TopDowner / top_down.rb

#!/usr/bin/env ruby
# ::Author:: Steven! Ragnarök
# The beginnings of a Recursive Descent Parser for a Context-Free
# Grammar G given below. This is a Two-pass parse. First pass scanning,
# second pass building a parse tree.
#####################################################################
##                 ::The Grammar Being Parsed::
##      Program -> {NonreturnStatement} ReturnStatement
##      NonreturnStatement -> AssignmentStatement | DefineStatement
##      AssignmentStatement -> set Identifier Expr
##      DefineStatement -> define Identifier Arglist Program
##      Arglist -> ( Identifier {Identifier} )
##      Expr -> Integer | Identifier | Application
##      Application -> ( Fname Expr {Expr} )
##      Fname -> Identifier | + | * | -
##      ReturnStatement -> return Expr
#####################################################################
## The terminals of the grammar are +, -, *, set, define, return, (, ).
## The token types are math_op, set, define, return, int_literal, identifier,
## lparen and rparen.

# Define all my token types using a standard Ruby hash.
TokenTypes = {
	:start => /^program$/, :set => /^set$/,	:math_op => /^[-\*+]$/,
	:return => /^return$/, :define => /^define$/,	:lparen => /^\($/,
	:rparen => /^\)$/, :ident => /^[a-zA-Z][a-zA-Z0-9_]*$/,
	:int_lit => /^-?[0-9]+$/,	:eof => /\$/
}

## ::meta-grammar:: * => 0..n, + => 1..n, or => any ONE of the following. ##
Grammar = {
	:Program => [ :*, :NonReturnStatement, :ReturnStatement ],
	:NonReturnStatement => [ :or, :AssignmentStatement, :DefineStatement ],
	:AssignmentStatement => [ :set, :ident, :Expr ],
	:DefineStatement => [ :define, :ident, :Arglist, :Program ],
	:Arglist => [ :lparen, :+, :ident, :rparen ],
	:Expr => [ :or, :int_lit, :ident, :Application ],
	:Application => [ :lparen, :Fname, :*, :Expr, :rparen ],
	:Fname => [ :or, :ident, :math_op ],
	:ReturnStatement => [ :return, :Expr ]
}

# Blow up on invalid args
raise ArgumentError.new "too many arguments" if ARGV.length > 1
raise ArgumentError.new "no argument given" if ARGV.empty?

def match_maker construct
	matcher = Array.new
	rhs = Grammar[construct] || construct
	case rhs
	when Array
		rhs.each do |c|

		end
	end
end

## Terminal Matcher ##
def match(terminal, token)
	if terminal == token[:type]
		token[:token]
	else
		raise ArgumentError.new "[SYNTAX ERROR] Expected:#{terminal} Got:#{token}"
	end
end

## A Token Type Detector ##
def detect(token)
	TokenTypes.each do |type, matcher|
		if token =~ matcher
			return { :token => token, :type => type }
		end
	end
	raise ArgumentError.new "[SCAN ERROR] Unknown token type for #{token}"
end

# Tokenize and scan. 
str = ARGV.shift
tokens = str.gsub(/[\n\t ]+/,' ').split(' ').map {|t| t.to_sym}
parsed_tokens = tokens.map do |token|
	detect(token)
end

puts parsed_tokens
@enumerator = parsed_tokens.each
@tree = Hash.new

## Recognizer ##
# Returns a symbol describing what is constructed by the token 
# and lookahead. For a token :return and a lookahead :"4", it returns
# :ReturnStatement. For a token :lparen and a lookahead :ident it would
# return :Arglist.
def recognize token, lookahead
	@cache[:"#{token},#{lookahead}"] ||=
		Grammar.each do |key, val|
		# Let's assume our key is :ReturnStatement and our val is [:return]
			if token == first(val[0])

				return key
			end
		end
end

# Returns all valid token types of the first terminal in a construct.

def first construct
	return [Grammar.has_key? construct && first(Grammar[construct]) ||
		construct].flatten
end

## ::Example Trees returned by `parse`:: ##
# for the following program:
# 	set x 2
# 	return x
# The parse tree returned should be:
#		{ :Program => [:set => [:x, :"2"], :return => [:x]] }
#	With an output as follows.
#		>Program
#		>	set
#		>		x
#		>		2
#		>	return
#		>		x
##

# ::function:: parse, the recursive call which builds the parse tree.
# ::args:: recognizer: the construct at the root of the tree.
def parse recognizer
	puts "Parsing: #{recognizer}"
	case recognizer
	## Array means nonliteral.
	when Array
		# Divide & Conquer step of recursion
		return recognizer.map {|r| parse Grammar[r] || r}
	## Symbol means literal
	when Symbol
		tkn = @enumerator.next
		puts tkn
		if tkn[:type] == recognizer
			# Base step of recursion
			return tkn[:token]
		else
			raise ArgumentError.new "Got #{tkn}, expected #{recognizer}"
		end
	else raise ArgumentError.new "What the fuck?"
	end
end

@tree[:Program] = parse Grammar[:Program]
puts @tree
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.