Anonymous avatar Anonymous committed 0dc5525

Yeah, I always get MULTILINE and DOTALL mixed up too.

Comments (0)

Files changed (2)

-a := 65
-print char a
-print string "Hello, world!"
-print string "The value of a is ";
-print a;
-print "!"
+a := 65
+print char a
+print string "Hello, world!"
+print string "The value of a is ";
+print a;
+print "!"
     else:
         raise UnimplementedError, "not an AST type I know: %s" % type
 
+
 class Scanner(object):
     """A Scanner provides facilities for extracting successive
     Xoomonk tokens from a string.
         self.type = None
         self.scan()
 
-    def scan_pattern(self, pattern):
+    def scan_pattern(self, pattern, type):
         pattern = r'^(' + pattern + r')(.*?)$'
-        match = re.match(pattern, self.text, re.MULTILINE)
+        match = re.match(pattern, self.text, re.DOTALL)
         if not match:
             return False
         else:
+            self.type = type
             self.token = match.group(1)
             self.text = match.group(2)
-            #print >>sys.stderr, "(%s)" % (self.token)
+            #print >>sys.stderr, "(%r/%s->%r)" % (self.token, self.type, self.text)
             return True
 
     def scan(self):
-        self.scan_pattern(r'\s*')
+        self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
         if not self.text:
             self.token = None
             self.type = 'EOF'
             return
-        if self.scan_pattern(r':=|\;|\{|\}|\*|\.|\^|\$'):
-            self.type = 'operator'
+        if self.scan_pattern(r':=|\;|\{|\}|\*|\.|\^|\$', 'operator'):
             return
-        if self.scan_pattern(r'\d+'):
-            self.type = 'integer literal'
+        if self.scan_pattern(r'\d+', 'integer literal'):
             return
-        if self.scan_pattern(r'\".*?\"'):
-            self.type = 'string literal'
+        if self.scan_pattern(r'\".*?\"', 'string literal'):
             return
-        if self.scan_pattern(r'\w+'):
-            self.type = 'identifier'
+        if self.scan_pattern(r'\w+', 'identifier'):
             return
-        if self.scan_pattern(r'.'):
-            self.type = 'unknown character'
+        if self.scan_pattern(r'.', 'unknown character'):
             return
         else:
             raise ValueError, "this should never happen, self.text=(%s)" % self.text
     >>> a.program()
     AST('Program',[AST('Assignment',[AST('Ref',[AST('Identifier',value='a')]), AST('IntLit',value=5)]), AST('Assignment',[AST('Ref',[AST('Identifier',value='c')]), AST('IntLit',value=4)])])
 
+    >>> a = Parser("a := { b := 1 }")
+    >>> a.program()
+    AST('Program',[AST('Assignment',[AST('Ref',[AST('Identifier',value='a')]), AST('Block',[AST('Assignment',[AST('Ref',[AST('Identifier',value='b')]), AST('IntLit',value=1)])])])])
+
     """
     def __init__(self, text):
         self.scanner = Scanner(text)
     file = open(args[0])
     text = file.read()
     file.close()
+    #print repr(text)
     p = Parser(text)
     ast = p.program()
     result = eval_xoomonk(ast)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.