Jan Borsodi avatar Jan Borsodi committed 3af4649

Implemented Scanner based on the re.Scanner API to make the code run on Python 2.4.

Comments (0)

Files changed (1)

     def __repr__(self):
         return "ParseError(%r,%r)" % (self.message, self.token)
 
+class Scanner(object):
+    """A regular expression based scanner which mimics the API define by re.Scanner (Python 2.5+)"""
+
+    def __init__(self, lexicon, flags=0):
+        self.lexicon = lexicon
+        self.groups = [0] # First index is not used
+        extend = self.groups.extend
+        def process(items):
+            for l in items:
+                r = re.compile(l[0])
+                extend([l]*(r.groups+1))
+                yield "(" + l[0] + ")"
+        # Build the expression by placing each lexicon match in a group and ORing them together
+        self.expr = "|".join(process(lexicon))
+        self.reg = re.compile(self.expr, flags)
+
+    def scan(self, string):
+        result = []
+        append = result.append
+        match = self.reg.match
+        while 1:
+            m = match(string)
+            if not m:
+                break
+            action = self.groups[m.lastindex][1]
+            if callable(action):
+                action = action(self, m.group())
+            if action is not None:
+                append(action)
+            string = string[m.end():]
+        return result, string
+
 class Position(object):
     def __init__(self, line, col, offset):
         self.line = line
     def TYPE(s, t): return "TYPE", t
 
     rules = [(r'[ \t\r\n]+', ignore_WHITESPACE),
-             (r'(///.*$|/\*\*(.|[\r\n])*?\*/)', DOCBLOCK),
-             (r'(//.*$|/\*(.|[\r\n])*?\*/)', COMMENT),
+             (r'(?:///.*$|/\*\*(?:.|[\r\n])*?\*/)', DOCBLOCK),
+             (r'(?://.*$|/\*(?:.|[\r\n])*?\*/)', COMMENT),
              (r'syntax(?!\w)', SYNTAX),
              (r'package(?!\w)', PACKAGE),
              (r'import(?!\w)', IMPORT),
              (r'event(?!\w)', EVENT),
              (r'returns(?!\w)', RETURNS),
              (r'enum(?!\w)', ENUM),
-             (r'(required|optional|repeated)(?!\w)', QUANTIFIER),
+             (r'(?:required|optional|repeated)(?!\w)', QUANTIFIER),
              (r'option(?!\w)', OPTION),
              (r'extensions(?!\w)', EXTENSIONS),
              (r'to(?!\w)', TO),
              (r'(true|false)(?!\w)', BOOL),
              (r'(int32|uint32|sint32|int64|uint64|sint64|fixed32|sfixed32|fixed64|sfixed64|float|double|bool|string|bytes)(?!\w)', TYPE),
              (r'"[^"]*"', STRING),
-             (r'[a-zA-Z][a-zA-Z0-9_]*([.][a-zA-Z][a-zA-Z0-9_]*)+', IDENTIFIER_PATH),
+             (r'[a-zA-Z][a-zA-Z0-9_]*(?:[.][a-zA-Z][a-zA-Z0-9_]*)+', IDENTIFIER_PATH),
              (r'[a-zA-Z][a-zA-Z0-9_]*(?!\w)', IDENTIFIER),
              (r'\[', L_SQUARE_BRACKET),
              (r'\]', R_SQUARE_BRACKET),
              (r'=', EQ),
              (r';', SEMI),
              (r',', COMMA),
-             (r'(\.[0-9]+)|([0-9]+(\.[0-9]+)?)', NUMBER),
+             (r'(?:\.[0-9]+)|(?:[0-9]+(?:\.[0-9]+)?)', NUMBER),
              ]
     return rules
 
 
     def scan(self, text):
         rules = list(self.rules)
-        scanner = re.Scanner(rules, re.MULTILINE)
+        scanner = Scanner(rules, re.MULTILINE)
         tokens, remainder = scanner.scan(text)
         for t, v in tokens:
             pos = deepcopy(self.pos)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.