Commits

Eric Snow  committed f66fef6 Draft

marching toward simple Python parsing

  • Participants
  • Parent commits f95d98a

Comments (0)

Files changed (4)

File pylt/tests/.data/python_complex.py

+"""Syntax examples"""
+
+import os
+from collections import namedtuple
+
+
+1 + 2
+a = 1 + 2
+b = 3 + 4  # note
+c = a + b; print(c); pass
+
+if b: pass
+
+if c:  # other note
+    print(a, b)
+
+class Spam:
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+def f(m):
+    def g(n):
+        def h(p):
+            return m * n * p
+        return h
+    return g
+
+
+@f
+def ham(): pass
+
+

File pylt/tests/_util.py

+import os.path
+from contextlib import contextmanager
+
+
+DATAPATH = os.path.join(os.path.dirname(__file__), 'data')
+
+
+def get_datapath(filename):
+    return os.path.join(DATAPATH, filename)
+
+
+@contextmanager
+def get_data(filename):
+    path = get_datapath(filename)
+    with open(path) as datafile:
+        yield datafile

File pylt/tests/test_python_parser.py

 import unittest
 import token
+from token import NAME, OP, NEWLINE, INDENT, DEDENT
+from tokenize import tokenize, ENCODING, NL
 
-from .test_tokenizing import SOURCE, SOURCE_UNNORMALIZED, TOKENS_RAW, TOKENS
+from ._util import get_data
+from ..tokenizing import token
 from ..parsing import python as parsing
 
 
-# XXX populate these!!!
+START = Token(ENCODING, 'utf-8')
+END = Token(ENDMARKER, '')
+
+
+def _wrap_source(tokens):
+    return [START] + list(tokens) + [END]
+
+
+SIMPLE_SOURCE = "c = a + b"
 SIMPLE = [
+        Token(NAME, 'c'),
+        Token(OP,   '='),
+        Token(NAME, 'a'),
+        Token(OP,   '+'),
+        Token(NAME, 'b'),
+
+SIMPLE_SEMI_SOURCE = "c = a + b; print(c); pass"
+SIMPLE_SEMI = [
+        Token(NAME, 'c'),
+        Token(OP,   '='),
+        Token(NAME, 'a'),
+        Token(OP,   '+'),
+        Token(NAME, 'b'),
+        Token(OP,   ';'),
+        Token(NAME, 'print'),
+        Token(OP,   '('),
+        Token(NAME, 'c'),
+        Token(OP,   ')'),
+        Token(OP,   ';'),
+        Token(NAME, 'pass'),
         ]
 
-COMPOUND = [
+CLASS_SOURCE = """\
+class Spam:
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+"""
+CLASS = [
+        Token(NAME,      'class'),
+        Token(NAME,      'Spam'),
+        Token(OP,        ':'),
+        Token(NEWLINE,   '\n'),
+        Token(INDENT,    '    '),
+        Token(NAME,      'def'),
+        Token(NAME,      '__init__'),
+        Token(OP,        '('),
+        Token(NAME,      'self'),
+        Token(OP,        ','),
+        Token(NAME,      'x'),
+        Token(OP,        ','),
+        Token(NAME,      'y'),
+        Token(OP,        ')'),
+        Token(OP,        ':'),
+        Token(NEWLINE,   '\n'),
+        Token(INDENT,    '        '),
+        Token(NAME,      'self'),
+        Token(OP,        '.'),
+        Token(NAME,      'x'),
+        Token(OP,        '='),
+        Token(NAME,      'x'),
+        Token(NEWLINE,   '\n'),
+        Token(NAME,      'self'),
+        Token(OP,        '.'),
+        Token(NAME,      'y'),
+        Token(OP,        '='),
+        Token(NAME,      'y'),
+        Token(NEWLINE,   '\n'),
+        Token(NL,        '\n'),
+        Token(DEDENT,    ''),
+        Token(DEDENT,    ''),
         ]
 
+HEADER_SOURCE = "def ham():"
 HEADER = [
+        Token(NAME, 'def'),
+        Token(NAME, 'ham'),
+        Token(OP,   '('),
+        Token(OP,   ')'),
+        Token(OP,   ':'),
         ]
 
+BODY_SOURCE = """\
+    def g(n):
+        def h(p):
+            return m * n * p
+        return h
+    return g
+
+"""
 BODY = [
+        Token(INDENT,  '    '),
+        Token(NAME,    'def'),
+        Token(NAME,    'g'),
+        Token(OP,      '('),
+        Token(NAME,    'n'),
+        Token(OP,      ')'),
+        Token(OP,      ':'),
+        Token(NEWLINE, '\n'),
+        Token(INDENT,  '        '),
+        Token(NAME,    'def'),
+        Token(NAME,    'h'),
+        Token(OP,      '('),
+        Token(NAME,    'p'),
+        Token(OP,      ')'),
+        Token(OP,      ':'),
+        Token(NEWLINE, '\n'),
+        Token(INDENT,  '            '),
+        Token(NAME,    'return'),
+        Token(NAME,    'm'),
+        Token(OP,      '*'),
+        Token(NAME,    'n'),
+        Token(OP,      '*'),
+        Token(NAME,    'p'),
+        Token(NEWLINE, '\n'),
+        Token(DEDENT,  ''),
+        Token(NAME,    'return'),
+        Token(NAME,    'h'),
+        Token(NEWLINE, '\n'),
+        Token(DEDENT,  ''),
+        Token(NAME,    'return'),
+        Token(NAME,    'g'),
+        Token(NEWLINE, '\n'),
+        Token(NL,      '\n'),
+        Token(NL,      '\n'),
+        Token(DEDENT,  ''),
         ]
 
 
 class TestsForParseUnits(unittest.TestCase):
 
     def test_base_get_tokens(self):
-        unit = parsing.ParseUnit(TOKENS)
+        unit = parsing.ParseUnit(SIMPLE)
         tokens = list(unit.get_tokens())
 
-        self.assertEqual(tokens, TOKENS)
+        self.assertEqual(tokens, SIMPLE)
 
     def test_simple_statement_get_tokens(self):
         statement = parsing.SimpleStatement(SIMPLE)
 
 class ParserTests(unittest.TestCase):
 
+    def setUp(self):
+        self.parser = parsing.Parser()
+
     def test_parse_source(self):
-        raise NotImplementedError
+        SIMPLE_FULL = _wrap_source(SIMPLE)
+        SIMPLE_SEMI_FULL = _wrap_source(SIMPLE_SEMI)
+        CLASS_FULL = _wrap_source(CLASS)
+
+        tokens2_a = SIMPLE_SEMI[:6]
+        tokens2_b = SIMPLE_SEMI[6:11]
+        tokens2_c = SIMPLE_SEMI[11:]
+        tokens3_header = CLASS[:4]
+        tokens3_body = CLASS[4:]
+
+        statements1 = list(self.parser.parse_source(SIMPLE_FULL))
+        statements2 = list(self.parser.parse_source(SIMPLE_SEMI_FULL))
+        statements3 = list(self.parser.parse_source(CLASS_FULL))
+
+        self.assertEqual(len(statements1), 3)
+        self.assertEqual(len(statements2), 5)
+        self.assertEqual(len(statements3), 3)
+
+        self.assertEqual(statements1[0], parsing.ParseUnit(START))
+        self.assertEqual(statements2[0], parsing.ParseUnit(START))
+        self.assertEqual(statements3[0], parsing.ParseUnit(START))
+
+        self.assertEqual(statements1[2], parsing.ParseUnit(END))
+        self.assertEqual(statements2[4], parsing.ParseUnit(END))
+        self.assertEqual(statements3[2], parsing.ParseUnit(END))
+
+        self.assertEqual(type(statements1[1]), parsing.SimpleStatement)
+        self.assertEqual(type(statements2[1]), parsing.SimpleStatement)
+        self.assertEqual(type(statements2[2]), parsing.SimpleStatement)
+        self.assertEqual(type(statements2[3]), parsing.SimpleStatement)
+        self.assertEqual(type(statements3[1]), parsing.CompoundStatement)
+
+        self.assertEqual(list(statements1[1].get_tokens()), SIMPLE)
+        self.assertEqual(list(statements2[1].get_tokens()), tokens2_a)
+        self.assertEqual(list(statements2[2].get_tokens()), tokens2_b)
+        self.assertEqual(list(statements2[3].get_tokens()), tokens2_c)
+        self.assertEqual(list(statements3[1].get_tokens()), CLASS)
+        self.assertEqual(list(statements3[1].head), tokens3_header)
+        self.assertEqual(list(statements3[1].body), tokens3_body)
 
     def test_parse_suite(self):
-        raise NotImplementedError
+        indent = parsing.ParseUnit(Token(INDENT, '    '))
+        dedent = parsing.ParseUnit(Token(DEDENT, ''))
+        newline = parsing.ParseUnit(Token(NEWLINE, '\n'))
+        nl = parsing.ParseUnit(Token(NL, '\n'))
+
+        statements = list(self.parser.parse_suite(BODY))
+
+        self.assertEqual(len(statements), 6)
+        self.assertEqual(statements[0], indent)
+        self.assertEqual(statements[2], newline)
+        self.assertEqual(statements[3], nl)
+        self.assertEqual(statements[4], nl)
+        self.assertEqual(statements[5], dedent)
+
+        self.assertEqual(type(statements[1]), CompoundStatement)
+        self.assertEqual(statements[1].header, BODY[1:8])
+
+        inner = statements[1].body
+        self.assertEqual(list(inner.get_tokens()), BODY[1:32])
+        self.assertEqual(inner.header, BODY[1:8])
+        self.assertEqual(len(inner.body), 3)
 
     def test_parse_simple_statements(self):
         raise NotImplementedError
+
+
+class ComplexTests(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        with get_data('python_complex.py') as sourcefile:
+            self.COMPLEX_SOURCE = sourcefile.read()
+            sourcefile.seek(0)
+            self.COMPLEX = tokenize(sourcefile.readline)

File pylt/tests/test_tokenizing.py

 
 class TokenizerTests(unittest.TestCase):
 
-    maxDiff = None
-
     def test_tokenize(self):
         tokens = list(tokenize(SOURCE))