Commits

Leonard Ritter  committed 7ea641d

dl parser

  • Participants
  • Parent commits 7ea3419

Comments (0)

Files changed (3)

File lib/groundwork/datalayer/dlc.py

 
 from groundwork.datalayer.pyscanner import Scanner
 
+class TypeDef(object):
+    linked = False
+    
+class Struct(TypeDef):
+    name = ''
 
+class Object(Struct):
+    pass
+
+class Context(object):
+    item = None
+
+class Float(TypeDef):
+    pass
+
+class Int(TypeDef):
+    pass
+
+class String(TypeDef):
+    pass
+
+class Blob(TypeDef):
+    pass
+
+class LinkedTypeDef(TypeDef):
+    linked = True
+    target = None
+
+class Ref(LinkedTypeDef):
+    pass
+
+class Array(LinkedTypeDef):
+    pass
+
+class List(LinkedTypeDef):
+    pass
+
+class Member(object):
+    name = ''
+    typedef = None
+
+class Parser(Scanner):
+    def __init__(self):
+        Scanner.__init__(self)
+        self.types = {
+            'float' : Float,
+            'int' : Int,
+            'string' : String,
+            'blob' : Blob,
+            'ref' : Ref,
+            'array' : Array,
+            'list' : List,
+        }
+        self.scope = []
+        self.root = None
+        
+    def new_scope(self):
+        ctx = Context()
+        self.scope.append(ctx)
+        return ctx
+        
+    def top_scope(self):
+        if not self.scope:
+            return Context()
+        return self.scope[-1]
+        
+    def handle_struct(self):
+        name = self.scan_name()
+        if name in self.types:
+            self.error('type %r is already declared.' % name)
+            
+        self.scan(self.tok_colon)
+        
+        item = Struct()
+        item.name = name
+        self.types[item.name] = item
+        
+        ctx = self.new_scope()
+        ctx.item = item
+        
+        return item
+
+    def handle_object(self):
+        name = self.scan_name()
+        if name in self.types:
+            self.error('type %r is already declared.' % name)
+            
+        self.scan(self.tok_colon)
+        
+        item = Object()
+        item.name = name
+        self.types[item.name] = item
+        
+        ctx = self.new_scope()
+        ctx.item = item
+        
+        return item
+        
+    def handle_root(self):
+        symbol = self.scan_name()
+        if not symbol in self.types:
+            self.error('unknown symbol: %r' % symbol)
+        item = self.types[symbol]
+        if not isinstance(item, Object):
+            self.error('root must be of type object')
+        self.root = item
+
+    def scan_names(self):
+        names = [self.scan_name()]
+        while self._try(self.tok_comma):
+            names.append(self.scan_name())
+        return names
+        
+    def _handle_member(self, typeobj):
+        owner = self.top_scope().item
+        if not isinstance(owner, Struct):
+            self.error('unexpected type declaration')
+        for name in self.scan_names():
+            item = Member()
+            item.name = name
+            item.typedef = typeobj
+    
+    def scan_typedef(self, typeobj):
+        if not typeobj.linked:
+            return typeobj
+        typeobj = typeobj()
+        self.scan(self.tok_bo)
+        symbol = self.scan_name()
+        if not symbol in self.types:
+            self.error('unknown symbol: %r' % symbol)
+        typeobj.target = self.scan_typedef(self.types[symbol])
+        self.scan(self.tok_bc)
+        return typeobj
+    
+    def parse(self):
+        try:
+            while self.lines.next():
+                while self.level < len(self.scope):
+                    self.scope.pop()
+                
+                symbol = self.scan_name()
+                func_name = 'handle_'+symbol
+                if hasattr(self, func_name):
+                    getattr(self, func_name)()
+                elif symbol in self.types:
+                    typeobj = self.scan_typedef(self.types[symbol])
+                    self._handle_member(typeobj)
+                else:
+                    self.error('unknown symbol: %r' % symbol)
+                
+                if self.line:
+                    self.error('unexpected characters: %r' % self.line)
+        except StopIteration:
+            pass
+            
+parser = Parser()
+parser.read('test.dl')

File lib/groundwork/datalayer/pyscanner.py

 import os
 
 class ParseError(Exception):
-	pass
+    pass
 
 class Token:
-	def __init__(self, name, exp, vgroup = 1, rgroup = 2, **kargs):
-		self.name = name
-		self.rname = kargs.get('rname', self.name)
-		self.exp = re.compile('^[ ]*(%s)[ ]*(.*)$' % exp)
-		self.vgroup = vgroup
-		self.rgroup = rgroup
-		self.replaces = kargs.get('replaces', [])
+    def __init__(self, name, exp, vgroup = 1, rgroup = 2, **kargs):
+        self.name = name
+        self.rname = kargs.get('rname', self.name)
+        self.exp = re.compile('^[ ]*(%s)[ ]*(.*)$' % exp)
+        self.vgroup = vgroup
+        self.rgroup = rgroup
+        self.replaces = kargs.get('replaces', [])
 
 def match_scan_try_funcs(postfix):		
-	match = lambda self: self.match(getattr(self, 'tok_'+postfix))
-	scan = lambda self: self.scan(getattr(self, 'tok_'+postfix))
-	_try = lambda self: self._try(getattr(self, 'tok_'+postfix))
-	return match, scan, _try
+    match = lambda self: self.match(getattr(self, 'tok_'+postfix))
+    scan = lambda self: self.scan(getattr(self, 'tok_'+postfix))
+    _try = lambda self: self._try(getattr(self, 'tok_'+postfix))
+    return match, scan, _try
 
 class Scanner:	
-	string_replaces = [
-		('\\"', '"'),
-		("\\'", "'"),
-		("\\t", "\t"),
-		("\\n", "\n"),
-		("\\r", "\r"),
-	]
-	for i in range(256):
-		string_replaces.append(('\\x%02x' % i, chr(i)))
-	tok_symbol = Token('symbol', r'[A-Za-z_][A-Za-z0-9_]*')
-	tok_colon = Token('colon', r':')
-	tok_bo = Token('bo', r'\(')
-	tok_bc = Token('bc', r'\)')
-	tok_sbo = Token('sbo', r'\[')
-	tok_sbc = Token('sbc', r'\]')
-	tok_eq = Token('eq', r'==')
-	tok_equal = Token('equal', r'(=)([^=].*)', 2, 3)
-	tok_ne = Token('ne', r'!=')
-	tok_excl = Token('excl', r'!')
-	tok_gt = Token('gt', r'(>)([^=].*)', 2, 3)
-	tok_lt = Token('lt', r'(<)([^=].*)', 2, 3)
-	tok_ge = Token('ge', r'>=')
-	tok_le = Token('le', r'<=')
-	tok_at = Token('at', r'\@')
-	tok_asterisk = Token('asterisk', r'\*')
-	tok_add = Token('add', r'(\+)([^\+].*)', 2, 3)
-	tok_sub = Token('sub', r'(\-)([^\-].*)', 2, 3)
-	tok_mul = Token('mul', r'(\*)([^\*].*)', 2, 3)
-	tok_div = Token('div', r'(\/)([^\/].*)', 2, 3)
-	tok_float = Token('float', r'(-?[0-9]*[.]?[0-9]+)([A-Za-z_0-9\%\xb0]+)?', (2,3), 4)
-	tok_int = Token('int', r'[0-9]+')
-	tok_to = Token('to', r'->')
-	tok_dot = Token('dot', r'\.')
-	tok_comma = Token('comma', r',')
-	tok_plus = Token('plus', r'\+')
-	tok_minus = Token('minus', r'\-')
-	tok_string1 = Token('string1', r'["]((\\"|[^"])*)["]', 2, 4, replaces=string_replaces)
-	tok_string2 = Token('string2', r"[']((\\'|[^'])*)[']", 2, 4, replaces=string_replaces)
-	
-	tokens = [
-		tok_colon,
-		tok_string1, tok_string2,
-		tok_float, tok_int, tok_asterisk,
-		tok_symbol, tok_excl, tok_dot,
-		tok_bo, tok_bc, tok_sbo, tok_sbc, tok_to, tok_comma,
-		tok_eq, tok_ne, tok_gt, tok_lt, tok_ge, tok_le,
-		tok_add, tok_sub, tok_mul, tok_div, tok_at, tok_plus, tok_minus, tok_equal
-	]
+    string_replaces = [
+        ('\\"', '"'),
+        ("\\'", "'"),
+        ("\\t", "\t"),
+        ("\\n", "\n"),
+        ("\\r", "\r"),
+    ]
+    for i in range(256):
+        string_replaces.append(('\\x%02x' % i, chr(i)))
+    tok_symbol = Token('symbol', r'[A-Za-z_][A-Za-z0-9_]*')
+    tok_colon = Token('colon', r':')
+    tok_bo = Token('bo', r'\(')
+    tok_bc = Token('bc', r'\)')
+    tok_sbo = Token('sbo', r'\[')
+    tok_sbc = Token('sbc', r'\]')
+    tok_eq = Token('eq', r'==')
+    tok_equal = Token('equal', r'(=)([^=].*)', 2, 3)
+    tok_ne = Token('ne', r'!=')
+    tok_excl = Token('excl', r'!')
+    tok_gt = Token('gt', r'(>)([^=].*)', 2, 3)
+    tok_lt = Token('lt', r'(<)([^=].*)', 2, 3)
+    tok_ge = Token('ge', r'>=')
+    tok_le = Token('le', r'<=')
+    tok_at = Token('at', r'\@')
+    tok_asterisk = Token('asterisk', r'\*')
+    tok_add = Token('add', r'(\+)([^\+].*)', 2, 3)
+    tok_sub = Token('sub', r'(\-)([^\-].*)', 2, 3)
+    tok_mul = Token('mul', r'(\*)([^\*].*)', 2, 3)
+    tok_div = Token('div', r'(\/)([^\/].*)', 2, 3)
+    tok_float = Token('float', r'(-?[0-9]*[.]?[0-9]+)([A-Za-z_0-9\%\xb0]+)?', (2,3), 4)
+    tok_int = Token('int', r'[0-9]+')
+    tok_to = Token('to', r'->')
+    tok_dot = Token('dot', r'\.')
+    tok_comma = Token('comma', r',')
+    tok_plus = Token('plus', r'\+')
+    tok_minus = Token('minus', r'\-')
+    tok_string1 = Token('string1', r'["]((\\"|[^"])*)["]', 2, 4, replaces=string_replaces)
+    tok_string2 = Token('string2', r"[']((\\'|[^'])*)[']", 2, 4, replaces=string_replaces)
+    
+    tokens = [
+        tok_colon,
+        tok_string1, tok_string2,
+        tok_float, tok_int, tok_asterisk,
+        tok_symbol, tok_excl, tok_dot,
+        tok_bo, tok_bc, tok_sbo, tok_sbc, tok_to, tok_comma,
+        tok_eq, tok_ne, tok_gt, tok_lt, tok_ge, tok_le,
+        tok_add, tok_sub, tok_mul, tok_div, tok_at, tok_plus, tok_minus, tok_equal
+    ]
 
-	aliases = {}
-	
-	def __init__(self):
-		self.context = dict(file='',lineno=0,line='')
-		self.level = 0
-		self.line = ''
-		self.pos = 0
-		self.consts = []
-		for tok in self.tokens:
-			import new
-			m,s,t = match_scan_try_funcs(tok.name)
-			setattr(self, 'match_'+tok.name, new.instancemethod(m,self,Parser))
-			setattr(self, 'scan_'+tok.name, new.instancemethod(s,self,Parser))
-			setattr(self, 'try_'+tok.name, new.instancemethod(t,self,Parser))
+    aliases = {}
+    
+    def __init__(self):
+        self.context = dict(file='',lineno=0,line='')
+        self.level = 0
+        self.line = ''
+        self.pos = 0
+        self.consts = []
+        for tok in self.tokens:
+            import new
+            m,s,t = match_scan_try_funcs(tok.name)
+            setattr(self, 'match_'+tok.name, new.instancemethod(m,self,Scanner))
+            setattr(self, 'scan_'+tok.name, new.instancemethod(s,self,Scanner))
+            setattr(self, 'try_'+tok.name, new.instancemethod(t,self,Scanner))
 
-	def warning(self, msg, context=None):
-		if not context:
-			context = self.context
-		traceback = self.format_context('warning: '  + msg, context)
-		print >> sys.stderr, traceback
-		
-	def format_context(self, msg, context):
-		if sys.platform == 'win32':
-			s = '%s(%s) : %s' % (os.path.abspath(context['file']), context['lineno'], msg)
-		else:
-			s = '%s:%s:%s\n' % (context['file'], context['lineno'], msg)
-			s += context['line']
-		return s
+    def warning(self, msg, context=None):
+        if not context:
+            context = self.context
+        traceback = self.format_context('warning: '  + msg, context)
+        print >> sys.stderr, traceback
+        
+    def format_context(self, msg, context):
+        if sys.platform == 'win32':
+            s = '%s(%s) : %s' % (os.path.abspath(context['file']), context['lineno'], msg)
+        else:
+            s = '%s:%s:%s\n' % (context['file'], context['lineno'], msg)
+            s += context['line']
+        return s
 
-	def error(self, msg, context=None):
-		if not context:
-			context = self.context
-		traceback = self.format_context('error: '  + msg, context)
-		print >> sys.stderr, traceback
-		raise ParseError
-		
-	def match(self,token):
-		m = token.exp.match(self.line)
-		if m:
-			line = m.group(token.rgroup)
-			if type(token.vgroup) in (list, tuple):
-				value = [m.group(v) for v in token.vgroup]
-			else:
-				value = m.group(token.vgroup)
-				for before,after in token.replaces:
-					value = value.replace(before,after)
-			return value, line
-		return None
-		
-	def match_any(self):
-		for tok in self.tokens:
-			m = self.match(tok)
-			if m:
-				return tok, m
-		return None
-			
-	def _try(self,token):
-		m = self.match(token)
-		if m:
-			value, self.line = m
-			return value
-			
-	def scan(self,token):
-		value = self._try(token)
-		if value == None:
-			self.error('%s expected at %r.' % (token.rname, self.line))
-		return value
-		
-	def match_name(self):
-		m = self.match_symbol()
-		if m and (m[0] in self.consts):
-			return None
-		return m
-		
-	def try_name(self):
-		m = self.match_symbol()
-		if m and (m[0] in self.consts):
-			return None
-		value = self.try_symbol()
-		if value:
-			value = self.aliases.get(value, value)
-		return value
-		
-	def scan_name(self):
-		m = self.match_symbol()
-		if m and (m[0] in self.consts):
-			self.error('symbol expected, but got constant.')
-		value = self.scan_symbol()
-		if value:
-			value = self.aliases.get(value, value)
-		return value
-		
-	def parse(self):
-		try:
-			while self.lines.next():
-				while self.line:
-					tok,(value,line) = self.match_any()
-					if not tok:
-						self.error('syntax error.')
-					else:
-						value = self.scan(tok)
-						print self.level,tok.name,repr(value)
-		except StopIteration:
-			pass
-			
-	def scan_keyword(self, kw):
-		v = self.scan_name()
-		if not v:
-			self.error('keyword %s expected.' % kw)
-		if v != kw:
-			self.error('keyword %s expected, got %s.' % (kw, v))
-		return v
-			
-	def match_keyword(self, kw):
-		m = self.match_name()
-		if not m:
-			return None
-		value, line = m
-		if value != kw:
-			return None
-		return value
-		
-	def try_keyword(self, kw):		
-		m = self.match_keyword(kw)
-		if not m:
-			return
-		self.scan_keyword(kw)
-		return m
-	
-	def scan_lines(self, infile):
-		prefixline = ''
-		prefixlineno = 0
-		lineno = 0
-		for l in file(infile,'r'):
-			lineno += 1
-			level = 0
-			# strip commentary
-			if '#' in l:
-				l = l[:l.index('#')]
-			# strip right side spaces
-			l = l.rstrip()
-			if prefixline:
-				l = prefixline + l
-				thisline = prefixlineno
-				prefixline = ''
-			else:
-				thisline = lineno
-			# if empty, skip
-			if not l:
-				continue
-			# if right side is backspace, store to prefix
-			if l.endswith('\\'):
-				if not prefixlineno:
-					prefixlineno = lineno
-				l = l[:-1] + ' '
-				prefixline += l
-				continue
-			else:
-				prefixlineno = 0
-			# count left hand tabs
-			while l[0] == '\t':
-				level += 1
-				l = l[1:]
-			# replace all other tabs with spaces
-			l = l.replace('\t', ' ')
-			# fold multiple spaces down to one
-			while '  ' in l:
-				l = l.replace('  ',' ')
-			# now we should have a quite sane string
-			self.context['file'] = infile
-			self.context['lineno'] = thisline
-			self.context['line'] = l
-			self.level = level
-			self.line = l
-			self.pos = 0
-			yield True
-		yield False
-		
-	def read(self, infile):
-		self.infile = infile
-		self.lines = self.scan_lines(infile)
-		try:
-			self.parse()
-		except ParseError:
-			raise ParseError
-		except:
-			import traceback
-			traceback.print_exc()
-			self.error("internal parser error.")
+    def error(self, msg, context=None):
+        if not context:
+            context = self.context
+        traceback = self.format_context('error: '  + msg, context)
+        print >> sys.stderr, traceback
+        raise ParseError
+        
+    def match(self,token):
+        m = token.exp.match(self.line)
+        if m:
+            line = m.group(token.rgroup)
+            if type(token.vgroup) in (list, tuple):
+                value = [m.group(v) for v in token.vgroup]
+            else:
+                value = m.group(token.vgroup)
+                for before,after in token.replaces:
+                    value = value.replace(before,after)
+            return value, line
+        return None
+        
+    def match_any(self):
+        for tok in self.tokens:
+            m = self.match(tok)
+            if m:
+                return tok, m
+        return None
+            
+    def _try(self,token):
+        m = self.match(token)
+        if m:
+            value, self.line = m
+            return value
+            
+    def scan(self,token):
+        value = self._try(token)
+        if value == None:
+            self.error('%s expected at %r.' % (token.rname, self.line))
+        return value
+        
+    def match_name(self):
+        m = self.match_symbol()
+        if m and (m[0] in self.consts):
+            return None
+        return m
+        
+    def try_name(self):
+        m = self.match_symbol()
+        if m and (m[0] in self.consts):
+            return None
+        value = self.try_symbol()
+        if value:
+            value = self.aliases.get(value, value)
+        return value
+        
+    def scan_name(self):
+        m = self.match_symbol()
+        if m and (m[0] in self.consts):
+            self.error('symbol expected, but got constant.')
+        value = self.scan_symbol()
+        if value:
+            value = self.aliases.get(value, value)
+        return value
+        
+    def parse(self):
+        try:
+            while self.lines.next():
+                while self.line:
+                    tok,(value,line) = self.match_any()
+                    if not tok:
+                        self.error('syntax error.')
+                    else:
+                        value = self.scan(tok)
+                        print self.level,tok.name,repr(value)
+        except StopIteration:
+            pass
+            
+    def scan_keyword(self, kw):
+        v = self.scan_name()
+        if not v:
+            self.error('keyword %s expected.' % kw)
+        if v != kw:
+            self.error('keyword %s expected, got %s.' % (kw, v))
+        return v
+            
+    def match_keyword(self, kw):
+        m = self.match_name()
+        if not m:
+            return None
+        value, line = m
+        if value != kw:
+            return None
+        return value
+        
+    def try_keyword(self, kw):		
+        m = self.match_keyword(kw)
+        if not m:
+            return
+        self.scan_keyword(kw)
+        return m
+    
+    def scan_lines(self, infile):
+        prefixline = ''
+        prefixlineno = 0
+        lineno = 0
+        for l in file(infile,'r'):
+            lineno += 1
+            level = 0
+            # strip commentary
+            if '#' in l:
+                l = l[:l.index('#')]
+            # strip right side spaces
+            l = l.rstrip()
+            if prefixline:
+                l = prefixline + l
+                thisline = prefixlineno
+                prefixline = ''
+            else:
+                thisline = lineno
+            # if empty, skip
+            if not l:
+                continue
+            # if right side is backspace, store to prefix
+            if l.endswith('\\'):
+                if not prefixlineno:
+                    prefixlineno = lineno
+                l = l[:-1] + ' '
+                prefixline += l
+                continue
+            else:
+                prefixlineno = 0
+            # replace spaces to tabs
+            l = l.replace('    ','\t')
+            # count left hand tabs
+            while l[0] == '\t':
+                level += 1
+                l = l[1:]
+            # replace all other tabs with spaces
+            l = l.replace('\t', ' ')
+            # fold multiple spaces down to one
+            while '  ' in l:
+                l = l.replace('  ',' ')
+            # now we should have a quite sane string
+            self.context['file'] = infile
+            self.context['lineno'] = thisline
+            self.context['line'] = l
+            self.level = level
+            self.line = l
+            self.pos = 0
+            yield True
+        yield False
+        
+    def read(self, infile):
+        self.infile = infile
+        self.lines = self.scan_lines(infile)
+        try:
+            self.parse()
+        except ParseError:
+            raise ParseError
+        except:
+            import traceback
+            traceback.print_exc()
+            self.error("internal parser error.")
 
-	def parse_file(self, infile):
-		self.read(infile)
+    def parse_file(self, infile):
+        self.read(infile)
 
 def split_camelcase(s):
-	o = ''
-	for c in s:
-		if c.isupper() and o and not o.endswith('_'):
-			o += '_'
-		o += c.lower()
-	return o
+    o = ''
+    for c in s:
+        if c.isupper() and o and not o.endswith('_'):
+            o += '_'
+        o += c.lower()
+    return o

File lib/groundwork/datalayer/test.dl

 # ? how are we going to define the level of logging?
 # ? there should be a central directory of objects that have been referenced.
 
-type vec3:
+struct vec3:
     float x,y,z
 
-type Item:
+object Item:
     int number
     float scalar
     string text
     vec3 origin
 
-type Document:
+object Document:
     int number
     float scalar
     string text