Commits

Robert Brewer  committed 1be58f9

Initial work on making buildParser threadsafe

  • Participants
  • Parent commits ca7086f

Comments (0)

Files changed (14)

+build/*
+.*.so
 .*.pyc

File simpleparse/__init__.py

 general documentation.  See license.txt for licensing
 information.  (This is a BSD-licensed package).
 '''
+__version__="2.1.1"

File simpleparse/common/comments.py

 _p = Parser( eolcomments )
 for name in ["hash_comment", "semicolon_comment", "slashslash_comment"]:
 	c[ name ] = objectgenerator.LibraryElement(
-		generator = _p._generator,
+		builder = _p._generator.buildParser(),
 		production = name,
 	)
 
 _p = Parser( ccomments )
 for name in ["c_comment","slashbang_comment"]:
 	c[ name ] = objectgenerator.LibraryElement(
-		generator = _p._generator,
+		builder = _p._generator.buildParser(),
 		production = "slashbang_comment",
 	)
 
 _p = Parser( nccomments )
 for name in ["c_nest_comment","slashbang_nest_comment"]:
 	c[ name ] = objectgenerator.LibraryElement(
-		generator = _p._generator,
+		builder = _p._generator.buildParser(),
 		production = "slashbang_nest_comment",
 	)
 

File simpleparse/common/numbers.py

 _p = Parser( declaration )
 for name in ["int","hex", "int_unsigned", "number", "float", "binary_number", "float_floatexp", "imaginary_number", "number_full"]:
 	c[ name ] = objectgenerator.LibraryElement(
-		generator = _p._generator,
+		builder = _p._generator.buildParser(),
 		production = name,
 	)
 
 		base = children[0]
 		base = self.mapSet[base[0]](base, buffer)
 		return base * 1j
-	
+	

File simpleparse/common/strings.py

 for name, partial in _stringTypeData:
 	_p = Parser( stringDeclaration + partial )
 	c[ name ] = objectgenerator.LibraryElement(
-		generator = _p._generator,
+		builder = _p._generator.buildParser(),
 		production = "str",
 	)
 common.share( c )
 string :=  string_triple_double/string_triple_single/string_double_quote/string_single_quote
 """ )
 c[ "string"] = objectgenerator.LibraryElement(
-	generator = _p._generator,
+	builder = _p._generator.buildParser(),
 	production = "string",
 )
 

File simpleparse/error.py

 	line = -1
 	production = ""
 	expected = ""
+	error_message = None
 	DEFAULTTEMPLATE = """Failed parsing production "%(production)s" @pos %(position)s (~line %(line)s:%(lineChar)s).\nExpected syntax: %(expected)s\nGot text: %(text)s"""
 	def __str__( self ):
 		"""Create a string representation of the error"""
-		if self.message:
-			return '%s: %s'%( self.__class__.__name__, self.messageFormat(self.message) )
+		if self.error_message:
+			return '%s: %s'%( self.__class__.__name__, self.messageFormat(self.error_message) )
 		else:
 			return '%s: %s'%( self.__class__.__name__, self.messageFormat() )
 	def messageFormat( self, template=None):

File simpleparse/generator.py

 		"""Initialise the Generator"""
 		self.names = []
 		self.rootObjects = []
-		self.methodSource = None
 		self.definitionSources = []
+		self.terminalParserCache = {}
 	def getNameIndex( self, name ):
 		'''Return the index into the main list for the given name'''
 		try:
 			self.names.append( name )
 			self.rootObjects.append( rootElement )
 			return self.getNameIndex( name )
-	def buildParser( self, name, methodSource=None ):
-		'''Build the given parser definition, returning a TextTools parsing tuple'''
+
+	def buildParser( self, methodSource=None ):
+		'''Build the given parser definition, returning a Builder instance'''
+		builder = Builder(self, methodSource)
+		builder.fill_terminals(self.rootObjects)
+		return builder
+
+	def cacheCustomTerminalParser( self, index, flags, parser ):
+		"""Optimization to reuse customized terminal parsers"""
+		self.terminalParserCache[ (index,flags) ] = parser
+	def getCustomTerminalParser( self, index, flags ):
+		"""Retrieved a cached customized terminal parser or None"""
+		return self.terminalParserCache.get( (index, flags))
+
+	def addDefinitionSource( self, item ):
+		"""Add a source for definitions when the current grammar doesn't supply
+		a particular rule (effectively common/shared items for the grammar)."""
+		self.definitionSources.append( item )
+
+
+class Builder(object):
+
+	def __init__(self, generator, methodSource=None):
+		self.generator = generator
 		self.parserList = []
-		self.terminalParserCache = {}
 		self.methodSource = methodSource
-		i = 0
-		while i < len(self.rootObjects):
-			# XXX Note: rootObjects will grow in certain cases where
-			# a grammar is loading secondary grammars into itself
-			rootObject = self.rootObjects[i]
+
+	def fill_terminals(self, rootObjects):
+		# XXX Note: rootObjects will grow in certain cases where
+		# a grammar is loading secondary grammars into itself
+		for i, rootObject in enumerate(rootObjects):
 			try:
 				if len(self.parserList) <= i or self.parserList[i] is None:
 					parser = tuple(rootObject.toParser( self ))
 					self.setTerminalParser( i, parser )
-			except NameError,err:
-				currentRuleName = self.names[i]
-				err.args = err.args + ('current declaration is %s'%(currentRuleName), )
+			except NameError, err:
+				err.args += (('current declaration is %s' % self.generator.names[i]), )
 				raise
-			i = i + 1
-		assert None not in self.parserList, str( self.parserList)
-		return self.parserList [self.getNameIndex (name)]
+		assert None not in self.parserList, str(self.parserList)
+
+	def tt_tuple(self, name):
+		"""Return a TextTools parsing tuple for the given name."""
+		return self.parserList [self.generator.getNameIndex(name)]
+
 	def setTerminalParser( self, index, parser ):
 		"""Explicitly set the parser value for given name"""
 		while index >= len(self.parserList):
 			self.parserList.append(None)
 		self.parserList[index] = parser
+
 	def getTerminalParser( self, index ):
 		"""Try to retrieve a parser from the parser-list"""
 		try:
 			return self.parserList[ index ]
 		except IndexError:
 			return None
-	def cacheCustomTerminalParser( self, index, flags, parser ):
-		"""Optimization to reuse customized terminal parsers"""
-		self.terminalParserCache[ (index,flags) ] = parser
-	def getCustomTerminalParser( self, index, flags ):
-		"""Retrieved a cached customized terminal parser or None"""
-		return self.terminalParserCache.get( (index, flags))
-		
+
 	def getParserList (self):
 		return self.parserList
 
-
 	def getObjectForName( self, name):
 		"""Determine whether our methodSource has a parsing method for the given name
 
 			else:
 				raise ValueError( """Unrecognised command value %s (not callable, not one of the Append* constants) found in methodSource %s, name=%s"""%( repr(method),repr(methodSource),name))
 		return 0, name
+
 	def getTagObjectForName( self, name ):
 		"""Get any explicitly defined tag object for the given name"""
 		testName = "_o_"+name
 			object = getattr( self.methodSource, testName )
 			return object
 		return name
-	def addDefinitionSource( self, item ):
-		"""Add a source for definitions when the current grammar doesn't supply
-		a particular rule (effectively common/shared items for the grammar)."""
-		self.definitionSources.append( item )
 
 
 ### Compatability API

File simpleparse/objectgenerator.py

 		updates the object's dictionary with them
 		"""
 		self.__dict__.update( namedarguments )
-	def toParser( self, generator, noReport=0 ):
+	def toParser( self, builder, noReport=0 ):
 		"""Abstract interface for implementing the conversion to a text-tools table
 
-		generator -- an instance of generator.Generator
+		builder -- an instance of generator.Builder
 			which provides various facilities for discovering
 			other productions.
 		noReport -- if true, we're being called recursively
 		CILiteral -- case-insensitive Literal values
 	"""
 	value = ""
-	def toParser( self, generator=None, noReport=0 ):
+	def toParser( self, builder=None, noReport=0 ):
 		"""Create the parser for the element token"""
 		flags = 0
 		if self.lookahead:
 			flags = flags + LookAhead
-		base = self.baseToParser( generator )
+		base = self.baseToParser( builder )
 		if flags or self.errorOnFail:
 			if self.errorOnFail:
 				return [(None, SubTable+flags, tuple(base),1,2),(None, Call, self.errorOnFail)]
 				return [(None, SubTable+flags, tuple(base))]
 		else:
 			return base
-	def baseToParser( self, generator=None ):
+	def baseToParser( self, builder=None ):
 		"""Parser generation without considering flag settings"""
 		svalue = self.value
 		if self.negative:
 	"""
 	value = ""
 	requiresExpandedSet = 1
-	def toParser( self, generator=None, noReport=0 ):
+	def toParser( self, builder=None, noReport=0 ):
 		"""Create the parser for the element token"""
 		flags = 0
 		if self.lookahead:
 			flags = flags + LookAhead
-		base = self.baseToParser( generator )
+		base = self.baseToParser( builder )
 		if flags or self.errorOnFail:
 			if self.errorOnFail:
 				return [(None, SubTable+flags, tuple(base),1,2),(None, Call, self.errorOnFail)]
 ##		in the SimpleParse grammar, of course.
 ##		"""
 ##		requiresExpandedSet = 0
-##		def baseToParser( self, generator=None ):
+##		def baseToParser( self, builder=None ):
 ##			"""Parser generation without considering flag settings"""
 ##			svalue = self.value
 ##			print 'generating range for ', repr(svalue)
 	is unable to handle unicode character sets.  However, it will work with
 	TextTools 2.0.3, which may be needed in some cases.
 	"""
-	def baseToParser( self, generator=None ):
+	def baseToParser( self, builder=None ):
 		"""Parser generation without considering flag settings"""
 		svalue = self.value
 		if not svalue:
 		("a", b, c, "d")
 	i.e. a series of comma-separated element token definitions.
 	"""
-	def toParser( self, generator=None, noReport=0 ):
+	def toParser( self, builder=None, noReport=0 ):
 		elset = []
 		for child in self.children:
-			elset.extend( child.toParser( generator, noReport ) )
+			elset.extend( child.toParser( builder, noReport ) )
 		basic = self.permute( (None, SubTable, tuple( elset)) )
 		if len(basic) == 1:
 			first = basic[0]
 		regular literal or character range
 	"""
 	value = ""
-	def toParser( self, generator=None, noReport=0 ):
+	def toParser( self, builder=None, noReport=0 ):
 		elset = self.ciParse( self.value )
 		if len(elset) == 1:
 			# XXX should be compressing these out during optimisation...
 	def __call__( self, text, position, end ):
 		"""Method called by mxTextTools iff the base production fails"""
 		error = ParserSyntaxError( self.message )
-		error.message = self.message
+		error.error_message = self.message
 		error.production = self.production
 		error.expected= self.expected
 		error.buffer = text
 		("a" / b / c / "d")
 	i.e. a series of slash-separated element token definitions.
 	"""
-	def toParser( self, generator=None, noReport=0 ):
+	def toParser( self, builder=None, noReport=0 ):
 		elset = []
 		# should catch condition where a child is optional
 		# and we are repeating (which causes a crash during
 		# requires analysis of the whole grammar.
 		for el in self.children:
 			assert not el.optional, """Optional child of a FirstOf group created, this would cause an infinite recursion in the engine, child was %s"""%el
-			dataset = el.toParser( generator, noReport )
+			dataset = el.toParser( builder, noReport )
 			if len( dataset) == 1:# and len(dataset[0]) == 3: # we can alter the jump states with impunity
 				elset.append( dataset[0] )
 			else: # for now I'm eating the inefficiency and doing an extra SubTable for all elements to allow for easy calculation of jumps within the FO group
 	by the other element tokens in your grammar.
 	"""
 	value = ()
-	def toParser( self, generator=None, noReport=0 ):
+	def toParser( self, builder=None, noReport=0 ):
 		return self.value
+
 class LibraryElement( ElementToken ):
-	"""Holder for a prebuilt item with it's own generator"""
-	generator = None
+	"""Holder for a prebuilt item with it's own builder"""
+	builder = None
 	production = ""
-	methodSource = None
-	def toParser( self, generator=None, noReport=0 ):
-		if self.methodSource is None:
-			source = generator.methodSource
+
+	def toParser( self, builder=None, noReport=0 ):
+		if self.builder is None:
+			b = builder
 		else:
-			source = self.methodSource
-		basetable = self.generator.buildParser( self.production, source )
+			b = self.builder
+		basetable = b.tt_tuple(self.production)
 		try:
 			if type(basetable[0]) == type(()):
 				if len(basetable) == 1 and len(basetable[0]) == 3:
 
 	finally:
 		if the target is not expanded and the Name token
-		should report something, the generator object is
+		should report something, the builder object is
 		asked to supply the tag object and flags for
 		processing the results of the target.  See the
-		generator.MethodSource documentation for details.
+		builder.MethodSource documentation for details.
 
 	Notes:
 		expanded and un-reported productions won't get any
 	value = ""
 	# following two flags are new ideas in the rewrite...
 	report = 1
-	def toParser( self, generator, noReport=0 ):
+	def toParser( self, builder, noReport=0 ):
 		"""Create the table for parsing a name-reference
 
 		Note that currently most of the "compression" optimisations
 		occur here.
 		"""
-		sindex = generator.getNameIndex( self.value )
+		sindex = builder.generator.getNameIndex( self.value )
 		command = TableInList
-		target = generator.getRootObjects()[sindex]
+		target = builder.generator.getRootObjects()[sindex]
 
 		reportSelf = (
 			(not noReport) and # parent hasn't suppressed reporting
 		elif not reportSelf:
 			tagobject = svalue
 		else:
-			flags, tagobject = generator.getObjectForName( svalue )
+			flags, tagobject = builder.getObjectForName( svalue )
 			if flags:
 				command = command | flags
 		if tagobject is None and not flags:
-			if self.terminal(generator):
+			if self.terminal(builder.generator):
 				if extractFlags(self,reportChildren) != extractFlags(target):
 					composite = compositeFlags(self,target, reportChildren)
-					partial = generator.getCustomTerminalParser( sindex,composite)
+					partial = builder.generator.getCustomTerminalParser( sindex,composite)
 					if partial is not None:
 						return partial
 					partial = tuple(copyToNewFlags(target, composite).toParser(
-						generator,
+						builder,
 						not reportChildren
 					))
-					generator.cacheCustomTerminalParser( sindex,composite, partial)
+					builder.generator.cacheCustomTerminalParser( sindex,composite, partial)
 					return partial
 				else:
-					partial = generator.getTerminalParser( sindex )
+					partial = builder.getTerminalParser( sindex )
 					if partial is not None:
 						return partial
 					partial = tuple(target.toParser(
-						generator,
+						builder,
 						not reportChildren
 					))
-					generator.setTerminalParser( sindex, partial)
+					builder.setTerminalParser( sindex, partial)
 					return partial
 		# base, required, positive table...
 		if (
-			self.terminal( generator ) and
+			self.terminal( builder.generator ) and
 			(not flags) and
 			isinstance(target, (SequentialGroup,Literal,Name,Range))
 		):
-			partial = generator.getTerminalParser( sindex )
+			partial = builder.getTerminalParser( sindex )
 			if partial is None:
 				partial = tuple(target.toParser(
-					generator,
+					builder,
 					#not reportChildren
 				))
-				generator.setTerminalParser( sindex, partial)
+				builder.setTerminalParser( sindex, partial)
 			if len(partial) == 1 and len(partial[0]) == 3 and (
 				partial[0][0] is None or tagobject is None
 			):
 		basetable = (
 			tagobject,
 			command, (
-				generator.getParserList (),
+				builder.getParserList (),
 				sindex,
 			)
 		)

File simpleparse/parser.py

 			production = self._rootProduction
 		if processor is None:
 			processor = self.buildProcessor()
-		return self._generator.buildParser(
-			production,
-			methodSource=processor,
-		)
+		builder = self._generator.buildParser(methodSource=processor)
+		return builder.tt_tuple(production)
 	

File simpleparse/printers.py

 	'''
 	def buildTagger( self, name=None, processor = None ):
 		'''Build the tag-table for parsing the EBNF for this parser'''
-		return GENERATOR.buildParser( name, processor )
+		return GENERATOR.buildParser( processor ).tt_tuple(name)
 
 """
 	ITEM = """GENERATOR.addDefinition(

File simpleparse/simpleparsegrammar.py

 		self.generator = processor.generator
 	def buildTagger( self, name=None, processor = None ):
 		"""Build the tag-table for parsing the EBNF for this parser"""
-		return SPGenerator.buildParser( name, processor )
+		return SPGenerator.buildParser( processor ).tt_tuple( name )
 
 class SPGrammarProcessor( DispatchProcessor ):
 	"""Processing object for post-processing an EBNF into a new generator"""

File tests/test_backup_on_subtable_failure.py

 import pprint
 
 
-parser = Parser( declaration ).generator.buildParser('testparser' )
+parser = Parser( declaration ).generator.buildParser().tt_tuple('testparser')
 result = TextTools.tag( testdata, parser )
 if result != expectedResult:
 	print 'backup-on-subtable-test failed'

File tests/test_deep_nesting.py

 ], 4)
 
 
-parser = Parser( declaration ).generator.buildParser( 'testparser' )
+parser = Parser( declaration ).generator.buildParser().tt_tuple( 'testparser' )
 print "About to attempt the deep-nesting test"
 print "If python goes into an infinite loop, then the test failed ;) "
 print

File tests/test_grammarparser.py

 
 from simpleparse.stt.TextTools import print_tagtable
 print_tagtable(
-	SPGenerator.buildParser( 'range' )
+	SPGenerator.buildParser().tt_tuple('range')
 )
 
 
 class SimpleParseGrammarTests(unittest.TestCase):
 	"""Test parsing of the the simpleparse grammar elements"""
 	def doBasicTest(self, parserName, testValue, expected, ):
-		parser = SPGenerator.buildParser( parserName )
+		parser = SPGenerator.buildParser().tt_tuple(parserName)
 		result = TextTools.tag( testValue, parser )
 		assert result == expected, '''\nexpected:%s\n     got:%s\n'''%( expected, result )
+
 	def testChar1( self ):
 		self.doBasicTest(
 			"CHARNODBLQUOTE",
 		)
 	def testDeclarationSet2( self ):
 		'''Just tries to parse and sees that everything was parsed, doesn't predict the result'''
-		parser = SPGenerator.buildParser( "declarationset" )
+		parser = SPGenerator.buildParser().tt_tuple( "declarationset" )
 		result = TextTools.tag( declaration, parser )
 		assert result[-1] == len(declaration), '''Didn't complete parse of the simpleparse declaration, only got %s chars, should have %s'''%(result[-1], len(declaration))