Commits

Geoffrey Sneddon committed 0134409

Add --w3c-compat-xref-normalization.

Comments (0)

Files changed (2)

 	parser.add_option("", "--w3c-compat-xref-a-placement", action="store_true",
 		default=False, dest="w3c_compat_xref_a_placement", help="When cross-referencing elements apart from span, put the a element inside the element instead of outside the element.")
 	
+	parser.add_option("", "--w3c-compat-xref-normalization", action="store_true",
+		default=False, dest="w3c_compat_xref_normalization", help="Only use ASCII letters, numbers, and spaces in comparison of cross-reference terms.")
+	
 	parser.add_option("", "--profile", action="store_true",
 		default=False, dest="profile", help="Profile the execution of %prog.")
 	

specGen/processes/xref.py

 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 
+import re
 from lxml import etree
 from copy import deepcopy
 
 w3c_term_elements = ("abbr", "acronym", "b", "bdo", "big", "code", "del", "em", "i", "ins", "kbd", "label", "legend", "q", "samp", "small", "span", "strong", "sub", "sup", "tt", "var")
 term_not_in_stack_with = ("a", "dfn", "datagrid")
 
+non_alphanumeric_spaces = re.compile(r"[^a-zA-Z0-9 ]+")
+
 class xref(object):
 	"""Add cross-references."""
 	
 	
 	def buildReferences(self, ElementTree, allow_duplicate_terms=False, **kwargs):
 		for dfn in ElementTree.iter("dfn"):
-			if dfn.get(u"title") is not None:
-				term = dfn.get(u"title")
-			else:
-				term = utils.textContent(dfn)
-			
-			term = term.strip(utils.spaceCharacters).lower()
+			term = self.getTerm(dfn, **kwargs)
 			
 			if len(term) > 0:
 				if not allow_duplicate_terms and term in self.dfns:
 					raise DuplicateTermException, term
-				term = utils.spacesRegex.sub(" ", term)
 				
 				id = utils.generateID(dfn)
 				
 		to_remove = []
 		for element in ElementTree.iter(tag=etree.Element):
 			if element.tag in term_elements or (w3c_compat or w3c_compat_xref_elements) and element.tag in w3c_term_elements:
-				if element.get(u"title") is not None:
-					term = element.get(u"title")
-				else:
-					term = utils.textContent(element)
-				
-				term = term.strip(utils.spaceCharacters).lower()
-				
-				term = utils.spacesRegex.sub(" ", term)
+				term = self.getTerm(element, **kwargs)
 				
 				if term in self.dfns:
 					goodParentingAndChildren = True
 								to_remove.append(element)
 		for element in to_remove:
 			element.getparent().remove(element)
+	
+	def getTerm(self, element, w3c_compat = False, w3c_compat_xref_normalization = False, **kwargs):
+		if element.get(u"title") is not None:
+			term = element.get(u"title")
+		else:
+			term = utils.textContent(element)
+		
+		term = term.strip(utils.spaceCharacters).lower()
+		
+		term = utils.spacesRegex.sub(" ", term)
+		
+		if w3c_compat or w3c_compat_xref_normalization:
+			term = non_alphanumeric_spaces.sub("", term)
+		
+		return term
 
 class DuplicateTermException(utils.SpecGenException):
 	"""Term already defined."""
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.