Anonymous avatar Anonymous committed 563118d Draft

more additions to fonts

Comments (0)

Files changed (26)

src/main/java/org/xmlcml/pdf2svg/AMIFontManager.java

 	private final static Logger LOG = Logger.getLogger(AMIFontManager.class);
 	
 	public static final String N_NAME = "Name";
+	public static final String N_BASE_FONT = "BaseFont";
 	
 	public static final String FONT_TRUE_TYPE = "TrueType";
 	public static final String FONT_TYPE1 = "Type1";
 	private FontFamilySet newFontFamilySet;
 
 	private Map<String, Integer> symbol2UnicodeHackMap;
+
+	private boolean nullFontDescriptorReport = true;;
 	
 	public static final int UNKNOWN_CHAR = (char)0X274E; // black square with white cross
 
 Resources = COSDictionary{(COSName{ProcSet}:COSArray{[COSName{PDF}, COSName{ImageB}]}) }
 Encoding = COSDictionary{(COSName{Differences}:COSArray{[COSInt{32}, COSName{space}]}) (COSName{Type}:COSName{Encoding}) }
 CharProcs = COSDictionary{(COSName{space}:COSDictionary{(COSName{Length}:COSInt{67}) (COSName{Filter}:COSName{FlateDecode}) }) }*/
+		
+		// This is messy until we work out what is null and what isn't
 		String typeS = null;
 		String subtypeS = null;
 		String baseFontS = null;
 			}
 			if (cosNameName != null && keyName.equals(N_NAME)) {
 				fontName = cosNameName;
+			} else if(cosNameName != null && keyName.equals(N_BASE_FONT)) {
+				baseFontS = cosNameName;
 			}
 		}
+		if (fontName == null) {
+			fontName = baseFontS;
+		}
 		
 		amiFont = getAmiFontByFontName(fontName);
 		if (amiFont == null) {
 			}
 		}
 		if (fd == null) {
-			LOG.error("****************** Null Font Descriptor : "+pdFont);
+			if (nullFontDescriptorReport) {
+				LOG.error("****************** Null Font Descriptor : "+pdFont+"\n       FURTHER ERRORS HIDDEN");
+				nullFontDescriptorReport = false;
+			}
 			amiFont = getOrCreateFontDict(0, (COSDictionary) pdFont.getCOSObject());
 			fontName = amiFont.getFontName();
 			if (fontName == null) {
 	public static String getUnknownCharacterSymbol() {
 		return ""+(char)UNKNOWN_CHAR;
 	}
+
+	public void setNullFontDescriptorReport(boolean b) {
+		this.nullFontDescriptorReport = b;
+	}
 	
 }

src/main/java/org/xmlcml/pdf2svg/PDFPage2SVGConverter.java

 	private HashMap<String, Integer> integerByClipStringMap;
 
 	private SVGElement defs1;
+
+	private boolean reportedEncodingError = false;;
 	
 
 	public PDFPage2SVGConverter() throws IOException {
 		pageSize = null;	// reset size for each page
 		this.pdf2svgConverter = converter;
 		this.amiFontManager = converter.getAmiFontManager();
+		amiFontManager.setNullFontDescriptorReport(true);
 		createSVGSVG();
 		drawPage(page);
 		return svg;
 	void drawPage(PDPage p) {
 		ensurePageSize();
 		page = p;
+		reportedEncodingError = false;
 
 		try {
 			if (page.getContents() != null) {
 		}
 
 		if (encoding == null) {
-			LOG.debug("Null encoding for character: "+charCode+" at "+currentXY+" font: "+fontName+" / "+fontFamilyName+" / "+amiFont.getBaseFont());
+			if (!reportedEncodingError ) {
+				LOG.debug("Null encoding for character: "+charCode+" at "+currentXY+" font: "+fontName+" / "+
+			       fontFamilyName+" / "+amiFont.getBaseFont()+
+			       "\n                FURTHER ERRORS HIDDEN");
+				reportedEncodingError = true;
+			}
 		} else {
 //			if (encoding instanceof DictionaryEncoding) {
 				getCharnameThroughEncoding(charCode);

src/main/resources/org/xmlcml/pdf2svg/codepoints/cm/cmsy10.xml

+<!--
+
+    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+            http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<codePointSet encoding="CM" id="cmsy10" resource="org/xmlcml/pdf2svg/codepoints/defacto">
+
+    <codePoint unicode="U+226B" name="greatermuch" decimal="29" note="MUCH GREATER-THAN"/>
+<!-- several points are ".notdef" and p;us-minus -->    
+    <codePoint unicode="U+00B1" name=".notdef" note="PLUS-MINUS SIGN"/>
+    
+</codePointSet>
+        

src/main/resources/org/xmlcml/pdf2svg/codepoints/defacto/advmt_sy.xml

-<!--
-
-    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-            http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
--->
-<codePointSet encoding="advmt_sy" id="advmt_sy" resource="org/xmlcml/pdf2svg/codepoints/defacto"> 
-    
-    <codePoint unicode="U+00D7" name="C2" note="MULTIPLICATION SIGN"/>  
-    <codePoint unicode="U+00B0" name="C14" note="DEGREE SIGN"/>  
-    
-</codePointSet>
-        

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/ADVMT_MI.xml

     <codePointSet idRef="basicLatin" href="../unicode/basicLatin.xml"/>
 
     <codePoint unicode="U+00D7" name="C2" note="MULTIPLICATION SIGN"/>
+    <codePoint unicode="U+03B1" name="C11" note="GREEK SMALL LETTER ALPHA"/>
+    <codePoint unicode="U+03B2" name="C12" note="GREEK SMALL LETTER BETA"/>
+    <codePoint unicode="U+03B3" name="C13" note="GREEK SMALL LETTER GAMMA"/>
     <codePoint unicode="U+25CB" name="C14" note="WHITE CIRCLE"/>
     <codePoint unicode="U+2022" name="C15" note="BULLET"/>
     <codePoint unicode="U+03B8" name="C18" note="GREEK SMALL LETTER THETA" />
     <codePoint unicode="U+007E" name="C24" note="TILDE" />
     <codePoint unicode="U+03C3" name="C27" note="GREEK SMALL LETTER SIGMA" />
     <codePoint unicode="U+03C7" name="C31" note="GREEK SMALL LETTER CHI" />
+    <codePoint unicode="U+00AE" name="C213" note="REGISTERED SIGN" />
+    <codePoint unicode="U+00A9" name="C223" note="COPYRIGHT SIGN" />
     
 </codePointSet>

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/AdvP4C4E46.xml

     <codePoint unicode="U+222B" decimal="82" name="R" note="INTEGRAL"/>  
     <codePoint unicode="U+2211"              name="X" note="N-ARY SUMMATION" />
     <codePoint unicode="U+222B" decimal="90" name="Z" note="INTEGRAL"/>  
+    <codePoint unicode="U+222B" decimal="240" name="eth" note="INTEGRAL"/>  
+    <codePoint unicode="U+221E" decimal="63" name="question" note="INFINITY"/>  
 	
     <codePoint unicode="U+006C" decimal="8216" note="CURLY LETTER L - no idea what character"/>  
 	
     <codePoint unicode="U+0029" name="C1" note="RIGHT PARENTHESIS"/>
     <codePoint unicode="U+005B" name="C2" note="LEFT SQUARE BRACKET"/>  
     <codePoint unicode="U+005D" name="C3" note="RIGHT SQUARE BRACKET"/>  
-    <codePoint unicode="U+0028" name="C16" note="LEFT BRACKET"/>  
-    <codePoint unicode="U+0029" name="C17" note="RIGHT BRACKET"/>
-    <codePoint unicode="U+0028" name="C18" note="LEFT BRACKET"/>  <!--  larger size -->
-    <codePoint unicode="U+0029" name="C19" note="RIGHT BRACKET"/> <!--  larger size --> 
+    <codePoint unicode="U+0028" name="C16" note="LEFT PARENTHESIS"/>  
+    <codePoint unicode="U+0029" name="C17" note="RIGHT PARENTHESIS"/>
+    <codePoint unicode="U+0028" name="C18" note="LEFT PARENTHESIS"/>  <!--  larger size -->
+    <codePoint unicode="U+0029" name="C19" note="RIGHT PARENTHESIS"/> <!--  larger size --> 
     <codePoint unicode="U+007B" name="C26" note="LEFT CURLY BRACKET"/> <!--  larger size --> 
     <codePoint unicode="U+007D" name="C27" note="RIGHT CURLY BRACKET"/> <!--  larger size --> 
     <codePoint unicode="U+3008" name="C28" note="LEFT ANGLE BRACKET"/> <!--  larger size --> 

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/AdvP4C4E74.xml

 
     <codePoint unicode="U+007C" name="numbersign" decimal="35" note="VERTICAL LINE"/>  
     <codePoint unicode="U+2229" name="backslash" decimal="92"  note="INTERSECTION"/>  
+    <codePoint unicode="U+2208" name="two"       decimal="50"  note="ELEMENT OF"/>  
     <codePoint unicode="U+002F" name="six"                     note="SOLIDUS"/>  
     <codePoint unicode="U+0028" name="eth"                     note="LEFT PARENTHESIS"/>  
     <codePoint unicode="U+0029" name="Thorn"                   note="RIGHT PARENTHESIS"/>  
     <codePoint unicode="U+003D" name="onequarter"              note="EQUALS"/>  
     <codePoint unicode="U+003D" name="ellipsis"                note="EQUALS"/>  
     <codePoint unicode="U+005B" name="onehalf"                 note="LEFT SQUARE BRACKET"/>  
-    <codePoint unicode="U+1D4DD" name="N"                      note="Mathematical bold script capital n"/>  
-    <codePoint unicode="U+1D4E2" name="S"                      note="Mathematical bold script capital s"/>  
-    <codePoint unicode="U+1D4E3" name="T"                      note="Mathematical bold script capital T"/>  
+    
+    <codePointSet idRef="mathBoldScript" href="mathBoldScript.xml"/>
+    
     <codePoint unicode="U+007B" name="f"                       note="LEFT CURLY BRACKET"/>  
     <codePoint unicode="U+007D" name="g"                       note="RIGHT CURLY BRACKET"/>  
     <codePoint unicode="U+3008" name="h" decimal="104"         note="LEFT ANGLE BRACKET"/>  
     <codePoint unicode="U+3009" name="i" decimal="105"         note="RIGHT ANGLE BRACKET"/>  
     <codePoint unicode="U+007C" name="j" decimal="106"         note="VERTICAL LINE"/>
+    <codePoint unicode="U+2225" name="k" decimal="107"         note="PARALLEL TO"/>
     <codePoint unicode="U+2020" name="y" charval="121"         note="DAGGER"/>  
     <codePoint unicode="U+221A" name="p" charval="112"         note="SQUARE ROOT"/>
     <codePoint unicode="U+2021" name="z"                       note="DOUBLE DAGGER"/>  
     <codePoint unicode="U+0028" name="C18"    note="LEFT PARENTHESIS"/> 
     <codePoint unicode="U+0029" name="C19"    note="RIGHT PARENTHESIS"/>     
     <codePoint unicode="U+2264" name="C20"    note="LESS-THAN OR EQUAL TO"/>     
-    <codePoint unicode="U+274E" name="C21"    note="UNK"/>     
+    <codePoint unicode="U+2265" name="C21"    note="GREATER-THAN OR EQUAL TO"/>     
     <codePoint unicode="U+030A" name="C23"    note="COMBINING RING ABOVE" />  
     <codePoint unicode="U+00BB" name="C24"    note="RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK" />  
     <codePoint unicode="U+2245" name="C25"    note="APPROXIMATELY EQUAL TO" />  
     <codePoint unicode="U+226A" name="C28"    note="MUCH LESS-THAN"/> 
     <codePoint unicode="U+226B" name="C29"    note="MUCH GREATER-THAN"/> 
     <codePoint unicode="U+005D" name="C138"   note="RIGHT SQUARE BRACKET" />  
+    <codePoint unicode="U+2020" name="C160"   note="DAGGER" />  
     <codePoint unicode="U+00AE" name="C210"   note="REGISTERED SIGN"/> 
     <codePoint unicode="U+00A9" name="C211"   note="COPYRIGHT SIGN"/> 
     <codePoint unicode="U+00A9" name="C223"   note="COPYRIGHT SIGN"/> 

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/AdvP697C.xml

 -->
 <codePointSet encoding="AdvP697C" id="AdvP697C" resource="org/xmlcml/pdf2svg/codepoints/misc"> 
 
+    <codePoint unicode="U+002B" name="plus" decimal="43" note="PLUS SIGN"/>
     <codePoint unicode="U+003C" name="less" note="LESS-THAN SIGN"/>
     <codePoint unicode="U+003E" name="greater" note="GREATER-THAN SIGN"/>
 

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/AdvPS4731B1.xml

 -->
 <codePointSet encoding="AdvPS4731B1" id="AdvPS4731B1" resource="org/xmlcml/pdf2svg/codepoints/misc"> 
 
-    <codePoint unicode="U+221E" name="N" decimal="78" note="INFIMITY"/>  
+    <codePoint unicode="U+221E" name="N" decimal="78" note="INFINITY"/>  
 
 </codePointSet>
 		

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/AdvPS586B.xml

+<!--
+
+    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+            http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<codePointSet encoding="AdvPS586B" id="AdvPS586B" resource="org/xmlcml/pdf2svg/codepoints/misc">
+
+    <codePoint unicode="U+003D" name="five" decimal="5" note="EQUALS"/>  
+    <codePoint unicode="U+00B1" name="six" decimal="6" note="PLUS-MINUS SIGN"/>  
+    <codePoint unicode="U+003C" name="comma" decimal="44" note="LESS-THAN SIGN"/>  
+    <codePoint unicode="U+003E" name="period" decimal="46" note="GREATER-THAN SIGN"/>  
+    <codePoint unicode="U+002B" name="one" decimal="49" note="PLUS SIGN"/>  
+    <codePoint unicode="U+00D7" name="three" decimal="51" note="MULTIPLICATION SIGN"/>  
+    <codePoint unicode="U+007E" name="semicolon" decimal="59" note="TILDE"/>  
+    <codePoint unicode="U+2261" name="bracketleft" decimal="91" note="IDENTICAL TO"/>  
+    
+</codePointSet>
+        

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/AdvT001.xml

 <codePointSet encoding="AdvT001" id="AdvT001" resource="org/xmlcml/pdf2svg/codepoints/misc"> 
 
     <codePoint unicode="U+00D0" name="C140" note="LATIN CAPITAL LETTER ETH"/>
+    <codePoint unicode="U+00AE" name="C213" note="REGISTERED SIGN"/>
     <codePoint unicode="U+00A9" name="C223" note="COPYRIGHT SIGN"/>
 </codePointSet>
         

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/advPSSym.xml

 <!-- this *might* be unicode compliant -->
     
     <codePoint unicode="U+00D7" name="periodcentered" decimal="183" note="MULTIPLICATION SIGN"/> 
-    <codePoint unicode="U+2264" name="sterling" decimal="163" note="LESS-THAN OR EQUAL TO"/> 
-    <codePoint unicode="U+0027" name="cent" decimal="162" note="APOSTROPHE"/> 
-    <codePoint unicode="U+00A9" name="ordfeminine" decimal="170" note="COPYRIGHT"/> 
+    <codePoint unicode="U+2264" name="sterling"       decimal="163" note="LESS-THAN OR EQUAL TO"/> 
+    <codePoint unicode="U+0027" name="cent"           decimal="162" note="APOSTROPHE"/> 
+    <codePoint unicode="U+00A9" name="ordfeminine"    decimal="170" note="COPYRIGHT"/> 
     
     <codePoint unicode="U+00B0" name="C176" note="DEGREE SIGN"/> 
     <codePoint unicode="U+00AE" name="C210" note="REGISTERED SIGN"/> 

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/advmt_ex.xml

+<!--
+
+    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+            http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<codePointSet encoding="advmt_ex" id="advmt_ex" resource="org/xmlcml/pdf2svg/codepoints/defacto"> 
+    
+    <codePoint unicode="U+239B" name="zero" decimal="48" note="LEFT PARENTHESIS UPPER HOOK"/>  
+    <codePoint unicode="U+239D" name="at"   decimal="64" note="LEFT PARENTHESIS LOWER HOOK"/>  
+    <codePoint unicode="U+239C" name="B"    decimal="66" note="LEFT PARENTHESIS EXTENSION"/>  
+
+    <codePoint unicode="U+239E" name="one" decimal="49" note="RIGHT PARENTHESIS UPPER HOOK"/>  
+    <codePoint unicode="U+23A0" name="A"   decimal="65" note="RIGHT PARENTHESIS LOWER HOOK"/>  
+    <codePoint unicode="U+239F" name="C"    decimal="67" note="RIGHT PARENTHESIS EXTENSION"/>  
+
+    <codePoint unicode="U+220F" name="Y"    decimal="89" note="N-ARY PRODUCT"/>  
+
+    <codePoint unicode="U+0028" name="C18" note="LEFT PARENTHESIS"/>  
+    <codePoint unicode="U+0029" name="C19" note="RIGHT PARENTHESIS"/>  
+    
+</codePointSet>
+        

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/advmt_rm.xml

+<!--
+
+    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+            http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<codePointSet encoding="advmt_rm" id="advmt_rm" resource="org/xmlcml/pdf2svg/codepoints/defacto"> 
+
+    <codePointSet idRef="unicode" href="../unicode/unicode.xml"/>
+    
+    <codePoint unicode="U+03A6" name="C8" note="GREEK CAPITAL LETTER PHI"/>  
+    
+</codePointSet>
+        

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/advmt_sy.xml

     limitations under the License.
 
 -->
-<codePointSet encoding="AdvMT_SY" id="advmt_sy" resource="org/xmlcml/pdf2svg/codepoints/misc">
-
-
-    <codePoint unicode="U+00A7" name="x" decimal="120" note="SECTION SIGN"/>  
-    <codePoint unicode="U+0027" name="zero" note="APOSTROPHE"/>  
-
-    <codePoint unicode="U+002D" name="C0" note="MINUS"/>  
-    <codePoint unicode="U+00D7" name="C2" note="MULTIPLICATION SIGN"/>
-    <codePoint unicode="U+25CB" name="C14" note="WHITE CIRCLE"/>
-    <codePoint unicode="U+2022" name="C15" note="BULLET"/>
-    <codePoint unicode="U+03B8" name="C18" note="GREEK SMALL LETTER THETA" />
-    <codePoint unicode="U+03BB" name="C21" note="GREEK SMALL LETTER LAMBDA" />
-    <codePoint unicode="U+03BC" name="C22" note="GREEK SMALL LETTER MU" />
-    <codePoint unicode="U+03C3" name="C27" note="GREEK SMALL LETTER SIGMA" />
-    <codePoint unicode="U+03C7" name="C31" note="GREEK SMALL LETTER CHI" />
+<codePointSet encoding="advmt_sy" id="advmt_sy" resource="org/xmlcml/pdf2svg/codepoints/defacto"> 
+    
+    <codePoint unicode="U+0027" name="zero" decimal="48" note="APOSTROPHE"/>  
+    <codePoint unicode="U+2208" name="two"  decimal="50" note="ELEMEMT OF"/>  
+    <codePoint unicode="U+007B" name="f"    decimal="102" note="LEFT CURLY BRACKET"/>  
+    <codePoint unicode="U+0028" name="g"    decimal="103" note="LEFT PARENTHESIS"/>  
+    <codePoint unicode="U+007C" name="j"    decimal="106" note="VERTICAL LINE"/>
+    <codePoint unicode="U+002B" name="onequarter" decimal="188" note="PLUS SIGN"/>  
+    <codePoint unicode="U+0028" name="eth"  decimal="240" note="LEFT PARENTHESIS"/>  
+    <codePoint unicode="U+0029" name="Thorn" decimal="222" note="RIGHT PARENTHESIS"/>  
+    <codePoint unicode="U+003D" name="thorn" decimal="254" note="EQUALS SIGN"/>  
+    
+    <codePoint unicode="U+2212" name="C0" note="MINUS SIGN"/>  
+    <codePoint unicode="U+00D7" name="C2" note="MULTIPLICATION SIGN"/>  
+    <codePoint unicode="U+00B0" name="C14" note="DEGREE SIGN"/>  
+    <codePoint unicode="U+003E" name="C14" note="GREATER-THAN SIGN"/>  
+    <codePoint unicode="U+2264" name="C20" note="LESS-THAN OR EQUAL TO"/>  
+    <codePoint unicode="U+2265" name="C21" note="GREATER-THAN OR EQUAL TO"/>  
+    <codePoint unicode="U+007E" name="C24" note="TILDE"/>  
     
 </codePointSet>
+        

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/cambriaMath.xml

 <codePointSet encoding="Cambria Math" id="cambriaMath" resource="org/xmlcml/pdf2svg/codepoints/misc">
 
 <!--  this may be wrong -->
-    <codePoint unicode="U+0020" decimal="3" note="space"/>
+    <codePoint unicode="U+0020" name="g3" decimal="3" note="space"/>
+
     
-    <codePoint unicode="U+0061" decimal="131" note="a"/>
-    <codePoint unicode="U+0062" decimal="132" note="b"/>
-    <codePoint unicode="U+0063" decimal="133" note="c"/>
-    <codePoint unicode="U+0064" decimal="134" note="d"/>
-    <codePoint unicode="U+0065" decimal="135" note="e"/>
-    <codePoint unicode="U+0066" decimal="136" note="f"/>
-    <codePoint unicode="U+0067" decimal="137" note="g"/>
-    <codePoint unicode="U+0068" decimal="138" note="h"/>
-    <codePoint unicode="U+0069" decimal="139" note="i"/>
-    <codePoint unicode="U+006A" decimal="140" note="j"/>
-    <codePoint unicode="U+006B" decimal="141" note="k"/>
-    <codePoint unicode="U+006C" decimal="142" note="l"/>
-    <codePoint unicode="U+006D" decimal="143" note="m"/>
-    <codePoint unicode="U+006E" decimal="144" note="n"/>
-    <codePoint unicode="U+006F" decimal="145" note="o"/>
-    <codePoint unicode="U+0070" decimal="146" note="p"/>
-    <codePoint unicode="U+0071" decimal="147" note="q"/>
-    <codePoint unicode="U+0072" decimal="148" note="r"/>
-    <codePoint unicode="U+0073" decimal="149" note="s"/>
-    <codePoint unicode="U+0074" decimal="150" note="t"/>
-    <codePoint unicode="U+0075" decimal="151" note="u"/>
-    <codePoint unicode="U+0076" decimal="152" note="v"/>
-    <codePoint unicode="U+0077" decimal="153" note="w"/>
-    <codePoint unicode="U+0078" decimal="154" note="x"/>
-    <codePoint unicode="U+0079" decimal="155" note="y"/>
-    <codePoint unicode="U+007A" decimal="156" note="z"/>
+    <codePointSet idRef="basicLatin" href="../unicode/basicLatin.xml"/>
+
+<!-- everything above 127 seems to be non-unicode -->    
+    <codePoint unicode="U+0061" name="g131" decimal="131" note="a"/>
+    <codePoint unicode="U+0062" name="g132" decimal="132" note="b"/>
+    <codePoint unicode="U+0063" name="g133" decimal="133" note="c"/>
+    <codePoint unicode="U+0064" name="g134" decimal="134" note="d"/>
+    <codePoint unicode="U+0065" name="g135" decimal="135" note="e"/>
+    <codePoint unicode="U+0066" name="g136" decimal="136" note="f"/>
+    <codePoint unicode="U+0067" name="g137" decimal="137" note="g"/>
+    <codePoint unicode="U+0068" name="g138" decimal="138" note="h"/>
+    <codePoint unicode="U+0069" name="g139" decimal="139" note="i"/>
+    <codePoint unicode="U+006A" name="g140" decimal="140" note="j"/>
+    <codePoint unicode="U+006B" name="g141" decimal="141" note="k"/>
+    <codePoint unicode="U+006C" name="g142" decimal="142" note="l"/>
+    <codePoint unicode="U+006D" name="g143" decimal="143" note="m"/>
+    <codePoint unicode="U+006E" name="g144" decimal="144" note="n"/>
+    <codePoint unicode="U+006F" name="g145" decimal="145" note="o"/>
+    <codePoint unicode="U+0070" name="g146" decimal="146" note="p"/>
+    <codePoint unicode="U+0071" name="g147" decimal="147" note="q"/>
+    <codePoint unicode="U+0072" name="g148" decimal="148" note="r"/>
+    <codePoint unicode="U+0073" name="g149" decimal="149" note="s"/>
+    <codePoint unicode="U+0074" name="g150" decimal="150" note="t"/>
+    <codePoint unicode="U+0075" name="g151" decimal="151" note="u"/>
+    <codePoint unicode="U+0076" name="g152" decimal="152" note="v"/>
+    <codePoint unicode="U+0077" name="g153" decimal="153" note="w"/>
+    <codePoint unicode="U+0078" name="g154" decimal="154" note="x"/>
+    <codePoint unicode="U+0079" name="g155" decimal="155" note="y"/>
+    <codePoint unicode="U+007A" name="g156" decimal="156" note="z"/>
     
 
-    <codePoint unicode="U+002C" decimal="481" note=","/>
-    <codePoint unicode="U+002E" decimal="484" note="period"/>
-    <codePoint unicode="U+0021" decimal="488" note="exclam"/>
+    <codePoint unicode="U+002C" name="g481" decimal="481" note=","/>
+    <codePoint unicode="U+002E" name="g484" decimal="484" note="period"/>
+    <codePoint unicode="U+0021" name="g488" decimal="488" note="exclam"/>
 
-    <codePoint unicode="U+007E" decimal="817" note="tilde"/>
+    <codePoint unicode="U+007E" name="g817" decimal="817" note="tilde"/>
 
-    <codePoint unicode="U+0030" decimal="882" note="0"/>
-    <codePoint unicode="U+0031" decimal="883" note="1"/>
-    <codePoint unicode="U+0032" decimal="884" note="2"/>
-    <codePoint unicode="U+0033" decimal="885" note="3"/>
-    <codePoint unicode="U+0034" decimal="886" note="4"/>
-    <codePoint unicode="U+0035" decimal="887" note="5"/>
-    <codePoint unicode="U+0036" decimal="888" note="6"/>
-    <codePoint unicode="U+0037" decimal="889" note="7"/>
-    <codePoint unicode="U+0038" decimal="890" note="8"/>
-    <codePoint unicode="U+0039" decimal="891" note="9"/>
+    <codePoint unicode="U+0030" name="g882" decimal="882" note="0"/>
+    <codePoint unicode="U+0031" name="g883" decimal="883" note="1"/>
+    <codePoint unicode="U+0032" name="g884" decimal="884" note="2"/>
+    <codePoint unicode="U+0033" name="g885" decimal="885" note="3"/>
+    <codePoint unicode="U+0034" name="g886" decimal="886" note="4"/>
+    <codePoint unicode="U+0035" name="g887" decimal="887" note="5"/>
+    <codePoint unicode="U+0036" name="g888" decimal="888" note="6"/>
+    <codePoint unicode="U+0037" name="g889" decimal="889" note="7"/>
+    <codePoint unicode="U+0038" name="g890" decimal="890" note="8"/>
+    <codePoint unicode="U+0039" name="g891" decimal="891" note="9"/>
     
-    <codePoint unicode="U+221E" decimal="955" note="infinty"/>
-    <codePoint unicode="U+221A" decimal="958" note="squareroot"/>
+    <codePoint unicode="U+221E" name="g955" decimal="955" note="infinty"/>
+    <codePoint unicode="U+221A" name="g958" decimal="958" note="squareroot"/>
 
-    <codePoint unicode="U+03A3" decimal="963" note="Sigma"/>
+    <codePoint unicode="U+03A3" name="g963" decimal="963" note="Sigma"/>
 
-    <codePoint unicode="U+2192" decimal="1372" note="rarrow"/>
+    <codePoint unicode="U+2192" name="g1372" decimal="1372" note="rarrow"/>
     
-    <codePoint unicode="U+221D" decimal="1503" note="proportional"/>
-    <codePoint unicode="U+222B" decimal="1516" note="integral"/>
-    <codePoint unicode="U+007E" decimal="1533" note="tilde"/>
-    <codePoint unicode="U+003C" decimal="1575" note="less"/>
-    <codePoint unicode="U+003E" decimal="1576" note="greater"/>
+    <codePoint unicode="U+002A" name="g1499" decimal="1499" note="asterisk"/>
+    
+    <codePoint unicode="U+221D" name="g1503" decimal="1503" note="proportional"/>
+    <codePoint unicode="U+222B" name="g1516" decimal="1516" note="integral"/>
+    <codePoint unicode="U+007E" name="g1533" decimal="1533" note="tilde"/>
+    <codePoint unicode="U+003C" name="g1575" decimal="1575" note="less"/>
+    <codePoint unicode="U+003E" name="g1576" decimal="1576" note="greater"/>
 
-    <codePoint unicode="U+0041" decimal="1827" note="A"/>
-    <codePoint unicode="U+0042" decimal="1828" note="B"/>
-    <codePoint unicode="U+0043" decimal="1829" note="C"/>
-    <codePoint unicode="U+0044" decimal="1830" note="D"/>
-    <codePoint unicode="U+0045" decimal="1831" note="E"/>
-    <codePoint unicode="U+0046" decimal="1832" note="F"/>
-    <codePoint unicode="U+0047" decimal="1833" note="G"/>
-    <codePoint unicode="U+0048" decimal="1834" note="H"/>
-    <codePoint unicode="U+0049" decimal="1835" note="I"/>
-    <codePoint unicode="U+004A" decimal="1836" note="J"/>
-    <codePoint unicode="U+004B" decimal="1837" note="K"/>
-    <codePoint unicode="U+004C" decimal="1838" note="L"/>
-    <codePoint unicode="U+004D" decimal="1839" note="M"/>
-    <codePoint unicode="U+004E" decimal="1840" note="N"/>
-    <codePoint unicode="U+004F" decimal="1841" note="O"/>
-    <codePoint unicode="U+0050" decimal="1842" note="P"/>
-    <codePoint unicode="U+0051" decimal="1843" note="Q"/>
-    <codePoint unicode="U+0052" decimal="1844" note="R"/>
-    <codePoint unicode="U+0053" decimal="1845" note="S"/>
-    <codePoint unicode="U+0054" decimal="1846" note="T"/>
-    <codePoint unicode="U+0055" decimal="1847" note="U"/>
-    <codePoint unicode="U+0056" decimal="1848" note="V"/>
-    <codePoint unicode="U+0057" decimal="1849" note="W"/>
-    <codePoint unicode="U+0058" decimal="1850" note="X"/>
-    <codePoint unicode="U+0059" decimal="1851" note="Y"/>
-    <codePoint unicode="U+005A" decimal="1852" note="Z"/>
+    <codePoint unicode="U+0041" name="g1827" decimal="1827" note="A"/>
+    <codePoint unicode="U+0042" name="g1828" decimal="1828" note="B"/>
+    <codePoint unicode="U+0043" name="g1829" decimal="1829" note="C"/>
+    <codePoint unicode="U+0044" name="g1830" decimal="1830" note="D"/>
+    <codePoint unicode="U+0045" name="g1831" decimal="1831" note="E"/>
+    <codePoint unicode="U+0046" name="g1832" decimal="1832" note="F"/>
+    <codePoint unicode="U+0047" name="g1833" decimal="1833" note="G"/>
+    <codePoint unicode="U+0048" name="g1834" decimal="1834" note="H"/>
+    <codePoint unicode="U+0049" name="g1835" decimal="1835" note="I"/>
+    <codePoint unicode="U+004A" name="g1836" decimal="1836" note="J"/>
+    <codePoint unicode="U+004B" name="g1837" decimal="1837" note="K"/>
+    <codePoint unicode="U+004C" name="g1838" decimal="1838" note="L"/>
+    <codePoint unicode="U+004D" name="g1839" decimal="1839" note="M"/>
+    <codePoint unicode="U+004E" name="g1840" decimal="1840" note="N"/>
+    <codePoint unicode="U+004F" name="g1841" decimal="1841" note="O"/>
+    <codePoint unicode="U+0050" name="g1842" decimal="1842" note="P"/>
+    <codePoint unicode="U+0051" name="g1843" decimal="1843" note="Q"/>
+    <codePoint unicode="U+0052" name="g1844" decimal="1844" note="R"/>
+    <codePoint unicode="U+0053" name="g1845" decimal="1845" note="S"/>
+    <codePoint unicode="U+0054" name="g1846" decimal="1846" note="T"/>
+    <codePoint unicode="U+0055" name="g1847" decimal="1847" note="U"/>
+    <codePoint unicode="U+0056" name="g1848" decimal="1848" note="V"/>
+    <codePoint unicode="U+0057" name="g1849" decimal="1849" note="W"/>
+    <codePoint unicode="U+0058" name="g1850" decimal="1850" note="X"/>
+    <codePoint unicode="U+0059" name="g1851" decimal="1851" note="Y"/>
+    <codePoint unicode="U+005A" name="g1852" decimal="1852" note="Z"/>
 
-    <codePoint unicode="U+0061" decimal="1853" note="a"/>
-    <codePoint unicode="U+0062" decimal="1854" note="b"/>
-    <codePoint unicode="U+0063" decimal="1855" note="c"/>
-    <codePoint unicode="U+0064" decimal="1856" note="d"/>
-    <codePoint unicode="U+0065" decimal="1857" note="e"/>
-    <codePoint unicode="U+0066" decimal="1858" note="f"/>
-    <codePoint unicode="U+0067" decimal="1859" note="g"/>
-    <codePoint unicode="U+0068" decimal="1860" note="h"/>
-    <codePoint unicode="U+0069" decimal="1861" note="i"/>
-    <codePoint unicode="U+006A" decimal="1862" note="j"/>
-    <codePoint unicode="U+006B" decimal="1863" note="k"/>
-    <codePoint unicode="U+006C" decimal="1864" note="l"/>
-    <codePoint unicode="U+006D" decimal="1865" note="m"/>
-    <codePoint unicode="U+006E" decimal="1866" note="n"/>
-    <codePoint unicode="U+006F" decimal="1867" note="o"/>
-    <codePoint unicode="U+0070" decimal="1868" note="p"/>
-    <codePoint unicode="U+0071" decimal="1869" note="q"/>
-    <codePoint unicode="U+0072" decimal="1870" note="r"/>
-    <codePoint unicode="U+0073" decimal="1871" note="s"/>
-    <codePoint unicode="U+0074" decimal="1872" note="t"/>
-    <codePoint unicode="U+0075" decimal="1873" note="u"/>
-    <codePoint unicode="U+0076" decimal="1874" note="v"/>
-    <codePoint unicode="U+0077" decimal="1875" note="w"/>
-    <codePoint unicode="U+0078" decimal="1876" note="x"/>
-    <codePoint unicode="U+0079" decimal="1877" note="y"/>
-    <codePoint unicode="U+007A" decimal="1878" note="z"/>
+    <codePoint unicode="U+0061" name="g1853" decimal="1853" note="a"/>
+    <codePoint unicode="U+0062" name="g1854" decimal="1854" note="b"/>
+    <codePoint unicode="U+0063" name="g1855" decimal="1855" note="c"/>
+    <codePoint unicode="U+0064" name="g1856" decimal="1856" note="d"/>
+    <codePoint unicode="U+0065" name="g1857" decimal="1857" note="e"/>
+    <codePoint unicode="U+0066" name="g1858" decimal="1858" note="f"/>
+    <codePoint unicode="U+0067" name="g1859" decimal="1859" note="g"/>
+    <codePoint unicode="U+0068" name="g1860" decimal="1860" note="h"/>
+    <codePoint unicode="U+0069" name="g1861" decimal="1861" note="i"/>
+    <codePoint unicode="U+006A" name="g1862" decimal="1862" note="j"/>
+    <codePoint unicode="U+006B" name="g1863" decimal="1863" note="k"/>
+    <codePoint unicode="U+006C" name="g1864" decimal="1864" note="l"/>
+    <codePoint unicode="U+006D" name="g1865" decimal="1865" note="m"/>
+    <codePoint unicode="U+006E" name="g1866" decimal="1866" note="n"/>
+    <codePoint unicode="U+006F" name="g1867" decimal="1867" note="o"/>
+    <codePoint unicode="U+0070" name="g1868" decimal="1868" note="p"/>
+    <codePoint unicode="U+0071" name="g1869" decimal="1869" note="q"/>
+    <codePoint unicode="U+0072" name="g1870" decimal="1870" note="r"/>
+    <codePoint unicode="U+0073" name="g1871" decimal="1871" note="s"/>
+    <codePoint unicode="U+0074" name="g1872" decimal="1872" note="t"/>
+    <codePoint unicode="U+0075" name="g1873" decimal="1873" note="u"/>
+    <codePoint unicode="U+0076" name="g1874" decimal="1874" note="v"/>
+    <codePoint unicode="U+0077" name="g1875" decimal="1875" note="w"/>
+    <codePoint unicode="U+0078" name="g1876" decimal="1876" note="x"/>
+    <codePoint unicode="U+0079" name="g1877" decimal="1877" note="y"/>
+    <codePoint unicode="U+007A" name="g1878" decimal="1878" note="z"/>
     
-    <codePoint unicode="U+03B1" decimal="2009" note="alpha"/>
-    <codePoint unicode="U+03B2" decimal="2010" note="beta"/>
-    <codePoint unicode="U+03B3" decimal="2011" note="gamma"/>
-    <codePoint unicode="U+03B4" decimal="2012" note="delta"/>
-    <codePoint unicode="U+03B5" decimal="2013" note="epsilon"/>
+    <codePoint unicode="U+03B1" name="g2009" decimal="2009" note="alpha"/>
+    <codePoint unicode="U+03B2" name="g2010" decimal="2010" note="beta"/>
+    <codePoint unicode="U+03B3" name="g2011" decimal="2011" note="gamma"/>
+    <codePoint unicode="U+03B4" name="g2012" decimal="2012" note="delta"/>
+    <codePoint unicode="U+03B5" name="g2013" decimal="2013" note="epsilon"/>
     
-    <codePoint unicode="U+03B8" decimal="2016" note="theta"/>
+    <codePoint unicode="U+03B8" name="g2016" decimal="2016" note="theta"/>
     
-    <codePoint unicode="U+03BC" decimal="2020" note="mu"/>
-    <codePoint unicode="U+03C0" decimal="2024" note="pi"/>
-    <codePoint unicode="U+03C1" decimal="2025" note="rho"/>
-    <codePoint unicode="U+03C3" decimal="2026" note="sigma"/>
-    <codePoint unicode="U+03C4" decimal="2028" note="tau"/>
-    <codePoint unicode="U+03C6" decimal="2030" note="phi"/>
+    <codePoint unicode="U+03BC" name="g2020" decimal="2020" note="mu"/>
+    <codePoint unicode="U+03C0" name="g2024" decimal="2024" note="pi"/>
+    <codePoint unicode="U+03C1" name="g2025" decimal="2025" note="rho"/>
+    <codePoint unicode="U+03C3" name="g2026" decimal="2026" note="sigma"/>
+    <codePoint unicode="U+03C4" name="g2028" decimal="2028" note="tau"/>
+    <codePoint unicode="U+03C6" name="g2030" decimal="2030" note="phi"/>
     
-    <codePoint unicode="U+0030" decimal="2868" note="0"/>
-    <codePoint unicode="U+0031" decimal="2869" note="1"/>
-    <codePoint unicode="U+0032" decimal="2870" note="2"/>
-    <codePoint unicode="U+0033" decimal="2871" note="3"/>
-    <codePoint unicode="U+0034" decimal="2872" note="4"/>
-    <codePoint unicode="U+0035" decimal="2873" note="5"/>
-    <codePoint unicode="U+0036" decimal="2874" note="6"/>
-    <codePoint unicode="U+0037" decimal="2875" note="7"/>
-    <codePoint unicode="U+0038" decimal="2876" note="8"/>
-    <codePoint unicode="U+0039" decimal="2877" note="9"/>
+    <codePoint unicode="U+0030" name="g2868" decimal="2868" note="0"/>
+    <codePoint unicode="U+0031" name="g2869" decimal="2869" note="1"/>
+    <codePoint unicode="U+0032" name="g2870" decimal="2870" note="2"/>
+    <codePoint unicode="U+0033" name="g2871" decimal="2871" note="3"/>
+    <codePoint unicode="U+0034" name="g2872" decimal="2872" note="4"/>
+    <codePoint unicode="U+0035" name="g2873" decimal="2873" note="5"/>
+    <codePoint unicode="U+0036" name="g2874" decimal="2874" note="6"/>
+    <codePoint unicode="U+0037" name="g2875" decimal="2875" note="7"/>
+    <codePoint unicode="U+0038" name="g2876" decimal="2876" note="8"/>
+    <codePoint unicode="U+0039" name="g2877" decimal="2877" note="9"/>
     
-    <codePoint unicode="U+002D" decimal="2879" note="-"/>
-    <codePoint unicode="U+003D" decimal="2880" note="="/>
+    <codePoint unicode="U+002D" name="g2879" decimal="2879" note="-"/>
+    <codePoint unicode="U+003D" name="g2880" decimal="2880" note="="/>
 
-    <codePoint unicode="U+0061" decimal="2911" note="a"/>
-    <codePoint unicode="U+0062" decimal="2912" note="b"/>
-    <codePoint unicode="U+0063" decimal="2913" note="c"/>
-    <codePoint unicode="U+0064" decimal="2914" note="d"/>
-    <codePoint unicode="U+0065" decimal="2915" note="e"/>
-    <codePoint unicode="U+0066" decimal="2916" note="f"/>
-    <codePoint unicode="U+0067" decimal="2917" note="g"/>
-    <codePoint unicode="U+0068" decimal="2918" note="h"/>
-    <codePoint unicode="U+0069" decimal="2919" note="i"/>
-    <codePoint unicode="U+006A" decimal="2920" note="j"/>
-    <codePoint unicode="U+006B" decimal="2921" note="k"/>
-    <codePoint unicode="U+006C" decimal="2922" note="l"/>
-    <codePoint unicode="U+006D" decimal="2923" note="m"/>
-    <codePoint unicode="U+006E" decimal="2924" note="n"/>
-    <codePoint unicode="U+006F" decimal="2925" note="o"/>
-    <codePoint unicode="U+0070" decimal="2926" note="p"/>
-    <codePoint unicode="U+0071" decimal="2927" note="q"/>
-    <codePoint unicode="U+0072" decimal="2928" note="r"/>
-    <codePoint unicode="U+0073" decimal="2929" note="s"/>
-    <codePoint unicode="U+0074" decimal="2930" note="t"/>
-    <codePoint unicode="U+0075" decimal="2931" note="u"/>
-    <codePoint unicode="U+0076" decimal="2932" note="v"/>
-    <codePoint unicode="U+0077" decimal="2933" note="w"/>
-    <codePoint unicode="U+0078" decimal="2934" note="x"/>
-    <codePoint unicode="U+0079" decimal="2935" note="y"/>
-    <codePoint unicode="U+007A" decimal="2936" note="z"/>
+    <codePoint unicode="U+0061" name="g2911" decimal="2911" note="a"/>
+    <codePoint unicode="U+0062" name="g2912" decimal="2912" note="b"/>
+    <codePoint unicode="U+0063" name="g2913" decimal="2913" note="c"/>
+    <codePoint unicode="U+0064" name="g2914" decimal="2914" note="d"/>
+    <codePoint unicode="U+0065" name="g2915" decimal="2915" note="e"/>
+    <codePoint unicode="U+0066" name="g2916" decimal="2916" note="f"/>
+    <codePoint unicode="U+0067" name="g2917" decimal="2917" note="g"/>
+    <codePoint unicode="U+0068" name="g2918" decimal="2918" note="h"/>
+    <codePoint unicode="U+0069" name="g2919" decimal="2919" note="i"/>
+    <codePoint unicode="U+006A" name="g2920" decimal="2920" note="j"/>
+    <codePoint unicode="U+006B" name="g2921" decimal="2921" note="k"/>
+    <codePoint unicode="U+006C" name="g2922" decimal="2922" note="l"/>
+    <codePoint unicode="U+006D" name="g2923" decimal="2923" note="m"/>
+    <codePoint unicode="U+006E" name="g2924" decimal="2924" note="n"/>
+    <codePoint unicode="U+006F" name="g2925" decimal="2925" note="o"/>
+    <codePoint unicode="U+0070" name="g2926" decimal="2926" note="p"/>
+    <codePoint unicode="U+0071" name="g2927" decimal="2927" note="q"/>
+    <codePoint unicode="U+0072" name="g2928" decimal="2928" note="r"/>
+    <codePoint unicode="U+0073" name="g2929" decimal="2929" note="s"/>
+    <codePoint unicode="U+0074" name="g2930" decimal="2930" note="t"/>
+    <codePoint unicode="U+0075" name="g2931" decimal="2931" note="u"/>
+    <codePoint unicode="U+0076" name="g2932" decimal="2932" note="v"/>
+    <codePoint unicode="U+0077" name="g2933" decimal="2933" note="w"/>
+    <codePoint unicode="U+0078" name="g2934" decimal="2934" note="x"/>
+    <codePoint unicode="U+0079" name="g2935" decimal="2935" note="y"/>
+    <codePoint unicode="U+007A" name="g2936" decimal="2936" note="z"/>
     
-    <codePoint unicode="U+0394" decimal="2940" note="Delta"/>
+    <codePoint unicode="U+0394" name="g2940" decimal="2940" note="Delta"/>
     
     
-    <codePoint unicode="U+003C" decimal="2996" note="less"/>
-    <codePoint unicode="U+221E" decimal="2998" note="infinty"/>
-    <codePoint unicode="U+2264" decimal="3000" note="le"/>
+    <codePoint unicode="U+003C" name="g2996" decimal="2996" note="less"/>
+    <codePoint unicode="U+221E" name="g2998" decimal="2998" note="infinty"/>
+    <codePoint unicode="U+2264" name="g3000" decimal="3000" note="le"/>
 
-    <codePoint unicode="U+0061" decimal="3028" note="a"/>
-    <codePoint unicode="U+0062" decimal="3029" note="b"/>
-    <codePoint unicode="U+0063" decimal="3030" note="c"/>
-    <codePoint unicode="U+0064" decimal="3031" note="d"/>
-    <codePoint unicode="U+0065" decimal="3032" note="e"/>
-    <codePoint unicode="U+0066" decimal="3033" note="f"/>
-    <codePoint unicode="U+0067" decimal="3034" note="g"/>
-    <codePoint unicode="U+0068" decimal="3035" note="h"/>
-    <codePoint unicode="U+0069" decimal="3036" note="i"/>
-    <codePoint unicode="U+006A" decimal="3037" note="j"/>
-    <codePoint unicode="U+006B" decimal="3038" note="k"/>
-    <codePoint unicode="U+006C" decimal="3039" note="l"/>
-    <codePoint unicode="U+006D" decimal="3040" note="m"/>
-    <codePoint unicode="U+006E" decimal="3041" note="n"/>
-    <codePoint unicode="U+006F" decimal="3042" note="o"/>
-    <codePoint unicode="U+0070" decimal="3043" note="p"/>
-    <codePoint unicode="U+0071" decimal="3044" note="q"/>
-    <codePoint unicode="U+0072" decimal="3045" note="r"/>
-    <codePoint unicode="U+0073" decimal="3046" note="s"/>
-    <codePoint unicode="U+0074" decimal="3047" note="t"/>
-    <codePoint unicode="U+0075" decimal="3048" note="u"/>
-    <codePoint unicode="U+0076" decimal="3049" note="v"/>
-    <codePoint unicode="U+0077" decimal="3050" note="w"/>
-    <codePoint unicode="U+0078" decimal="3051" note="x"/>
-    <codePoint unicode="U+0079" decimal="3052" note="y"/>
-    <codePoint unicode="U+007A" decimal="3053" note="z"/>
+    <codePoint unicode="U+0021" name="g3015" decimal="3015" note="exclam"/>
+    <codePoint unicode="U+0061" name="g3028" decimal="3028" note="a"/>
+    <codePoint unicode="U+0062" name="g3029" decimal="3029" note="b"/>
+    <codePoint unicode="U+0063" name="g3030" decimal="3030" note="c"/>
+    <codePoint unicode="U+0064" name="g3031" decimal="3031" note="d"/>
+    <codePoint unicode="U+0065" name="g3032" decimal="3032" note="e"/>
+    <codePoint unicode="U+0066" name="g3033" decimal="3033" note="f"/>
+    <codePoint unicode="U+0067" name="g3034" decimal="3034" note="g"/>
+    <codePoint unicode="U+0068" name="g3035" decimal="3035" note="h"/>
+    <codePoint unicode="U+0069" name="g3036" decimal="3036" note="i"/>
+    <codePoint unicode="U+006A" name="g3037" decimal="3037" note="j"/>
+    <codePoint unicode="U+006B" name="g3038" decimal="3038" note="k"/>
+    <codePoint unicode="U+006C" name="g3039" decimal="3039" note="l"/>
+    <codePoint unicode="U+006D" name="g3040" decimal="3040" note="m"/>
+    <codePoint unicode="U+006E" name="g3041" decimal="3041" note="n"/>
+    <codePoint unicode="U+006F" name="g3042" decimal="3042" note="o"/>
+    <codePoint unicode="U+0070" name="g3043" decimal="3043" note="p"/>
+    <codePoint unicode="U+0071" name="g3044" decimal="3044" note="q"/>
+    <codePoint unicode="U+0072" name="g3045" decimal="3045" note="r"/>
+    <codePoint unicode="U+0073" name="g3046" decimal="3046" note="s"/>
+    <codePoint unicode="U+0074" name="g3047" decimal="3047" note="t"/>
+    <codePoint unicode="U+0075" name="g3048" decimal="3048" note="u"/>
+    <codePoint unicode="U+0076" name="g3049" decimal="3049" note="v"/>
+    <codePoint unicode="U+0077" name="g3050" decimal="3050" note="w"/>
+    <codePoint unicode="U+0078" name="g3051" decimal="3051" note="x"/>
+    <codePoint unicode="U+0079" name="g3052" decimal="3052" note="y"/>
+    <codePoint unicode="U+007A" name="g3053" decimal="3053" note="z"/>
     
-    <codePoint unicode="U+03B4" decimal="3083" note="delta"/>
-    <codePoint unicode="U+03B8" decimal="3087" note="theta"/>
-    <codePoint unicode="U+03C0" decimal="3095" note="pi"/>
-    <codePoint unicode="U+03C9" decimal="3104" note="omega"/>
-    <codePoint unicode="U+2202" decimal="3105" note="diff"/>
+    <codePoint unicode="U+03B4" name="g3083" decimal="3083" note="delta"/>
+    <codePoint unicode="U+03B8" name="g3087" decimal="3087" note="theta"/>
+    <codePoint unicode="U+03C0" name="g3095" decimal="3095" note="pi"/>
+    <codePoint unicode="U+03C9" name="g3104" decimal="3104" note="omega"/>
+    <codePoint unicode="U+2202" name="g3105" decimal="3105" note="diff"/>
     
-    <codePoint unicode="U+0030" decimal="3116" note="0"/>
-    <codePoint unicode="U+0031" decimal="3117" note="1"/>
-    <codePoint unicode="U+0032" decimal="3118" note="2"/>
-    <codePoint unicode="U+0033" decimal="3119" note="3"/>
-    <codePoint unicode="U+0034" decimal="3120" note="4"/>
-    <codePoint unicode="U+0035" decimal="3121" note="5"/>
-    <codePoint unicode="U+0036" decimal="3122" note="6"/>
-    <codePoint unicode="U+0037" decimal="3123" note="7"/>
-    <codePoint unicode="U+0038" decimal="3124" note="8"/>
-    <codePoint unicode="U+0039" decimal="3125" note="9"/>
+    <codePoint unicode="U+0030" name="g3116" decimal="3116" note="0"/>
+    <codePoint unicode="U+0031" name="g3117" decimal="3117" note="1"/>
+    <codePoint unicode="U+0032" name="g3118" decimal="3118" note="2"/>
+    <codePoint unicode="U+0033" name="g3119" decimal="3119" note="3"/>
+    <codePoint unicode="U+0034" name="g3120" decimal="3120" note="4"/>
+    <codePoint unicode="U+0035" name="g3121" decimal="3121" note="5"/>
+    <codePoint unicode="U+0036" name="g3122" decimal="3122" note="6"/>
+    <codePoint unicode="U+0037" name="g3123" decimal="3123" note="7"/>
+    <codePoint unicode="U+0038" name="g3124" decimal="3124" note="8"/>
+    <codePoint unicode="U+0039" name="g3125" decimal="3125" note="9"/>
     
-    <codePoint unicode="U+002B" decimal="3397" note="+"/>
-    <codePoint unicode="U+002D" decimal="3398" note="-"/>
-    <codePoint unicode="U+00B1" decimal="3399" note="plusminus"/>
-    <codePoint unicode="U+00F7" decimal="3402" note="divide"/>
-    <codePoint unicode="U+003D" decimal="3404" note="="/>
-    <codePoint unicode="U+2260" decimal="3405" note="ne"/>
-    <codePoint unicode="U+003C" decimal="3407" note="less"/>
-    <codePoint unicode="U+226A" decimal="3408" note="very much less"/>
-    <codePoint unicode="U+226B" decimal="3409" note="very much greater"/>
-    <codePoint unicode="U+2265" decimal="3410" note="ge"/>
+    <codePoint unicode="U+002B" name="g3364" decimal="3364" note="space"/>
+    <codePoint unicode="U+00AF" name="g3365" decimal="3365" note="macron"/>
+    
+    <codePoint unicode="U+002B" name="g3397" decimal="3397" note="+"/>
+    <codePoint unicode="U+002D" name="g3398" decimal="3398" note="-"/>
+    <codePoint unicode="U+00B1" name="g3399" decimal="3399" note="plusminus"/>
+    <codePoint unicode="U+0026" name="g3400" decimal="3400" note="ampersand"/>
+    <codePoint unicode="U+00F7" name="g3402" decimal="3402" note="divide"/>
+    <codePoint unicode="U+003D" name="g3404" decimal="3404" note="="/>
+    <codePoint unicode="U+2260" name="g3405" decimal="3405" note="ne"/>
+    <codePoint unicode="U+003C" name="g3407" decimal="3407" note="less"/>
+    <codePoint unicode="U+226A" name="g3408" decimal="3408" note="very much less"/>
+    <codePoint unicode="U+226B" name="g3409" decimal="3409" note="very much greater"/>
+    <codePoint unicode="U+2265" name="g3410" decimal="3410" note="ge"/>
 
-    <codePoint unicode="U+0028" decimal="3435" note="("/>
-    <codePoint unicode="U+0029" decimal="3439" note=")"/>
+    <codePoint unicode="U+0028" name="g3435" decimal="3435" note="("/>
+    <codePoint unicode="U+0029" name="g3439" decimal="3439" note=")"/>
 
-    <codePoint unicode="U+221A" decimal="3493" note="squareroot"/>
+    <codePoint unicode="U+221A" name="g3493" decimal="3493" note="squareroot"/>
+    <codePoint unicode="U+0022" name="g3495" decimal="3495" note="quote"/>
 
-    <codePoint unicode="U+222B" decimal="3505" note="integral"/>
+    <codePoint unicode="U+222B" name="g3505" decimal="3505" note="integral"/>
     
-    <codePoint unicode="U+03A3" decimal="3533" note="Sigma"/>
+    <codePoint unicode="U+03A3" name="g3533" decimal="3533" note="Sigma"/>
     
-    <codePoint unicode="U+0028" decimal="4666" note="("/>
-    <codePoint unicode="U+0029" decimal="4667" note=")"/>
+    <codePoint unicode="U+0028" name="g4666" decimal="4666" note="("/>
+    <codePoint unicode="U+0029" name="g4667" decimal="4667" note=")"/>
     
-    <codePoint unicode="U+0028" decimal="4672" note="("/>
-    <codePoint unicode="U+0029" decimal="4673" note=")"/>
-    <codePoint unicode="U+007B" decimal="4676" note="{"/>
+    <codePoint unicode="U+0028" name="g4672" decimal="4672" note="("/>
+    <codePoint unicode="U+0029" name="g4673" decimal="4673" note=")"/>
+    <codePoint unicode="U+007B" name="g4676" decimal="4676" note="{"/>
+    
+    <!--  unicode -->
+    <codePoint unicode="U+2211" decimal="8721" note="N-ARY SUMMATION"/>
     
     
 </codePointSet>

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/cmmi10.xml

 -->
 <codePointSet encoding="CMM" id="cmmi10" resource="org/xmlcml/pdf2svg/codepoints/misc">
 
+<!--  maybe unicode?  -->
+    <codePointSet idRef="greekAndCoptic" href="../unicode/greekAndCoptic.xml"/>
+    
+    <codePoint unicode="U+003E" name="greater" note="GREATER-THAN SIGN" />
+	
     <codePoint unicode="U+2113" name="lscript" note="SCRIPT SMALL L" />
 	
-	<!-- unicode --> 
-    <codePointSet idRef="unicode256" href="../unicode/unicode256.xml"/>
-	
 </codePointSet>

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/cmmi8.xml

+<!--
+
+    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+            http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<codePointSet encoding="CMM" id="cmmi8" resource="org/xmlcml/pdf2svg/codepoints/misc">
+
+    <codePoint unicode="U+2113" name="lscript" note="SCRIPT SMALL L" />
+	
+</codePointSet>

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/cmmi9.xml

+<!--
+
+    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+            http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<codePointSet encoding="CMM" id="cmmi9" resource="org/xmlcml/pdf2svg/codepoints/misc">
+
+    <codePoint unicode="U+2113" name="lscript" note="SCRIPT SMALL L" />
+	
+	<!-- unicode --> 
+    <codePointSet idRef="unicode256" href="../unicode/unicode256.xml"/>
+	
+</codePointSet>

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/cmsy10.xml

+<!--
+
+    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+            http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<codePointSet encoding="CMM" id="cmsy10" resource="org/xmlcml/pdf2svg/codepoints/misc">
+
+    <codePoint unicode="U+226B" name="greatermuch"   decimal="29" note="MUCH GREATER-THAN" />
+    <codePoint unicode="U+002F" name="negationslash" decimal="54" note="SOLIDUS" />
+    <codePoint unicode="U+007B" name="paragraph"     decimal="123" note="LEFT CURLY BRACKET" />
+    <codePoint unicode="U+007C" name="club"          decimal="124" note="VERTICAL LINE" />
+    <codePoint unicode="U+007D" name="diamond"          decimal="125" note="RIGHT CURLY BRACKET" />
+    <codePoint unicode="U+00D7" name=""              decimal="215" note="MULTIPLICATION SIGN" />
+	<codePoint unicode="U+00B1" name=".notdef" note="PLUS-MINUS SIGN"/>
+	
+	<!-- mathScript --> 
+    <codePointSet idRef="mathBoldScript" href="../misc/mathBoldScript.xml"/>
+	
+	<!-- unicode broken? 8729 8733 fails; there may be two overlapping fonts :-( -->
+	
+	
+    <codePoint unicode="U+2020" name="" decimal="8224" note="DAGGER" />
+    <codePoint unicode="U+2021" name="" decimal="8225" note="DOUBLE DAGGER" />
+    <codePoint unicode="U+2022" name="" decimal="8226" note="BULLET" />
+    <codePoint unicode="U+2032" name="" decimal="8242" note="PRIME" />
+    <codePoint unicode="U+2192" name="" decimal="8594" note="RIGHTWARDS ARROW" />
+    <codePoint unicode="U+2208" name="" decimal="8712" note="ELEMENT OF" />
+    <codePoint unicode="U+2212" name="" decimal="8729" note="MINUS SIGN" />
+    <codePoint unicode="U+221A" name="" decimal="8730" note="SQUARE ROOT" />
+    <codePoint unicode="U+2212" name="" decimal="8722" note="MINUS SIGN" />
+    <codePoint unicode="U+221D" name="" decimal="8723" note="PROPORTIONAL TO" />
+    <codePoint unicode="U+221D" name="" decimal="8725" note="PROPORTIONAL TO" />
+    <codePoint unicode="U+2217" name="" decimal="8727" note="ASTERISK OPERATOR" />
+    <codePoint unicode="U+221D" name="" decimal="8733" note="PROPORTIONAL TO" />
+    <codePoint unicode="U+223C" name="" decimal="8764" note="TILDE OPERATOR" />
+    <codePoint unicode="U+2248" name="" decimal="8776" note="ALMOST EQUAL TO" />
+    <codePoint unicode="U+2261" name="" decimal="8801" note="IDENTICAL TO" />
+    <codePoint unicode="U+2264" name="" decimal="8804" note="LESS-THAN OR EQUAL TO" />
+    <codePoint unicode="U+2265" name="" decimal="8805" note="GREATER-THAN OR EQUAL TO" />
+	<!-- 
+    <codePointSet idRef="unicode256" href="../unicode/unicode256.xml"/>	
+    -->
+</codePointSet>

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/mathBoldScript.xml

+<!--
+
+    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+            http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<codePointSet encoding="mathBoldScript" id="mathBoldScript" resource="org/xmlcml/pdf2svg/codepoints/misc">
+
+<!--  math bold script referenced through latin characters -->
+    <codePoint unicode="U+1D4D0" name="A"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL A"/>  
+    <codePoint unicode="U+1D4D1" name="B"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL B"/>  
+    <codePoint unicode="U+1D4D2" name="C"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL C"/>  
+    <codePoint unicode="U+1D4D3" name="D"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL D"/>  
+    <codePoint unicode="U+1D4D4" name="E"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL E"/>  
+    <codePoint unicode="U+1D4D5" name="F"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL F"/>  
+    <codePoint unicode="U+1D4D6" name="G"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL G"/>  
+    <codePoint unicode="U+1D4D7" name="H"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL H"/>  
+    <codePoint unicode="U+1D4D8" name="I"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL I"/>  
+    <codePoint unicode="U+1D4D9" name="J"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL J"/>  
+    <codePoint unicode="U+1D4DA" name="K"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL K"/>  
+    <codePoint unicode="U+1D4DB" name="L"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL L"/>  
+    <codePoint unicode="U+1D4DC" name="M"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL M"/>  
+    <codePoint unicode="U+1D4DD" name="N"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL N"/>  
+    <codePoint unicode="U+1D4DE" name="O"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL O"/>  
+    <codePoint unicode="U+1D4DF" name="P"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL P"/>  
+    <codePoint unicode="U+1D4E0" name="Q"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL Q"/>  
+    <codePoint unicode="U+1D4E1" name="R"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL R"/>  
+    <codePoint unicode="U+1D4E2" name="S"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL S"/>  
+    <codePoint unicode="U+1D4E3" name="T"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL T"/>  
+    <codePoint unicode="U+1D4E4" name="U"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL U"/>  
+    <codePoint unicode="U+1D4E5" name="V"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL V"/>  
+    <codePoint unicode="U+1D4E6" name="W"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL W"/>  
+    <codePoint unicode="U+1D4E7" name="X"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL X"/>  
+    <codePoint unicode="U+1D4E8" name="Y"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL Y"/>  
+    <codePoint unicode="U+1D4E9" name="Z"                      note="MATHEMATICAL BOLD SCRIPT CAPITAL Z"/>  
+	
+</codePointSet>

src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/msam10.xml

 <codePointSet encoding="MSAM" id="msam10" resource="org/xmlcml/pdf2svg/codepoints/misc">
 
 	<codePoint unicode="U+2A7E" name="greaterorequalslant" note="GREATER-THAN OR SLANTED EQUAL TO"/> 
+	<codePoint unicode="U+2272" name="lessorsimilar" decimal="46" note="LESS-THAN OR EQUIVALENT TO"/> 
+	<codePoint unicode="U+2273" name="greaterorsimilar" decimal="38" note="GREATER-THAN OR EQUIVALENT TO"/> 
 	
 	<!-- unicode --> 
     <codePointSet idRef="unicode256" href="../unicode/unicode256.xml"/>

src/main/resources/org/xmlcml/pdf2svg/codepoints/symbol/advpi1.xml

     
     <codePoint unicode="U+2264" decimal="35" name="numbersign" note="LESS-THAN OR EQUAL TO"/>
     <codePoint unicode="U+2265" decimal="36" name="dollar" note="GREATER-THAN OR EQUAL TO"/>
+    <codePoint unicode="U+2245" decimal="38" name="ampersand" note="APPROXIMATELY EQUAL TO"/>
     <codePoint unicode="U+003C" decimal="44" name="comma"  note="LESS-THAN SIGN"/>
     <codePoint unicode="U+2192" decimal="45" name="hyphen"  note="RIGHTWARDS ARROW"/>
     <codePoint unicode="U+003E" decimal="46" note="GREATER-THAN SIGN"/>
     <codePoint unicode="U+003E" decimal="52" name="four" note="RIGHT ANGLE BRACKET"/>
     <codePoint unicode="U+00D7" decimal="54" name="six" note="MULTIPLY SIGN"/>
     <codePoint unicode="U+00B0" decimal="56" name="eight" note="DEGREE SIGN"/>
+    <codePoint unicode="U+221E" decimal="63" name="question" note="INFINITY"/>
 
 <!--  greek Gamma = G -->    
     <codePoint unicode="U+0391" decimal="65" name="A" note="GREEK CAPITAL LETTER ALPHA"/>

src/main/resources/org/xmlcml/pdf2svg/codepoints/symbol/symbol.xml

     <codePoint unicode="U+03BC" decimal="61472" note="GREEK SMALL LETTER MU"/>
 
     <codePoint unicode="U+002D" decimal="61485" note="HYPHEN-MINUS"/>
+    <!--  have also found combining tilde -->
 
     <codePoint unicode="U+03B1" decimal="61537" note="GREEK SMALL LETTER ALPHA"/>
     <codePoint unicode="U+03B2" decimal="61538" note="GREEK SMALL LETTER BETA"/>

src/main/resources/org/xmlcml/pdf2svg/fontFamilySets/nonStandardFontFamilySet.xml

         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/msam9.xml" />
     <fontFamily name="MSAM10" 
         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/msam10.xml" />
+    <fontFamily name="msam10" 
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/msam10.xml" />
         
 <!--  ================================================== -->    
     
         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/AdvTTec1d2308I.xml"/>
     <fontFamily name="AdvPS4731B1" symbol="true" 
         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/AdvPS4731B1.xml"/>
+    <fontFamily name="AdvPS586B" symbol="true" 
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/AdvPS586B.xml"/>
+
+
+    <fontFamily name="AdvT001" symbol="true" 
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/AdvT001.xml"/>
 
     <fontFamily name="AdvPSMP1" symbol="true" 
         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/advPSMP1.xml"/>
     <fontFamily name="AdvPS_TINR" symbol="true" 
         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/advPSTINR.xml"/>
 
+    <fontFamily name="AdvMT_EX" symbol="true"
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/advmt_ex.xml" />
     <fontFamily name="AdvMT_MI" symbol="true"
         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/advmt_mi.xml" />
     <fontFamily name="ADVMT_MI" symbol="true"
         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/advmt_mi.xml" />
+    <fontFamily name="ADVMT_RM" symbol="true"
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/advmt_rm.xml" />
+    <fontFamily name="AdvMT_RM" symbol="true"
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/advmt_rm.xml" />
         
     <fontFamily name="AdvMT_SY" symbol="true"
         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/advmt_sy.xml" />
 
     <fontFamily name="Cambria Math" symbol="true" 
         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/cambriaMath.xml"/>
+    <fontFamily name="CambriaMath" symbol="true" 
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/cambriaMath.xml"/>
 
 <!-- uses Cddd codepoints -->
     <fontFamily name="AdvMacMthSyN" 
     <fontFamily name="AdvOT863180fb+fb" symbol="false" />
 
 <!-- ====================== CM ==============================  -->
+    <fontFamily name="CMSY10" 
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/cmsy10.xml"/>
+
+    <fontFamily name="cmmi8" 
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/cmmi8.xml"/>
+    <fontFamily name="cmmi9" 
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/cmmi9.xml"/>
     <fontFamily name="cmmi10" 
         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/cmmi10.xml"/>
 <!-- ====================== Calibri ==============================  -->

src/test/java/org/xmlcml/pdf2svg/SamplesForTest.java

 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/BotJLinn55", "../pdfs/BotJLinn55");
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/IchthyRes79", "../pdfs/IchthyRes79");
 		
-		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/MolBiolEvol51", "../pdfs/MolBiolEvol51");
+//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/MolBiolEvol51", "../pdfs/MolBiolEvol51");
 		// runs OK // 460 secs
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/Palaeontology44", "../pdfs/Palaeontology44");
 		// runs OK
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/jb/", "../pdfs/jb/");
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/misc/", "../pdfs/misc/");
 //      new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/npg/", "../pdfs/npg/");
+		// OK
+//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/ActaPalaeontologicaPolonica", "../pdfs/pdfsByJournal/ActaPalaeontologicaPolonica");
+//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/ActaZoologica", "../pdfs/pdfsByJournal/ActaZoologica");
+//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/AmericanJournalBotany", "../pdfs/pdfsByJournal/AmericanJournalBotany");
+//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/AmericanMuseumNovitates", "../pdfs/pdfsByJournal/AmericanMuseumNovitates");
+//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/AmericanZoologist", "../pdfs/pdfsByJournal/AmericanZoologist");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/AnnualReviewEcologySystematics", "../pdfs/pdfsByJournal/AnnualReviewEcologySystematics");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/AppliedMathematicsLetters", "../pdfs/pdfsByJournal/AppliedMathematicsLetters");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BiochemicalSystematicsEcology", "../pdfs/pdfsByJournal/BiochemicalSystematicsEcology");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/Bioinformatics", "../pdfs/pdfsByJournal/Bioinformatics");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BiologicalJournalLinneanSociety", "../pdfs/pdfsByJournal/BiologicalJournalLinneanSociety");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BiologicalReviews", "../pdfs/pdfsByJournal/BiologicalReviews");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BiologyLetters", "../pdfs/pdfsByJournal/BiologyLetters");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BiologicalReviews", "../pdfs/pdfsByJournal/BiologicalReviews");
+		
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/plosone/", "../pdfs/plosone/");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/ppt/", "../pdfs/ppt/");
+//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/taylorfrancis/", "../pdfs/taylorfrancis/");
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/psyc/", "../pdfs/psyc/");
+//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/royalsoc/", "../pdfs/royalsoc/");
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/rsc/many/", "../pdfs/rsc/many/");
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/springer/", "../pdfs/springer/");
+//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/taylorfrancis/", "../pdfs/taylorfrancis/");
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/thesis/", "../pdfs/thesis/");
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/wiley/", "../pdfs/wiley/");
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/word/", "../pdfs/word/");
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.