Commits

Anonymous committed aa0cbf9

added dingbats, more cambria, better cascading of char/code precedence; happy with current version for 0.2

  • Participants
  • Parent commits 16ffdbb

Comments (0)

Files changed (42)

File src/main/java/org/xmlcml/pdf2svg/AMIFont.java

 
 import java.io.IOException;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.regex.Matcher;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.encoding.DictionaryEncoding;
 import org.apache.pdfbox.encoding.Encoding;
+import org.apache.pdfbox.pdmodel.common.PDMatrix;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.font.PDFont;
 import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
 import org.apache.pdfbox.pdmodel.font.PDFontFactory;
+import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
 import org.apache.pdfbox.pdmodel.font.PDType0Font;
 
 /** wrapper for PDType1Font. is meant to manage the badFontnames, other
 	public static final String ENCODING = "Encoding";
 	static Pattern LEADER_PATTERN = Pattern.compile("^[A-Z]{6}\\+(.*)$");
 	
-	private Boolean isBold = null;
-	private Boolean isItalic = null;
-	private Boolean isSymbol = null;
+//	private Boolean isBold = null;
+//	private Boolean isItalic = null;
+//	private Boolean isSymbol = null;
+	
 	private String fontFamilyName;
 	private String fontName;
 	
 	private PDFontDescriptor fontDescriptor;
 	private String fontType;
 	
-	private int currentIndex;
-	
-	
 	private String finalSuffix;
-
 	private Encoding encoding;
 	private String fontEncoding;
+	private String baseFont;
+	private Map<String, String> pathStringByCharnameMap;
+	
+	private COSDictionary dictionary;
+	private COSArray dictionaryArray;
+	private COSName dictionaryName;
+	private COSDictionary dictionaryDictionary;
+	private COSInteger dictionaryInteger;
 
-	private String baseFont;
-
-	private Map<String, String> pathStringByCharnameMap;
-
-	private COSDictionary dictionary;
+	private PDFont firstDescendantFont;
 	
 //	static {
 //		String[] standard14Names = PDType1Font.getStandard14Names();
         addAdobeFontMetric( metrics, "ZapfDingbats" );
         return metrics;
     }
-    
-    
         
 	 */
 	
 	/** create font from family and key attributes
 	 * currently used when compiling an external table
 	 */
-	public AMIFont(String fontFamilyName, String encoding, String type, boolean isSymbol) {
+	public AMIFont(String fontFamilyName, String encoding, String type) {
 		this();
 		this.fontFamilyName = fontFamilyName;
 		this.fontEncoding = encoding;
 		this.fontType = type;
-		this.isSymbol = isSymbol;
 	}
 
 	/** create font from family and key attributes
 	 * currently used when compiling an external table
 	 */
-	public AMIFont(String fontFamilyName, String encoding, String type, boolean isSymbol, COSDictionary dictionary) {
-		this(fontFamilyName, encoding, type, isSymbol);
+	public AMIFont(String fontFamilyName, String encoding, String type, COSDictionary dictionary) {
+		this(fontFamilyName, encoding, type);
 		this.dictionary = dictionary;
 		analyzeDictionary();
 	}
 		for (COSName key : keySet) {
 			COSBase object = dictionary.getDictionaryObject(key);
 			if (object instanceof COSArray) {
-				COSArray cosArray = (COSArray) object;
-				for (int i = 0; i < cosArray.size(); i++) {
-					LOG.trace(cosArray.getName(i)+": "+cosArray.getObject(i));
+				dictionaryArray = (COSArray) object;
+				for (int i = 0; i < dictionaryArray.size(); i++) {
+					LOG.trace(dictionaryArray.getName(i)+": "+dictionaryArray.getObject(i));
 				}
 			} else if (object instanceof COSName) {
-				COSName cosName = (COSName) object;
+				this.dictionaryName = (COSName) object;
 			} else if (object instanceof COSDictionary) {
-				COSDictionary cosDictionary = (COSDictionary) object;
+				this.dictionaryDictionary = (COSDictionary) object;
 			} else if (object instanceof COSInteger) {
-				COSInteger cosInt = (COSInteger) object;
+				this.dictionaryInteger = (COSInteger) object;
 			} else {
 				LOG.debug(object.getClass());
-//				COSInt cosInt = (COSInt) object;
 			}
 		}
 	}
 
 	public AMIFont(PDFont pdFont) {
 		fontDescriptor = getFontDescriptorOrDescendantFontDescriptor(pdFont);
+		this.firstDescendantFont = getFirstDescendantFont(pdFont);
 		this.baseFont = pdFont.getBaseFont();
-		this.fontType = pdFont.getClass().getSimpleName();
-		encoding = pdFont.getFontEncoding();
+//		this.fontType = pdFont.getClass().getSimpleName();
+		this.fontType = pdFont.getType();
+		this.encoding = pdFont.getFontEncoding();
 		if (encoding == null && pdFont instanceof PDType0Font) {
-			// this gets the first descendant font
-			COSDictionary fontx = (COSDictionary) pdFont.getCOSObject();
-			COSArray descendantFontArray = (COSArray) fontx.getDictionaryObject(COSName.DESCENDANT_FONTS);
-			COSDictionary descendantFontDictionary = (COSDictionary) descendantFontArray.getObject(0);
-			try {
-				pdFont = PDFontFactory.createFont(descendantFontDictionary);
-			} catch (IOException e) {
-				// TODO Auto-generated catch block
-				e.printStackTrace();
-			}
+			pdFont = firstDescendantFont;
+			encoding = pdFont.getFontEncoding();
 		}
 		fontEncoding = (encoding == null) ? null : encoding.getClass().getSimpleName();
-		processFont(pdFont, fontDescriptor);
+		this.pdFont = pdFont;
+		fontFamilyName = null;
+		if (fontDescriptor != null) {
+			fontName = fontDescriptor.getFontName();
+//			fontFamilySave = fontDescriptor.getFontFamily();
+			
+			stripFontNameComponents();
+			if (fontFamilyName == null) {
+				fontFamilyName = fontName;
+			}
+			LOG.trace("FFFFF "+fontFamilyName);
+//			// take fontDescriptor over name extraction
+//			isBold = fontDescriptor.isForceBold() ? true : isBold;
+//			if (isBold) {
+//				LOG.trace("bold from Font Descriptor");
+//			}
+//			isItalic = fontDescriptor.isItalic() ? true : isItalic;
+//			if (isItalic) {
+//				LOG.trace("italic from Font Descriptor");
+//			}
+//			isSymbol = fontDescriptor.isSymbolic();
+//			if (isSymbol) {
+//				LOG.trace("symbol from Font Descriptor");
+//			}
+			
+			fontName = fontDescriptor.getFontName();
+			LOG.trace("name="+fontName+" fam="+fontFamilyName+" type="+pdFont.getSubType()+" bold="+isForceBold() +" it="+isItalic()+" face="+finalSuffix+" sym="+isSymbolic()+ " enc="+(encoding == null ? "null" : encoding.getClass().getSimpleName()));
+		} else {
+			fontName = baseFont;
+			stripFontNameComponents();
+			if (fontFamilyName == null) {
+				fontFamilyName = fontName;
+			}
+			LOG.debug(this.toString());
+			LOG.warn("font had no descriptor: "+baseFont+" / "+fontFamilyName);
+		}
 	}
 
 	/** do not call without fontName or PDType1Font
 
 	public static AMIFont createAMIFontFromName(String fontName) {
 		AMIFont amiFont = new AMIFont(fontName);
-		return (amiFont.isOK()) ? amiFont : null;
+		return amiFont;
+//		return (amiFont.isOK()) ? amiFont : null;
 	}
 	
-	private void processFont(PDFont pdFont, PDFontDescriptor fd) {
-		this.pdFont = pdFont;
-		fontDescriptor = fd;
-		fontFamilyName = null;
-		if (fontDescriptor != null) {
-			fontName = fontDescriptor.getFontName();
-//			fontFamilySave = fontDescriptor.getFontFamily();
-			
-			stripFontNameComponents();
-			if (fontFamilyName == null) {
-				fontFamilyName = fontName;
-			}
-			LOG.trace("FFFFF "+fontFamilyName);
-			// take fontDescriptor over name extraction
-			isBold = fontDescriptor.isForceBold() ? true : isBold;
-			if (isBold) {
-				LOG.trace("bold from Font Descriptor");
-			}
-			isItalic = fontDescriptor.isItalic() ? true : isItalic;
-			if (isItalic) {
-				LOG.trace("italic from Font Descriptor");
-			}
-			isSymbol = fontDescriptor.isSymbolic();
-			if (isSymbol) {
-				LOG.trace("symbol from Font Descriptor");
-			}
-			
-			fontName = fontDescriptor.getFontName();
-			LOG.trace("name="+fontName+" fam="+fontFamilyName+" type="+pdFont.getSubType()+" bold="+isBold +" it="+isItalic+" face="+finalSuffix+" sym="+isSymbol+ " enc="+(encoding == null ? "null" : encoding.getClass().getSimpleName()));
-		} else {
-			fontName = baseFont;
-			stripFontNameComponents();
-			if (fontFamilyName == null) {
-				fontFamilyName = fontName;
-			}
-			LOG.debug(this.toString());
-			LOG.warn("font had no descriptor: "+baseFont+" / "+fontFamilyName);
-		}
-	}
-
 	private void stripFontNameComponents() {
 		noteAndRemovePrefix();
-		processStandardFamilies();
-		processIsBoldInName();
-		processIsItalicInName();
-		processFinalSuffix();
+//		processStandardFamilies();
+//		processIsBoldInName();
+//		processIsItalicInName();
+//		processFinalSuffix();
 	}
 
 	private void noteAndRemovePrefix() {
 		}
 	}
 
-	private void processStandardFamilies() {
-		processAsFamily("TimesNewRoman");
-		if (fontFamilyName != null) return;
-		processAsFamily("Courier");
-		if (fontFamilyName != null) return;
-		processAsFamily("Helvetica");
-		if (fontFamilyName != null) return;
-		processAsFamily(SYMBOL);
-		if (fontFamilyName != null) return;
-		processAsFamily("ZapfDingbats");
-	}
+//	private void processStandardFamilies() {
+//		processAsFamily("TimesNewRoman");
+//		if (fontFamilyName != null) return;
+//		processAsFamily("Courier");
+//		if (fontFamilyName != null) return;
+//		processAsFamily("Helvetica");
+//		if (fontFamilyName != null) return;
+//		processAsFamily(SYMBOL);
+//		if (fontFamilyName != null) return;
+//		processAsFamily("ZapfDingbats");
+//	}
 	
-	private void processIsBoldInName() {
-		// syntactic variants 
-		boolean isBoldInName = false;
-		for (String bString : BOLD_SUFFIXES) {
-			isBoldInName = isIncluded(bString);
-			if (isBoldInName) break;
-		}
-		isBold = (isBold != null) ? isBold : isBoldInName;
-	}
+//	private void processIsBoldInName() {
+//		// syntactic variants 
+//		boolean isBoldInName = false;
+//		for (String bString : BOLD_SUFFIXES) {
+//			isBoldInName = isIncluded(bString);
+//			if (isBoldInName) break;
+//		}
+//		isBold = (isBold != null) ? isBold : isBoldInName;
+//	}
+//	
+//	private void processIsItalicInName() {
+//		// syntactic variants 
+//		boolean isItalicInName = false;
+//		for (String iString : ITALIC_SUFFIXES) {
+//			isItalicInName = isIncluded(iString);
+//			if (isItalicInName) break;
+//		}
+//		isItalic = (isItalic != null) ? isItalic : isItalicInName;
+//	}
 	
-	private void processIsItalicInName() {
-		// syntactic variants 
-		boolean isItalicInName = false;
-		for (String iString : ITALIC_SUFFIXES) {
-			isItalicInName = isIncluded(iString);
-			if (isItalicInName) break;
-		}
-		isItalic = (isItalic != null) ? isItalic : isItalicInName;
-	}
-	
-	private boolean isOK() {
-		return 
-		isBold != null &&
-		isItalic != null &&
-		isSymbol != null &&
-		fontFamilyName != null &&
-		fontName != null;
-	}
+//	private boolean isOK() {
+//		return 
+//		isBold != null &&
+//		isItalic != null &&
+//		isSymbol != null &&
+//		fontFamilyName != null &&
+//		fontName != null;
+//	}
 
-	private void processAsFamily(String standardFamilyName) {
-		if (fontName != null) {
-			String fontNameLower = fontName.toLowerCase();
-			currentIndex = fontNameLower.indexOf(standardFamilyName.toLowerCase());
-			if (currentIndex != -1) {
-				removeFromFontName(standardFamilyName, currentIndex);
-				fontFamilyName = standardFamilyName;
-			}
-		}
-	}
+//	private void processAsFamily(String standardFamilyName) {
+//		if (fontName != null) {
+//			String fontNameLower = fontName.toLowerCase();
+//			int currentIndex = fontNameLower.indexOf(standardFamilyName.toLowerCase());
+//			if (currentIndex != -1) {
+//				removeFromFontName(standardFamilyName, currentIndex);
+//				fontFamilyName = standardFamilyName;
+//			}
+//		}
+//	}
 
 	private Boolean isIncluded(String suffix) {
 		boolean isIncluded = false;
 		if (fontName != null) {
 			String fontNameLower = fontName.toLowerCase();
-			currentIndex = fontNameLower.indexOf(suffix.toLowerCase());
+			int currentIndex = fontNameLower.indexOf(suffix.toLowerCase());
 			if (currentIndex != -1) {
 				removeFromFontName(suffix, currentIndex);
 				isIncluded = true;
 		}
 	}
 
-//	/** should only be used once for each new fontName
-//	 * 
-//	 * @param fontName
-//	 * @return
-//	 */
-//	AMIFont createAMIFont(String fontName) {
-//		AMIFont amiFont = null;
-//		PDType1Font standardFont = PDType1Font.getStandardFont(fontName);
-//		if (standardFont != null) {
-//			amiFont = new AMIFont(standardFont);
-//		} else {
-//			amiFont = createAMIFontFromName(fontName);
-//		}
-//		return amiFont;
-//	}
-
 	public Encoding getEncoding() {
 		return encoding;
 	}
 		return (encoding instanceof DictionaryEncoding) ? (DictionaryEncoding) encoding : null;
 	}
 
-	public String getFontWeight() {
-		return (isBold != null && isBold) ? AMIFontManager.BOLD : null;
-	}
+//	public String getFontWeight() {
+//		return (isBold != null && isBold) ? AMIFontManager.BOLD : null;
+//	}
 
 	public String getFontName() {
 		return fontName;
 		return fontType;
 	}
 	
-	public boolean isSymbol() {
-		return (isSymbol == null) ? false : isSymbol;
-	}
+//	public boolean isSymbol() {
+//		return (isSymbol == null) ? false : isSymbol;
+//	}
 	
 	public String getBaseFont() {
 		return baseFont;
 	}
 
-	public FontFamily getFontFamily() {
-		// TODO Auto-generated method stub
-		return null;
-	}
-
-	public Boolean isItalic() {
-		return isItalic;
-	}
+//	public Boolean isItalic() {
+//		return isItalic;
+//	}
 	
-	public Boolean isBold() {
-		return isBold;
-	}
+//	public Boolean isBold() {
+//		return ;isBold
+//	}
 
 	public Map<String, String> getPathStringByCharnameMap() {
 		ensurePathStringByCharnameMap();
 
 	public static PDFontDescriptor getDescendantFontDescriptor(PDFont pdFont) {
 		PDFontDescriptor fd = null;
+		PDFont descendantFont = getFirstDescendantFont(pdFont);
+		fd = (descendantFont == null) ? null : descendantFont.getFontDescriptor();
+		LOG.trace("fd ("+fd.getFontName()+") "+fd);
+		return fd;
+	}
+
+	public static PDFont getFirstDescendantFont(PDFont pdFont) {
 		COSDictionary dict = (COSDictionary) pdFont.getCOSObject();
-		COSArray array = (COSArray) dict.getDictionaryObject(COSName.DESCENDANT_FONTS);
-		PDFont descendantFont;
+		COSArray array = dict == null ? null : (COSArray) dict.getDictionaryObject(COSName.DESCENDANT_FONTS);
+		PDFont descendantFont = null;
 		try {
-			descendantFont = PDFontFactory.createFont((COSDictionary) array.getObject(0));
-			fd = descendantFont.getFontDescriptor();
-			LOG.trace("fd ("+fd.getFontName()+") "+fd);
+			descendantFont = array == null ? null : PDFontFactory.createFont((COSDictionary) array.getObject(0));
 		} catch (IOException e) {
-			LOG.error("****************** Can't create descendant font!");
+			LOG.error("****************** Can't create descendant font! for "+pdFont);
 		}
-		return fd;
+		return descendantFont;
 	}
 
 	public static PDFontDescriptor getFontDescriptorOrDescendantFontDescriptor(PDFont pdFont) {
 		PDFontDescriptor fd = pdFont.getFontDescriptor();
-//		COSBase cosBase = ((PDSimpleFont) pdFont).getToUnicode();
-//		System.out.println("ToUnicode: "+((cosBase== null) ? "NULL" : cosBase));
+//		getToUnicode(pdFont);
 		if (fd == null && pdFont instanceof PDType0Font) {
 			fd = AMIFont.getDescendantFontDescriptor(pdFont);
 		}
 		return fd;
 	}
 
-
+	public COSDictionary getToUnicode() {
+		COSDictionary cosDictionary = (COSDictionary) ((PDSimpleFont) pdFont).getToUnicode();
+		return cosDictionary;
+	}
 
 	public String toString() {
 
 		StringBuilder sb = new StringBuilder();
 		sb.append("isBold: ");
-		sb.append(isBold);
+		sb.append(isForceBold());
 		sb.append("; isItalic: ");
-		sb.append(isItalic);
+		sb.append(isItalic());
 		sb.append("; isSymbol: ");
-		sb.append(isSymbol);
+		sb.append(isSymbolic());
 		sb.append("; fontFamilyName: ");
 		sb.append(fontFamilyName);
 		sb.append("; fontName: ");
 		sb.append(fontEncoding);
 		sb.append("; baseFont: ");
 		sb.append(baseFont);
+		sb.append("\n");
+		sb.append("; dictionary: ");
+		sb.append(dictionary);
+		sb.append("; dictionaryName: ");
+		sb.append(dictionaryName);
+		sb.append("; dictionaryArray: ");
+		sb.append(dictionaryArray);
+		sb.append("; dictionaryDictionary: ");
+		sb.append(dictionaryDictionary);
+		sb.append("; dictionaryInteger: ");
+		sb.append(dictionaryInteger);
+		sb.append("\n");
+		sb.append("; isFixedPitch(): ");
+		sb.append(isFixedPitch());
+		sb.append("\n");
+		sb.append("; isHeuristicBold(): ");
+		sb.append(isHeuristicBold());
+		sb.append("; isHeuristicFixedPitch(): ");
+		sb.append(isHeuristicFixedPitch());
 		
 		return sb.toString();
 	}
 	public PDFontDescriptor getFontDescriptor() {
 		return fontDescriptor;
 	}
+
+	/** delegates from dictionary
+	 */
+	public COSDictionary getDictionaryDictionary() {
+		return dictionaryDictionary;
+	}
+
+	public COSInteger getDictionaryInteger() {
+		return dictionaryInteger;
+	}
+
+	public COSName getDictionaryName() {
+		return dictionaryName;
+	}
 	
+	/** delegates from PDFont
+	 */
+	public Float getFontWidth(byte[] c, int offset, int length)
+			throws IOException {
+		return pdFont == null ? null : pdFont.getFontWidth(c, offset, length);
+	}
+
+	public Float getFontHeight(byte[] c, int offset, int length)
+			throws IOException {
+		return pdFont == null ? null : pdFont.getFontHeight(c, offset, length);
+	}
+
+	public Float getStringWidth(String string) throws IOException {
+		return pdFont == null ? null : pdFont.getStringWidth(string);
+	}
+
+	public Float getAverageFontWidth() throws IOException {
+		return pdFont == null ? null : pdFont.getAverageFontWidth();
+	}
+
+	public String encode(byte[] c, int offset, int length) throws IOException {
+		return pdFont == null ? null : pdFont.encode(c, offset, length);
+	}
+
+	public Integer encodeToCID(byte[] c, int offset, int length) throws IOException {
+		return pdFont == null ? null : pdFont.encodeToCID(c, offset, length);
+	}
+
+	public String getSubType() {
+		return pdFont == null ? null : pdFont.getSubType();
+	}
+
+	public List<Float> getWidths() {
+		return pdFont == null ? null : pdFont.getWidths();
+	}
+
+	public PDMatrix getFontMatrix() {
+		return pdFont == null ? null : pdFont.getFontMatrix();
+	}
+
+	public PDRectangle getFontBoundingBox() throws IOException {
+		PDRectangle pdRect = null;
+		pdRect = fontDescriptor == null ? null : fontDescriptor.getFontBoundingBox();
+		return pdRect != null ? pdRect : ((pdFont == null) ? null : pdFont.getFontBoundingBox());
+	}
+
+	public Float getFontWidth(int charCode) {
+		return pdFont == null ? null : pdFont.getFontWidth(charCode);
+	}
+
+	/** delegates from fontDescriptor */
+	public String getFontStretch() {
+		return fontDescriptor == null ? null : fontDescriptor.getFontStretch();
+	}
+
+	public Float getFontWeightFloat() {
+		return fontDescriptor == null ? null : fontDescriptor.getFontWeight();
+	}
+
+	public String getFontFamilyString() {
+		return fontDescriptor == null ? null : fontDescriptor.getFontFamily();
+	}
+
+	public Integer getFlags() {
+		return fontDescriptor == null ? null : fontDescriptor.getFlags();
+	}
+
+	public Boolean isFixedPitch() {
+		return fontDescriptor == null ? null : fontDescriptor.isFixedPitch();
+	}
+
+	public Boolean isSerif() {
+		return fontDescriptor == null ? null : fontDescriptor.isSerif();
+	}
+
+	public Boolean isSymbolic() {
+		return fontDescriptor == null ? null : fontDescriptor.isSymbolic();
+	}
+
+	public Boolean isScript() {
+		return fontDescriptor == null ? null : fontDescriptor.isScript();
+	}
+
+	public Boolean isNonSymbolic() {
+		return fontDescriptor == null ? null : fontDescriptor.isNonSymbolic();
+	}
+
+	public Boolean isItalic() {
+		return fontDescriptor == null ? null : fontDescriptor.isItalic();
+	}
+
+	public Boolean isAllCap() {
+		return fontDescriptor == null ? null : fontDescriptor.isAllCap();
+	}
+
+	public Boolean isSmallCap() {
+		return fontDescriptor == null ? null : fontDescriptor.isSmallCap();
+	}
+
+	public Boolean isForceBold() {
+		return fontDescriptor == null ? null : fontDescriptor.isForceBold();
+	}
+
+	public Float getItalicAngle() {
+		return fontDescriptor == null ? null : fontDescriptor.getItalicAngle();
+	}
+
+	public Float getAscent() {
+		return fontDescriptor == null ? null : fontDescriptor.getAscent();
+	}
+
+	public Float getDescent() {
+		return fontDescriptor == null ? null : fontDescriptor.getDescent();
+	}
+
+	public Float getLeading() {
+		return fontDescriptor == null ? null : fontDescriptor.getLeading();
+	}
+
+	public Float getCapHeight() {
+		return fontDescriptor == null ? null : fontDescriptor.getCapHeight();
+	}
+
+	public Float getXHeight() {
+		return fontDescriptor == null ? null : fontDescriptor.getXHeight();
+	}
+
+	public Float getStemV() {
+		return fontDescriptor == null ? null : fontDescriptor.getStemV();
+	}
+
+	public Float getStemH() {
+		return fontDescriptor == null ? null : fontDescriptor.getStemH();
+	}
+
+	public Float getAverageWidth() throws IOException {
+		return fontDescriptor == null ? null : fontDescriptor.getAverageWidth();
+	}
+
+	public Float getMaxWidth() {
+		return fontDescriptor == null ? null : fontDescriptor.getMaxWidth();
+	}
+
+	public String getCharSet() {
+		return fontDescriptor == null ? null : fontDescriptor.getCharSet();
+	}
+
+	public Float getMissingWidth() {
+		return fontDescriptor == null ? null : fontDescriptor.getMissingWidth();
+	}
+
+	public AMIFontFamily getAMIFontFamily() {
+		String ff = getFontFamilyString();
+		return ff == null ? null : new AMIFontFamily(ff);
+	}
+
+	/** guesses bold from name
+	 * 
+	 * @return
+	 */
+	public boolean isHeuristicBold() {
+		boolean bold = fontName.toLowerCase().contains("bold") || fontName.toLowerCase().contains(".b");
+		return bold;
+	}
+
+	/** guesses italic from name
+	 * 
+	 * @return
+	 */
+	public boolean isHeuristicItalic() {
+		boolean bold = fontName.toLowerCase().contains("ital") || fontName.toLowerCase().contains(".i");
+		return bold;
+	}
+
+	/** guesses bold from name
+	 * 
+	 * @return
+	 */
+	public boolean isHeuristicFixedPitch() {
+		boolean fixed = fontFamilyName.toLowerCase().contains("cmtt") || fontName.toLowerCase().contains("cmtt") ;
+		return fixed;
+	}
+
 }

File src/main/java/org/xmlcml/pdf2svg/AMIFontFamily.java

+/**
+ * Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.xmlcml.pdf2svg;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.apache.log4j.Logger;
+import org.xmlcml.euclid.Util;
+
+import nu.xom.Attribute;
+import nu.xom.Builder;
+import nu.xom.Element;
+import nu.xom.Elements;
+import nu.xom.ParsingException;
+import nu.xom.ValidityException;
+
+/** manages a generic set of fonts
+ * should not depend on prefix, bold, italic, MT or PS suffixes, etc.
+ * @author pm286
+ *
+ // standard
+    <font family="Courier" fontType="PDType1Font" note="a standard14 font" serif="yes" unicode="yes"/>
+    or
+  // non-standard
+    <font family="FooBar" fontType="PDType1Font" standardFont="Helvetica" note="" serif="" unicode="guessed"/>
+
+ *
+ */
+public class AMIFontFamily {
+
+	public final static Logger LOG = Logger.getLogger(AMIFontFamily.class);
+	// XML
+	public static final String CODE_POINT_SET = "codePointSet";
+	public final static String FONT_FAMILY = "fontFamily";
+	public static final String NAME = "name";
+	public static final String FONT_TYPE = "fontType";
+	public static final String MONOSPACED = "Courier";
+	public static final String NOTE = "note";
+	public static final String SERIF = "TimesNewRoman";
+	public static final String SANS_SERIF = "Helvetica";
+	public static final String STANDARD_FONT = "standardFont";
+	public static final String UNICODE = "unicode";
+
+	private String name;
+	private String fontType;
+	private String standardFont;
+	private String unicode;
+	private String serif;
+	private String monospaced;
+	private String note;
+	private CodePointSet codePointSet;
+
+	public AMIFontFamily() {
+		
+	}
+
+	public AMIFontFamily(String name) {
+		this.name = name;
+	}
+
+	public static AMIFontFamily createFromElement(Element fontFamilyElement) {
+		AMIFontFamily fontFamily = null;
+		try {
+			fontFamily = new AMIFontFamily();
+			if (!(FONT_FAMILY.equals(fontFamilyElement.getLocalName()))) {
+				throw new RuntimeException("FontFamilySet children must be: "+FONT_FAMILY);
+			}
+			fontFamily.name = fontFamilyElement.getAttributeValue(NAME);
+			if (fontFamily.name == null) {
+				throw new RuntimeException("<fontFamily> must have name attribute");
+			}
+			fontFamily.fontType = fontFamilyElement.getAttributeValue(FONT_TYPE);
+			fontFamily.standardFont = fontFamilyElement.getAttributeValue(STANDARD_FONT);
+			fontFamily.unicode = fontFamilyElement.getAttributeValue(UNICODE);
+			fontFamily.serif = fontFamilyElement.getAttributeValue(SERIF);
+			fontFamily.monospaced = fontFamilyElement.getAttributeValue(MONOSPACED);
+			fontFamily.note = fontFamilyElement.getAttributeValue(NOTE);
+			String codePointSetName = fontFamilyElement.getAttributeValue(CODE_POINT_SET);
+			if (codePointSetName != null) {
+				CodePointSet codePointSet = CodePointSet.readCodePointSet(codePointSetName);
+				if (codePointSet == null) {
+					throw new RuntimeException("Cannot read codePointSet: "+codePointSetName);
+				}
+				fontFamily.setCodePointSet(codePointSet);
+				LOG.trace("CPS: "+fontFamily.getCodePointSet());
+			}
+		} catch (Exception e) {
+			throw new RuntimeException("invalid FontFamilyElement: "+((fontFamilyElement == null) ? null : fontFamilyElement.toXML()), e);
+		}
+		return fontFamily;
+	}
+
+	private void setCodePointSet(CodePointSet codePointSet) {
+		this.codePointSet = codePointSet;
+	}
+
+	public Element createElement() {
+		Element FontFamilyElement = new Element(FONT_FAMILY);
+		if (name == null) {
+			throw new RuntimeException("familyName must not be null");
+		}
+		FontFamilyElement.addAttribute(new Attribute(NAME, ""+name));
+		if (standardFont != null) {
+			FontFamilyElement.addAttribute(new Attribute(STANDARD_FONT, standardFont));
+		}
+		if (note != null) {
+			FontFamilyElement.addAttribute(new Attribute(NOTE, note));
+		}
+		if (unicode != null) {
+			FontFamilyElement.addAttribute(new Attribute(UNICODE, unicode));
+		}
+		if (serif != null) {
+			FontFamilyElement.addAttribute(new Attribute(SERIF, serif));
+		}
+		if (monospaced != null) {
+			FontFamilyElement.addAttribute(new Attribute(MONOSPACED, monospaced));
+		}
+		if (fontType != null) {
+			FontFamilyElement.addAttribute(new Attribute(FONT_TYPE, fontType));
+		}
+		return FontFamilyElement;
+	}
+
+	public String getUnicode() {
+		return unicode;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public void setName(String name) {
+		this.name = name;
+	}
+
+	public CodePointSet getCodePointSet() {
+		return codePointSet;
+	}
+
+	public String convertSymbol2UnicodeValue(String charname) {
+		String unicodeValue = null;
+		if (codePointSet != null) {
+			CodePoint codePoint = codePointSet.getByName(charname);
+			unicodeValue = (codePoint == null) ? null : codePoint.getUnicodeValue(); 
+		}
+		return unicodeValue;
+	}
+
+	public Integer convertSymbol2UnicodePoint(String charname) {
+		String unicodeValue = convertSymbol2UnicodeValue(charname);
+		return (unicodeValue == null || unicodeValue.length() == 0) ? null : (Integer) (int) unicodeValue.charAt(0);
+	}
+
+}

File src/main/java/org/xmlcml/pdf2svg/AMIFontManager.java

 		String fontName = AMIFont.getFontName(dict);
 		
 		String typeS = null;
-		boolean isSymbol = false;
 		amiFont = getAmiFontByFontName(fontName);
 		if (amiFont == null) {
 			// some confusion here between fontName and fontFamilyName
-			amiFont = new AMIFont(fontName, null, typeS, isSymbol, dict);
+			amiFont = new AMIFont(fontName, null, typeS, dict);
 			amiFont.setFontName(fontName);
 			amiFontByFontNameMap.put(fontName, amiFont);
 	
 	 * individual AMIFonts
 	 * @param symbol2UnicodeResource
 	 */
-	public void ensureSymbol2UnicodeHackMap(String symbol2UnicodeResource) {
-		if (symbol2UnicodeHackMap == null) {
-			symbol2UnicodeHackMap = new HashMap<String, Integer>();
-			try {
-				InputStream is = Util.getResourceUsingContextClassLoader(symbol2UnicodeResource, this.getClass());
-				Element conversionElement = new Builder().build(is).getRootElement();
-				Elements charDataElements = conversionElement.getChildElements();
-				for (int i = 0; i < charDataElements.size(); i++) {
-					Element charDataElement = charDataElements.get(i);
-					String charname = charDataElement.getAttributeValue(CHARNAME);
-					String unicodeS = charDataElement.getAttributeValue(CODEPOINT);
-					Integer unicodePoint = new Integer(unicodeS);
-					symbol2UnicodeHackMap.put(charname, unicodePoint);
-				}
-			} catch (Exception e) {
-				throw new RuntimeException("Cannot read/parse symbolConverter: "+symbol2UnicodeResource, e);
-			}
-		}
-	}
+//	private void ensureSymbol2UnicodeHackMap(String symbol2UnicodeResource) {
+//		if (symbol2UnicodeHackMap == null) {
+//			symbol2UnicodeHackMap = new HashMap<String, Integer>();
+//			try {
+//				InputStream is = Util.getResourceUsingContextClassLoader(symbol2UnicodeResource, this.getClass());
+//				Element conversionElement = new Builder().build(is).getRootElement();
+//				Elements charDataElements = conversionElement.getChildElements();
+//				for (int i = 0; i < charDataElements.size(); i++) {
+//					Element charDataElement = charDataElements.get(i);
+//					String charname = charDataElement.getAttributeValue(CHARNAME);
+//					String unicodeS = charDataElement.getAttributeValue(CODEPOINT);
+//					Integer unicodePoint = new Integer(unicodeS);
+//					symbol2UnicodeHackMap.put(charname, unicodePoint);
+//				}
+//			} catch (Exception e) {
+//				throw new RuntimeException("Cannot read/parse symbolConverter: "+symbol2UnicodeResource, e);
+//			}
+//		}
+//	}
 	
-	/** convert a text symbol to Unicode codepoint via PDFBox's StandardEncoding.INSTANCE
-	 * Thus "two" is converted to 50 (0X32) or character '2'
-	 * we test that all StandardEncoding returns are single characters and assume this
-	 * as test of correct interpretation. If any chars are ligatures ('ffl'we'll deal with that
-	 * when it comes up)
-	 * @param symbol e.g. "two", "A", "comma"
-	 * @return codepoint 50, 65, 44 or null if not converted
-	 */
-	public static Integer convertSymbol2UnicodeStandard(String symbol) {
-		Integer codePoint = null;
-		String s = (symbol == null) ? null : convertToUnicodeWithPDFStandardEncoding(symbol);
-		if (s != null) {
-			// all converted characters should have length 1
-			if (s.length() == 1) {
-				LOG.trace(symbol+" => "+s);
-				codePoint = new Integer((int) s.charAt(0));
-			} else {
-				// for ?unknown? charnames - we may need glyphs
-				s = null; 
-			}
-		}
-		return codePoint;
-	}
+//	/** convert a text symbol to Unicode codepoint via PDFBox's StandardEncoding.INSTANCE
+//	 * Thus "two" is converted to 50 (0X32) or character '2'
+//	 * we test that all StandardEncoding returns are single characters and assume this
+//	 * as test of correct interpretation. If any chars are ligatures ('ffl'we'll deal with that
+//	 * when it comes up)
+//	 * @param symbol e.g. "two", "A", "comma"
+//	 * @return codepoint 50, 65, 44 or null if not converted
+//	 */
+//	private static Integer convertSymbol2UnicodeStandard(String symbol) {
+//		Integer codePoint = null;
+//		String s = (symbol == null) ? null : convertToUnicodeWithPDFStandardEncoding(symbol);
+//		if (s != null) {
+//			// all converted characters should have length 1
+//			if (s.length() == 1) {
+//				LOG.trace(symbol+" => "+s);
+//				codePoint = new Integer((int) s.charAt(0));
+//			} else {
+//				// for ?unknown? charnames - we may need glyphs
+//				s = null; 
+//			}
+//		}
+//		return codePoint;
+//	}
 
 	/** uses PDFBox list of standard symbols to convert to characters.
 	 * e.g. "two" converts to "2" (unicode codePoint 50)
 		return StandardEncoding.INSTANCE.getCharacter(symbol);
 	}
 	
-	/** has a messy collection of character names from MathematicalPi, Cddd and elsewhere
-	 * no guarantee of uniqueness
-	 * FIXME should be tied to individual fonts asap
-	 * 
-	 * @param symbol
-	 * @return
-	 */
-	public Integer convertSymbol2UnicodeHack(String symbol, String fontF) {
-		Integer codePoint = null;
-		if (symbol != null) {
-			codePoint = symbol2UnicodeHackMap.get(symbol);
-			LOG.trace("Used lashed-up symbol2UnicodeMap FIX THIS : "+symbol+" => "+((codePoint == null) ? null : (char)(int)codePoint));
-		}
-		return codePoint;
-	}
+//	/** has a messy collection of character names from MathematicalPi, Cddd and elsewhere
+//	 * no guarantee of uniqueness
+//	 * FIXME should be tied to individual fonts asap
+//	 * 
+//	 * @param symbol
+//	 * @return
+//	 */
+//	private Integer convertSymbol2UnicodeHack(String symbol, String fontF) {
+//		Integer codePoint = null;
+//		if (symbol != null) {
+//			codePoint = symbol2UnicodeHackMap.get(symbol);
+//			LOG.trace("Used lashed-up symbol2UnicodeMap FIX THIS : "+symbol+" => "+((codePoint == null) ? null : (char)(int)codePoint));
+//		}
+//		return codePoint;
+//	}
 
 	public static Map<String, AMIFont> readAmiFonts() {
 		return readAmiFonts(FontFamilySet.STANDARD_FONT_FAMILY_SET_XML);
 				if (fontMap.get(familyName) != null) {
 					throw new RuntimeException("AMIFont map ("+resourceName+") already contains family: "+familyName);
 				}
-				String symbol = amiFontElement.getAttributeValue(IS_SYMBOL);
-				Boolean isSymbol = (symbol == null) ? false : new Boolean(symbol);
-				AMIFont amiFont = new AMIFont(familyName, encoding, type, isSymbol);
+//				String symbol = amiFontElement.getAttributeValue(IS_SYMBOL);
+//				Boolean isSymbol = (symbol == null) ? false : new Boolean(symbol);
+				AMIFont amiFont = new AMIFont(familyName, encoding, type);
 				fontMap.put(familyName, amiFont);
 			}
 		} catch (Exception e) {
 				font.addAttribute(new Attribute(FONT_ENCODING, encoding));
 			}
 			font.addAttribute(new Attribute(TYPE, amiFont.getFontType()));
-			Boolean isSymbol = amiFont.isSymbol();
-			if (isSymbol != null) {
-				font.addAttribute(new Attribute(IS_SYMBOL, isSymbol.toString()));
-			}
+//			Boolean isSymbol = amiFont.isSymbol();
+//			if (isSymbol != null) {
+//				font.addAttribute(new Attribute(IS_SYMBOL, isSymbol.toString()));
+//			}
 		}
 		return fontList;
 	}
 
-	public FontFamily getFontFamily(String fontFamilyName) {
-		FontFamily fontFamily = standardFontFamilySet.getFontFamilyByName(fontFamilyName);
+	public AMIFontFamily getFontFamilyByFamilyName(String fontFamilyName) {
+		AMIFontFamily fontFamily = standardFontFamilySet.getFontFamilyByName(fontFamilyName);
 		if (fontFamily == null) {
 			fontFamily = nonStandardFontFamilySet.getFontFamilyByName(fontFamilyName);
 		}
 		return fontFamily;
 	}
 
-	public FontFamily recordExistingOrAddNewFontFamily(String fontName, AMIFont amiFont) {
+	public AMIFontFamily recordExistingOrAddNewFontFamily(String fontName, AMIFont amiFont) {
 		String fontFamilyName = amiFont.getFontFamilyName();
-		FontFamily fontFamily = amiFont.getFontFamily();
+		AMIFontFamily fontFamily = amiFont.getAMIFontFamily();
 		if (standardFontFamilySet.containsKey(fontFamilyName)) {
 			LOG.trace(fontFamilyName+" is a standard FontFamily");
 		} else if (nonStandardFontFamilySet.containsKey(fontFamilyName)) {
 			LOG.trace(fontName+" is being added as new FontFamily ("+fontFamilyName+")");
 			if (fontFamily == null) {
 				LOG.trace("ami: "+amiFont.toString());
-				fontFamily = new FontFamily();
+				fontFamily = new AMIFontFamily();
 				fontFamily.setName(""+fontName);
 				LOG.trace("created new FontFamily: "+fontFamilyName);
 			}

File src/main/java/org/xmlcml/pdf2svg/CodePoint.java

 	static final String TAG = "codePoint";
 	
 	private static final String DECIMAL = "decimal";
+	private static final String HEX = "hex";
 	private static final String NAME = "name";
 	private static final String NOTE = "note";
 	private static final String REPLACE_BY_UNICODE = "replaceByUnicode";
 				throw new RuntimeException("CodePointSet children must be <codePoint>");
 			}
 			String decimalS = codePointElement.getAttributeValue(DECIMAL);
+			String hexS = codePointElement.getAttributeValue(HEX);
 			codePoint.name = codePointElement.getAttributeValue(NAME);
-			if (decimalS == null && codePoint.name == null) {
-				throw new RuntimeException("<codePoint> must have decimal attribute and/or name");
+			if ((decimalS == null && hexS == null) && codePoint.name == null) {
+				throw new RuntimeException("<codePoint> must have decimal-or-hex attribute and/or name");
 			}
 			if (decimalS != null) {
-				codePoint.nonUnicodeDecimal = new Integer(decimalS); 
+				Integer decimal = new Integer(decimalS); 
+				codePoint.nonUnicodeDecimal = decimal; 
+			} else if (hexS != null) {
+				Integer decimal = Integer.parseInt(hexS, 16); 
+				codePoint.nonUnicodeDecimal = decimal; 
 			}
 			codePoint.unicodePoint = UnicodePoint.createUnicodeValue(codePointElement.getAttributeValue(UNICODE));
 			if (codePoint.unicodePoint == null) {

File src/main/java/org/xmlcml/pdf2svg/CodePointSet.java

 		}
 	}
 
+	public boolean isUnicodeEncoded() {
+		return UNICODE.equals(encoding);
+	}
 	
 }

File src/main/java/org/xmlcml/pdf2svg/FontFamily.java

-/**
- * Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.xmlcml.pdf2svg;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-
-import org.apache.log4j.Logger;
-import org.xmlcml.euclid.Util;
-
-import nu.xom.Attribute;
-import nu.xom.Builder;
-import nu.xom.Element;
-import nu.xom.Elements;
-import nu.xom.ParsingException;
-import nu.xom.ValidityException;
-
-/** manages a generic set of fonts
- * should not depend on prefix, bold, italic, MT or PS suffixes, etc.
- * @author pm286
- *
- // standard
-    <font family="Courier" fontType="PDType1Font" note="a standard14 font" serif="yes" unicode="yes"/>
-    or
-  // non-standard
-    <font family="FooBar" fontType="PDType1Font" standardFont="Helvetica" note="" serif="" unicode="guessed"/>
-
- *
- */
-public class FontFamily {
-
-	public final static Logger LOG = Logger.getLogger(FontFamily.class);
-	// XML
-	public static final String CODE_POINT_SET = "codePointSet";
-	public final static String FONT_FAMILY = "fontFamily";
-	public static final String NAME = "name";
-	public static final String FONT_TYPE = "fontType";
-	public static final String MONOSPACED = "monospaced";
-	public static final String NOTE = "note";
-	public static final String SERIF = "serif";
-	public static final String STANDARD_FONT = "standardFont";
-	public static final String UNICODE = "unicode";
-
-	private String name;
-	private String fontType;
-	private String standardFont;
-	private String unicode;
-	private String serif;
-	private String monospaced;
-	private String note;
-	private CodePointSet codePointSet;
-
-	public FontFamily() {
-		
-	}
-
-	public static FontFamily createFromElement(Element fontFamilyElement) {
-		FontFamily fontFamily = null;
-		try {
-			fontFamily = new FontFamily();
-			if (!(FONT_FAMILY.equals(fontFamilyElement.getLocalName()))) {
-				throw new RuntimeException("FontFamilySet children must be: "+FONT_FAMILY);
-			}
-			fontFamily.name = fontFamilyElement.getAttributeValue(NAME);
-			if (fontFamily.name == null) {
-				throw new RuntimeException("<fontFamily> must have name attribute");
-			}
-			fontFamily.fontType = fontFamilyElement.getAttributeValue(FONT_TYPE);
-			fontFamily.standardFont = fontFamilyElement.getAttributeValue(STANDARD_FONT);
-			fontFamily.unicode = fontFamilyElement.getAttributeValue(UNICODE);
-			fontFamily.serif = fontFamilyElement.getAttributeValue(SERIF);
-			fontFamily.monospaced = fontFamilyElement.getAttributeValue(MONOSPACED);
-			fontFamily.note = fontFamilyElement.getAttributeValue(NOTE);
-			String codePointSetName = fontFamilyElement.getAttributeValue(CODE_POINT_SET);
-			if (codePointSetName != null) {
-				CodePointSet codePointSet = CodePointSet.readCodePointSet(codePointSetName);
-				if (codePointSet == null) {
-					throw new RuntimeException("Cannot read codePointSet: "+codePointSetName);
-				}
-				fontFamily.setCodePointSet(codePointSet);
-				LOG.trace("CPS: "+fontFamily.getCodePointSet());
-			}
-		} catch (Exception e) {
-			throw new RuntimeException("invalid FontFamilyElement: "+((fontFamilyElement == null) ? null : fontFamilyElement.toXML()), e);
-		}
-		return fontFamily;
-	}
-
-	private void setCodePointSet(CodePointSet codePointSet) {
-		this.codePointSet = codePointSet;
-	}
-
-	public Element createElement() {
-		Element FontFamilyElement = new Element(FONT_FAMILY);
-		if (name == null) {
-			throw new RuntimeException("familyName must not be null");
-		}
-		FontFamilyElement.addAttribute(new Attribute(NAME, ""+name));
-		if (standardFont != null) {
-			FontFamilyElement.addAttribute(new Attribute(STANDARD_FONT, standardFont));
-		}
-		if (note != null) {
-			FontFamilyElement.addAttribute(new Attribute(NOTE, note));
-		}
-		if (unicode != null) {
-			FontFamilyElement.addAttribute(new Attribute(UNICODE, unicode));
-		}
-		if (serif != null) {
-			FontFamilyElement.addAttribute(new Attribute(SERIF, serif));
-		}
-		if (monospaced != null) {
-			FontFamilyElement.addAttribute(new Attribute(MONOSPACED, monospaced));
-		}
-		if (fontType != null) {
-			FontFamilyElement.addAttribute(new Attribute(FONT_TYPE, fontType));
-		}
-		return FontFamilyElement;
-	}
-
-	public String getUnicode() {
-		return unicode;
-	}
-
-	public String getName() {
-		return name;
-	}
-
-	public void setName(String name) {
-		this.name = name;
-	}
-
-	public CodePointSet getCodePointSet() {
-		return codePointSet;
-	}
-
-	public String convertSymbol2UnicodeValue(String charname) {
-		String unicodeValue = null;
-		if (codePointSet != null) {
-			CodePoint codePoint = codePointSet.getByName(charname);
-			unicodeValue = (codePoint == null) ? null : codePoint.getUnicodeValue(); 
-		}
-		return unicodeValue;
-	}
-
-	public Integer convertSymbol2UnicodePoint(String charname) {
-		String unicodeValue = convertSymbol2UnicodeValue(charname);
-		return (unicodeValue == null || unicodeValue.length() == 0) ? null : (Integer) (int) unicodeValue.charAt(0);
-	}
-
-}

File src/main/java/org/xmlcml/pdf2svg/FontFamilySet.java

 	public static final String STANDARD_FONT_FAMILY_SET_XML = FONT_FAMILY_DIR+"/"+"standardFontFamilySet.xml";
 	public static final String NON_STANDARD_FONT_FAMILY_SET_XML = FONT_FAMILY_DIR+"/"+"nonStandardFontFamilySet.xml";
 
-	private Map<String, FontFamily> fontFamilyByFamilyName;
+	private Map<String, AMIFontFamily> fontFamilyByFamilyName;
 
 	public FontFamilySet() {
 		ensureMaps();
 
 	private void ensureMaps() {
 		if (fontFamilyByFamilyName == null) {
-			fontFamilyByFamilyName = new HashMap<String, FontFamily>();
+			fontFamilyByFamilyName = new HashMap<String, AMIFontFamily>();
 		}
 	}
 
 	 * @return
 	 */
 	public static FontFamilySet readFontFamilySet(String fontFamilySetXmlResource) {
-		LOG.debug("FFS"+fontFamilySetXmlResource);
+		LOG.trace("FFS"+fontFamilySetXmlResource);
 		FontFamilySet fontFamilySet = null;
 		try {
 			Element fontFamilySetElement = new Builder().build(
 		Elements childElements = fontFamilySetElement.getChildElements();
 		for (int i = 0; i < childElements.size(); i++) {
 			Element fontFamilyElement = childElements.get(i);
-			FontFamily fontFamily = FontFamily.createFromElement(fontFamilyElement);
+			AMIFontFamily fontFamily = AMIFontFamily.createFromElement(fontFamilyElement);
 			if (fontFamily == null) {
 				throw new RuntimeException("Cannot read/parse fontFamilyElement: "+((fontFamilyElement == null) ? null : fontFamilyElement.toXML()));
 			}
 		return fontFamilyByFamilyName.containsKey(name);
 	}
 
-	FontFamily getFontFamilyByName(String fontFamilyName) {
+	AMIFontFamily getFontFamilyByName(String fontFamilyName) {
 		return fontFamilyByFamilyName.get(fontFamilyName);
 	}
 
-	void add(String fontFamilyName, FontFamily fontFamily) {
+	void add(String fontFamilyName, AMIFontFamily fontFamily) {
 		if (fontFamily == null) {
 			throw new RuntimeException("Cannot add null fontFamily");
 		}
 	Element createElement() {
 		Element fontsElement = new Element(FONT_FAMILY_SET);
 		for (String fontFamilyName : fontFamilyByFamilyName.keySet()) {
-			FontFamily fontFamily = fontFamilyByFamilyName.get(fontFamilyName);
+			AMIFontFamily fontFamily = fontFamilyByFamilyName.get(fontFamilyName);
 			if (fontFamily == null) {
 				throw new RuntimeException("BUG null fontFamily should never happen: ");
 			}

File src/main/java/org/xmlcml/pdf2svg/PDF2SVGConverter.java

 
 	public static final String DEBUG_CHAR_CODE = "-debugCharCode";
 	public static final String DEBUG_CHAR_NAME = "-debugCharName";
+	public static final String DEBUG_FONT_NAME = "-debugFontName";
 	public static final String EXITONERR = "-exitonerr";
 	public static final String INFO_FILES = "-infofiles";
 	public static final String LOGGER = "-logger";
 
 	public String debugCharname = null;
 	public Integer debugCharCode = null;
+	public String debugFontName = null;
 
 	public int getMaxPage() {
 		return maxPage;
 						+ "  %s              log lots more characters (could produce a VERY big log)%n"
 						+ "  %s            Attempt to include char glyphs as svg paths in the XML logger%n"
 						+ "  %s            exit on PDF parse error (otherwise continue to next pdf)%n"
+						+ "  %s            enter debug loop (with charCode)%n"
+						+ "  %s            enter debug loop (with charName)%n"
+						+ "  %s            enter debug loop (with fontName)%n"
 						+ "  <input-file(s)>       The PDF document(s) to be loaded%n%n",
 						PASSWORD, NONSEQ, PAGES, PUB, OUTDIR, MKDIR, NO_SVG,
 						INFO_FILES, LOGGER, LOGFILE, LOGMORE, LOGGLYPHS,
 						EXITONERR, PASSWORD, NONSEQ, PAGES, PUB, OUTDIR, MKDIR,
 						NO_SVG, INFO_FILES, LOGGER, LOGFILE, LOGMORE,
-						LOGGLYPHS, EXITONERR);
+						LOGGLYPHS, EXITONERR, DEBUG_CHAR_CODE, DEBUG_CHAR_NAME, DEBUG_FONT_NAME);
 	}
 
 	private void openPDFFile(File file) throws Exception {
 
 		for (iarg = 0; iarg < args.length; iarg++) {
 
-			System.out.println(args[iarg]);
+			LOG.trace(args[iarg]);
 			if (args[iarg].equals(PASSWORD)) {
 				if (!incrementArg(args))
 					return false;
 				continue;
 			}
 
+			if (args[iarg].equals(DEBUG_FONT_NAME)) {
+				if (!incrementArg(args))
+					return false;
+				debugFontName = args[iarg];
+				continue;
+			}
+
 			if (args[iarg].equals(INFO_FILES)) {
 				writeInfoFiles = true;
 				continue;

File src/main/java/org/xmlcml/pdf2svg/PDFPage2SVGConverter.java

 import org.xmlcml.euclid.Real2;
 import org.xmlcml.euclid.Real2Range;
 import org.xmlcml.euclid.Transform2;
+import org.xmlcml.graphics.svg.GraphicsElement.FontStyle;
+import org.xmlcml.graphics.svg.GraphicsElement.FontWeight;
 import org.xmlcml.graphics.svg.SVGClipPath;
 import org.xmlcml.graphics.svg.SVGDefs;
 import org.xmlcml.graphics.svg.SVGElement;
  */
 public class PDFPage2SVGConverter extends PageDrawer {
 	
+	private static final int _BOLD_FONT_MIN = 410;
 	private static final String SYMBOL = "Symbol";
 	private static final String ENCODING = "Encoding";
 	private static final String ITALIC = "italic";
 
 	private AMIFont amiFont;
 	private String lastFontName;
-	private FontFamily fontFamily;
+	private AMIFontFamily fontFamily;
 
 	private HashMap<String, Integer> integerByClipStringMap;
 	private SVGElement defs1;
 		amiFont = amiFontManager.getAmiFontByFont(pdFont);
 
 		setAndProcessFontNameAndFamilyName();
+		debugFont();
 		getCharCodeAndSetEncodingAndCharname();
 
 		SVGText svgText = new SVGText();
 		if (pdf2svgConverter.useXMLLogger) {
 			pdf2svgConverter.xmlLogger.newFont(amiFont);
 			if (pdf2svgConverter.xmlLoggerLogGlyphs) {
-				captureAndIndexGlyphVector(charCode);
+				captureAndIndexGlyphVector();
 			}
 		}
 
 		createAndReOrientateTextPosition(svgText);
 
 
-		if (amiFont.isSymbol() || amiFont.getDictionaryEncoding() != null ||
+		if (amiFont.isSymbolic() || amiFont.getDictionaryEncoding() != null ||
 				(fontFamily != null && fontFamily.getCodePointSet() != null)) {
 //			convertNonUnicodeCharacterEncodings();
 //			annotateContent(svgText, textContent, charCode, charname, charCode, encoding);
 		LOG.trace("Fn: "+fontName+"; Ff: "+fontFamilyName+"; "+textContent+"; "+charCode+"; "+charname);
 
 		addContentAndAttributesToSVGText(svgText);
+		changeFontStyles(svgText);
 		svg.appendChild(svgText);
 	}
 
+	private void changeFontStyles(SVGText svgText) {
+		useStandardFonts(svgText);
+		computeBold(svgText);
+		computeItalic(svgText);
+		computeFixedPitch(svgText);
+	}
+
+	private void computeBold(SVGText svgText) {
+/**	     * The weight of the font.  According to the PDF spec "possible values are
+	     * 100, 200, 300, 400, 500, 600, 700, 800 or 900"  Where a higher number is
+	     * more weight and appears to be more bold. */
+
+		FontWeight fontWeight = FontWeight.NORMAL;
+		Float weight = amiFont.getFontWeightFloat();
+		if (weight > _BOLD_FONT_MIN) {
+			fontWeight = FontWeight.BOLD;
+		}
+		Boolean forceBold = amiFont.isForceBold();
+		if (forceBold) {
+			fontWeight = FontWeight.BOLD;
+		}
+		if (amiFont.isHeuristicBold()) {
+			fontWeight = FontWeight.BOLD;
+		}
+		svgText.setFontWeight(fontWeight);
+	}
+
+	private void computeFixedPitch(SVGText svgText) {
+
+		Boolean fixedPitch = amiFont.isFixedPitch();
+		if (!fixedPitch) {
+			fixedPitch = amiFont.isHeuristicFixedPitch();
+			LOG.trace("FIXED PITCH: "+fixedPitch);
+		}
+		if (fixedPitch) {
+			LOG.trace("FIXED PITCH!!!!!!!!!: "+fixedPitch);
+			svgText.setFontFamily(AMIFontFamily.MONOSPACED);
+//			svgText.debug("FIXED");
+		}
+	}
+
+	private void computeItalic(SVGText svgText) {
+		Boolean italic = amiFont.isItalic();
+		if (!italic) {
+			Float angle = amiFont.getItalicAngle();
+			italic = angle > 0.001;
+		}
+		if (!italic) {
+			italic = amiFont.isHeuristicItalic();
+		}
+		if (italic) {
+			svgText.setFontStyle(FontStyle.ITALIC);
+		}
+	}
+
+	private void useStandardFonts(SVGText svgText) {
+		if (amiFont.isFixedPitch() || amiFont.isHeuristicFixedPitch()) {
+			svgText.setFontFamily(AMIFontFamily.MONOSPACED);
+		} else if (amiFont.isSerif()) {
+			svgText.setFontFamily(AMIFontFamily.SERIF);
+		} else if (!amiFont.isSerif()) {
+			svgText.setFontFamily(AMIFontFamily.SANS_SERIF);
+		} else {
+			
+		}
+	}
+
 	private void debugChar() {
 		if (pdf2svgConverter.debugCharname != null && pdf2svgConverter.debugCharname.equals(charname)) {
 			LOG.debug("Logging charname: "+charname);
 			LOG.debug("Logging charCode: "+charCode);
 		}
 	}
+	private void debugFont() {
+		if (pdf2svgConverter.debugFontName != null && pdf2svgConverter.debugFontName.equals(fontName)) {
+			LOG.debug("Logging fontName: "+fontName+ " / "+amiFont +" / "+amiFont.isHeuristicFixedPitch());
+		}
+	}
 
 	private void setAndProcessFontNameAndFamilyName() {
 		fontName = amiFont.getFontName();
 			lastFontName = fontName;
 		}
 		fontFamilyName = amiFont.getFontFamilyName();
-		fontFamily = amiFontManager.getFontFamily(fontFamilyName);
+		fontFamily = amiFontManager.getFontFamilyByFamilyName(fontFamilyName);
 	}
 
 	private void getCharCodeAndSetEncodingAndCharname() {
 		} else {
 			getCharnameThroughEncoding();
 		}
-		convertIllegalXMLCharacters();
+		convertCharactersToUnicode();
 	}
 
-	private void convertIllegalXMLCharacters() {
-		try {
-			new Element("foo").appendChild(textContent);
-		} catch (RuntimeException e) {
-			CodePointSet codePointSet = fontFamily.getCodePointSet();
+	private void convertCharactersToUnicode() {
+		// must have a codePointSet
+		CodePointSet codePointSet = fontFamily.getCodePointSet();
+		// no point if its already unicode
+		if (codePointSet != null && !codePointSet.isUnicodeEncoded()) {
+			// use charname first
+			CodePoint codePoint = (charname != null) ? codePointSet.getByName(charname) : null;
+			if (codePoint == null) {
+				charCode = textContent.codePointAt(0);
+				// try charCode as last resort
+				codePoint = codePointSet.getByDecimal(charCode);
+			}
 			annotateText = true;
-			if (codePointSet != null) {
-				CodePoint codePoint = (charname != null) ? codePointSet.getByName(charname) : null;
-				if (codePoint == null) {
-					LOG.trace("Null code point"+codePointSet.getCodePoints());
-				}
-				charCode = (codePoint != null) ? codePoint.getUnicodeDecimal() : charCode;
+			if (codePoint != null) {
+				charCode = codePoint.getUnicodeDecimal();
 				textContent = ""+(char)charCode;
 				annotateText = false;
 			}
 		getFontSizeAndSetNotZeroRotations(svgText);
 		float width = getCharacterWidth(pdFont, textContent);
 		addAttributesToSVGText(width, svgText);
-		addTooltips(svgText, charCode);
-		svgText.setFontWeight(amiFont.getFontWeight());
+		addTooltips(svgText);
 		if (amiFont.isItalic() != null && amiFont.isItalic()) {
 			svgText.setFontStyle(ITALIC);
 		}
-		if (amiFont.isBold() != null && amiFont.isBold()) {
-			svgText.setFontWeight("bold");
-		}
-		addCodePointToHighPoints();
+//		addCodePointToHighPoints();
 		if (SYMBOL.equals(svgText.getFontFamily())) {
 			svgText.setFontFamily("Symbol-X"); // to stop browsers misbehaving
 		}
 		}
 	}
 
-	private void annotateCharacter(SVGText svgText) {
-		svgText.setFill("red");
-	}
+//	private void annotateCharacter(SVGText svgText) {
+//		svgText.setFill("red");
+//	}
 
 	private void convertNonUnicodeCharacterEncodings() {
 		CodePointSet codePointSet = fontFamily.getCodePointSet();
 		}
 	}
 
-	private void captureAndIndexGlyphVector(int charCode) {
+	private void captureAndIndexGlyphVector() {
 		String key = charname;
 		if (key == null) {
 			key = "" + charCode;
 		LOG.trace("pathString: "+pathString);
 	}
 
-	private void addTooltips(SVGText svgText, int charCode) {
+	private void addTooltips(SVGText svgText) {
 		if (pdf2svgConverter.addTooltipDebugTitles) {
-			String enc = (encoding == null) ? null : encoding.getClass().getSimpleName();
-			enc =(enc != null && enc.endsWith(AMIFont.ENCODING)) ? enc.substring(0, enc.length()-AMIFont.ENCODING.length()) : enc;
-			String title = "char: "+charCode+"; name: "+charname+"; f: "+fontFamilyName+"; fn: "+fontName+"; e: "+enc;
-			SVGTitle svgTitle = new SVGTitle(title);
-			svgText.appendChild(svgTitle);
+			addTitleChildAsTooltip(svgText);
 		}
 	}
 
-	private int addCodePointToHighPoints() {
-		pdf2svgConverter.ensureCodePointSets();
-		int charCode = textPosition.getCharacter().charAt(0);
-		if (charCode > 255) {
-			if (pdf2svgConverter.knownCodePointSet.containsKey((Integer)charCode)) {
-				// known
-			} else if (pdf2svgConverter.newCodePointSet.containsKey((Integer) charCode)) {
-				// known 
-			} else if (encoding != null) {
-				pdf2svgConverter.newCodePointSet.ensureEncoding(encoding.toString());
-				CodePoint codePoint = new CodePoint((Integer)charCode, charname); // creates as UNKNOWN unicode
-				pdf2svgConverter.newCodePointSet.add(codePoint);
-				LOG.trace("added to new codePointSet: "+charCode);
-			} else {
-				LOG.warn("Font name: "+fontName+" No encoding, so cannot add codePoint ("+charCode+") to codePointSet");
-			}
-		}
-		return charCode;
+	private void addTitleChildAsTooltip(SVGText svgText) {
+		String enc = (encoding == null) ? null : encoding.getClass().getSimpleName();
+		enc =(enc != null && enc.endsWith(AMIFont.ENCODING)) ? enc.substring(0, enc.length()-AMIFont.ENCODING.length()) : enc;
+		String title = "char: "+charCode+"; name: "+charname+"; f: "+fontFamilyName+"; fn: "+fontName+"; e: "+enc;
+		SVGTitle svgTitle = new SVGTitle(title);
+		svgText.appendChild(svgTitle);
 	}
 
+//	private int addCodePointToHighPoints() {
+//		pdf2svgConverter.ensureCodePointSets();
+//		int charCode = textPosition.getCharacter().charAt(0);
+//		if (charCode > 255) {
+//			if (pdf2svgConverter.knownCodePointSet.containsKey((Integer)charCode)) {
+//				// known
+//			} else if (pdf2svgConverter.newCodePointSet.containsKey((Integer) charCode)) {
+//				// known 
+//			} else if (encoding != null) {
+//				pdf2svgConverter.newCodePointSet.ensureEncoding(encoding.toString());
+//				CodePoint codePoint = new CodePoint((Integer)charCode, charname); // creates as UNKNOWN unicode
+//				pdf2svgConverter.newCodePointSet.add(codePoint);
+//				LOG.trace("added to new codePointSet: "+charCode);
+//			} else {
+//				//assume identity
+//				CodePoint codePoint = new CodePoint((Integer)charCode, null); // creates as UNKNOWN unicode
+//				pdf2svgConverter.newCodePointSet.add(codePoint);
+////				LOG.trace("Font name: "+fontName+" No encoding, so cannot add codePoint ("+charCode+") to codePointSet");
+//			}
+//		}
+//		return charCode;
+//	}
+
 	private void addAttributesToSVGText(float width, SVGText svgText) {
 		//svgText.setClipPath(clipString);
 		setClipPath(svgText, clipString, (Integer) integerByClipStringMap.get(clipString));
 		svgText.setFill("red");
 		svgText.setFontFamily("Helvetica");
 		svgText.setStrokeWidth(0.5);
+		addTitleChildAsTooltip(svgText);
 	}
 
 	/** translates java color to CSS RGB

File src/main/java/org/xmlcml/pdf2svg/SVGSerializer.java

 		StringBuilder sb = new StringBuilder();
 		for (int i = 0; i < s.length(); i++) {
 			int ch = s.charAt(i);
-			if (ch > 255) {
+			if (ch > 127) {
 				sb.append("&#");
 				sb.append(ch);
 				sb.append(";");

File src/main/java/org/xmlcml/pdf2svg/log/XMLLogger.java

 		font.addAttribute(new Attribute(BASEFONT, baseFont == null ? NULL
 				: baseFont));
 
-		addAttribute(font, BOLD, amiFont.isBold());		
+		addAttribute(font, BOLD, amiFont.isForceBold());		
 		addAttribute(font, ITALIC, amiFont.isItalic());
-		addAttribute(font, SYMBOL, amiFont.isSymbol());
+		addAttribute(font, SYMBOL, amiFont.isSymbolic());
 
 		fontlist.appendChild(font);
 	}

File src/main/resources/org/xmlcml/pdf2svg/codepoints/defacto/dingbats.xml

+<!--
+    Gatherered from various observations
+
+    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+            http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<!-- 
+from http://www.tug.org/pipermail/tex-live/2011-June/029475.html
+
+[MTMI] doesn't exist : "the font MTMI is a virtual one, combining characters
+ from Times Italic with additional characters (such as an italic
+ letter v looking less like the Greek letter nu) contained in a font
+ RMTMI."
+ 
+ -->
+<!-- appears that names take priority over charvals 
+e.g. 
+<character charval="5" charname="Delta1" font="MTMI"/> 
+<character charval="946" charname="null" font="MTMI"/>
+
+the charname appears to take precedence over the charval
+-->
+<codePointSet encoding="Dingbats" id="dingbats" resource="org/xmlcml/pdf2svg/codepoints/defacto">
+
+<!--
+#
+#  Name:             Adobe Zapf Dingbats Encoding to Unicode
+#  Unicode version:  2.0
+#  Table version:    1.0
+#  Date:             2011 July 12
+#  
+#  Copyright (c) 1991-2011 Unicode, Inc. All Rights reserved.
+#  
+#  This file is provided as-is by Unicode, Inc. (The Unicode Consortium). No
+#  claims are made as to fitness for any particular purpose. No warranties of
+#  any kind are expressed or implied. The recipient agrees to determine
+#  applicability of information provided. If this file has been provided on
+#  magnetic media by Unicode, Inc., the sole remedy for any claim will be
+#  exchange of defective media within 90 days of receipt.
+#  
+#  Unicode, Inc. hereby grants the right to freely use the information
+#  supplied in this file in the creation of products supporting the
+#  Unicode Standard, and to make copies of this file in any form for
+#  internal or external distribution as long as this notice remains
+#  attached.
+#  
+#  Format: Three tab-delimited fields:
+#
+#    (1) The Unicode value (in hexadecimal)
+#    (2) The Zapf Dingbats Encoding code point (in hexadecimal)
+#    (3) # Unicode 2.0 name
+#    (4) # PostScript character name
+#  
+#  General Notes:
+# 
+#    The Unicode values in this table were produced as the result of
+#    applying the algorithm described in the section "Populating a Unicode
+#    space" in the document "Unicode and Glyph Names," at
+#    http://partners.adobe.com/asn/developer/typeforum/unicodegn.html
+#    to the characters in Zapf Dingbats. Note that some characters, such as
+#    "space", are mapped to 2 Unicode values. 14 characters have assignments in
+#    the Corporate Use Subarea; these are indicated by "(CUS)" in field 4.
+#    Refer to the above document for more details.
+#
+#    2011 July 12: The above link is no longer valid. For comparable,
+#    more current information, see the document, "Glyph", at:
+#    <http://www.adobe.com/devnet/opentype/archives/glyph.html>
+#
+#  Revision History:
+#
+#    [v1.0, 2011 July 12]
+#    Updated terms of use to current wording.
+#    Updated contact information and document link.
+#    No changes to the mapping data.
+#
+#    [v0.2, 30 March 1999] Different algorithm to produce Unicode values (see
+#    notes above)