Commits

petermr  committed bc56334

more testing; enhanced the logger/viewer; BMC regression fails on one symbol

  • Participants
  • Parent commits 80be5d4

Comments (0)

Files changed (47)

File src/main/java/org/xmlcml/pdf2svg/PDF2SVGConverter.java

 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.util.PDFStreamEngine;
 import org.xmlcml.graphics.svg.SVGSVG;
+import org.xmlcml.pdf2svg.log.XMLLogger;
 import org.xmlcml.pdf2svg.util.MenuSystem;
 import org.xmlcml.pdf2svg.util.PConstants;
-import org.xmlcml.pdf2svg.util.XMLLogger;
 
 /**
  * Simple app to read PDF documents ... based on ... * PDFReader.java
 
 		svgPageList = null;
 		page2svgConverter = new PDFPage2SVGConverter();
-		LOG.debug("Parsing PDF file "+ file.getAbsolutePath());
+		LOG.debug("PDF "+ file.getCanonicalPath());
 		readDocument(file, useNonSeqParser, PDFpassword);
 
 		@SuppressWarnings("unchecked")

File src/main/java/org/xmlcml/pdf2svg/PDFPage2SVGConverter.java

 import java.awt.Shape;
 import java.awt.geom.AffineTransform;
 import java.awt.geom.GeneralPath;
-import java.awt.geom.PathIterator;
 import java.awt.image.BufferedImage;
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
-import javax.imageio.ImageIO;
+import nu.xom.Element;
 
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 import org.apache.pdfbox.pdmodel.text.PDTextState;
 import org.apache.pdfbox.util.Matrix;
 import org.apache.pdfbox.util.TextPosition;
-import org.apache.xerces.impl.dv.util.Base64;
 import org.xmlcml.euclid.Angle;
 import org.xmlcml.euclid.Real2;
 import org.xmlcml.euclid.Real2Range;
-import org.xmlcml.euclid.RealArray;
-import org.xmlcml.euclid.RealMatrix;
 import org.xmlcml.euclid.Transform2;
 import org.xmlcml.graphics.svg.SVGClipPath;
 import org.xmlcml.graphics.svg.SVGDefs;
 
 		setAndProcessFontNameAndFamilyName();
 
-		int charCode = getCharCodeAndSetEncodingAndCharname();
+		getCharCodeAndSetEncodingAndCharname();
 
 		SVGText svgText = new SVGText();
 		
 
 		LOG.trace("Fn: "+fontName+"; Ff: "+fontFamilyName+"; "+textContent+"; "+charCode+"; "+charname);
 
-		float width = getCharacterWidth(pdFont, textContent);
-		addContentAndAttributesToSVGText(svgText, width, charCode);
-
+		addContentAndAttributesToSVGText(svgText);
 		svg.appendChild(svgText);
 	}
 
 		fontFamily = amiFontManager.getFontFamily(fontFamilyName);
 	}
 
-	private int getCharCodeAndSetEncodingAndCharname() {
+	private void getCharCodeAndSetEncodingAndCharname() {
 
 		encoding = amiFont.getEncoding();
 		int[] codePoints = textPosition.getCodePoints();
-		LOG.trace("codePoints: "+(codePoints == null ? null : codePoints.length));
 		charCode = -1;
 		if (encoding == null) {
 			if (codePoints != null) {
 			textContent = textPosition.getCharacter();
 			if (textContent.length() > 1) {
 				// this can happen for ligatures
-				LOG.trace("multi-char string: "+textPosition.getCharacter());
+				LOG.trace("multi-char string: "+textContent);
 			} 
 			charCode = textContent.charAt(0);
 		}
 
 		if (encoding == null) {
 			if (!reportedEncodingError ) {
-				LOG.debug("Null encoding for character: "+charCode+" at "+currentXY+" font: "+fontName+" / "+
+				LOG.debug("Null encoding for character: "+charname+" / "+charCode+" at "+currentXY+" font: "+fontName+" / "+
 			       fontFamilyName+" / "+amiFont.getBaseFont()+
-			       "\n                FURTHER ERRORS HIDDEN");
+			       "\n                FURTHER NULL ENCODING ERRORS HIDDEN");
 				reportedEncodingError = true;
 			}
 		} else {
-			getCharnameThroughEncoding(charCode);
+			getCharnameThroughEncoding();
 		}
-
-		return charCode;
+		convertIllegalXMLCharacters();
 	}
 
-	private void getCharnameThroughEncoding(int charCode) {
+	private void convertIllegalXMLCharacters() {
+		try {
+			new Element("foo").appendChild(textContent);
+		} catch (RuntimeException e) {
+			CodePointSet codePointSet = fontFamily.getCodePointSet();
+			if (codePointSet != null) {
+				CodePoint codePoint = (charname != null) ? codePointSet.getByName(charname) : null;
+				charCode = (codePoint != null) ? codePoint.getUnicodeDecimal() : charCode;
+				textContent = ""+(char)charCode;
+			}
+		}
+	}
+
+	private void getCharnameThroughEncoding() {
 		try {
 			// NOTE: charname is the formal name for the character such as "period", "bracket" or "a", "two"
 			charname = encoding.getName(charCode);
 		}
 	}
 
-	private void addContentAndAttributesToSVGText(SVGText svgText, float width, int charCode) {
+	private void addContentAndAttributesToSVGText(SVGText svgText) {
 		try {
-			svgText.setText(textPosition.getCharacter());
+			svgText.setText(textContent);
 		} catch (RuntimeException e) {
 			// drops here if cannot encode as XML character
 			annotateUnusualCharacters(svgText);
 		}
 		
 		getFontSizeAndSetNotZeroRotations(svgText);
+		float width = getCharacterWidth(pdFont, textContent);
 		addAttributesToSVGText(width, svgText);
 		addTooltips(svgText, charCode);
 		if (amiFont.isItalic() != null && amiFont.isItalic()) {
 		if ("Symbol".equals(svgText.getFontFamily())) {
 			svgText.setFontFamily("Symbol-X"); // to stop browsers misbehaving
 		}
-
 	}
 
 	private void convertNonUnicodeCharacterEncodings() {
 			}
 			if (codePoint == null) {
 				//or add Bad Character Glyph
-				int ch = (int) textContent.charAt(0);
+//				int ch = (int) textContent.charAt(0);
 				if (pdf2svgConverter.useXMLLogger && !charWasLogged) {
-					pdf2svgConverter.xmlLogger.newCharacter(fontName, fontFamilyName, charname, ch);
+					pdf2svgConverter.xmlLogger.newCharacter(fontName, fontFamilyName, charname, charCode);
 					charWasLogged = true;
+				} else {
+					LOG.error("Cannot convert character: "+textContent+" char: "+charCode+" charname: "+charname+" fn: "+fontFamilyName);
 				}
-				else
-					LOG.error("Cannot convert character: "+textContent+" char: "+ch+" charname: "+charname+" fn: "+fontFamilyName);
-				textContent = ""+AMIFontManager.getUnknownCharacterSymbol()+ch;
+				textContent = ""+AMIFontManager.getUnknownCharacterSymbol()+charCode;
 			} else {
 				Integer codepoint = codePoint.getUnicodeDecimal();
 				textContent = ""+(char)(int) codepoint;
 	}
 
 	private void annotateUnusualCharacters(SVGText svgText) {
-//		char cc = textPosition.getCharacter().charAt(0);
 		String s = AMIFontManager.BADCHAR_S+(int)charCode+AMIFontManager.BADCHAR_E;
 		if (pdf2svgConverter.useXMLLogger && !charWasLogged) {
 			pdf2svgConverter.xmlLogger.newCharacter(fontName, fontFamilyName, charname, charCode);
 			charWasLogged = true;
+		} else {
+			LOG.debug(s+" "+fontName+" ("+fontSubType+") charname: "+charname);
 		}
-		else
-			LOG.debug(s+" "+fontName+" ("+fontSubType+") charname: "+charname);
 		s = ""+(char)(BADCHAR+Math.min(9, charCode));
 		svgText.setText(s);
 		svgText.setStroke("red");

File src/main/java/org/xmlcml/pdf2svg/log/LogAggregator.java

+package org.xmlcml.pdf2svg.log;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import nu.xom.Attribute;
+import nu.xom.Element;
+import nu.xom.Nodes;
+
+import org.xmlcml.cml.base.CMLUtil;
+
+public class LogAggregator {
+
+	private Element pdfLogRoot;
+
+	public LogAggregator() {
+		
+	}
+
+	/**
+	<pdfLog glyphs="true">
+    <fontList>
+        <font name="DENAEF+Frutiger-LightCn"
+        family="Frutiger-LightCn" type="PDType1Font"
+        encoding="DictionaryEncoding" fontencoding="DictionaryEncoding"
+        basefont="DENAEF+Frutiger-LightCn"
+        bold="false" italic="false" symbol="false"/>
+    </fontList>
+    */
+	public void aggregateAllLogs(String topDir) {
+		File topFile = new File(topDir);
+		System.err.println("Top "+topFile.getAbsolutePath());
+		createDummyLog();
+		aggregateLogs(topFile);
+		removeCharactersWithEmptyPaths();
+		removeFontAttributes();
+		removeDuplicateCharacters();
+		removeNullCharacters();
+		removeEmptyPages();
+		removeEmptyFiles();
+	}
+
+	private void removeNullCharacters() {
+		Nodes nullCharacters = pdfLogRoot.query(".//character[@name='null']");
+		int removed = 0;
+		for (int j = nullCharacters.size()-1; j >= 0; j--) {
+			Element nullCharacter = (Element) nullCharacters.get(j);
+			Integer code = new Integer(nullCharacter.getAttributeValue("code"));
+			if (code < 32) {
+				nullCharacters.get(j).detach();
+				removed++;
+			}
+		}
+		System.err.println("removed "+removed+" null characters");
+	}
+
+	private void removeDuplicateCharacters() {
+		Set<String> characterXMLSet = new HashSet<String>();
+		Nodes characters = pdfLogRoot.query(".//character");
+		for (int j = characters.size()-1; j >= 0; j--) {
+			Element character = (Element) characters.get(j);
+			String characterXML = character.toXML();
+			if (characterXMLSet.contains(characterXML)) {
+				character.detach();
+			} else {
+				characterXMLSet.add(characterXML);
+			}
+		}
+	}
+
+	private void removeFontAttributes() {
+		Nodes fonts = pdfLogRoot.query(".//@font");
+		for (int j = fonts.size()-1; j >= 0; j--) {
+			fonts.get(j).detach();
+		}
+	}
+
+	private void removeEmptyPages() {
+		Nodes emptyPages = pdfLogRoot.query(".//page[count(*)=0]");
+		for (int j = emptyPages.size()-1; j >= 0; j--) {
+			emptyPages.get(j).detach();
+		}
+		System.err.println("removed "+emptyPages.size()+" empty pages");
+	}
+
+	private void removeEmptyFiles() {
+		Nodes emptyFiles = pdfLogRoot.query(".//pdf[count(*)=0]");
+		for (int j = emptyFiles.size()-1; j >= 0; j--) {
+			emptyFiles.get(j).detach();
+		}
+		System.err.println("removed "+emptyFiles.size()+" empty pages");
+	}
+
+	/**
+      <character font="LinLibertine" family="LinLibertine" name="null" code="3">
+        <path stroke="black" fill="none" d="" stroke-width="0.0050" xmlns="http://www.w3.org/2000/svg"/>
+      </character>
+	 */
+	private void removeCharactersWithEmptyPaths() {
+		Nodes emptyPathNodes = pdfLogRoot.query(".//character[*[local-name()='path' and @d='']]");
+		for (int j = emptyPathNodes.size()-1; j >= 0; j--) {
+			emptyPathNodes.get(j).detach();
+		}
+		System.err.println("removed "+emptyPathNodes.size()+" empty paths");
+	}
+
+	private void createDummyLog() {
+		pdfLogRoot = new Element("pdfLog");
+		pdfLogRoot.addAttribute(new Attribute("glyphs", "true"));
+		Element fontList = new Element("fontList");
+		pdfLogRoot.appendChild(fontList);
+		Element dummyFont = new Element("font");
+		dummyFont.addAttribute(new Attribute("name", "dummy"));
+		fontList.appendChild(dummyFont);
+	}
+
+	private void aggregateLogs(File topFile) {
+		File[] files = topFile.listFiles();
+		if (files != null) {
+			for (File file : files) {
+				if ("pdfLog.xml".equals(file.getName())) {
+					addLogFile(file);
+				} else if (file.isDirectory()) {
+					aggregateLogs(file);
+				}
+			}
+		}
+	}
+
+	/**
+    <pdf filename="C:\Users\pm286\workspace\pdf2svg\..\pdfs\pdfsByJournal\ZoologicaScripta\Liu,
+    Yang - 2006.pdf" pageCount="18">
+        <page num="1"/>
+        <page num="2"/>
+        <page num="3"/>
+        <page num="4"/>
+        <page num="5"/>
+        <page num="6"/>
+        <page num="7">
+            <character font="GNBAPP+Universal-NewswithCommPi"
+            family="Universal-NewswithCommPi"
+            name="H20040" code="1">
+                <path stroke="black" fill="none"
+	 * @param file
+	 */
+	private void addLogFile(File file) {
+		System.err.println(file.getAbsolutePath());
+		Element logElement = CMLUtil.parseQuietlyToDocument(file).getRootElement();
+		// add children with characters
+		Nodes nodes = logElement.query(".//pdf[page[count(*)>0]]");
+		for (int i = 0; i < nodes.size(); i++) {
+			Element pdfElement = (Element) nodes.get(i);
+			pdfElement.detach();
+			pdfLogRoot.appendChild(pdfElement);
+		}
+	}
+	
+	private void writeLog(File file) throws IOException {
+		FileOutputStream fos = new FileOutputStream(file);
+		CMLUtil.debug(pdfLogRoot, fos, 2);
+	}
+	
+	public static void main(String[] args) throws IOException {
+		LogAggregator aggregator = new LogAggregator();
+//		aggregator.aggregateAllLogs("target/pdfsByJournal");
+		aggregator.aggregateAllLogs("target/minorJournals");
+		aggregator.writeLog(new File("target/pdfLog.xml"));
+	}
+
+}

File src/main/java/org/xmlcml/pdf2svg/log/XMLLogger.java

+package org.xmlcml.pdf2svg.log;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import nu.xom.Attribute;
+import nu.xom.Document;
+import nu.xom.Element;
+import nu.xom.Serializer;
+
+import org.apache.log4j.Logger;
+import org.apache.pdfbox.encoding.Encoding;
+import org.xmlcml.graphics.svg.SVGPath;
+import org.xmlcml.pdf2svg.AMIFont;
+
+public class XMLLogger {
+
+
+
+	private final static Logger LOG = Logger.getLogger(XMLLogger.class);
+
+	private static final String BASEFONT = "basefont";
+	private static final String CHARACTER = "character";
+	private static final String CODE = "code";
+	private static final String ENCODING = "encoding";
+	private static final String FAMILY = "family";
+	private static final String FILENAME = "filename";
+	private static final String FONT = "font";
+	private static final String FONT_LIST = "fontList";
+	private static final String FONTENCODING = "fontencoding";
+	private static final String GLYPHS = "glyphs";
+	private static final String NAME = "name";
+	private static final String NULL = "null";
+	private static final String NUM = "num";
+	private static final String PAGE = "page";
+	private static final String PAGE_COUNT = "pageCount";
+	private static final String PDF = "pdf";
+	private static final String PDF_LOG = "pdfLog";
+	private static final String TYPE = "type";
+
+	private static final String BOLD = "bold";
+	private static final String ITALIC = "italic";
+	private static final String SYMBOL = "symbol";
+
+	private static final String UTF_8 = "UTF-8";
+
+	private Element root;
+	private Element fontlist;
+	private Element file;
+	private Element page;
+
+	private List<String> fontnames; // names of all fonts in the fontlist
+	private Map<String, AMIFont> fontmap; // only valid for the current PDF
+	private final boolean logGlyphs;
+
+	public XMLLogger() {
+		logGlyphs = false;
+		reset();
+	}
+
+	public XMLLogger(boolean logGlyphs) {
+		this.logGlyphs = logGlyphs;
+		reset();
+	}
+
+	public void reset() {
+		root = new Element(PDF_LOG);
+		root.addAttribute(new Attribute(GLYPHS, Boolean.toString(logGlyphs)));
+
+		fontlist = new Element(FONT_LIST);
+		root.appendChild(fontlist);
+
+		file = null;
+		page = null;
+
+		fontnames = new ArrayList<String>();
+		if (logGlyphs)
+			fontmap = null;
+	}
+
+	public void newPDFFile(String fileName, int pageCount) {
+		file = new Element(PDF);
+		file.addAttribute(new Attribute(FILENAME, fileName));
+		file.addAttribute(new Attribute(PAGE_COUNT, Integer
+				.toString(pageCount)));
+		root.appendChild(file);
+
+		if (logGlyphs)
+			fontmap = new HashMap<String, AMIFont>();
+	}
+
+	public void newPDFPage(int pageNumber) {
+		if (file == null)
+			throw new RuntimeException("no current PDF file!");
+		page = new Element(PAGE);
+		page.addAttribute(new Attribute(NUM, Integer.toString(pageNumber)));
+		file.appendChild(page);
+	}
+
+	public void newFont(AMIFont amiFont) {
+		String fontName = amiFont.getFontName();
+		if (fontName == null)
+			return;
+		if (logGlyphs)
+			fontmap.put(fontName, amiFont);
+
+		if (fontnames.contains(fontName))
+			return;
+		fontnames.add(fontName);
+
+		Element font = new Element(FONT);
+
+		font.addAttribute(new Attribute(NAME, fontName));
+		String fontFamilyName = amiFont.getFontFamilyName();
+		font.addAttribute(new Attribute(FAMILY,
+				fontFamilyName == null ? NULL : fontFamilyName));
+
+		String fontType = amiFont.getFontType();
+		font.addAttribute(new Attribute(TYPE, fontType == null ? NULL
+				: fontType));
+		Encoding encoding = amiFont.getEncoding();
+		font.addAttribute(new Attribute(ENCODING, encoding == null ? NULL
+				: encoding.getClass().getSimpleName()));
+		String fontEncoding = amiFont.getFontEncoding();
+
+		font.addAttribute(new Attribute(FONTENCODING,
+				fontEncoding == null ? NULL : fontEncoding));
+		String baseFont = amiFont.getBaseFont();
+		font.addAttribute(new Attribute(BASEFONT, baseFont == null ? NULL
+				: baseFont));
+
+		addAttribute(font, BOLD, amiFont.isBold());		
+		addAttribute(font, ITALIC, amiFont.isItalic());
+		addAttribute(font, SYMBOL, amiFont.isSymbol());
+
+		fontlist.appendChild(font);
+	}
+
+	private void addAttribute(Element font, String attName, Boolean value) {
+		if (value != null) {
+			font.addAttribute(new Attribute(attName, Boolean.toString(value)));
+		}
+	}
+
+	public void newCharacter(String fontName, String fontFamilyName, String charName, int charCode) {
+		if (file == null || page == null)
+			throw new RuntimeException("no current PDF file or page!");
+
+		if (fontName == null) {
+			LOG.error("fontName is null! (charName=" + charName + ",charValue="
+					+ charCode + ")");
+			return;
+		}
+
+		if (!fontnames.contains(fontName)) {
+			LOG.error("new character (" + charName + "," + charCode
+					+ ") specifies font name '" + fontName
+					+ "' - which doesn't exist!");
+		}
+
+		Element character = new Element(CHARACTER);
+
+		character.addAttribute(new Attribute(FONT, fontName));
+		character.addAttribute(new Attribute(FAMILY,
+				fontFamilyName == null ? NULL : fontFamilyName));
+		character.addAttribute(new Attribute(NAME, charName == null ? NULL
+				: charName));
+		character
+				.addAttribute(new Attribute(CODE, Integer.toString(charCode)));
+
+		if (logGlyphs) {
+			AMIFont amiFont = fontmap.get(fontName);
+			if (amiFont == null) {
+				LOG.error(String.format("no AMIFont available for (%s,%s,%d)",
+						fontName, charName, charCode));
+			} else {
+				String key = charName;
+				if (key == null)
+					key = "" + charCode;
+				String d = amiFont.getPathStringByCharnameMap().get(key);
+				if (d != null) {
+					SVGPath path = new SVGPath(d);
+					path.setStrokeWidth(0.005);
+					character.appendChild(path);
+				}
+			}
+		}
+
+		page.appendChild(character);
+	}
+
+	public void newException(Exception e) {
+		if (file == null || page == null) {
+			throw new RuntimeException("no current PDF file or page!");
+		}
+
+		Element exceptionElement = new Element("exception");
+		StackTraceElement[] steArray = e.getStackTrace();
+		for (StackTraceElement ste : steArray) {
+			Element ste0 = new Element("stackTrace");
+			exceptionElement.appendChild(ste0);
+			ste0.appendChild(ste.toString());
+		}
+		page.appendChild(exceptionElement);
+	}
+
+	public void writeXMLFile(OutputStream outputStream) {
+		Document doc = new Document(root);
+		try {
+			Serializer serializer = new Serializer(outputStream, UTF_8);
+			serializer.setIndent(4);
+			serializer.setMaxLength(50);
+			serializer.write(doc);
+			serializer.flush();
+		} catch (IOException ex) {
+			System.err.println(ex);
+		}
+	}
+
+	public void writeXMLFile(String outdir, String pdfname) {
+		String logname = pdfname.replaceFirst("(?i)\\.pdf$", "") + "-log.xml";
+
+		File outputFile = new File(outdir, logname);
+		OutputStream outputStream;
+		try {
+			outputStream = new FileOutputStream(outputFile);
+		} catch (FileNotFoundException e) {
+			throw new RuntimeException(
+					"caught File Not Found exception while creating logfile '"
+							+ outputFile.getAbsolutePath() + "'.");
+		}
+
+		writeXMLFile(outputStream);
+	}
+}

File src/main/java/org/xmlcml/pdf2svg/util/XMLLogger.java

-package org.xmlcml.pdf2svg.util;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import nu.xom.Attribute;
-import nu.xom.Document;
-import nu.xom.Element;
-import nu.xom.Serializer;
-
-import org.apache.log4j.Logger;
-import org.apache.pdfbox.encoding.Encoding;
-import org.xmlcml.graphics.svg.SVGPath;
-import org.xmlcml.pdf2svg.AMIFont;
-
-public class XMLLogger {
-
-
-
-	private final static Logger LOG = Logger.getLogger(XMLLogger.class);
-
-	private static final String BASEFONT = "basefont";
-	private static final String CHARACTER = "character";
-	private static final String CODE = "code";
-	private static final String ENCODING = "encoding";
-	private static final String FAMILY = "family";
-	private static final String FILENAME = "filename";
-	private static final String FONT = "font";
-	private static final String FONT_LIST = "fontList";
-	private static final String FONTENCODING = "fontencoding";
-	private static final String GLYPHS = "glyphs";
-	private static final String NAME = "name";
-	private static final String NULL = "null";
-	private static final String NUM = "num";
-	private static final String PAGE = "page";
-	private static final String PAGE_COUNT = "pageCount";
-	private static final String PDF = "pdf";
-	private static final String PDF_LOG = "pdfLog";
-	private static final String TYPE = "type";
-
-	private static final String BOLD = "bold";
-	private static final String ITALIC = "italic";
-	private static final String SYMBOL = "symbol";
-
-	private static final String UTF_8 = "UTF-8";
-
-	private Element root;
-	private Element fontlist;
-	private Element file;
-	private Element page;
-
-	private List<String> fontnames; // names of all fonts in the fontlist
-	private Map<String, AMIFont> fontmap; // only valid for the current PDF
-	private final boolean logGlyphs;
-
-	public XMLLogger() {
-		logGlyphs = false;
-		reset();
-	}
-
-	public XMLLogger(boolean logGlyphs) {
-		this.logGlyphs = logGlyphs;
-		reset();
-	}
-
-	public void reset() {
-		root = new Element(PDF_LOG);
-		root.addAttribute(new Attribute(GLYPHS, Boolean.toString(logGlyphs)));
-
-		fontlist = new Element(FONT_LIST);
-		root.appendChild(fontlist);
-
-		file = null;
-		page = null;
-
-		fontnames = new ArrayList<String>();
-		if (logGlyphs)
-			fontmap = null;
-	}
-
-	public void newPDFFile(String fileName, int pageCount) {
-		file = new Element(PDF);
-		file.addAttribute(new Attribute(FILENAME, fileName));
-		file.addAttribute(new Attribute(PAGE_COUNT, Integer
-				.toString(pageCount)));
-		root.appendChild(file);
-
-		if (logGlyphs)
-			fontmap = new HashMap<String, AMIFont>();
-	}
-
-	public void newPDFPage(int pageNumber) {
-		if (file == null)
-			throw new RuntimeException("no current PDF file!");
-		page = new Element(PAGE);
-		page.addAttribute(new Attribute(NUM, Integer.toString(pageNumber)));
-		file.appendChild(page);
-	}
-
-	public void newFont(AMIFont amiFont) {
-		String fontName = amiFont.getFontName();
-		if (fontName == null)
-			return;
-		if (logGlyphs)
-			fontmap.put(fontName, amiFont);
-
-		if (fontnames.contains(fontName))
-			return;
-		fontnames.add(fontName);
-
-		Element font = new Element(FONT);
-
-		font.addAttribute(new Attribute(NAME, fontName));
-		String fontFamilyName = amiFont.getFontFamilyName();
-		font.addAttribute(new Attribute(FAMILY,
-				fontFamilyName == null ? NULL : fontFamilyName));
-
-		String fontType = amiFont.getFontType();
-		font.addAttribute(new Attribute(TYPE, fontType == null ? NULL
-				: fontType));
-		Encoding encoding = amiFont.getEncoding();
-		font.addAttribute(new Attribute(ENCODING, encoding == null ? NULL
-				: encoding.getClass().getSimpleName()));
-		String fontEncoding = amiFont.getFontEncoding();
-
-		font.addAttribute(new Attribute(FONTENCODING,
-				fontEncoding == null ? NULL : fontEncoding));
-		String baseFont = amiFont.getBaseFont();
-		font.addAttribute(new Attribute(BASEFONT, baseFont == null ? NULL
-				: baseFont));
-
-		addAttribute(font, BOLD, amiFont.isBold());		
-		addAttribute(font, ITALIC, amiFont.isItalic());
-		addAttribute(font, SYMBOL, amiFont.isSymbol());
-
-		fontlist.appendChild(font);
-	}
-
-	private void addAttribute(Element font, String attName, Boolean value) {
-		if (value != null) {
-			font.addAttribute(new Attribute(attName, Boolean.toString(value)));
-		}
-	}
-
-	public void newCharacter(String fontName, String fontFamilyName,
-			String charName, int charCode) {
-		if (file == null || page == null)
-			throw new RuntimeException("no current PDF file or page!");
-
-		if (fontName == null) {
-			LOG.error("fontName is null! (charName=" + charName + ",charValue="
-					+ charCode + ")");
-			return;
-		}
-
-		if (!fontnames.contains(fontName)) {
-			LOG.error("new character (" + charName + "," + charCode
-					+ ") specifies font name '" + fontName
-					+ "' - which doesn't exist!");
-		}
-
-		Element character = new Element(CHARACTER);
-
-		character.addAttribute(new Attribute(FONT, fontName));
-		character.addAttribute(new Attribute(FAMILY,
-				fontFamilyName == null ? NULL : fontFamilyName));
-		character.addAttribute(new Attribute(NAME, charName == null ? NULL
-				: charName));
-		character
-				.addAttribute(new Attribute(CODE, Integer.toString(charCode)));
-
-		if (logGlyphs) {
-			AMIFont amiFont = fontmap.get(fontName);
-			if (amiFont == null) {
-				LOG.error(String.format("no AMIFont available for (%s,%s,%d)",
-						fontName, charName, charCode));
-			} else {
-				String key = charName;
-				if (key == null)
-					key = "" + charCode;
-				String d = amiFont.getPathStringByCharnameMap().get(key);
-				if (d != null) {
-					SVGPath path = new SVGPath(d);
-					path.setStrokeWidth(0.005);
-					character.appendChild(path);
-				}
-			}
-		}
-
-		page.appendChild(character);
-	}
-
-	public void newException(Exception e) {
-		if (file == null || page == null) {
-			throw new RuntimeException("no current PDF file or page!");
-		}
-
-		Element exceptionElement = new Element("exception");
-		StackTraceElement[] steArray = e.getStackTrace();
-		for (StackTraceElement ste : steArray) {
-			Element ste0 = new Element("stackTrace");
-			exceptionElement.appendChild(ste0);
-			ste0.appendChild(ste.toString());
-		}
-		page.appendChild(exceptionElement);
-	}
-
-	public void writeXMLFile(OutputStream outputStream) {
-		Document doc = new Document(root);
-		try {
-			Serializer serializer = new Serializer(outputStream, UTF_8);
-			serializer.setIndent(4);
-			serializer.setMaxLength(50);
-			serializer.write(doc);
-			serializer.flush();
-		} catch (IOException ex) {
-			System.err.println(ex);
-		}
-	}
-
-	public void writeXMLFile(String outdir, String pdfname) {
-		String logname = pdfname.replaceFirst("(?i)\\.pdf$", "") + "-log.xml";
-
-		File outputFile = new File(outdir, logname);
-		OutputStream outputStream;
-		try {
-			outputStream = new FileOutputStream(outputFile);
-		} catch (FileNotFoundException e) {
-			throw new RuntimeException(
-					"caught File Not Found exception while creating logfile '"
-							+ outputFile.getAbsolutePath() + "'.");
-		}
-
-		writeXMLFile(outputStream);
-	}
-}

File src/main/resources/org/xmlcml/pdf2svg/codepoints/defacto/astrologypi1.xml

+<!--
+
+    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limiations under the License.
+
+-->
+<codePointSet encoding="AstrologyPi1" id="astrologypi1" resource="org/xmlcml/pdf2svg/codepoints/defacto">
+
+    <codePoint unicode="U+2640" name="H50488" note="FEMALE SIGN"/>
+    
+    	
+</codePointSet>
+		

File src/main/resources/org/xmlcml/pdf2svg/codepoints/defacto/mathpi6.xml

     <codePoint unicode="U+274E" name="H" note="UNKNOWN" />
     <codePoint unicode="U+0020" name="space" decimal="32" note="SPACE" />
 
+    <codePoint unicode="U+25C7" name="H17003" note="WHITE DIAMOND" /> <!-- square on side?-->
     <codePoint unicode="U+25C7" name="H17004" note="WHITE DIAMOND" />
     <codePoint unicode="U+25B3" name="H17005" note="WHITE UPWARDS-POINTING TRIANGLE" />
     <codePoint unicode="U+25BD" name="H17006" note="WHITE DOWNWARDS-POINTING TRIANGLE" />
     <codePoint unicode="U+25A0" name="H17039" note="BLACK SQUARE" />
     <codePoint unicode="U+25A1" name="H17040" note="WHITE SQUARE" />
     
+    <codePoint unicode="U+25E6" name="H11568" note="WHITE BULLET" />
     <codePoint unicode="U+25A1" name="H11623" note="WHITE SQUARE" />
     
     <codePoint unicode="U+2606" name="H22841" note="WHITE STAR" />

File src/main/resources/org/xmlcml/pdf2svg/codepoints/defacto/mtsyn.xml

     <codePoint unicode="U+007C" decimal="124"  name="bar" note="VERTICAL LINE" />
     
     <codePoint unicode="U+002D" decimal="161" name="exclamdown" note="HYPHEN-MINUS" />
-    <codePoint unicode="U+002A" decimal="163" note="ASTERISK" />
-    <codePoint unicode="U+002A" decimal="164" name="currency" note="ASTERISK" />
-    <codePoint unicode="U+002B" decimal="163" name="plus" note="PLUS SIGN" />
-    <codePoint unicode="U+00B1" decimal="167" name="section" note="PLUS-MINUS SIGN" />
+    <codePoint unicode="U+002A" decimal="163"                   note="ASTERISK" />
+    <codePoint unicode="U+002A" decimal="164" name="currency"   note="ASTERISK" />
+    <codePoint unicode="U+002B" decimal="163" name="plus"       note="PLUS SIGN" />
+    <codePoint unicode="U+00B1" decimal="167" name="section"    note="PLUS-MINUS SIGN" />
     <codePoint unicode="U+25E6" decimal="169" name="quotesingle" note="WHITE BULLET" />
-    <codePoint unicode="U+00B1" decimal="177" name="endash" note="PLUS-MINUS SIGN" />
-    <codePoint unicode="U+00B1" name="plusminus" note="PLUS-MINUS SIGN" />
-    <codePoint unicode="U+00B7" decimal="183" name="bullet" note="MIDDLE DOT" />
+    <codePoint unicode="U+00B1" decimal="177" name="endash"     note="PLUS-MINUS SIGN" />
+    <codePoint unicode="U+00B7" decimal="183" name="bullet"     note="MIDDLE DOT" />
     <codePoint unicode="U+007E" decimal="187" name="guillemotright" note="TILDE" />
-    <codePoint unicode="U+00D7" decimal="215" name="multiply" note="MULTIPLICATION SIGN" />
+    <codePoint unicode="U+00D7" decimal="215" name="multiply"   note="MULTIPLICATION SIGN" />
 
+    <codePoint unicode="U+00B1"               name="plusminus"  note="PLUS-MINUS SIGN" />
+    <codePoint unicode="U+002F"               name="negationslash" note="SOLIDUS" />
     
 <!-- unicode --> 
     <codePointSet idRef="unicode256" href="../unicode/unicode256.xml"/>

File src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/AdvP4C4E59.xml

     <codePoint unicode="U+005E" name="asciicircum" note="CIRCUMFLEX ACCENT"/>  
     <codePoint unicode="U+002A" name="asterisk" note="ASTERISK"/> 
 -->
-    
+    <codePoint unicode="U+0393" name="C0" note="GREEK CAPITAL LETTER GAMMA"/>
     <codePoint unicode="U+2018" name="C18" note="LEFT SINGLE QUOTATION MARK"/>  
     <codePoint unicode="U+007E" name="C19" note="TILDE"/>  
     <codePoint unicode="U+02C7" name="C20" note="CARON"/>  

File src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/AdvSPSASORT.xml

+<!--
+
+    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+            http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<codePointSet encoding="AdvSPSASORT" id="AdvSPSASORT" resource="org/xmlcml/pdf2svg/codepoints/misc"> 
+
+    <codePoint name="C223" unicode="U+0023" unicodeName="COMMA"/> 
+    
+</codePointSet>
+        

File src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/advPSSym.xml

     <codePoint unicode="U+00A9" name="ordfeminine"    decimal="170" note="COPYRIGHT"/> 
     
     <codePoint unicode="U+00B0" name="C176" note="DEGREE SIGN"/> 
+    <codePoint unicode="U+221D" name="C181" unicodeName="PROPORTIONAL TO"/> 
     <codePoint unicode="U+00AE" name="C210" note="REGISTERED SIGN"/> 
     <codePoint unicode="U+00A9" name="C211" note="COPYRIGHT SIGN"/> 
     <codePoint unicode="U+2122" name="C212" note="TRADE MARK SIGN"/> 
     <codePoint unicode="U+2038" name="C217" note="CARET"/> 
+    <codePoint unicode="U+2122" name="C228" note="TRADE MARK SIGN"/> 
 
     <codePointSet idRef="unicode256" href="../unicode/unicode256.xml"/> 
     

File src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/advmathsymb.xml

+<!--
+
+    Copyright (C) 2012 pm286 <peter.murray.rust@googlemail.com>
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+            http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<codePointSet encoding="AdvMathSymb" id="advmathsymb" resource="org/xmlcml/pdf2svg/codepoints/misc">]
+
+    <codePoint unicode="U+002A" name="C3"     note="ASTERISK"/>  
+    
+</codePointSet>
+        

File src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/advmt_mi.xml

     <codePoint unicode="U+03BB" name="C21" note="GREEK SMALL LETTER LAMBDA" />
     <codePoint unicode="U+03BC" name="C22" note="GREEK SMALL LETTER MU" />
     <codePoint unicode="U+007E" name="C24" note="TILDE" />
+    <codePoint unicode="U+03C1" name="C27" note="GREEK SMALL LETTER RHO" />
     <codePoint unicode="U+03C3" name="C27" note="GREEK SMALL LETTER SIGMA" />
     <codePoint unicode="U+03C7" name="C31" note="GREEK SMALL LETTER CHI" />
     <codePoint unicode="U+00AE" name="C213" note="REGISTERED SIGN" />

File src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/advmt_rm.xml

     <codePointSet idRef="unicode" href="../unicode/unicode.xml"/>
     
     <codePoint unicode="U+03A6" name="C8" note="GREEK CAPITAL LETTER PHI"/>  
-    
+    <codePoint unicode="U+00A8" name="C22" note="MACRON" />    
 </codePointSet>
         

File src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/advmt_sy.xml

     
     <codePoint unicode="U+2212" name="C0" note="MINUS SIGN"/>  
     <codePoint unicode="U+00D7" name="C2" note="MULTIPLICATION SIGN"/>  
+    <codePoint unicode="U+002A" name="C3" note="ASTERISK"/>  
     <codePoint unicode="U+00B0" name="C14" note="DEGREE SIGN"/>  
     <codePoint unicode="U+003E" name="C14" note="GREATER-THAN SIGN"/>  
+    <codePoint unicode="U+2022" name="C15" note="BULLET"/>  
     <codePoint unicode="U+2264" name="C20" note="LESS-THAN OR EQUAL TO"/>  
     <codePoint unicode="U+2265" name="C21" note="GREATER-THAN OR EQUAL TO"/>  
     <codePoint unicode="U+007E" name="C24" note="TILDE"/>  

File src/main/resources/org/xmlcml/pdf2svg/codepoints/misc/msam10.xml

 -->
 <codePointSet encoding="MSAM" id="msam10" resource="org/xmlcml/pdf2svg/codepoints/misc">
 
+<!--	<codePoint unicode="U+0000" name="lessorequalslant"    note="LESS-THAN OR SLANTED EQUAL TO"/>  CHECK UNICODE -->
 	<codePoint unicode="U+2A7E" name="greaterorequalslant" note="GREATER-THAN OR SLANTED EQUAL TO"/> 
-	<codePoint unicode="U+2272" name="lessorsimilar" decimal="46" note="LESS-THAN OR EQUIVALENT TO"/> 
-	<codePoint unicode="U+2273" name="greaterorsimilar" decimal="38" note="GREATER-THAN OR EQUIVALENT TO"/> 
+	<codePoint unicode="U+2273" name="greaterorsimilar"    note="GREATER-THAN OR EQUIVALENT TO" decimal="38" /> 
+	<codePoint unicode="U+2272" name="lessorsimilar"       note="LESS-THAN OR EQUIVALENT TO" decimal="46" /> 
+	<codePoint unicode="U+25A0" name="square"              note="BLACK SQUARE"/> 
 	
 	<!-- unicode --> 
     <codePointSet idRef="unicode256" href="../unicode/unicode256.xml"/>

File src/main/resources/org/xmlcml/pdf2svg/codepoints/symbol/symbol.xml

     <codePoint name="C210" unicode="U+00AE" note="REGISTERED SIGN"/> 
     <codePoint name="C211" unicode="U+00A9" note="COPYRIGHT SIGN"/> 
     <codePoint name="C212" unicode="U+2122" note="TRADE-MARK SIGN"/> 
+<!-- maybe the same? -->    
+    <codePoint name="c174" unicode="U+2192" note="RIGHTWARDS ARROW"/> 
+    <codePoint name="c176" unicode="U+25E6" note="WHITE BULLET"/> 
+    <codePoint name="c183" unicode="U+2022" note="BULLET"/> 
+    <codePoint name="c215" unicode="U+015C" notee="LATIN CAPITAL LETTER S WITH CIRCUMFLEX"/>
     
     
 </codePointSet>

File src/main/resources/org/xmlcml/pdf2svg/fontFamilySets/nonStandardFontFamilySet.xml

     <fontFamily name="Universal-NewswithCommPi"
         codePointSet="org/xmlcml/pdf2svg/codepoints/defacto/commpi.xml"/>
 
+<!--  =================AstrologyPi================================= -->    
+    <fontFamily name="LAstrologyPi-One"
+        codePointSet="org/xmlcml/pdf2svg/codepoints/defacto/astrologypi1.xml"/>
 
 
 <!--  ================== MATH TIME =================== -->    
 
 <!--  ==================Adv misc =========================== -->    
     
+    
+    <fontFamily name="AdvMathSymb" symbol="true" 
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/advmathsymb.xml"/>
+    
     <fontFamily name="AdvP4C4E74" symbol="true" 
         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/AdvP4C4E74.xml"/>
     <fontFamily name="AdvP4C4E59" symbol="true" 
 <!-- uses Cddd codepoints -->
     <fontFamily name="AdvMacMthSyN" 
         codePointSet="org/xmlcml/pdf2svg/codepoints/misc/AdvMacMthSyN.xml"/>
+    <fontFamily name="AdvMacms"  
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/AdvMacMthSyN.xml"/>
 
     <!-- ligatures -->
     <fontFamily name="AdvOT863180fb" symbol="false" />
     
     <fontFamily name="AdvMyriad_R" />
 
+<!-- ====================== Other ==============================  -->
+    <fontFamily name="AdvSPSASORT" symbol="true" 
+        codePointSet="org/xmlcml/pdf2svg/codepoints/misc/advSPSASORT.xml"/>
+
     
 <!-- ====================== Other ==============================  -->
     <fontFamily name="TimesNRMTBold" />

File src/test/java/org/xmlcml/pdf2svg/CharactersInPDFTest.java

+package org.xmlcml.pdf2svg;
+
+import org.junit.Test;
+
+public class CharactersInPDFTest {
+
+	@Test
+	public void testChars() {
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/test", "../pdfs/test");
+	}
+
+}

File src/test/java/org/xmlcml/pdf2svg/RegressionTest.java

+package org.xmlcml.pdf2svg;
+
+import java.io.File;
+
+import junit.framework.Assert;
+import nu.xom.Element;
+
+import org.junit.Ignore;
+import org.junit.Test;
+import org.xmlcml.cml.base.CMLUtil;
+import org.xmlcml.cml.testutil.JumboTestUtils;
+
+/** test complete output of Open conversions
+ *  
+ * @author pm286
+ *
+ */
+public class RegressionTest {
+
+	@Test
+	@Ignore // FIXME ASAP symbol fails on p12
+	public void testBMCRegression() {
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/regression", "src/test/resources/regression/BMCBioinfLin2009.pdf");
+		for (int i = 1; i <= 15; i++) {
+			System.out.print("t"+i+"=");
+			File outfile = new File("target/regression/bmcbioinflin2009-page"+i+".svg");
+			Assert.assertTrue("page"+i, outfile.exists());
+			Element test = CMLUtil.parseQuietlyToDocument(outfile).getRootElement();
+			File reffile = new File("src/test/resources/regression/bmcbioinflin2009-page"+i+".svg");
+			Assert.assertTrue("page"+i, reffile.exists());
+			Element ref = CMLUtil.parseQuietlyToDocument(reffile).getRootElement();
+			JumboTestUtils.assertEqualsIncludingFloat("page"+i, ref, test, true, 0.00001);
+		}
+	}
+	
+	@Test
+	public void testBMCRegression313() {
+		testMultipage("src/test/resources/regression/313.pdf", "target/regression/", "src/test/resources/regression/", "313", 8);
+	}
+
+	private void testMultipage(String pdffile, String outdir, String refroot, String paperroot, int npage) {
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", outdir, pdffile);
+		for (int i = 1; i <= npage; i++) {
+			System.out.print("t"+i+"=");
+			File outfile = new File(outdir+paperroot+"-page"+i+".svg");
+			Assert.assertTrue(outfile.toString(), outfile.exists());
+			Element test = CMLUtil.parseQuietlyToDocument(outfile).getRootElement();
+			File reffile = new File(refroot+paperroot+"-page"+i+".svg");
+			Assert.assertTrue(reffile.toString(), reffile.exists());
+			Element ref = CMLUtil.parseQuietlyToDocument(reffile).getRootElement();
+			JumboTestUtils.assertEqualsIncludingFloat("page"+i, ref, test, true, 0.00001);
+		}
+	}
+}

File src/test/java/org/xmlcml/pdf2svg/SamplesForTest.java

 //      new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/npg/", "../pdfs/npg/");
 		// OK
 		
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BMCBioinformatics", "../pdfs/pdfsByJournal/BMCBioinformatics");
-		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/test", "../pdfs/test");
+//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/test", "../pdfs/test");
+
 		
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/ActaPalaeontologicaPolonica", "../pdfs/pdfsByJournal/ActaPalaeontologicaPolonica");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/ActaZoologica", "../pdfs/pdfsByJournal/ActaZoologica");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/AmericanJournalBotany", "../pdfs/pdfsByJournal/AmericanJournalBotany");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/AmericanMuseumNovitates", "../pdfs/pdfsByJournal/AmericanMuseumNovitates");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/AmericanZoologist", "../pdfs/pdfsByJournal/AmericanZoologist");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/AnnualReviewEcologySystematics", "../pdfs/pdfsByJournal/AnnualReviewEcologySystematics");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/AppliedMathematicsLetters", "../pdfs/pdfsByJournal/AppliedMathematicsLetters");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BiochemicalSystematicsEcology", "../pdfs/pdfsByJournal/BiochemicalSystematicsEcology");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/Bioinformatics", "../pdfs/pdfsByJournal/Bioinformatics");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BiologicalJournalLinneanSociety", "../pdfs/pdfsByJournal/BiologicalJournalLinneanSociety");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BiologicalReviews", "../pdfs/pdfsByJournal/BiologicalReviews");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BiologyLetters", "../pdfs/pdfsByJournal/BiologyLetters");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BiologicalReviews", "../pdfs/pdfsByJournal/BiologicalReviews");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BiologyPhilosophy", "../pdfs/pdfsByJournal/BiologyPhilosophy");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/BulletinAmericanMuseumNaturalHistory", "../pdfs/pdfsByJournal/BulletinAmericanMuseumNaturalHistory");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/CanadianJournalEarthSciences", "../pdfs/pdfsByJournal/CanadianJournalEarthSciences");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/Cladistics", "../pdfs/pdfsByJournal/Cladistics");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/Comptes Rendus Palevol", "../pdfs/pdfsByJournal/Comptes Rendus Palevol");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/Copeia", "../pdfs/pdfsByJournal/Copeia");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/Cretaceous Research", "../pdfs/pdfsByJournal/CretaceousResearch");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/EarthEnvironmentalScienceTransactionsRoyalSocietyEdinburgh", "../pdfs/pdfsByJournal/EarthEnvironmentalScienceTransactionsRoyalSocietyEdinburgh");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/Evolution", "../pdfs/pdfsByJournal/Evolution");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/EvolutionaryBiology", "../pdfs/pdfsByJournal/EvolutionaryBiology");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/Exs", "../pdfs/pdfsByJournal/Exs");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/FungalBiology", "../pdfs/pdfsByJournal/FungalBiology");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/Geobios", "../pdfs/pdfsByJournal/Geobios");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/Geodiversitas", "../pdfs/pdfsByJournal/Geodiversitas");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/HerpetologicalMonographs", "../pdfs/pdfsByJournal/HerpetologicalMonographs");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/IchthyologicalResearch", "../pdfs/pdfsByJournal/IchthyologicalResearch");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/InvertebrateBiology", "../pdfs/pdfsByJournal/InvertebrateBiology");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/JournalBiogeography", "../pdfs/pdfsByJournal/JournalBiogeography");
-		
-		
-		
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/JournalEvolutionaryBiology", "../pdfs/pdfsByJournal/JournalEvolutionaryBiology");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/JournalHumanEvolution", "../pdfs/pdfsByJournal/JournalHumanEvolution");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/JournalMammalianEvolution", "../pdfs/pdfsByJournal/JournalMammalianEvolution");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/JournalMolluscanStudies", "../pdfs/pdfsByJournal/JournalMolluscanStudies");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/JournalPaleontology", "../pdfs/pdfsByJournal/JournalPaleontology");
-//		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/pdfsByJournal/JournalSystematicPalaeontology", "../pdfs/pdfsByJournal/JournalSystematicPalaeontology");
-			
+//		mainAB();
+//		mainCJ();
+//		mainMZ();
+
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/minorJournals/abc", "../pdfs/minorJournals/abc");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/minorJournals/def", "../pdfs/minorJournals/def");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/minorJournals/ghijk", "../pdfs/minorJournals/ghijk");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/minorJournals/lmnop", "../pdfs/minorJournals/lmnop");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/minorJournals/rst", "../pdfs/minorJournals/rst");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/minorJournals/uvwxyz", "../pdfs/minorJournals/uvwxyz");
+
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/plosone/", "../pdfs/plosone/");
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/taylorfrancis/", "../pdfs/taylorfrancis/");
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/psyc/", "../pdfs/psyc/");
 //		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/word/", "../pdfs/word/");
 	}
 
+	private static void mainMZ() {
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/MolecularBiologyEvolution", "../pdfs/mainJournals/MolecularBiologyEvolution");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Nature", "../pdfs/mainJournals/Nature");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Naturwissenschaften", "../pdfs/mainJournals/Naturwissenschaften");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Palaeontology", "../pdfs/mainJournals/Palaeontology");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/PaläontologischeZeitschrift", "../pdfs/mainJournals/PaläontologischeZeitschrift");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/PhilosophicalTransB", "../pdfs/mainJournals/PhilosophicalTransB");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/PlantSystematicsEvolution", "../pdfs/mainJournals/PlantSystematicsEvolution");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/PLoSBiology", "../pdfs/mainJournals/PLoSBiology");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/PNAS", "../pdfs/mainJournals/PNAS");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/PNAS1", "../pdfs/mainJournals/PNAS1");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/ProcRoySocB", "../pdfs/mainJournals/ProcRoySocB");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Science", "../pdfs/mainJournals/Science");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/SystBiol", "../pdfs/mainJournals/SystBiol");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/SystematicEntomology", "../pdfs/mainJournals/SystematicEntomology");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/SystematicZoology", "../pdfs/mainJournals/SystematicZoology");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/ZoologicaScripta", "../pdfs/mainJournals/ZoologicaScripta");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/ZoologicalJournalLinneanSociety", "../pdfs/mainJournals/ZoologicalJournalLinneanSociety");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Zootaxa", "../pdfs/mainJournals/Zootaxa");
+	}
+
+	private static void mainAB() {
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/ActaPalaeontologicaPolonica", "../pdfs/mainJournals/ActaPalaeontologicaPolonica");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/ActaZoologica", "../pdfs/mainJournals/ActaZoologica");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/AmericanJournalBotany", "../pdfs/mainJournals/AmericanJournalBotany");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/AmericanMuseumNovitates", "../pdfs/mainJournals/AmericanMuseumNovitates");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/AmericanZoologist", "../pdfs/mainJournals/AmericanZoologist");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/AnnualReviewEcologySystematics", "../pdfs/mainJournals/AnnualReviewEcologySystematics");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/AppliedMathematicsLetters", "../pdfs/mainJournals/AppliedMathematicsLetters");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/BiochemicalSystematicsEcology", "../pdfs/mainJournals/BiochemicalSystematicsEcology");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Bioinformatics", "../pdfs/mainJournals/Bioinformatics");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/BiologicalJournalLinneanSociety", "../pdfs/mainJournals/BiologicalJournalLinneanSociety");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/BiologicalReviews", "../pdfs/mainJournals/BiologicalReviews");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/BiologyLetters", "../pdfs/mainJournals/BiologyLetters");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/BiologicalReviews", "../pdfs/mainJournals/BiologicalReviews");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/BiologyPhilosophy", "../pdfs/mainJournals/BiologyPhilosophy");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/BMCBioinformatics", "../pdfs/mainJournals/BMCBioinformatics");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/BulletinAmericanMuseumNaturalHistory", "../pdfs/mainJournals/BulletinAmericanMuseumNaturalHistory");
+	}
+
+	private static void mainCJ() {
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/CanadianJournalEarthSciences", "../pdfs/mainJournals/CanadianJournalEarthSciences");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Cladistics", "../pdfs/mainJournals/Cladistics");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Copeia", "../pdfs/mainJournals/Copeia");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Cretaceous Research", "../pdfs/mainJournals/CretaceousResearch");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/EarthEnvironmentalScienceTransactionsRoyalSocietyEdinburgh", "../pdfs/mainJournals/EarthEnvironmentalScienceTransactionsRoyalSocietyEdinburgh");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Evolution", "../pdfs/mainJournals/Evolution");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/EvolutionaryBiology", "../pdfs/mainJournals/EvolutionaryBiology");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Exs", "../pdfs/mainJournals/Exs");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/FungalBiology", "../pdfs/mainJournals/FungalBiology");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Geobios", "../pdfs/mainJournals/Geobios");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/Geodiversitas", "../pdfs/mainJournals/Geodiversitas");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/HerpetologicalMonographs", "../pdfs/mainJournals/HerpetologicalMonographs");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/IchthyologicalResearch", "../pdfs/mainJournals/IchthyologicalResearch");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/InvertebrateBiology", "../pdfs/mainJournals/InvertebrateBiology");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/JournalBiogeography", "../pdfs/mainJournals/JournalBiogeography");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/JournalEvolutionaryBiology", "../pdfs/mainJournals/JournalEvolutionaryBiology");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/JournalHumanEvolution", "../pdfs/mainJournals/JournalHumanEvolution");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/JournalMammalianEvolution", "../pdfs/mainJournals/JournalMammalianEvolution");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/JournalMolluscanStudies", "../pdfs/mainJournals/JournalMolluscanStudies");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/JournalPaleontology", "../pdfs/mainJournals/JournalPaleontology");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/JournalSystematicPalaeontology", "../pdfs/mainJournals/JournalSystematicPalaeontology");
+		new PDF2SVGConverter().run("-logger", "-infofiles", "-logglyphs", "-outdir", "target/mainJournals/JournalVertebratePaleontology", "../pdfs/mainJournals/JournalVertebratePaleontology");
+	}
+
 }

File src/test/java/org/xmlcml/pdf2svg/XMLLoggerTest.java

 
 import org.junit.Assert;
 import org.junit.Test;
-import org.xmlcml.pdf2svg.util.XMLLogger;
+import org.xmlcml.pdf2svg.log.XMLLogger;
 
 public class XMLLoggerTest {
 

File src/test/resources/regression/313-page1.svg

Added
New image

File src/test/resources/regression/313-page2.svg

Added
New image