Commits

petermr  committed 896cb47 Draft

added axis analysis

  • Participants
  • Parent commits 1c97c1b

Comments (0)

Files changed (38)

File src/main/java/org/xmlcml/graphics/control/CommandElement.java

 import org.xmlcml.graphics.control.document.FontManagerElement;
 import org.xmlcml.graphics.control.document.PageIteratorElement;
 import org.xmlcml.graphics.control.document.PageSelectorElement;
+import org.xmlcml.graphics.control.page.AxisAnalyzerElement;
 import org.xmlcml.graphics.control.page.BoxDrawerElement;
 import org.xmlcml.graphics.control.page.BoxProcessorElement;
 import org.xmlcml.graphics.control.page.ElementDeleterElement;
 		} else if (tag.equals(DocumentWriterElement.TAG)) {
 			newElement = new DocumentWriterElement();
 			
+		} else if (tag.equals(AxisAnalyzerElement.TAG)) {
+			newElement = new AxisAnalyzerElement();
 		} else if (tag.equals(BoxDrawerElement.TAG)) {
 			newElement = new BoxDrawerElement();
 		} else if (tag.equals(BoxProcessorElement.TAG)) {

File src/main/java/org/xmlcml/graphics/control/document/DocumentActionElement.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	
 	static {
-		ATTNAMES.add(ACTION);
+		ATTNAMES.add(ACTION);  // is this needed?
 		ATTNAMES.add(FILE);
 		ATTNAMES.add(FILENAME);
 	}

File src/main/java/org/xmlcml/graphics/control/document/DocumentBreakElement.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	
 	static {
-		ATTNAMES.add(PageActionElement.ACTION);
+//		ATTNAMES.add(PageActionElement.ACTION);
 	}
 
 	/** constructor

File src/main/java/org/xmlcml/graphics/control/document/DocumentDebuggerElement.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	
 	static {
-		ATTNAMES.add(PageActionElement.ACTION);
+//		ATTNAMES.add(PageActionElement.ACTION);
 	}
 
 	/** constructor

File src/main/java/org/xmlcml/graphics/control/document/DocumentPageRunnerAction.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	
 	static {
-		ATTNAMES.add(AbstractActionElement.ACTION);
+//		ATTNAMES.add(AbstractActionElement.ACTION);
 		ATTNAMES.add(AbstractActionElement.FILENAME);
 	}
 	protected List<String> getAttributeNames() {

File src/main/java/org/xmlcml/graphics/control/document/DocumentPageSelectorAction.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	
 	static {
-		ATTNAMES.add(AbstractActionElement.ACTION);
+//		ATTNAMES.add(AbstractActionElement.ACTION);
 		ATTNAMES.add(PageActionElement.PAGE_RANGE);
 	}
 	protected List<String> getAttributeNames() {

File src/main/java/org/xmlcml/graphics/control/document/DocumentReaderElement.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 
 	static {
-		ATTNAMES.add(PageActionElement.ACTION);
+//		ATTNAMES.add(PageActionElement.ACTION);
 		ATTNAMES.add(PageActionElement.FILENAME);
 		ATTNAMES.add(PageActionElement.FORMAT);
 //		ATTNAMES.add(PageActionElement.SKIP);

File src/main/java/org/xmlcml/graphics/control/document/DocumentWriterElement.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	
 	static {
-		ATTNAMES.add(PageActionElement.ACTION);
+//		ATTNAMES.add(PageActionElement.ACTION);
 		ATTNAMES.add(PageActionElement.FILENAME);
 		ATTNAMES.add(PageActionElement.FORMAT);
 		ATTNAMES.add(PageActionElement.REGEX);

File src/main/java/org/xmlcml/graphics/control/page/Axis.java

+package org.xmlcml.graphics.control.page;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+import nu.xom.Attribute;
+import nu.xom.Element;
+
+import org.apache.log4j.Logger;
+import org.xmlcml.cml.base.CMLConstants;
+import org.xmlcml.cml.base.CMLUtil;
+import org.xmlcml.cml.element.CMLArray;
+import org.xmlcml.cml.element.CMLScalar;
+import org.xmlcml.cml.interfacex.HasDataType;
+import org.xmlcml.euclid.Real;
+import org.xmlcml.euclid.Real2;
+import org.xmlcml.euclid.Real2Range;
+import org.xmlcml.euclid.RealArray;
+import org.xmlcml.euclid.Transform2;
+import org.xmlcml.graphics.paths.ComplexLine;
+import org.xmlcml.graphics.paths.ComplexLine.CombType;
+import org.xmlcml.graphics.paths.ComplexLine.LineOrientation;
+import org.xmlcml.graphics.paths.Joint;
+import org.xmlcml.graphics.pdf2svg.BoundingBoxManager;
+import org.xmlcml.graphics.pdf2svg.BoundingBoxManager.BoxEdge;
+import org.xmlcml.graphics.svg.SVGElement;
+import org.xmlcml.graphics.svg.SVGLine;
+import org.xmlcml.graphics.svg.SVGText;
+import org.xmlcml.graphics.svg.SVGUtil;
+import org.xmlcml.graphics.text.ConstantYCharacterList;
+import org.xmlcml.graphics.text.TextAnalyzer;
+import org.xmlcml.graphics.text.WordSequence;
+
+public class Axis {
+	private final static Logger LOG = Logger.getLogger(Axis.class);
+
+	public static final String AXIS = "axis";
+	
+	private double eps = 0.001;
+	private ComplexLine complexLine;
+	private Real2 axisWorldCoordStart = null;
+	private Real2 axisWorldCoordEnd = null;
+	private String axisLabel = null;
+	private String axisUnits = null;
+	private Transform2 application2Pixel = null;
+	private Transform2 pixel2Application = null;
+	private CombType combType;
+	private List<SVGElement> texts;
+	private double boxThickness;
+	private double boxLengthExtension;
+	private AxisAnalyzer axisAnalyzer;
+	private TextAnalyzer textAnalyzer;
+	private String id;
+
+	private double minTickLengthPixels;
+	private double maxTickLengthPixels;
+	private List<Joint> majorTickJointList;
+	private List<Joint> minorTickJointList;
+	private int majorTickSpacingPixelsToMinorTick;
+	private Double majorTickSpacingInPixels = null;
+	private Double minorTickSpacingInPixels = null;
+	private CMLArray majorTickMarkValues;
+
+	private CMLScalar scalar;
+
+
+	public Axis(AxisAnalyzer axisAnalyzer) {
+		this.axisAnalyzer = axisAnalyzer;
+	}
+
+	public CombType getCombType() {
+		return combType;
+	}
+
+	public void setCombType(CombType combType) {
+		this.combType = combType;
+	}
+
+	public Double getMajorTickPixelSpacing() {
+		return majorTickSpacingInPixels;
+	}
+
+	public void setMajorTickPixelSpacing(Double majorTickPixelSpacing) {
+		this.majorTickSpacingInPixels = majorTickPixelSpacing;
+	}
+
+	public Double getMinorTickPixelSpacing() {
+		return minorTickSpacingInPixels;
+	}
+
+	public void setMinorTickPixelSpacing(Double minorTickPixelSpacing) {
+		this.minorTickSpacingInPixels = minorTickPixelSpacing;
+	}
+
+	public Real2 getAxisWorldCoordStart() {
+		if (axisWorldCoordStart == null) {
+			axisWorldCoordStart = complexLine.getBackbone().getXY(0);
+		}
+		return axisWorldCoordStart;
+	}
+
+	public Real2 getAxisWorldCoordEnd() {
+		if (axisWorldCoordEnd == null) {
+			axisWorldCoordEnd = complexLine.getBackbone().getXY(1);
+		}
+		return axisWorldCoordEnd;
+	}
+
+	public String getAxisLabel() {
+		return axisLabel;
+	}
+
+	public void setAxisLabel(String axisLabel) {
+		this.axisLabel = axisLabel;
+	}
+
+	public String getAxisUnits() {
+		return axisUnits;
+	}
+
+	public void setAxisUnits(String axisUnits) {
+		this.axisUnits = axisUnits;
+	}
+
+	public ComplexLine getComplexLine() {
+		return complexLine;
+	}
+
+	public List<Joint> getMinorTickJointList() {
+		return minorTickJointList;
+	}
+
+	public void setComplexLine(ComplexLine complexLine) {
+		complexLine.getBackbone().normalizeDirection(eps);
+		this.complexLine = complexLine;
+	}
+	
+	List<Joint> trimJointList(List<Joint> jointList, double minTickLength, double maxTickLength) {
+		minorTickJointList = new ArrayList<Joint>();
+		for (Joint joint : jointList) {
+			double jointLength = joint.getLength();
+			if (jointLength <= maxTickLength && jointLength >= minTickLength) {
+				minorTickJointList.add(joint);
+			}
+		}
+		return minorTickJointList;
+	}
+
+
+	public String debug(String msg) {
+		String s = msg+"\n";
+		s += " TrimmedJoints: "+minorTickJointList.size();
+		s += " Spacing: "+minorTickSpacingInPixels;
+		s += " Orient: "+complexLine.getBackboneOrientation()+"\n";
+		s += " start: "+complexLine.getBackbone().getXY(0)+" end "+complexLine.getBackbone().getXY(1)+"\n";
+		return s;
+	}
+	
+	public LineOrientation getOrientation() {
+		return complexLine.getBackboneOrientation();
+	}
+
+	/** only works for correctly oriented text
+	 * may have to rotate for other text
+	 * 
+	 * @param container
+	 * @param boxThickness
+	 * @param boxLengthExtension
+	 */
+	public void processScaleValuesAndTitles(SVGElement container, double boxThickness, double boxLengthExtension) {
+		CMLUtil.outputQuietly(container, new File("target/axis0.svg"), 1);
+		this.boxThickness = boxThickness;
+		this.boxLengthExtension = boxLengthExtension;
+		texts = SVGUtil.getQuerySVGElements(container, ".//svg:text");
+		LOG.debug("TEXTS "+texts.size());
+		Real2Range textBox = getTextBox(complexLine.getBackbone());
+		BoxEdge edge = (LineOrientation.HORIZONTAL.equals(getOrientation())) ? BoxEdge.XMIN : BoxEdge.YMIN;
+		List<SVGElement> sortedTexts = BoundingBoxManager.getElementsSortedByEdge(texts, edge);
+		List<SVGText> boundedTexts = getTextsInBox(textBox, sortedTexts); 
+		LOG.debug("BB "+boundedTexts.size());
+		List<SVGText> horizontalTexts = getTexts(boundedTexts, LineOrientation.HORIZONTAL);
+		for (SVGText horizontalText : horizontalTexts) {
+			LOG.trace("HOR "+horizontalText.getValue());
+		}
+		CMLUtil.outputQuietly(container, new File("target/axis1HorizontalText.svg"), 1);
+		processScaleValuesAndScaleTitle(horizontalTexts);
+		if (majorTickMarkValues.getSize() == majorTickJointList.size()) {
+			mapScaleNumbersToTicks();
+			TransfromArrayFromPixelsToScale();
+		} else {
+			LOG.debug("ARRAY: "+majorTickMarkValues.getSize()+ " != "+majorTickJointList.size());
+		}
+		TextAnalyzer textAnalyzer = this.getTextAnalyzer();
+		List<WordSequence> wordSequenceList = textAnalyzer.getWordSequenceList();
+		if (wordSequenceList == null) {
+			LOG.debug("NULL WordSequenceList");
+		} else {
+			for (WordSequence wordSequence : wordSequenceList) {
+				LOG.debug("WS "+wordSequence);
+			}
+		}
+		CMLUtil.outputQuietly(container, new File("target/axis2WordSequence.svg"), 1);
+		
+		List<SVGText> rotatedTexts = getTexts(boundedTexts, LineOrientation.VERTICAL);
+		for (SVGText rotatedText : rotatedTexts) {
+			LOG.trace("ROT "+rotatedText.getValue()+" .. "+
+		       rotatedText.getTransform().getAngleOfRotation().getDegrees());
+		}
+	}
+
+	private void TransfromArrayFromPixelsToScale() {
+	}
+
+	private void mapScaleNumbersToTicks() {
+		int size = majorTickMarkValues.getSize();
+		LOG.debug("FOUND AXIS: "+size);
+		Double arraySpacingInPixels = null;
+		if (CMLConstants.XSD_INTEGER.equals(majorTickMarkValues.getDataType())) {
+			arraySpacingInPixels = ((double) majorTickMarkValues.getInts()[size-1] - (double) majorTickMarkValues.getInts()[0])  / (double )(size - 1);
+		} else if (CMLConstants.XSD_DOUBLE.equals(majorTickMarkValues.getDataType())) {
+			arraySpacingInPixels = ((double) majorTickMarkValues.getDoubles()[size-1] - (double) majorTickMarkValues.getDoubles()[0])  / (double )(size - 1);
+		}
+		LOG.debug("SCALE/TICK "+arraySpacingInPixels);
+	}
+
+	private void processScaleValuesAndScaleTitle(List<SVGText> texts) {
+		ensureTextAnalyzer();
+		textAnalyzer.setCreateTSpans(false);
+		textAnalyzer.setCreateHTML(false);
+		List<SVGElement> elements = new ArrayList<SVGElement>();
+		SVGElement parent = (SVGElement)texts.get(0).getParent();
+//		CMLUtil.outputQuietly(parent, new File("target/axis3MakeWords.svg"), 1);
+		elements.add(parent);
+		textAnalyzer.analyzeSingleWordsOrLines(elements);
+		List<ConstantYCharacterList> sortedLineList = textAnalyzer.getYSortedLineList();
+		LOG.debug("SORTED LINES: "+sortedLineList.size());
+		for (ConstantYCharacterList sortedLine : sortedLineList) {
+			LOG.debug("LINE: "+sortedLine);
+			WordSequence wordSequence = sortedLine.getWordSequence();
+			String s = wordSequence.getStringValue();
+			LOG.debug("SLWS "+s+" "+wordSequence.getId());
+			majorTickMarkValues = wordSequence.createCMLArray();
+			HasDataType hasDataType = null;
+			if (s.trim().length() > 0 && majorTickMarkValues != null) {
+				if (majorTickMarkValues.getSize() == 0) {
+					// skip
+				} else if (majorTickMarkValues.getSize() == 1) {
+					scalar = majorTickMarkValues.getElementAt(0);
+					wordSequence.addAsSibling(scalar);
+					hasDataType = scalar;
+				} else {
+					wordSequence.addAsSibling(majorTickMarkValues);
+					hasDataType = majorTickMarkValues;
+				}
+			}
+			if (hasDataType != null) {
+				SVGElement svgParent = (SVGElement) ((Element) hasDataType).getParent();
+				if (svgParent != null) {
+//					svgParent.debug("DATATYPE");
+				}
+			}
+		}
+	}
+
+	public TextAnalyzer getTextAnalyzer() {
+		return textAnalyzer;
+	}
+
+	private void ensureTextAnalyzer() {
+		if (textAnalyzer == null) {
+			textAnalyzer = new TextAnalyzer(axisAnalyzer.getPageAnalyzer());
+		}
+	}
+
+	private List<SVGText> getTexts(List<SVGText> textList, LineOrientation orientation) {
+		List<SVGText> subTextList = new ArrayList<SVGText>();
+		for (SVGText text : textList) {
+			Transform2 transform = text.getTransform();
+			boolean isRotated = false;
+			if (transform != null) {
+				double degrees = transform.getAngleOfRotation().getDegrees();
+				isRotated = Math.abs(degrees) > eps;
+			}
+			if (isRotated == LineOrientation.VERTICAL.equals(orientation)) {
+				subTextList.add(text);
+			}
+		}
+		return subTextList;
+	}
+
+	private List<SVGText> getTextsInBox(Real2Range textBox, List<SVGElement> sortedTexts) {
+		// crude at present
+		List<SVGText> textList = new ArrayList<SVGText>();
+		for (int i = 0; i < sortedTexts.size(); i++) {
+			if (sortedTexts.get(i).getBoundingBox().intersectionWith(textBox) != null) {
+				textList.add((SVGText)sortedTexts.get(i));
+			}
+		}
+		return textList;
+	}
+
+	private Real2Range getTextBox(SVGLine backbone) {
+		Real2Range textBox = null;
+		if (LineOrientation.HORIZONTAL.equals(getOrientation())) {
+			double x0 = backbone.getXY(0).getX();
+			double x1 = backbone.getXY(1).getX();
+			double y = backbone.getXY(0).getY();
+			textBox = new Real2Range(new Real2(x0 - boxLengthExtension, y), 
+					      new Real2(x1 + boxLengthExtension, y + boxThickness));
+		} else if (LineOrientation.VERTICAL.equals(getOrientation())) { // only LHS at present
+			double y0 = backbone.getXY(0).getY();
+			double y1 = backbone.getXY(1).getY();
+			double x = backbone.getXY(0).getX();
+			textBox = new Real2Range(new Real2(x - boxLengthExtension, y0), new Real2(x, y1));
+		}
+		return textBox;
+	}
+
+	public String getId() {
+		return this.id;
+	}
+
+	public void setId(String string) {
+		this.id = string;
+	}
+
+	public List<Joint> getMajorTicks(double tickEpsRatio) {
+		RealArray realArray = new RealArray();
+		for (Joint joint : minorTickJointList) {
+			realArray.addElement(joint.getLength());
+		}
+		minTickLengthPixels = realArray.getMin();
+		maxTickLengthPixels = realArray.getMax();
+		double meanTickLength = (minTickLengthPixels + maxTickLengthPixels) / 2.0; 
+		// if not significant difference assume all ticks same size
+		if (maxTickLengthPixels / minTickLengthPixels < tickEpsRatio) {
+			return minorTickJointList;
+		}
+		majorTickJointList = new ArrayList<Joint>();
+		for (Joint joint : minorTickJointList) {
+			if (joint.getLength() > meanTickLength) {
+				majorTickJointList.add(joint);
+			}
+		}
+		return majorTickJointList;
+	}
+
+	void analyzeMajorMinorTicks(ComplexLine complexLine) {
+		addAxisAttribute(complexLine.getBackbone(), getId());
+		for (Joint joint : getMinorTickJointList()) {
+			addAxisAttribute(joint.getLine(), getId());
+		}
+		minorTickSpacingInPixels = ComplexLine.calculateInterJointSpacing(minorTickJointList, axisAnalyzer.jointEps);
+		majorTickJointList = getMajorTicks(AxisAnalyzer._MAJOR_MINOR_TICK_RATIO);
+		majorTickSpacingInPixels = ComplexLine.calculateInterJointSpacing(majorTickJointList, axisAnalyzer.jointEps);
+		double ratio = majorTickSpacingInPixels/minorTickSpacingInPixels;
+		majorTickSpacingPixelsToMinorTick = (int) Math.rint(ratio);
+		if (Math.abs(ratio - majorTickSpacingPixelsToMinorTick) > 0.1) {
+			throw new RuntimeException("Cannot get integer tick mark ratio: "+ratio + "/" +majorTickSpacingPixelsToMinorTick);
+		}
+		LOG.trace("MAJOR/MINOR "+(majorTickSpacingPixelsToMinorTick)+" majorTicks: "+majorTickJointList.size()+" ");
+		LOG.debug(debug("NEW COMB"));
+	}
+	
+	void addAxisAttribute(SVGElement element, String id) {
+		element.addAttribute(new Attribute(AXIS, id));
+	}
+
+	public String toString() {
+		String s = "\n";
+		s += tickDetail("major", majorTickSpacingInPixels, majorTickJointList)+"\n";
+		if (majorTickMarkValues != null) {
+			int nValues = majorTickMarkValues.getSize();
+			s += " "+nValues+" major values "+majorTickMarkValues.getElementAt(0).getValue()+" ... "+(nValues-1)+" gaps ... "+
+			" "+majorTickMarkValues.getElementAt(nValues-1).getValue()+"\n";
+		}
+		s += tickDetail("minor", minorTickSpacingInPixels, minorTickJointList)+"\n";
+		return s;
+	}
+
+	private String tickDetail(String title, double spacing, List<Joint> jointList) {
+		int nTicks = jointList.size();
+		return " "+nTicks+" "+title+" ticks (pixels): "+jointList.get(0).getPoint().format(3)+" ... "+(nTicks-1)+" gaps "+Real.normalize(spacing, 3)+"(pixels) ... "+jointList.get(nTicks-1).getPoint().format(3);
+	}
+}

File src/main/java/org/xmlcml/graphics/control/page/AxisAnalyzer.java

+package org.xmlcml.graphics.control.page;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import nu.xom.Attribute;
+
+import org.apache.log4j.Logger;
+import org.xmlcml.graphics.paths.ComplexLine;
+import org.xmlcml.graphics.paths.ComplexLine.CombType;
+import org.xmlcml.graphics.paths.ComplexLine.LineOrientation;
+import org.xmlcml.graphics.pdf2svg.PageAnalyzer;
+import org.xmlcml.graphics.svg.SVGElement;
+import org.xmlcml.graphics.svg.SVGLine;
+
+public class AxisAnalyzer {
+
+	static final double _MAJOR_MINOR_TICK_RATIO = 1.1;
+
+	static final Logger LOG = Logger.getLogger(AxisAnalyzer.class);
+
+	
+	private List<ComplexLine> horizontalComplexLines;
+	private List<SVGLine> horizontalLines;
+	private List<ComplexLine> verticalComplexLines;
+	private List<SVGLine> verticalLines;
+
+	private double maxTickLength = 50.d;
+	private double minTickLength = 1.0d;
+	double jointEps = 0.5;
+	private int minJointCount = 2;
+	private int maxJointCount = 999;
+	private double boxThickness = 100.;
+	private double boxLengthExtension = 50.;
+
+	private List<Axis> horizontalAxisList;
+	private List<Axis> verticalAxisList;
+	private SVGElement container;
+	private PageAnalyzer pageAnalyzer;
+
+	public AxisAnalyzer(SVGElement container, PageAnalyzer pageAnalyzer) {
+		this.container = container;
+		this.setPageAnalyzer(pageAnalyzer);
+	}
+	
+	public List<Axis> createAxisList(List<SVGLine> svgLines, double eps) {
+		if (verticalAxisList == null) {
+			verticalAxisList = new ArrayList<Axis>();
+			horizontalAxisList = new ArrayList<Axis>();
+			this.verticalLines = ComplexLine.createSubset(svgLines, LineOrientation.VERTICAL, eps);
+			this.horizontalLines = ComplexLine.createSubset(svgLines, LineOrientation.HORIZONTAL, eps);
+			this.verticalComplexLines = ComplexLine.createComplexLines(this.verticalLines, this.horizontalLines, eps);
+			this.horizontalComplexLines = ComplexLine.createComplexLines(this.horizontalLines, this.verticalLines, eps);
+			LOG.warn("SKIPPED VERTICAL");
+//			createAxisList(verticalComplexLines, verticalAxisList);
+			createAxisList(horizontalComplexLines, horizontalAxisList);
+			
+		}
+		return verticalAxisList;
+	}
+
+	private void createAxisList(List<ComplexLine> complexLines, List<Axis> axisList) {
+		if (complexLines != null) {
+			for (ComplexLine complexLine : complexLines) {
+				Axis axis = createAxis(complexLine);
+				if (axis != null) {
+					axisList.add(axis);
+					axis.processScaleValuesAndTitles(container, boxThickness, boxLengthExtension);
+					LOG.debug("AXIS "+axis);
+				}
+			}
+		}
+	}
+
+	private Axis createAxis(ComplexLine complexLine) {
+		Axis axis = new Axis(this);
+		axis.setId("a_"+complexLine.getBackbone().getId());
+		axis.setComplexLine(complexLine);
+		complexLine.setMinMaxJointLength(minTickLength, maxTickLength);
+		complexLine.setMinJointCount(2);
+		complexLine.setRequirePerpendicularJoints(true);
+		CombType combType = complexLine.getCombType();
+		if (combType != null) {
+			axis.trimJointList(complexLine.getJointList(), minTickLength, maxTickLength);
+			axis.setCombType(ComplexLine.getCombType(axis.getMinorTickJointList(), minJointCount, maxJointCount));
+		}
+		if (axis.getCombType() != null) {
+			axis.analyzeMajorMinorTicks(complexLine);
+			LOG.debug("AXIS "+axis.toString());
+		} else {
+			axis = null;
+		}
+		return axis;
+	}
+
+	public double getMaxTickLength() {
+		return maxTickLength;
+	}
+
+	public void setMaxTickLength(double maxTickLength) {
+		this.maxTickLength = maxTickLength;
+	}
+
+	public PageAnalyzer getPageAnalyzer() {
+		return pageAnalyzer;
+	}
+
+	public void setPageAnalyzer(PageAnalyzer pageAnalyzer) {
+		this.pageAnalyzer = pageAnalyzer;
+	}
+
+}

File src/main/java/org/xmlcml/graphics/control/page/AxisAnalyzerAction.java

+package org.xmlcml.graphics.control.page;
+
+import java.util.List;
+
+import org.apache.log4j.Logger;
+import org.xmlcml.graphics.control.AbstractActionElement;
+import org.xmlcml.graphics.paths.ComplexLine;
+import org.xmlcml.graphics.paths.PathAnalyzer;
+import org.xmlcml.graphics.svg.SVGElement;
+import org.xmlcml.graphics.svg.SVGLine;
+import org.xmlcml.graphics.svg.SVGUtil;
+
+public class AxisAnalyzerAction extends PageAction {
+
+	private final static Logger LOG = Logger.getLogger(AxisAnalyzerAction.class);
+	
+	public AxisAnalyzerAction(AbstractActionElement pageActionCommand) {
+		super(pageActionCommand);
+	}
+	
+	@Override
+	public void run() {
+		findAxes(getSVGPage());
+		debugFile("target/axisAnalyzer1Axes.svg");
+	}
+
+	private void findAxes(SVGElement container) {
+		AxisAnalyzer axisAnalyzer = new AxisAnalyzer(container, pageAnalyzer);
+		List<SVGLine> svgLines = ComplexLine.extractLines(SVGUtil.getQuerySVGElements(container, ".//svg:line"));
+		axisAnalyzer.createAxisList(svgLines, PathNormalizerAction.EPS);
+	}
+
+
+}

File src/main/java/org/xmlcml/graphics/control/page/AxisAnalyzerElement.java

+package org.xmlcml.graphics.control.page;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import nu.xom.Node;
+import nu.xom.Nodes;
+
+import org.xmlcml.graphics.control.AbstractActionElement;
+import org.xmlcml.graphics.control.CommandElement;
+import org.xmlcml.graphics.pdf2svg.DocumentAnalyzer;
+import org.xmlcml.graphics.svg.SVGSVG;
+
+
+public class AxisAnalyzerElement extends AbstractActionElement {
+
+	public final static String TAG ="axisAnalyzer";
+	private static final List<String> ATTNAMES = new ArrayList<String>();
+	
+	/** attribute names
+	 * 
+	 */
+
+	static {
+		ATTNAMES.add(PageActionElement.XPATH);
+	}
+
+	/** constructor
+	 */
+	public AxisAnalyzerElement() {
+		super(TAG);
+		init();
+	}
+	
+	protected void init() {
+	}
+	
+	/** constructor
+	 */
+	public AxisAnalyzerElement(CommandElement element) {
+        super(element);
+	}
+	
+    /**
+     * copy node .
+     *
+     * @return Node
+     */
+    public Node copy() {
+        return new AxisAnalyzerElement(this);
+    }
+
+	/**
+	 * @return tag
+	 */
+	public String getTag() {
+		return TAG;
+	}
+
+	protected List<String> getAttributeNames() {
+		return ATTNAMES;
+	}
+
+	protected List<String> getRequiredAttributeNames() {
+		return Arrays.asList(new String[]{
+				AbstractActionElement.XPATH,
+		});
+	}
+
+
+}

File src/main/java/org/xmlcml/graphics/control/page/BoxDrawerElement.java

 	 */
 
 	static {
-		ATTNAMES.add(PageActionElement.ACTION);
+//		ATTNAMES.add(PageActionElement.ACTION);
 		ATTNAMES.add(PageActionElement.FILL);
 		ATTNAMES.add(PageActionElement.OPACITY);
 		ATTNAMES.add(PageActionElement.STROKE_WIDTH);

File src/main/java/org/xmlcml/graphics/control/page/BoxProcessorElement.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	
 	static {
-		ATTNAMES.add(PageActionElement.ACTION);
+//		ATTNAMES.add(PageActionElement.ACTION);
 		ATTNAMES.add(PageActionElement.BOX_COUNT);
 		ATTNAMES.add(PageActionElement.MARGIN_X);
 		ATTNAMES.add(PageActionElement.MARGIN_Y);

File src/main/java/org/xmlcml/graphics/control/page/ElementDeleterElement.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	
 	static {
-		ATTNAMES.add(PageActionElement.ACTION);
+//		ATTNAMES.add(PageActionElement.ACTION);
 		ATTNAMES.add(PageActionElement.PAGE_RANGE);
 		ATTNAMES.add(PageActionElement.TITLE);
 		ATTNAMES.add(PageActionElement.XPATH);

File src/main/java/org/xmlcml/graphics/control/page/ElementStylerElement.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	
 	static {
-		ATTNAMES.add(PageActionElement.ACTION);
+//		ATTNAMES.add(PageActionElement.ACTION);
 		ATTNAMES.add(PageActionElement.FILL);
 		ATTNAMES.add(PageActionElement.OPACITY);
 		ATTNAMES.add(PageActionElement.STROKE_WIDTH);

File src/main/java/org/xmlcml/graphics/control/page/PageAction.java

 package org.xmlcml.graphics.control.page;
 
+import java.io.File;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 
 import org.apache.log4j.Logger;
 import org.xmlcml.cml.base.CMLConstants;
+import org.xmlcml.cml.base.CMLUtil;
 import org.xmlcml.graphics.control.AbstractAction;
 import org.xmlcml.graphics.control.AbstractActionElement;
 import org.xmlcml.graphics.pdf2svg.AbstractAnalyzer;
 import org.xmlcml.graphics.pdf2svg.DocumentAnalyzer;
 import org.xmlcml.graphics.pdf2svg.PageAnalyzer;
+import org.xmlcml.graphics.svg.SVGElement;
 import org.xmlcml.graphics.svg.SVGSVG;
+import org.xmlcml.graphics.svg.SVGUtil;
 import org.xmlcml.graphics.util.GraphUtil;
 
 
 		info(string);
 	}
 
+	protected void debugFile(String filename) {
+		SVGSVG svg = new SVGSVG(getSVGPage());
+		List<SVGElement> defs = SVGUtil.getQuerySVGElements(svg, ".//svg:defs");
+		for (SVGElement def : defs) def.detach();
+		CMLUtil.outputQuietly(svg, new File(filename), 1);
+	}
+
 }

File src/main/java/org/xmlcml/graphics/control/page/PageActionElement.java

 	public static final String FILL = "fill";
 	public static final String MARGIN_X = "marginX";
 	public static final String MARGIN_Y = "marginY";
+	public static final String NORMALIZE_HIGH_CODE_POINTS = "normalizeHighCodePoints";
 	public static final String OPACITY = "opacity";
 	public static final String PAGE_RANGE = "pageRange";
 	public static final String REMOVE_IMAGE_DATA = "removeImageData";
 		ATTNAMES.add(MARGIN_Y);
 		ATTNAMES.add(MESSAGE);
 		ATTNAMES.add(NAME);
+		ATTNAMES.add(NORMALIZE_HIGH_CODE_POINTS);
 		ATTNAMES.add(OPACITY);
 		ATTNAMES.add(PAGE_RANGE);
 		ATTNAMES.add(REGEX);
 
 	public final static String TAG = "pageAction";
 
+
 	/** constructor 
 	 */
 	public PageActionElement() {
 
 	protected List<String> getRequiredAttributeNames() {
 		return Arrays.asList(new String[]{
-				ACTION,
+//				ACTION,
 		});
 	}
 

File src/main/java/org/xmlcml/graphics/control/page/PageActionFactory.java

 
 		PageAction pageAction = null;
 		if (false) {
+		} else if(command instanceof AxisAnalyzerElement) {
+			pageAction = new AxisAnalyzerAction(command);
 		} else if(command instanceof BoxDrawerElement) {
 			pageAction = new BoxDrawerAction(command);
 		} else if(command instanceof BoxProcessorElement) {

File src/main/java/org/xmlcml/graphics/control/page/PageNormalizerAction.java

 	
 	
 	private static final String STYLE = "style";
-//	private static final double X_OFFSET = 100.;
-//	private static final double Y_OFFSET = 100.;
 	private static final double X_OFFSET = 0.;
 	private static final double Y_OFFSET = 0.;
 	private static final String ANGLE = "angle";
 	}
 	
 	public void run() {
+		if (isTrue(PageActionElement.NORMALIZE_HIGH_CODE_POINTS)) {
+			normalizeHighCodePoints(getSVGPage());
+		}
 		if (isTrue(PageActionElement.REMOVE_IMAGE_DATA)) {
 			removeImageData(getSVGPage());
 		}
 		if (isTrue(PageActionElement.APPLY_AND_REMOVE_CUMULATIVE_TRANSFORMS)) {
 			SVGUtil.applyAndRemoveCumulativeTransformsFromDocument(getSVGPage());
 		}
-		// debug
-		debugFile("target/norm1.svg");
 				
 		if (isTrue(PageActionElement.CAN_ROTATE_LANDSCAPE)) {
 			guessAndApplyConvertToLandscape();
+			debugFile("target/pageNorm1Rotation.svg");
 		}
 		
 		Integer decimalPlaces = getDecimalPlaces();
 		if (decimalPlaces != null) {
 			getSVGPage().format(decimalPlaces);
+			debugFile("target/pageNorm2Decimal.svg");
 		}
-		debugFile("target/norm2.svg");
 	}
 
-	private void debugFile(String filename) {
-		SVGSVG svg = new SVGSVG(getSVGPage());
-		List<SVGElement> defs = SVGUtil.getQuerySVGElements(svg, ".//svg:defs");
-		for (SVGElement def : defs) def.detach();
-		CMLUtil.outputQuietly(svg, new File(filename), 1);
+	private void normalizeHighCodePoints(SVGSVG svgPage) {
+		List<SVGText> texts = SVGText.extractTexts(SVGUtil.getQuerySVGElements(svgPage, ".//svg:text"));
+		for (SVGText text : texts) {
+			String s = text.getValue();
+			int codePoint = s.codePointAt(0);
+			if (codePoint > 127) {
+				text.setText("&#"+codePoint+";");
+				LOG.debug("codePoint " + codePoint);
+			}
+		}
 	}
-	
+
 	private void guessAndApplyConvertToLandscape() {
 		List<SVGElement> textsNoTransform = SVGUtil.getQuerySVGElements(getSVGPage(), ".//svg:text[not(@transform)]");
 		List<SVGElement> textsTransform = SVGUtil.getQuerySVGElements(getSVGPage(), ".//svg:text[@transform]");

File src/main/java/org/xmlcml/graphics/control/page/PageNormalizerElement.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	
 	static {
-		ATTNAMES.add(PageActionElement.ACTION);
+//		ATTNAMES.add(PageActionElement.ACTION);
 		ATTNAMES.add(PageActionElement.APPLY_AND_REMOVE_CUMULATIVE_TRANSFORMS);
 		ATTNAMES.add(PageActionElement.CAN_ROTATE_LANDSCAPE);
 		ATTNAMES.add(PageActionElement.CLEAN_SVG_STYLES);
 		ATTNAMES.add(PageActionElement.DENORMALIZE_FONT_SIZES);
 		ATTNAMES.add(PageActionElement.FORMAT_DECIMAL_PLACES);
+		ATTNAMES.add(PageActionElement.NORMALIZE_HIGH_CODE_POINTS);
 		ATTNAMES.add(PageActionElement.REMOVE_IMAGE_DATA);
 		ATTNAMES.add(PageActionElement.REMOVE_UNWANTED_ATTRIBUTES);
 	}

File src/main/java/org/xmlcml/graphics/control/page/PathNormalizerAction.java

 import java.util.List;
 
 import org.apache.log4j.Logger;
+import org.xmlcml.cml.base.CMLUtil;
 import org.xmlcml.graphics.control.AbstractActionElement;
+import org.xmlcml.graphics.paths.ComplexLine;
 import org.xmlcml.graphics.paths.PathAnalyzer;
 import org.xmlcml.graphics.svg.SVGElement;
-import org.xmlcml.graphics.svg.SVGRect;
+import org.xmlcml.graphics.svg.SVGLine;
 import org.xmlcml.graphics.svg.SVGUtil;
 
 /**
 
 	private final static Logger LOG = Logger.getLogger(PathNormalizerAction.class);
 	
+	public final static double EPS = 0.001;
+	
 	public PathNormalizerAction(AbstractActionElement pageActionCommand) {
 		super(pageActionCommand);
 	}
 	
 	@Override
 	public void run() {
+		debugFile("target/pathNorm0.svg");
 		PathAnalyzer pathAnalyzer = pageAnalyzer.ensurePathAnalyzer();
 		if (isTrue(PathNormalizerElement.REMOVE_DUPLICATE_PATHS)) {
-				pathAnalyzer.removeDuplicatePaths();
+			pathAnalyzer.removeDuplicatePaths();
+			debugFile("target/pathNorm1Duplicate.svg");
 		}
 		if (isTrue(PathNormalizerElement.CREATE_HIGHER_PRIMITIVES)) {
 			pathAnalyzer.removeRedundantMoveCommands();
 			pathAnalyzer.splitAtMoveCommands();
 			pathAnalyzer.interpretPathsAsRectCirclePolylineAndReplace();
 		}
+		// process min lines anyway
 		Integer minLinesInPolyline = getMinLinesInPolyline();
 		pathAnalyzer.splitPolylinesToLines(minLinesInPolyline);
 		
+		debugFile("target/pathNorm2Polyline.svg");
+		if (isTrue(PathNormalizerElement.JOIN_POLYLINES)) {
+			pathAnalyzer.mergePolylinesAtContiguousEndPoints(EPS);
+			debugFile("target/pathNorm3Merge.svg");
+		}
 		if (isTrue(PathNormalizerElement.REMOVE_EMPTY_SVGG)) {
 			getSVGPage().removeEmptySVGG();
+			debugFile("target/pathNorm4EmptySVG.svg");
 		}
 	}
 

File src/main/java/org/xmlcml/graphics/control/page/PathNormalizerElement.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	
 	static final String CREATE_HIGHER_PRIMITIVES = "createHigherPrimitives";
-	static final String REMOVE_DUPLICATE_PATHS = "removeDuplicatePaths";
-	static final String REMOVE_EMPTY_SVGG = "removeEmptySVGG";
-	static final String MIN_LINES_IN_POLY = "minLinesInPolyline";
+    static final String FIND_AXES                = "findAxes";
+	static final String JOIN_POLYLINES           = "joinPolylines";
+	static final String MIN_LINES_IN_POLY        = "minLinesInPolyline";
+	static final String REMOVE_DUPLICATE_PATHS   = "removeDuplicatePaths";
+	static final String REMOVE_EMPTY_SVGG        = "removeEmptySVGG";
 
 	static {
-		ATTNAMES.add(PageActionElement.ACTION);
 		ATTNAMES.add(REMOVE_DUPLICATE_PATHS);
 		ATTNAMES.add(CREATE_HIGHER_PRIMITIVES);
+		ATTNAMES.add(FIND_AXES);
+		ATTNAMES.add(JOIN_POLYLINES);
 		ATTNAMES.add(MIN_LINES_IN_POLY);
 		ATTNAMES.add(REMOVE_EMPTY_SVGG);
 	}

File src/main/java/org/xmlcml/graphics/control/page/TextChunkerAction.java

 import org.apache.log4j.Logger;
 import org.xmlcml.graphics.control.AbstractActionElement;
 import org.xmlcml.graphics.svg.SVGElement;
+import org.xmlcml.graphics.svg.SVGText;
 import org.xmlcml.graphics.svg.SVGUtil;
 import org.xmlcml.graphics.text.TextAnalyzer;
 

File src/main/java/org/xmlcml/graphics/figure/FigureAnalyzerAction.java

 import org.xmlcml.graphics.control.page.PageAction;
 import org.xmlcml.graphics.pdf2svg.PConstants;
 import org.xmlcml.graphics.svg.SVGElement;
+import org.xmlcml.graphics.svg.SVGText;
 import org.xmlcml.graphics.svg.SVGUtil;
 import org.xmlcml.graphics.text.TextAnalyzer;
 import org.xmlcml.graphics.util.GraphUtil;

File src/main/java/org/xmlcml/graphics/font/SimpleFont.java

 	}
 
 	public SimpleCharacter getSimpleCharacter(String ch) {
+		if (characterMap == null) {
+			throw new RuntimeException("null characterMap");
+		}
 		return characterMap.get(ch);
 	}
 

File src/main/java/org/xmlcml/graphics/paths/ComplexLine.java

 public class ComplexLine {
 
 	private final static Logger LOG = Logger.getLogger(ComplexLine.class);
+	private static final double JOINT_EPS = 0.3;
 	
 	public enum LineOrientation {
 		HORIZONTAL,
 	private LineOrientation backboneOrientation;
 	private Direction backboneDirection;
 	private double eps;
+	private Double spacing;
+	private double maxJointLength = 999.;
+	private double minJointLength = 0.0;
+	private CombType combType;
+	private int minJointCount = 0;
+	private int maxJointCount = 999;
+	private boolean requirePerpendicularJoints = true;
 
 	
 	private ComplexLine(SVGLine backbone, LineOrientation backboneOrientation, double eps) {
 		return complexLines; 
 	}
 	
-	Joint createPerpendicularJoint(SVGLine line, double eps) {
+	public int getMinJointCount() {
+		return minJointCount;
+	}
+
+	public void setMinJointCount(int minJointCount) {
+		this.minJointCount = minJointCount;
+	}
+
+	public int getMaxJointCount() {
+		return maxJointCount;
+	}
+
+	public void setMaxJointCount(int maxJointCount) {
+		this.maxJointCount = maxJointCount;
+	}
+
+	public void setMaxJointLength(double maxJointLength) {
+		this.maxJointLength = maxJointLength;
+	}
+
+	public void setMinJointLength(double minJointLength) {
+		this.minJointLength = minJointLength;
+	}
+
+	public double getMaxJointLength() {
+		return maxJointLength;
+	}
+
+	public double getMinJointLength() {
+		return minJointLength;
+	}
+
+	public void setMinMaxJointLength(double minJointLength, double maxJointLength) {
+		if (minJointLength > maxJointLength) {
+			throw new IllegalArgumentException("Bad tick constraints: "+minJointLength+", "+maxJointLength);
+		}
+		setMinJointLength(minJointLength);
+		setMaxJointLength(maxJointLength);
+	}
+
+	public void setMinJoints(int minJoints) {
+		this.minJointCount  = minJoints;
+	}
+
+	public boolean isRequirePerpendicularJoints() {
+		return requirePerpendicularJoints;
+	}
+
+	public void setRequirePerpendicularJoints(boolean requirePerpendicularJoints) {
+		this.requirePerpendicularJoints = requirePerpendicularJoints;
+	}
+
+	public Joint createPerpendicularJoint(SVGLine line, double eps) {
 		Joint joint = null;
 		Real2 point = null;
-		RealRange backboneXRange = backbone.getReal2Range().getXRange();
-		RealRange backboneYRange = backbone.getReal2Range().getYRange();
+		RealRange backboneXRange = this.backbone.getReal2Range().getXRange();
+		RealRange backboneYRange = this.backbone.getReal2Range().getYRange();
 		RealRange lineXRange = line.getReal2Range().getXRange();
 		RealRange lineYRange = line.getReal2Range().getYRange();
 		SideOrientation sideOrientation = null;
-		LineOrientation lineOrientation = ComplexLine.getLineOrientation(line, eps);
-		if (horizontalAndVerticalLinesMeet(backbone, line, eps)) {
-			if (lineOrientation == null) {
-				// not orthogonal
-			} else if (this.backboneOrientation.equals(LineOrientation.HORIZONTAL) &&
-				lineOrientation.equals(LineOrientation.VERTICAL)) {
-				point = new Real2(lineXRange.getMin(), backboneYRange.getMin());
-				sideOrientation = getSideOrientation( point.getY(), lineYRange, eps);
-			} else if (this.backboneOrientation.equals(LineOrientation.VERTICAL) && 
-				lineOrientation.equals(LineOrientation.HORIZONTAL)) {
-				point = new Real2(backboneXRange.getMin(), lineYRange.getMin());
-				sideOrientation = getSideOrientation( point.getX(), lineXRange, eps);
+		Double lineLength = line.getLength();
+		if (horizontalAndVerticalLinesMeet(this.backbone, line, eps)) {
+			if (!isRequirePerpendicularJoints() || backbone.isPerpendicularTo(line, eps)) {
+				LineOrientation currentJointOrientation = ComplexLine.getLineOrientation(line, eps);
+				this.createPointAndSideOrientation(currentJointOrientation);
+				if (currentJointOrientation == null) {
+					// not orthogonal
+				} else if (this.backboneOrientation.equals(LineOrientation.HORIZONTAL) &&
+					currentJointOrientation.equals(LineOrientation.VERTICAL)) {
+					point = new Real2(lineXRange.getMin(), backboneYRange.getMin());
+					sideOrientation = this.getSideOrientation( point.getY(), lineYRange, eps);
+				} else if (this.backboneOrientation.equals(LineOrientation.VERTICAL) && 
+					currentJointOrientation.equals(LineOrientation.HORIZONTAL)) {
+					point = new Real2(backboneXRange.getMin(), lineYRange.getMin());
+					sideOrientation = this.getSideOrientation( point.getX(), lineXRange, eps);
+				}
 			}
 		}
-		if (point != null) {
-			joint = new Joint(point, backbone, line, sideOrientation, eps);
+		if (point != null && lineLength <= this.getMaxJointLength() && lineLength >= this.getMinJointLength()) {
+			joint = new Joint(point, this.backbone, line, sideOrientation, eps);
 		}
 		return joint;
 	}
 
+	private void createPointAndSideOrientation(LineOrientation lineOrientation) {
+		// TODO Auto-generated method stub
+		
+	}
+
 	public static Direction getLineDirection(SVGLine line, double eps) {
 		Direction direction = null;
 		Real2 coord0 = line.getXY(0);
 		return new RealRange(lineRange.getMin()-eps, lineRange.getMax()+eps);
 	}
 	
-	public boolean isComb() {
-		return false;
-	}
-
 	public LineOrientation getBackboneOrientation() {
 		if (backboneOrientation == null) {
 			backboneOrientation = getLineOrientation(backbone, eps);
 	}
 
 	public CombType getCombType() {
+		if (combType == null) {
+			combType = getCombType(jointList, minJointCount, maxJointCount);
+		}
+		return combType;
+	}
+
+	public static CombType getCombType(List<Joint> jointList, int minJointCount, int maxJointCount) {
 		CombType combType = null;
-		SideOrientation sideOrientation0 = null;
-		List<SideOrientation> sideOrientations = Joint.getSideOrientations(jointList);
+		if (jointList.size() >= minJointCount && jointList.size() <= maxJointCount) {
+			SideOrientation sideOrientation0 = null;
+			List<SideOrientation> sideOrientations = Joint.getSideOrientations(jointList);
+			sideOrientation0 = getSideOrientation(sideOrientation0, sideOrientations);
+			combType = getCombTypeFromSideOrientation(sideOrientation0, sideOrientations);
+		}
+		return combType;
+	}
+
+	private static SideOrientation getSideOrientation(SideOrientation sideOrientation0,
+			List<SideOrientation> sideOrientations) {
 		for  (SideOrientation sideOrientation : sideOrientations) {
 			if (sideOrientation0 == null) {
 				sideOrientation0 = sideOrientation;
 				break;
 			}
 		}
-		
+		return sideOrientation0;
+	}
+
+	private static CombType getCombTypeFromSideOrientation(SideOrientation sideOrientation0,
+			List<SideOrientation> sideOrientations) {
+		CombType combType = null;
 		if (sideOrientation0 == null) {
 			if (sideOrientations.size() > 0) {
 				combType = CombType.MIXED;
 		}
 		return subset;
 	}
+	
+	public static Double calculateInterJointSpacing(List<Joint> jointList, double jointEps) {
+		Double spacing = null;
+		if (jointList != null && jointList.size() > 1) {
+			Joint lastJoint = jointList.get(0);
+			for (int i = 1; i < jointList.size(); i++) {
+				Joint joint = jointList.get(i);
+				Double length = joint.getPoint().getDistance(lastJoint.getPoint());
+				if (length < jointEps) {
+					// coincident joint???
+				} else if (spacing == null) {
+					spacing = length;
+				} else {
+					if (Math.abs(spacing - length) > jointEps) {
+						spacing = null;
+						break;
+					}
+				}
+				lastJoint = joint;
+			}
+		}
+		return spacing;
+	}
+
+	public void debug(String string) {
+		LOG.debug("===");
+		LOG.debug("jointSpacing: "+calculateInterJointSpacing(jointList, JOINT_EPS));
+		for (Joint joint : jointList) {
+			LOG.debug("J "+joint);
+		}
+	}
 
 }

File src/main/java/org/xmlcml/graphics/paths/Joint.java

 		return result0 || result1;
 	}
 	
+	public Double getLength() {
+		Double length = null;
+		if (line != null) {
+			length = line.getEuclidLine().getLength();
+		}
+		return length;
+	}
+	
+	public String toString() {
+		String s = "";
+		if (line != null) { 
+			s += /*line.toXML()+*/" "+line.getEuclidLine().getLength()+"\n";
+		}
+		return s;
+	}
+
 }

File src/main/java/org/xmlcml/graphics/paths/PathAnalyzer.java

 import org.apache.log4j.Logger;
 import org.xmlcml.cml.base.CMLConstants;
 import org.xmlcml.cml.base.CMLUtil;
+import org.xmlcml.euclid.Axis.Axis2;
 import org.xmlcml.euclid.Point2;
 import org.xmlcml.euclid.Real2;
 import org.xmlcml.euclid.Real2Range;
+import org.xmlcml.euclid.RealArray.Monotonicity;
 import org.xmlcml.euclid.RealRange;
 import org.xmlcml.graphics.control.page.PageNormalizerAction;
 import org.xmlcml.graphics.pdf2svg.AbstractSVGAnalyzer;
+import org.xmlcml.graphics.pdf2svg.BoundingBoxManager;
+import org.xmlcml.graphics.pdf2svg.BoundingBoxManager.BoxEdge;
 import org.xmlcml.graphics.pdf2svg.Chunk;
 import org.xmlcml.graphics.pdf2svg.ChunkStyle;
 import org.xmlcml.graphics.pdf2svg.PDF2SVGUtil;
 
 	private static final Double DEFAULT_MARGIN_X = 5.0;
 	private static final Double DEFAULT_MARGIN_Y = 5.0;
+
+	private static final String MERGED = "merged";
 	
 	private SVGG annotatedPathListG;
 
+	private SVGPolygon polygon;
+
 	public PathAnalyzer() {
 	}
 
 		return strings;
 	}
 
+	/** sort polylines along X and Y coords and find common points to merge lines
+	 *  replace joined lines by common new line
+	 */
+	public void mergePolylinesAtContiguousEndPoints(double eps) {
+		mergePolylinesAtContigousEndPoints(Axis2.X, eps);
+		mergePolylinesAtContigousEndPoints(Axis2.Y, eps);
+	}
+
+	private void mergePolylinesAtContigousEndPoints(Axis2 axis, double eps) {
+		while (true) {
+			List<SVGElement> polylines0 = SVGUtil.getQuerySVGElements(getSVGPage(), ".//svg:polyline");
+			LOG.trace("POL "+polylines0.size());
+			List<SVGElement> polylines = SVGUtil.getQuerySVGElements(getSVGPage(), ".//svg:polyline[not(@"+MERGED+")]");
+			if (polylines.size() == 0) {
+				break;
+			}
+			mergePolylinesAtContiguousPoints(axis, eps, polylines);
+		}
+	}
+
+	private void mergePolylinesAtContiguousPoints(Axis2 axis, double eps, List<SVGElement> polylines) {
+		// will modify all polylines so they are monotonic increasing
+		List<SVGPolyline> polylinesXIncreasing = getNormalizedMonotonicity(polylines, Monotonicity.INCREASING, axis);
+		BoxEdge boxEdge = (Axis2.X.equals(axis)) ? BoxEdge.XMIN : BoxEdge.YMIN;
+		List<SVGElement> sortedPolylines = BoundingBoxManager.getElementsSortedByEdge(polylinesXIncreasing, boxEdge);
+		for (SVGElement pp : sortedPolylines) {
+			SVGPolyline p = (SVGPolyline) pp;
+			LOG.trace(""+p.getFirst()+" ==> "+p.getLast());
+		}
+		SVGPolyline newPolyline = null;
+		Real2 lastXY = null;
+		for (int i = 0; i < sortedPolylines.size(); i++) {
+			SVGPolyline polyline = (SVGPolyline) sortedPolylines.get(i);
+			if (newPolyline == null) {
+				newPolyline = new SVGPolyline(polyline);
+				polyline.getParent().replaceChild(polyline, newPolyline);
+				newPolyline.addAttribute(new Attribute(MERGED, "true"));
+			} else {
+				Real2 firstXY = polyline.getFirst();
+				double delta = (axis.equals(Axis2.X)) ? 
+						firstXY.getX() - lastXY.getX() : firstXY.getY() - lastXY.getY(); 
+				if (delta > eps) { // no remaining lines in range
+					break;
+				} else if (delta < -eps) {
+					// else skip overlapping lines
+				} else if (firstXY.getDistance(lastXY) < eps) {
+					newPolyline.appendIntoSingleLine(polyline, 1);
+					LOG.trace("SIZE: "+newPolyline.getPointList().size());
+					polyline.detach();
+				}
+			}
+			lastXY = newPolyline.getLast();
+		}
+		LOG.debug("new points "+newPolyline.getPointList().size());
+//		newPolyline.debug("NEW POLY");
+	}
+
+
+	private List<SVGPolyline> getNormalizedMonotonicity(List<SVGElement> polylines, Monotonicity monotonicity, Axis2 axis) {
+		List<SVGPolyline> polylineSubset = new ArrayList<SVGPolyline>();
+		for (SVGElement polylineE : polylines) {
+			SVGPolyline polyline = (SVGPolyline) polylineE;
+			Monotonicity monotonicity0  = polyline.getMonotonicity(axis);
+			if (monotonicity0 != null) {
+				if (!monotonicity.equals(monotonicity0)) {
+					polyline.reverse();
+				}
+				polylineSubset.add(polyline);
+			}
+		}
+		return polylineSubset;
+ 	} 
+
 	public void formatClipPaths() {
 		List<SVGElement> clipPaths = SVGUtil.getQuerySVGElements(svgPage, ".//svg:clipPath/svg:path");
 		for (SVGElement clipPath : clipPaths) {
 					replace(path, line);
 					newSVGElement = line;
 				} else {
-					SVGPolygon polygon = polyline.createPolygon(RECT_EPS);
+					polygon = polyline.createPolygon(RECT_EPS);
 					if (polygon != null) {
 						newSVGElement = polygon;
 						polygon.setId("polygon"+id);

File src/main/java/org/xmlcml/graphics/pdf2svg/AbstractSVGAnalyzer.java

 		ensureStyleManager();
 	}
 
-//	public AbstractSVGAnalyzer(PageAnalyzer pageAnalyzer, AbstractSVGAnnotator annotator) {
-//		this(pageAnalyzer);
-//		this.annotator = annotator;
-//	}
-//
 	public void setSVGPage(SVGSVG svgPage) {
 		this.svgPage = svgPage;
 	}
 		return pageAnalyzer;
 	}
 
-//	public AbstractSVGAnnotator getAnnotator() {
-//		return annotator;
-//	}
-//	
-//	public void addAnnotatedPrimitives(List<? extends SVGElement> primitiveList) {
-//		this.annotator.addAnnotatedPrimitives(primitiveList);
-//	}
-//	
-//	public void addAnnotation(SVGElement element) {
-//		annotator.addAnnotation(element);
-//	}
-//
 	protected StyleManager ensureStyleManager() {
 		if (styleManager == null) {
 			styleManager = pageAnalyzer.getStyleManager();

File src/main/java/org/xmlcml/graphics/pdf2svg/PDF2SVGReader.java

 				pageList.add(svg);
 				svg.addAttribute(new Attribute(FILE_SIZE, ""+fileSize));
 			} catch (Exception e) {
-				throw new RuntimeException("Cannot build/parse file: "+filename);
+				throw new RuntimeException("Cannot build/parse file: "+filename, e);
 			}
 		}
 		LOG.debug("read "+pageList.size()+" SVG files");

File src/main/java/org/xmlcml/graphics/pdf2svg/PDFSVGGraphics2D.java

 //		System.err.println("draw GlyphVector");
 	}
 
-//	@Override
-//	public void draw(Shape shape) {
-//		super.draw(shape);
-//		System.out.print("D_*");
-//	}
-//	
-//	@Override
-//	public void drawString(String string, float x, float y) {
-//		super.drawString(string, x, y);
-////		System.out.print("("+format(x)+" "+ format(y)+") "+ string);
-//		System.out.print("_"+ string);
-//	}
-//	
-//	@Override
-//	public void drawString(AttributedCharacterIterator aci, float x, float y) {
-//		super.drawString(aci, x, y);
-//		throw new RuntimeException("AttributedCharacterIterator NYI");
-//	}
-//	
-//	@Override
-//	public void fill(Shape shape) {
-//		super.fill(shape);
-////		System.out.print("F_"+createPathString(shape));
-//		System.out.print("F_*");
-//	}
-//	
-//	@Override
-//	public void setColor(Color color) {
-//		super.setColor(color);
-//		if (this.color == null || !color.toString().equals(this.color.toString())) {
-//			System.out.print("_COL_");
-//			if (!this.colorSet.contains(color)) {
-//				this.colorSet.add(color);
-//				System.out.print(""+color);
-//			}
-//		}
-//		this.color = color;
-//	}
 	
 	@Override
 	public void setFont(Font font) {
 		this.font = font;
 	}
 	
-//	@Override
-//	public void setStroke(Stroke stroke) {
-//		super.setStroke(stroke);
-//		if (!stroke.equals(this.stroke)) {
-//			System.out.println("CHANGED STROKE: "+stroke);
-//		}
-//		this.stroke = stroke;
-//	}
-//	
-//	@Override
-//	public void setTransform(AffineTransform at) {
-//		super.setTransform(at);
-//		if (this.affineTransform == null || !(at.toString().equals(affineTransform.toString()))) {
-//			System.out.println("NEW AFFINE "+at);
-//		}
-//		this.affineTransform = at;
-////		throw new RuntimeException("NYI");
-//	}
-//	
-//	@Override
-//	public void transform(AffineTransform at) {
-//		super.transform(at);
-//		if (this.lastAffine == null || !(at.toString().equals(lastAffine.toString()))) {
-////			System.out.println("NEW TRANSFORM "+at);
-//		}
-//		this.lastAffine = at;
-//	}
-//
-//	@Override
-//    public void stream(Writer svgwriter, boolean useCSS) throws SVGGraphics2DIOException {
-//		super.stream(svgwriter, useCSS);
-//		System.out.println("STREAM");
-//		Document document = this.getDOMFactory();
-//		Element root = document.getDocumentElement();
-//		System.out.println(root.getLocalName());
-//		for (int i = 0; i < root.getChildNodes().getLength(); i++) {
-//			System.out.println("CHILD "+root.getChildNodes().item(i).getLocalName());
+
+//	private static void addCoords(StringBuilder sb, double[] coords, int n) {
+//		for (int i = 0; i < n; i++) {
+//			sb.append(format(coords[i]));
+//			sb.append(" ");
 //		}
 //	}
-//
-//	// ===========================
-//	
-//	public static String createPathString(Shape shape) {
-//		return createPathString(shape, new AffineTransform());
-//		
-//	}
-//		
-//	public static String createPathString(Shape shape, AffineTransform at) {
-//		PathIterator pi = shape.getPathIterator(at);
-//		double coords[] = new double[6];
-//		StringBuilder sb = new StringBuilder();
-//		while (!pi.isDone()) {
-//			int type = pi.currentSegment(coords);
-//			pi.next();
-//			if (PathIterator.SEG_CLOSE == type) {
-//				sb.append("Z");
-//			} else if (PathIterator.SEG_CUBICTO == type) {
-//				sb.append("C");
-//				addCoords(sb, coords, 6);
-//			} else if (PathIterator.SEG_QUADTO == type) {
-//				sb.append("Q");
-//				addCoords(sb, coords, 4);
-//			} else if (PathIterator.SEG_LINETO == type) {
-//				sb.append("L");
-//				addCoords(sb, coords, 2);
-//			} else if (PathIterator.SEG_MOVETO == type) {
-//				sb.append("M");
-//				addCoords(sb, coords, 2);
-//			} else {
-//				throw new RuntimeException("unknown pathIterator code: "+type);
-//			}
-//		}
-//		return sb.toString();
-//	}
-	
-
-	private static void addCoords(StringBuilder sb, double[] coords, int n) {
-		for (int i = 0; i < n; i++) {
-			sb.append(format(coords[i]));
-			sb.append(" ");
-		}
-	}
 
 	private static String format(double coord) {
 		return ""+((double) ((int) Math.round(coord*PLACES_CONST)))/PLACES_CONST;

File src/main/java/org/xmlcml/graphics/text/ConstantYCharacterList.java

 	private static final double EPS = 0.05;
 	private static final double COORD_EPS = 0.0001;
 	private static final double FONT_EPS = 0.001;
+	
 	private List<SVGText> characterList;
 	private Double yCoord = null;
 	private List<Double> yCoordList = null;
 	private List<ConstantYCharacterList> subLines;
 	private WordSequence wordSequence;
 	private TextAnalyzer textAnalyzer;
-//	private SimpleFont simpleFont = SimpleFont.SIMPLE_FONT;
 	private SimpleFont simpleFont;
+	private Integer y;
 	
 	public ConstantYCharacterList(TextAnalyzer textAnalyzer, List<SVGText> characterList) {
 		this.characterList = characterList;
 			for (int i = 0; i < characterList.size(); i++) {
 				SVGText text = characterList.get(i);
 				Double fontSize = text.getFontSize();
+				LOG.trace("fontSize "+fontSize);
 				Double yCoord = text.getY();
 				String physicalStyle = getPhysicalStyle(text);
 				if (i == 0 || LineAttributesHaveChanged(lastFontSize, lastYCoord, lastPhysicalStyle, fontSize, yCoord,
 					chList.normalize();
 				}
 			}
+			if (lastFontSize != null) {
+				fontSize = lastFontSize;
+			}
 		}
 		return getSubLines();
 	}
 		characterList.add(svgText);
 	}
 
+	
+	public WordSequence getWordSequence() {
+		return wordSequence;
+	}
+
 	public int size() {
 		return characterList.size();
 	}
 		return characterList.iterator();
 	}
 
-	ConstantYCharacterList sortByX() {
+    void sortLineByX() {
 		// assumes no coincident text??
 		Map<Integer, SVGText> lineByXCoordMap = new HashMap<Integer, SVGText>();
 		for (SVGText text : this) {
 		Set<Integer> xCoords = lineByXCoordMap.keySet();
 		Integer[] xArray = xCoords.toArray(new Integer[xCoords.size()]);
 		Arrays.sort(xArray);
-		ConstantYCharacterList sortedText = new ConstantYCharacterList(this.textAnalyzer);
+		List<SVGText> newCharacterList = new ArrayList<SVGText>();
 		for (int x : xArray) {
-			sortedText.add(lineByXCoordMap.get(x));
+			newCharacterList.add(lineByXCoordMap.get(x));
 		}
+		this.characterList = newCharacterList;
 		getFontSize();
 		getYCoord();
 		getSinglePhysicalStyle();
 		getLineContent();
 		splitLineByCharacterAttributes();
-		return sortedText;
 	}
 	
 	/** 
 	 * @param simpleFont
 	 * @return
 	 */
-	public /* for test */String guessAndApplySpacingInLine(SimpleFont simpleFont) {
+	public /* for test */String guessAndApplySpacingInLine() {
 		if (lineContentIncludingSpaces == null) {
 			StringBuilder sb = new StringBuilder();
+			ensureSimpleFont();
+			LOG.trace("SF "+simpleFont);
 			RealArray realArray = getInterCharacterWidth();
 			for (int i = 0; i < characterList.size(); i++) {
 				SVGText text = characterList.get(i);
 				String ch = text.getText();
+				LOG.trace("CH "+ch+ " "+simpleFont);
 				SimpleCharacter simpleCharacter = simpleFont.getSimpleCharacter(ch);
 				if (simpleCharacter == null) {
 					simpleCharacter = simpleFont.getSimpleCharacter(DEFAULT_CHAR);
 	 * 
 	 * @return
 	 */
-	public /*for test*/ WordSequence createWords() {
+	public WordSequence createWords() {
 		if (wordSequence == null) {
 			ensureSimpleFont();
 			Real2 xy = (this.size() > 0) ? this.get(0).getXY() : null;
 			Double characterSeparation = null;
 			Double sumdeltax = 0.0;
 			for (SVGText text : this) {
+				fontSize = fontSize == null ? text.getFontSize() : fontSize;
+				if (fontSize == null) {
+					throw new RuntimeException("missing fontSize: "+text.getText());
+				}
 				String ch = text.getText();
+				LOG.trace("CH "+ch+" "+text.getXY());
 				if (origin == null) {
 					origin = text.getXY();
 					sumdeltax = 0.0;
 						LOG.trace("char not in font "+lastChar);
 						width = widths.get("e");
 					}
+					LOG.trace("FS "+fontSize);
 					estimatedLastWidth = width * fontSize * spaceFactor;
 					characterSeparation = x - lastX;
 					if (characterSeparation / estimatedLastWidth > 1.0) {
 				s += "   "+splitList+"\n";
 			}
 		} else {
+			
 			s = "chars: "+characterList.size() +
 				" Y: "+yCoord+
 				" fontSize: "+fontSize+
 				" physicalStyle: "+physicalStyle+
-				" >>"+lineContent;
+				" >>"+getLineContent();