Commits

Anonymous committed 04b4bd7 Draft

subscripts, axes, typed numbers

Comments (0)

Files changed (33)

src/main/java/org/xmlcml/graphics/control/CommandElement.java

 import org.xmlcml.graphics.control.page.ChunkAnalyzerElement;
 import org.xmlcml.graphics.control.page.BoxDrawerElement;
 import org.xmlcml.graphics.control.page.BoxProcessorElement;
-import org.xmlcml.graphics.control.page.ElementDeleterElement;
+import org.xmlcml.graphics.control.page.NodeDeleterElement;
 import org.xmlcml.graphics.control.page.ElementStylerElement;
 import org.xmlcml.graphics.control.page.PageActionElement;
 import org.xmlcml.graphics.control.page.PageAnalyzerElement;
 			newElement = new BoxDrawerElement();
 		} else if (tag.equals(BoxProcessorElement.TAG)) {
 			newElement = new BoxProcessorElement();
-		} else if (tag.equals(ElementDeleterElement.TAG)) {
-			newElement = new ElementDeleterElement();
+		} else if (tag.equals(NodeDeleterElement.TAG)) {
+			newElement = new NodeDeleterElement();
 		} else if (tag.equals(ElementStylerElement.TAG)) {
 			newElement = new ElementStylerElement();
 		} else if (tag.equals(FigureAnalyzerElement.TAG)) {

src/main/java/org/xmlcml/graphics/control/page/Axis.java

 	 */
 	public void processScaleValuesAndTitles(SVGElement container) {
 		texts = SVGUtil.getQuerySVGElements(container, ".//svg:text");
+		countTSpanChildren("ALL ", texts);
 		Real2Range textBox = getTextBox(complexLine.getBackbone());
 		BoxEdge edge = (LineOrientation.HORIZONTAL.equals(getOrientation())) ? BoxEdge.XMIN : BoxEdge.YMIN;
 		List<SVGElement> sortedTexts = BoundingBoxManager.getElementsSortedByEdge(texts, edge);
+		countTSpanChildren("SORTED ", texts);
 		List<SVGText> boundedTexts = getTextsInBox(textBox, sortedTexts); 
+		countTSpanChildren("BOUND ", texts);
 		ensureTickmarks();
 		if (LineOrientation.HORIZONTAL.equals(lineOrientation)) {
 			List<SVGText> horizontalTexts = getTexts(boundedTexts, LineOrientation.HORIZONTAL);
+			countTSpanChildren("HOR ", horizontalTexts);
+			for (SVGText horizontalText : horizontalTexts) {
+				horizontalText.debug("HOR TEXT");
+			}
 			analyzeHorizontalAxis(horizontalTexts);
 		} else if (LineOrientation.VERTICAL.equals(lineOrientation)) {
 			List<SVGText> verticalTexts = getTexts(boundedTexts, LineOrientation.HORIZONTAL);
 			}
 		}
 	}
+
+	private void countTSpanChildren(String msg, List<? extends SVGElement> texts) {
+		int tspanCount = 0;
+		for (SVGElement text : texts) {
+			tspanCount += ((SVGText)text).getChildTSpans().size();
+		}
+		LOG.debug(msg+" TSPANS****************"+tspanCount);
+	}
 	
 	public void createAxisGroup() {
 /*
 
 	private CMLArray createNumericValues(List<SVGText> numericTexts) {
 		CMLArray array = null;
-		String dataType = getCommonDataType(numericTexts);
-		if (dataType != null) {
-			List<String> values = new ArrayList<String>();
-			for (SVGText numericText : numericTexts) {
-				values.add(numericText.getValue());
-			}
+		if (numericTexts.size() == 1 ) {
+			SVGText text = numericTexts.get(0);
+			String dataType = text.getAttributeValue(TextAnalyzer.DATA_TYPE);
+			String numbers = text.getAttributeValue(TextAnalyzer.NUMBERS);
+			LOG.debug("NUMBERS: "+numbers);
 			if (CMLConstants.XSD_INTEGER.equals(dataType)) {
-				IntArray intArray = new IntArray(values.toArray(new String[0]));
+				IntArray intArray = new IntArray(numbers);
 				array = new CMLArray(intArray.getArray());
 			} else if (CMLConstants.XSD_DOUBLE.equals(dataType)) {
-				RealArray realArray = new RealArray(values.toArray(new String[0]));
+				RealArray realArray = new RealArray(numbers);
 				array = new CMLArray(realArray.getArray());
 			}
+		} else {
+			String dataType = getCommonDataType(numericTexts);
+			if (dataType != null) {
+				List<String> values = new ArrayList<String>();
+				for (SVGText numericText : numericTexts) {
+					values.add(TextAnalyzer.getNumericValue(numericText));
+				}
+				if (CMLConstants.XSD_INTEGER.equals(dataType)) {
+					IntArray intArray = new IntArray(values.toArray(new String[0]));
+					array = new CMLArray(intArray.getArray());
+				} else if (CMLConstants.XSD_DOUBLE.equals(dataType)) {
+					RealArray realArray = new RealArray(values.toArray(new String[0]));
+					array = new CMLArray(realArray.getArray());
+				}
+			}
 		}
 		return array;
 	}
 			numericTexts = new ArrayList<SVGText>();
 			nonNumericTexts = new ArrayList<SVGText>();
 			for (SVGText text : texts) {
-				if (text.query("@"+TextAnalyzer.NUMBER).size() > 0) {
+				if (text.query("@"+TextAnalyzer.NUMBER).size() > 0 ||
+					text.query("@"+TextAnalyzer.NUMBERS).size() > 0  ) {
 					numericTexts.add(text);
 				} else {
 					if (text.getValue().trim().length() != 0) {
 	}
 
 	private List<SVGText> getTexts(List<SVGText> textList, LineOrientation orientation) {
+		LOG.trace("ORIENT "+orientation+" texts "+textList.size());
 		List<SVGText> subTextList = new ArrayList<SVGText>();
 		for (SVGText text : textList) {
 			Transform2 transform = text.getTransform();
 			boolean isRotated = false;
+			Double degrees = null;
 			if (transform != null) {
-				double degrees = transform.getAngleOfRotation().getDegrees();
-				isRotated = Math.abs(degrees) > eps;
+				degrees = transform.getAngleOfRotation().getDegrees();
+			} else {
+				degrees = 0.0;
 			}
+			isRotated = Math.abs(degrees) > eps;
+			LOG.trace("IS ROT "+isRotated);
 			if (isRotated == LineOrientation.VERTICAL.equals(orientation)) {
+				LOG.trace("ADDED TEXT ");
 				subTextList.add(text);
+			} else {
+				text.debug("NOT ADDED");
 			}
 		}
 		return subTextList;
 
 	private List<SVGText> getTextsInBox(Real2Range textBox, List<SVGElement> sortedTexts) {
 		// crude at present
+		LOG.debug("TEXTBOX "+textBox);
 		List<SVGText> textList = new ArrayList<SVGText>();
 		for (int i = 0; i < sortedTexts.size(); i++) {
-			Real2Range bb = sortedTexts.get(i).getBoundingBox();
-			if (bb.intersectionWith(textBox) != null) {
-				if (textBox.includes(bb)) {
-					textList.add((SVGText)sortedTexts.get(i));
-				}
+			SVGText sortedText = (SVGText) sortedTexts.get(i);
+			Real2Range bb = sortedText.getBoundingBox();
+			LOG.debug("   BOX? "+bb);
+			if (textBox.includes(bb)) {
+				textList.add(sortedText);
+			} else {
+				sortedText.getBoundingBox();
+				sortedText.debug(bb+ " NOT INCLUDED in "+textBox);
 			}
 		}
 		return textList;
 
 	public RealArray createScaledArrayToRange(RealArray polylinePixelCoords) {
 		ensureTickmarks();
-		RealArray realArray =  polylinePixelCoords.createScaledArrayToRange(
-				lowestMajorTickCoordInPixels, highestMajorTickCoordInPixels, lowestTickMarkValue, highestTickMarkValue);
+		RealArray realArray = null;
+		if (lowestTickMarkValue != null && lowestMajorTickCoordInPixels != null) {
+			realArray =  polylinePixelCoords.createScaledArrayToRange(
+					lowestMajorTickCoordInPixels, highestMajorTickCoordInPixels, lowestTickMarkValue, highestTickMarkValue);
+		}
 		return realArray;
 	}
 	

src/main/java/org/xmlcml/graphics/control/page/AxisAnalyzer.java

 
 	static final Logger LOG = Logger.getLogger(AxisAnalyzer.class);
 
-	
+	public static final String AXES_BOX = "axesBox";
 	static final double _MAJOR_MINOR_TICK_RATIO = 1.1;
 
 	private List<ComplexLine> horizontalComplexLines;
 	private PageAnalyzer pageAnalyzer;
 	double eps;
 
-
-	private Real2Range plotBox;
-
+	private PlotBox plotBox;
 
 	public AxisAnalyzer(SVGElement container, PageAnalyzer pageAnalyzer) {
 		this.container = container;
 		}
 	}
 	
-	private Real2Range createPlotBox() {
+	private PlotBox createPlotBox() {
 		if (horizontalAxis != null && verticalAxis != null) {
-			plotBox = new Real2Range(horizontalAxis.getAxisRangeInPixels(), verticalAxis.getAxisRangeInPixels());
+			plotBox = new PlotBox(horizontalAxis, verticalAxis);
 			LOG.debug("PLOT BOX "+plotBox);
 			drawBox();
 		}
 	}
 	
 	private void drawBox() {
-		SVGRect bbox = new SVGRect(plotBox);
+		SVGRect bbox =plotBox.createRect();
+		bbox.setClassName(AXES_BOX);
 		bbox.setOpacity(0.3);
 		bbox.setStroke("cyan");
 		bbox.setStrokeWidth(5.0);
 	}
 
 
-	/** craete axis for given orientation
+	/** create axis for given orientation
 	 * 
 	 * @param complexLines
 	 * @param orientation
 				Axis axis = createAxis(complexLine, orientation);
 				if (axis != null) {
 					axisList.add(axis);
+//					container.debug("AXIS CONT");
 					axis.processScaleValuesAndTitles(container);
 					axis.createAxisGroup();
 					LOG.debug("************  AXIS "+axis);
 		this.boxThickness = boxThickness;
 	}
 
+	public PlotBox getPlotBox() {
+		return plotBox;
+	}
+
 }

src/main/java/org/xmlcml/graphics/control/page/CMLAnalyzer.java

+package org.xmlcml.graphics.control.page;
+
+import java.util.List;
+
+import nu.xom.Attribute;
+
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.xmlcml.cml.element.CMLFormula;
+import org.xmlcml.graphics.pdf2svg.AbstractSVGAnalyzer;
+import org.xmlcml.graphics.svg.SVGG;
+import org.xmlcml.graphics.svg.SVGSVG;
+import org.xmlcml.graphics.svg.SVGText;
+import org.xmlcml.graphics.svg.SVGUtil;
+import org.xmlcml.graphics.text.SubSupAnalyzer;
+import org.xmlcml.graphics.text.SubSupAnalyzer.SubSup;
+import org.xmlcml.molutil.ChemicalElement;
+
+public class CMLAnalyzer extends AbstractSVGAnalyzer {
+
+	private final static Logger LOG = Logger.getLogger(CMLAnalyzer.class);
+	static {
+		LOG.setLevel(Level.DEBUG);
+	}
+	
+	private static final String ELEMENT_COUNT = "elementCount";
+	private static final String ELEMENT = "element";
+	private static final String FORMULA = "formula";
+	
+	public CMLAnalyzer() {
+		super(new PageAnalyzer());
+	}
+	
+	public CMLAnalyzer(SVGSVG svgPage) {
+		setSVGPage(svgPage);
+	}
+
+	public CMLAnalyzer(PageAnalyzer pageAnalyzer) {
+		super(pageAnalyzer);
+	}
+
+	public void analyzeInlineFormulas(SVGG svgg) {
+		List<SVGG> gList = SVGG.extractGs(SVGUtil.getQuerySVGElements(svgg, ".//svg:g[svg:text[@"+SubSupAnalyzer.SCRIPT_TYPE+"='"+SubSup.SUBSCRIPT+"']]"));
+		LOG.debug("SUBSCRIPTS"+gList.size());
+		if (gList.size() > 0) {
+			for (SVGG g : gList) {
+				this.analyzeSubscriptsAsInlineFormulas(g);
+			}
+		}
+	}
+
+	private void analyzeSubscriptsAsInlineFormulas(SVGG g) {
+		List<SVGText> subscriptedTexts = SVGText.extractTexts(SVGUtil.getQuerySVGElements(g, "./svg:text[@"+SubSupAnalyzer.SCRIPT_TYPE+"='"+SubSup.SUBSCRIPT+"']"));
+		for (SVGText subscriptText : subscriptedTexts) {
+			int index = g.indexOf(subscriptText);
+			markIfElementAndCount(g, subscriptText, index);
+		}
+		groupIntoInlineFormulas(g);
+	}
+
+	private void groupIntoInlineFormulas(SVGG g) {
+		List<SVGText> textChildElements = SVGText.extractTexts(SVGUtil.getQuerySVGElements(g,  "./svg:text"));
+		CMLFormula formula = null;
+		for (int i = 0; i < textChildElements.size(); i++) {
+			SVGText textChild = textChildElements.get(i);
+			if (ifIsElement(textChild)) {
+				SVGText nextChild = textChildElements.get(++i);
+				if (ifIsElementCount(nextChild)) {
+					if (formula == null) {
+						formula = new CMLFormula();
+					}
+					formula.add(textChild.getText(), new Integer(nextChild.getText()));
+				}
+			} else {
+				if (formula != null) {
+					formula.debug("FFFFFFFFFFFFFF");
+					g.insertChild(formula, i++);
+				}
+				formula = null;
+			}
+		}
+	}
+
+	private boolean ifIsElementCount(SVGText nextChild) {
+		return ELEMENT_COUNT.equals(nextChild.getAttributeValue(FORMULA));
+	}
+
+	private boolean ifIsElement(SVGText nextChild) {
+		return ELEMENT.equals(nextChild.getAttributeValue(FORMULA));
+	}
+
+
+	private void markIfElementAndCount(SVGG g, SVGText subscriptText, int index) {
+		Integer count = null; 
+		try {
+			count = new Integer(subscriptText.getText());
+			SVGText precedingText = (SVGText) subscriptText.query("preceding-sibling::*[1]").get(0);
+//			precedingText.debug("PRESUB");
+			if (precedingText.getAttribute(SubSupAnalyzer.SCRIPT_TYPE) == null) {
+				String content = precedingText.getText();
+				ChemicalElement chemicalElement = ChemicalElement.getChemicalElement(content);
+				if (chemicalElement != null) {
+					CMLAnalyzer.markAsElement(precedingText);
+					CMLAnalyzer.markAsElementCount(subscriptText);
+				}
+			}
+		} catch (Exception e) {
+			// not an integer or no previous sibling
+		}
+	}
+
+	private static void markAsElement(SVGText text) {
+		text.addAttribute(new Attribute(FORMULA, ELEMENT));
+	}
+
+	private static void markAsElementCount(SVGText precedingText) {
+		precedingText.addAttribute(new Attribute(FORMULA, ELEMENT_COUNT));
+	}
+
+}

src/main/java/org/xmlcml/graphics/control/page/Chunk.java

 	private BoxEdge edge;
 	protected ChunkStyle chunkStyle;
 	private Set<Class<?>> svgClassSet;
-	private PageChunkSplitter chunkAnalyzer;
+	private PageChunkSplitter pageChunkSplitter;
 	protected StyleManager styleManager;
 
 	public Chunk() {
 		super();
 	}
 	
-	public Chunk(PageChunkSplitter chunkAnalyzer) {
-		this.chunkAnalyzer = chunkAnalyzer;
+	public Chunk(PageChunkSplitter pageChunkSplitter) {
+		this.pageChunkSplitter = pageChunkSplitter;
 		ensureBoundingBoxManager();
 	}
 
 		this(null, element);
 	}
 		
-	public Chunk(PageChunkSplitter chunkAnalyzer, SVGElement element) {
-		this(chunkAnalyzer);
+	public Chunk(PageChunkSplitter pageChunkSplitter, SVGElement element) {
+		this(pageChunkSplitter);
 		createElementListAndCalculateBoundingBoxes(element);
 	}
 
 	
 	private Chunk makeChunk(Double chunkWidth, BoxEdge edge, Integer decimalPlaces, int count) {
 		Chunk chunk;
-		chunk = new Chunk(chunkAnalyzer);
+		chunk = new Chunk(pageChunkSplitter);
 		chunk.setBoundingBoxCached(true);
 		chunk.setBoundingBoxAttribute(decimalPlaces);
 		chunk.addAttribute(new Attribute("edge", ""+edge));
 
 	List<Chunk> splitByPhysicalStyle(PageAnalyzer pageAnalyzer) {
 		ensureStyleManager(pageAnalyzer);
-		PageChunkSplitter chunkAnalyzer = pageAnalyzer.getChunkAnalyzer();
+		PageChunkSplitter pageChunkSplitter = pageAnalyzer.getPageChunkSplitter();
 		List<Chunk> chunkList = new ArrayList<Chunk>();
 		String lastPhysicalStyle = null;
 		Chunk chunk = null;
 				continue;
 			}
 			if (!physicalStyle.equals(lastPhysicalStyle)) {
-				chunk = new Chunk(chunkAnalyzer);
+				chunk = new Chunk(pageChunkSplitter);
 				addPhysicalStyle(chunk, physicalStyle);
 				chunkList.add(chunk);
 				this.appendChild(chunk);
 				System.out.println("...................................."+clazz.getName());
 			}
 		}
-		SVGSVG svgPage = chunkAnalyzer.getSVGPage();
+		SVGSVG svgPage = pageChunkSplitter.getSVGPage();
 		SVGUtil.drawBoxes(Arrays.asList(this), svgPage, "black", fill, 1., 0.3);
 	}
 	private void ensureClassSet() {

src/main/java/org/xmlcml/graphics/control/page/ChunkAnalyzer.java

 import org.xmlcml.graphics.svg.SVGPolyline;
 import org.xmlcml.graphics.svg.SVGText;
 import org.xmlcml.graphics.svg.SVGUtil;
+import org.xmlcml.graphics.text.SubSupAnalyzer;
+import org.xmlcml.graphics.text.SubSupAnalyzer.SubSup;
 import org.xmlcml.graphics.text.TextAnalyzer;
 
 public class ChunkAnalyzer extends AbstractSVGAnalyzer {
 	private LineAnalyzer lineAnalyzer;
 	private List<SVGPolyline> polylines;
 	private PolylineAnalyzer polylineAnalyzer;
+	private CMLAnalyzer cmlAnalyzer;
 	private SVGG svgg;
+	private PlotBox plotBox;
 
 	public ChunkAnalyzer(PageAnalyzer pageAnalyzer) {
 		this.pageAnalyzer = pageAnalyzer;
 		this.svgPage = pageAnalyzer.getSVGPage();
+		pageAnalyzer.setCurrentChunkAnalyzer(this);
 	}
 
 	public void analyzeChunk(SVGG g) {
 		CMLUtil.outputQuietly(svgg, new File("target/line"+svgg.getId()+".svg"), 1);
 		analyzePolylines();
 		CMLUtil.outputQuietly(svgg, new File("target/poly"+svgg.getId()+".svg"), 1);
+		analyzeCML();
+		CMLUtil.outputQuietly(svgg, new File("target/poly"+svgg.getId()+".svg"), 1);
 	}
 
 	private void analyzeTexts() {
-		texts = SVGText.extractTexts(SVGUtil.getQuerySVGElements(svgg, ".//svg:text"));
+		analyzeTexts(0);
+		analyzeTexts(90);
+		analyzeTexts(180);
+	}
+
+	private void analyzeTexts(int angle) {
+		ensureTextAnalyzer();
+		String angleCondition = (angle == 0) ? "@angle='0' or not(@angle)" : "@angle='"+angle+"'";
+		texts = SVGText.extractTexts(SVGUtil.getQuerySVGElements(svgg, ".//svg:text["+angleCondition+"]"));
+		LOG.trace("ROT "+angle+": "+texts.size());
 		if (texts.size() > 0) {
-			textAnalyzer = new TextAnalyzer(pageAnalyzer);
+//			textAnalyzer = new TextAnalyzer(pageAnalyzer);
 			textAnalyzer.analyzeTexts(svgg, texts);
 		}
 	}
 	
+	private TextAnalyzer ensureTextAnalyzer() {
+		if (textAnalyzer == null) {
+			textAnalyzer = new TextAnalyzer(pageAnalyzer);
+		}
+		return textAnalyzer;
+	}
+
 	private void analyzeLines() {
 		lines = SVGLine.extractLines(SVGUtil.getQuerySVGElements(svgg, ".//svg:line"));
 		if (lines.size() > 0) {
 		}
 	}
 
+	public TextAnalyzer getTextAnalyzer() {
+		ensureTextAnalyzer();
+		return textAnalyzer;
+	}
+
 	private void analyzePolylines() {
 		polylines = SVGPolyline.extractPolylines(SVGUtil.getQuerySVGElements(svgg, ".//svg:polyline"));
 		if (polylines.size() > 0) {
 		}
 	}
 
+	private void analyzeCML() {
+		ensureCMLAnalyzer();
+		cmlAnalyzer.analyzeInlineFormulas(svgg);
+	}
+
+	private void ensureCMLAnalyzer() {
+		if (cmlAnalyzer == null) {
+			cmlAnalyzer = new CMLAnalyzer();
+		}
+	}
+
 	private void debugLeaf() {
 		List<SVGElement> gList = SVGUtil.getQuerySVGElements(svgg, "./svg:g");
 		LOG.trace("G children: "+gList.size());
 //		LOG.debug("G "+texts.size()+" texts;    "+lines.size()+" lines;    "+polylines.size()+" polylines; ");
 	}
 
-
+	public PlotBox getPlotBox() {
+		if (plotBox == null) {
+			if (lineAnalyzer != null) {
+				plotBox = lineAnalyzer.getPlotBox();
+			}
+		}
+		return plotBox;
+	}
 
 }

src/main/java/org/xmlcml/graphics/control/page/ChunkAnalyzerAction.java

 public class ChunkAnalyzerAction extends PageAction {
 
 	private final static Logger LOG = Logger.getLogger(ChunkAnalyzerAction.class);
-	
+	private boolean subSup;
+	private boolean removeNumericTSpans;
+	private boolean splitAtSpaces;
 	
 	public ChunkAnalyzerAction(AbstractActionElement pageActionCommand) {
 		super(pageActionCommand);
 		if (xpath != null) {
 			List<SVGElement> elements = SVGUtil.getQuerySVGElements(getSVGPage(), xpath);
 			LOG.debug("LEAFS "+elements.size());
+			this.subSup = isTrue(ChunkAnalyzerElement.SUBSUP);
+			this.splitAtSpaces = isTrue(ChunkAnalyzerElement.SPLIT_AT_SPACES);
+			this.removeNumericTSpans = isTrue(ChunkAnalyzerElement.REMOVE_NUMERIC_TSPANS);
+
 			for (SVGElement element : elements) {
 				if (!(element instanceof SVGG)) {
 					throw new RuntimeException("Must operate on <g> elements");
 				}
 				LOG.trace("*********************ELEMENT "+element.getId());
 				analyzeChunk((SVGG)element);
-				if (isTrue(ChunkAnalyzerElement.SUBSUP)) {
-					processSubSup(element);
-				}
 			}
 			debugFile("target/chunkAnalyzer1Axes.svg");
 		}
 	}
 	
-	private void processSubSup(SVGElement element) {
-		List<SVGText> texts =SVGText.extractTexts(SVGUtil.getQuerySVGElements(element, ".//svg:text"));
-		TextAnalyzer textAnalyzer = new TextAnalyzer(pageAnalyzer);
-		textAnalyzer.mergeSubSup(texts);
+	private void analyzeChunk(SVGG svgg) {
+		ChunkAnalyzer chunkAnalyzer = new ChunkAnalyzer(pageAnalyzer);
+		createTextAnalyzer(chunkAnalyzer);
+		chunkAnalyzer.analyzeChunk(svgg);
 	}
 
-	private void analyzeChunk(SVGG svgg) {
-		ChunkAnalyzer chunkAnalyzer = new ChunkAnalyzer(pageAnalyzer);
-		chunkAnalyzer.analyzeChunk(svgg);
+	private void createTextAnalyzer(ChunkAnalyzer chunkAnalyzer) {
+		TextAnalyzer textAnalyzer = chunkAnalyzer.getTextAnalyzer();
+		textAnalyzer.setSubSup(subSup);
+		textAnalyzer.setRemoveNumericTSpans(removeNumericTSpans);
+		textAnalyzer.setSplitAtSpaces(splitAtSpaces);
 	}
 	
 }

src/main/java/org/xmlcml/graphics/control/page/ChunkAnalyzerElement.java

 	
 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	public static final String SUBSUP = "subSup";
+	public static final String REMOVE_NUMERIC_TSPANS = "removeNumericTSpans";
+	public static final String SPLIT_AT_SPACES = "splitAtSpaces";
 	
 	/** attribute names
 	 * 
 	static {
 		ATTNAMES.add(PageActionElement.XPATH);
 		ATTNAMES.add(SUBSUP);
+		ATTNAMES.add(REMOVE_NUMERIC_TSPANS);
 	}
 
 	/** constructor

src/main/java/org/xmlcml/graphics/control/page/ElementDeleterAction.java

-package org.xmlcml.graphics.control.page;
-
-
-import org.apache.log4j.Logger;
-import org.xmlcml.graphics.control.AbstractActionElement;
-
-public class ElementDeleterAction extends PageAction {
-
-	private final static Logger LOG = Logger.getLogger(ElementDeleterAction.class);
-	
-	public ElementDeleterAction(AbstractActionElement pageActionCommand) {
-		super(pageActionCommand);
-	}
-	
-	@Override
-	public void run() {
-		deleteNodes(getXPath());
-	}
-
-}

src/main/java/org/xmlcml/graphics/control/page/ElementDeleterElement.java

-package org.xmlcml.graphics.control.page;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-
-import nu.xom.Node;
-import nu.xom.Nodes;
-
-import org.xmlcml.graphics.control.AbstractActionElement;
-import org.xmlcml.graphics.control.CommandElement;
-import org.xmlcml.graphics.pdf2svg.DocumentAnalyzer;
-import org.xmlcml.graphics.svg.SVGSVG;
-
-
-public class ElementDeleterElement extends AbstractActionElement {
-
-	public final static String TAG ="deleteElements";
-	private static final List<String> ATTNAMES = new ArrayList<String>();
-	
-	static {
-//		ATTNAMES.add(PageActionElement.ACTION);
-		ATTNAMES.add(PageActionElement.PAGE_RANGE);
-		ATTNAMES.add(PageActionElement.TITLE);
-		ATTNAMES.add(PageActionElement.XPATH);
-	}
-
-	/** constructor
-	 */
-	public ElementDeleterElement() {
-		super(TAG);
-		init();
-	}
-	
-	protected void init() {
-	}
-	
-	/** constructor
-	 */
-	public ElementDeleterElement(CommandElement element) {
-        super(element);
-	}
-	
-    /**
-     * copy node .
-     *
-     * @return Node
-     */
-    public Node copy() {
-        return new ElementDeleterElement(this);
-    }
-
-	/**
-	 * @return tag
-	 */
-	public String getTag() {
-		return TAG;
-	}
-
-	protected List<String> getAttributeNames() {
-		return ATTNAMES;
-	}
-
-	protected List<String> getRequiredAttributeNames() {
-		return Arrays.asList(new String[]{
-				AbstractActionElement.XPATH,
-		});
-	}
-
-
-}

src/main/java/org/xmlcml/graphics/control/page/NodeDeleterAction.java

+package org.xmlcml.graphics.control.page;
+
+
+import org.apache.log4j.Logger;
+import org.xmlcml.graphics.control.AbstractActionElement;
+
+public class NodeDeleterAction extends PageAction {
+
+	private final static Logger LOG = Logger.getLogger(NodeDeleterAction.class);
+	
+	public NodeDeleterAction(AbstractActionElement pageActionCommand) {
+		super(pageActionCommand);
+	}
+	
+	@Override
+	public void run() {
+		deleteNodes(getXPath());
+	}
+
+}

src/main/java/org/xmlcml/graphics/control/page/NodeDeleterElement.java

+package org.xmlcml.graphics.control.page;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import nu.xom.Node;
+import nu.xom.Nodes;
+
+import org.xmlcml.graphics.control.AbstractActionElement;
+import org.xmlcml.graphics.control.CommandElement;
+import org.xmlcml.graphics.pdf2svg.DocumentAnalyzer;
+import org.xmlcml.graphics.svg.SVGSVG;
+
+
+public class NodeDeleterElement extends AbstractActionElement {
+
+	public final static String TAG ="deleteNodes";
+	private static final List<String> ATTNAMES = new ArrayList<String>();
+	
+	static {
+		ATTNAMES.add(PageActionElement.PAGE_RANGE);
+		ATTNAMES.add(PageActionElement.TITLE);
+		ATTNAMES.add(PageActionElement.XPATH);
+	}
+
+	/** constructor
+	 */
+	public NodeDeleterElement() {
+		super(TAG);
+		init();
+	}
+	
+	protected void init() {
+	}
+	
+	/** constructor
+	 */
+	public NodeDeleterElement(CommandElement element) {
+        super(element);
+	}
+	
+    /**
+     * copy node .
+     *
+     * @return Node
+     */
+    public Node copy() {
+        return new NodeDeleterElement(this);
+    }
+
+	/**
+	 * @return tag
+	 */
+	public String getTag() {
+		return TAG;
+	}
+
+	protected List<String> getAttributeNames() {
+		return ATTNAMES;
+	}
+
+	protected List<String> getRequiredAttributeNames() {
+		return Arrays.asList(new String[]{
+				AbstractActionElement.XPATH,
+		});
+	}
+
+
+}

src/main/java/org/xmlcml/graphics/control/page/PageActionElement.java

 	public static final String PAGE_RANGE = "pageRange";
 	public static final String REMOVE_DEFS = "removeDefs";
 	public static final String REMOVE_IMAGE_DATA = "removeImageData";
+	public static final String REMOVE_UNIT_TRANSFORMS = "removeUnitTransforms";
 	public static final String REMOVE_UNWANTED_ATTRIBUTES = "removeUnwantedAttributes";
 	public static final String STROKE = "stroke";
 	public static final String STROKE_WIDTH = "strokeWidth";
 		ATTNAMES.add(REGEX);
 		ATTNAMES.add(REMOVE_DEFS);
 		ATTNAMES.add(REMOVE_IMAGE_DATA);
+		ATTNAMES.add(REMOVE_UNIT_TRANSFORMS);
 		ATTNAMES.add(REMOVE_UNWANTED_ATTRIBUTES);
 		ATTNAMES.add(STROKE);
 		ATTNAMES.add(STROKE_WIDTH);
 	public final static String TAG = "pageAction";
 
 
+
 	/** constructor 
 	 */
 	public PageActionElement() {

src/main/java/org/xmlcml/graphics/control/page/PageActionFactory.java

 			pageAction = new BoxProcessorAction(command);
 		} else if(command instanceof ElementStylerElement) {
 			pageAction = new ElementStylerAction(command);
-		} else if(command instanceof ElementDeleterElement) {
-			pageAction = new ElementDeleterAction(command);
+		} else if(command instanceof NodeDeleterElement) {
+			pageAction = new NodeDeleterAction(command);
 		} else if(command instanceof FigureAnalyzerElement) {
 			pageAction = new FigureAnalyzerAction(command);
 		} else if(command instanceof PageNormalizerElement) {

src/main/java/org/xmlcml/graphics/control/page/PageAnalyzer.java

 	DocumentAnalyzer documentAnalyzer;
 	private PageClipPathAnalyzer clipPathAnalyzer;
 	private PageFontSizeAnalyzer fontSizeAnalyzer;
-	private PageChunkSplitter chunkAnalyzer;
+	private PageChunkSplitter pageChunkSplitter;
 	private TextAnalyzer textAnalyzer;
 	private FigureAnalyzer figureAnalyzer;
 	private TableAnalyzer tableAnalyzer;
+	private ChunkAnalyzer currentChunkAnalyzer;
+
+	private Integer rotationAngle;
 
 	public PageAnalyzer() {
 	}
 		return fontSizeAnalyzer;
 	}
 
-	public PageChunkSplitter ensureChunkAnalyzer() {
-		if (chunkAnalyzer == null) {
-			chunkAnalyzer = new PageChunkSplitter(this);
+	public PageChunkSplitter ensurePageChunkSplitter() {
+		if (pageChunkSplitter == null) {
+			pageChunkSplitter = new PageChunkSplitter(this);
 		}
-		return chunkAnalyzer;
+		return pageChunkSplitter;
 	}
 
 	public TextAnalyzer ensureTextAnalyzer() {
 		return fontSizeAnalyzer;
 	}
 
-	public PageChunkSplitter getChunkAnalyzer() {
-		return chunkAnalyzer;
+	public PageChunkSplitter getPageChunkSplitter() {
+		return pageChunkSplitter;
 	}
 
 	public StyleManager getStyleManager() {
 	public String getNamePrefix() {
 		return NAME_PREFIX;
 	}
+
+	public void setCurrentChunkAnalyzer(ChunkAnalyzer chunkAnalyzer) {
+		this.currentChunkAnalyzer = chunkAnalyzer;
+	}
+
+	public ChunkAnalyzer getCurrentChunkAnalyzer() {
+		return currentChunkAnalyzer;
+	}
+
+	public void setRotationAngle(Integer angle) {
+		this.rotationAngle = angle;
+	}
 }

src/main/java/org/xmlcml/graphics/control/page/PageChunkSplitter.java

  * 
  * each of these has a special analyzer (TextAnalyzer, etc.)
  * 
- * chunkAnalyzer communicates upwards through pageAnalyzer
+ * pageChunkSplitter communicates upwards through pageAnalyzer
  * @author pm286
  *
  */
 				"fill-rule",
 		});
 		Long time = System.currentTimeMillis();
-		LOG.trace("chunkAnalyzer: "+(time-time0));
+		LOG.trace("pageChunkSplitter: "+(time-time0));
 		return subSubSubChunkList;
 	}
 

src/main/java/org/xmlcml/graphics/control/page/PageNormalizerAction.java

 import java.util.List;
 
 import nu.xom.Attribute;
+import nu.xom.Nodes;
 
 import org.apache.log4j.Logger;
 import org.xmlcml.cml.base.CMLUtil;
 	private static final double X_OFFSET = 0.;
 	private static final double Y_OFFSET = 0.;
 	private static final String ANGLE = "angle";
+	private static final double[] UNIT_ARRAY = new double[]{1.,0.,0.,0.,1.,0.,0.,0.,1.};	
+	private static final RealArray UNIT_REAL_ARRAY = new RealArray(UNIT_ARRAY);
+	
 	private Multimap<Integer, SVGText> textByRotation;
 	private CodePointConverter codePointConverter;
 
 			getSVGPage().format(decimalPlaces);
 			debugFile("target/pageNorm2Decimal.svg");
 		}
+		
+		if (isTrue(PageActionElement.REMOVE_UNIT_TRANSFORMS)) {
+			removeUnitTransforms(getSVGPage());
+//			debugFile("target/pageNorm1Rotation.svg");
+		}
+		
+	}
+
+	private void removeUnitTransforms(SVGElement element) {
+		Nodes transformAttributes = element.query(".//@transform");
+		LOG.debug("TRANSFORM "+transformAttributes.size());
+		for (int i = 0; i < transformAttributes.size(); i++) {
+			Attribute transformAttribute = (Attribute) transformAttributes.get(i);
+			String transformAttributeValue = transformAttribute.getValue();
+			Transform2 t2 = SVGElement.createTransform2FromTransformAttribute(transformAttributeValue);
+			RealArray matrixArray = new RealArray(t2.getMatrixAsArray());
+			if (UNIT_REAL_ARRAY.equals(matrixArray, EPS)) {
+				transformAttribute.detach();
+			}
+		}
 	}
 
 	private void normalizeHighCodePoints(SVGSVG svgPage) {
 		List<SVGText> texts = SVGText.extractTexts(SVGUtil.getQuerySVGElements(svgPage, ".//svg:text"));
+		int i =0;
 		for (SVGText text : texts) {
 			String s = text.getValue();
 			int codePoint = s.codePointAt(0);
 			if (codePoint > 127) {
 				ensureCodePointConverter();
-				Character newChar = codePointConverter.convertCharacter(new Character((char)s.charAt(0)));
+				String newChar = codePointConverter.convertCharacter(new Character((char)s.charAt(0)));
 				if (newChar == null) {
 					LOG.debug(" unknown codePoint " + codePoint);
 				}
 				text.setText(""+newChar);
+				LOG.trace(" codePoint " + codePoint+ "["+text.getValue()+"]");
+				if (newChar.startsWith("[")) {
+					LOG.debug(debug(texts, i));
+				}
 			}
+			i++;
 		}
 	}
 
+	private String debug(List<SVGText> texts, int ipos) {
+		int imin = Math.max(ipos-10,  0);
+		int imax = Math.min(ipos+10,  texts.size()-1);
+		StringBuilder sb = new StringBuilder();
+		for (int i = imin; i < ipos; i++) {
+			sb.append(texts.get(i).getValue());
+			sb.append("_");
+		}
+		sb.append("_");
+		sb.append(texts.get(ipos).getValue());
+		sb.append("_");
+		for (int i = ipos+1; i <= imax; i++) {
+			sb.append("_");
+			sb.append(texts.get(i).getValue());
+		}
+		return sb.toString();
+	}
+
 	private void ensureCodePointConverter() {
 		if (codePointConverter == null) {
 			codePointConverter = new CodePointConverter();
 		LOG.debug("unrotated  : "+zeroCount);
 		LOG.debug("rotated 90 : "+plus90Count);
 		LOG.debug("upsideDown : "+upsideDownCount);
+		Integer pageRotation = null;
 		if (plus90Count > 0 && minus90Count == 0) {
-			rotatePage(new Angle(-Math.PI/2., Units.RADIANS));
+			pageRotation = -90;
 		} else if (plus90Count == 0 && minus90Count > 0) {
-			rotatePage(new Angle(Math.PI/2., Units.RADIANS));
+			pageRotation = 90;
 		} else if (zeroCount > 0 && minus90Count == 0 && plus90Count == 0) {
 			LOG.debug("no rotation required");
 		}
+		if (pageRotation != null) {
+			rotatePage(pageRotation);
+		}
 	}
 
 	private void storeRotationCounts(List<SVGElement> textsTransform) {
 		}
 	}
 
-	private void rotatePage(Angle newAngle) {
+	private void rotatePage(Integer angle) {
+		this.pageAnalyzer.setRotationAngle(angle);
+		Angle newAngle = new Angle((double) angle, Units.DEGREES);
 		Transform2 t2 = new Transform2(newAngle);
-		List<SVGElement> elements = SVGUtil.getQuerySVGElements(getSVGPage(), ".//svg:*[count(*)=0]");
+		// get leaf nodes or text (might have tspans)
+		List<SVGElement> elements = SVGUtil.getQuerySVGElements(getSVGPage(), ".//svg:*[count(*)=0 or self::svg:text]");
 		for (SVGElement element : elements) {
 			Double fontSize = null;
 			if (element instanceof SVGText) {

src/main/java/org/xmlcml/graphics/control/page/PageNormalizerElement.java

 	private static final List<String> ATTNAMES = new ArrayList<String>();
 	
 	static {
-//		ATTNAMES.add(PageActionElement.ACTION);
 		ATTNAMES.add(PageActionElement.APPLY_AND_REMOVE_CUMULATIVE_TRANSFORMS);
 		ATTNAMES.add(PageActionElement.CAN_ROTATE_LANDSCAPE);
 		ATTNAMES.add(PageActionElement.CLEAN_SVG_STYLES);
 		ATTNAMES.add(PageActionElement.FORMAT_DECIMAL_PLACES);
 		ATTNAMES.add(PageActionElement.NORMALIZE_HIGH_CODE_POINTS);
 		ATTNAMES.add(PageActionElement.REMOVE_IMAGE_DATA);
+		ATTNAMES.add(PageActionElement.REMOVE_UNIT_TRANSFORMS);
 		ATTNAMES.add(PageActionElement.REMOVE_UNWANTED_ATTRIBUTES);
 	}
 

src/main/java/org/xmlcml/graphics/control/page/PlotBox.java

+package org.xmlcml.graphics.control.page;
+
+import org.xmlcml.euclid.Real2Range;
+import org.xmlcml.euclid.RealRange;
+import org.xmlcml.graphics.svg.SVGRect;
+
+public class PlotBox {
+
+
+	private Axis horizontalAxis;
+	private Axis verticalAxis;
+	private RealRange horizontalRange;
+	private RealRange verticalRange;
+	private Real2Range boxRange;
+
+
+	public PlotBox(Axis horizontalAxis, Axis verticalAxis) {
+		setHorizontalAxis(horizontalAxis);
+		setVerticalAxis(verticalAxis);
+	}
+	
+	public Axis getHorizontalAxis() {
+		return horizontalAxis;
+	}
+
+	public void setHorizontalAxis(Axis horizontalAxis) {
+		this.horizontalAxis = horizontalAxis;
+		this.horizontalRange = (horizontalAxis == null) ? null : horizontalAxis.getAxisRangeInPixels();
+	}
+
+	public Axis getVerticalAxis() {
+		return verticalAxis;
+	}
+
+	public void setVerticalAxis(Axis verticalAxis) {
+		this.verticalAxis = verticalAxis;
+		this.verticalRange = (verticalAxis == null) ? null : verticalAxis.getAxisRangeInPixels();
+	}
+
+	SVGRect createRect() {
+		SVGRect boxRect = null; 
+		if (horizontalRange != null && verticalRange != null) {
+			boxRange = new Real2Range(horizontalRange, verticalRange);
+			boxRect = new SVGRect(boxRange);
+		}
+		return boxRect;
+	}
+
+	public Real2Range getBoxRange() {
+		return boxRange;
+	}
+}

src/main/java/org/xmlcml/graphics/control/page/WhitespaceChunkerAction.java

 	
 	@Override
 	public void run() {
-		PageChunkSplitter chunkAnalyzer = pageAnalyzer.ensureChunkAnalyzer();
+		PageChunkSplitter pageChunkSplitter = pageAnalyzer.ensurePageChunkSplitter();
 		Integer depth = getDepth();
 		if (depth != null) {
 			LOG.trace("DEPTH cannot yet be set");
 		}
-		List<Chunk> finalChunkList = chunkAnalyzer.splitByWhitespace();
-		chunkAnalyzer.labelLeafNodes(finalChunkList);
-		chunkAnalyzer.splitByPhysicalStyle(finalChunkList);
+		List<Chunk> finalChunkList = pageChunkSplitter.splitByWhitespace();
+		pageChunkSplitter.labelLeafNodes(finalChunkList);
+		pageChunkSplitter.splitByPhysicalStyle(finalChunkList);
 		
 	}
 

src/main/java/org/xmlcml/graphics/paths/LineAnalyzer.java

 import java.util.Arrays;
 import java.util.List;
 
+import nu.xom.Element;
+
 import org.apache.log4j.Logger;
 import org.xmlcml.euclid.Angle;
 import org.xmlcml.euclid.Vector2;
 import org.xmlcml.graphics.control.page.AxisAnalyzer;
 import org.xmlcml.graphics.control.page.PageAnalyzer;
 import org.xmlcml.graphics.control.page.PathNormalizerAction;
+import org.xmlcml.graphics.control.page.PlotBox;
 import org.xmlcml.graphics.pdf2svg.AbstractSVGAnalyzer;
 import org.xmlcml.graphics.svg.SVGG;
 import org.xmlcml.graphics.svg.SVGLine;
 import org.xmlcml.graphics.svg.SVGSVG;
-import org.xmlcml.graphics.svg.SVGUtil;
 
 import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.Multimap;
 	private Multimap<Integer, SVGLine> lineAngleMap;
 	private Axis horizontalAxis;
 	private Axis verticalAxis;
+	private List<PlotBox> plotBoxList;
+
+	private PlotBox plotBox;
 
 	public LineAnalyzer() {
 		super(new PageAnalyzer());
 	}
 
 	private void findAxes() {
+		plotBoxList = new ArrayList<PlotBox>();
 		AxisAnalyzer axisAnalyzer = new AxisAnalyzer(svgg, pageAnalyzer);
 		axisAnalyzer.createVerticalHorizontalAxisList(lines, PathNormalizerAction.EPS);
+		plotBox = axisAnalyzer.getPlotBox();
+		if (plotBox != null) {
+			plotBoxList.add(plotBox);
+		}
 	}
 
 	public String debug() {
 		}
 		return sb.toString();
 	}
+
+	public PlotBox getPlotBox() {
+		return plotBox;
+	}
 }

src/main/java/org/xmlcml/graphics/paths/PolylineAnalyzer.java

 import java.util.List;
 import java.util.Map;
 
+import nu.xom.Attribute;
 import nu.xom.Element;
 
 import org.apache.log4j.Logger;
+import org.xmlcml.cml.element.CMLArray;
 import org.xmlcml.euclid.Real;
 import org.xmlcml.euclid.Real2;
 import org.xmlcml.euclid.Real2Array;
 import org.xmlcml.graphics.control.page.Axis;
 import org.xmlcml.graphics.control.page.ChunkAnalyzer;
 import org.xmlcml.graphics.control.page.PageAnalyzer;
+import org.xmlcml.graphics.control.page.PlotBox;
+import org.xmlcml.graphics.paths.ComplexLine.LineOrientation;
 import org.xmlcml.graphics.pdf2svg.AbstractSVGAnalyzer;
 import org.xmlcml.graphics.svg.SVGCircle;
 import org.xmlcml.graphics.svg.SVGElement;
 	private static final String VERT = "VERT";
 	private static final String HOR = "HOR";
 	private static final String IS_SAME_AS = "isSameAs";
+	private static final String PLOT = "plot";
 	private static final String POLYLINES = "extractedPolylines";
+	private static final String XAXIS = "xaxis";
+	private static final String YAXIS = "yaxis";
+	private static final String ROLE = "role";
+
 	private Double eps = 0.0000001;
 	private Double eps1 = 0.01;
 	private int lineCount;
 	
 	public void createNetwork() {
 		indexHorizonalVertical();
-//		findTJoints();
-//		addTJointNodes();
-//		mergeTJointNodes();
 	}
 	
 	private void indexHorizonalVertical() {
 		}
 	}
 
-
 	private void splitIntoLinesAndIndex(SVGPolyline polyline) {
 		List<SVGLine> lineList = polyline.createLineList();
 		for (SVGLine line : lineList) {
 
 
 	public void analyzePolylines(SVGG svgg, List<SVGPolyline> polylines) {
-		this.analyzePolylines(svgg, polylines, null, null);
+		this.svgg = svgg;
+		ChunkAnalyzer chunkAnalyzer = this.pageAnalyzer.getCurrentChunkAnalyzer();
+		PlotBox plotBox = chunkAnalyzer.getPlotBox();
+//		List<SVGRect> axisBoxList = SVGRect.extractRects(SVGUtil.getQuerySVGElements(
+//				svgg, ".//svg:rect[@class='"+AxisAnalyzer.AXES_BOX+"']"));
+//		for (SVGRect axisBox : axisBoxList) {
+			this.analyzePolylines(svgg, polylines, plotBox);
+//		}
 	}
 
-	public void analyzePolylines(SVGG svgg, List<SVGPolyline> polylines, Axis horizontalAxis, Axis verticalAxis) {
-		this.svgg = svgg;
+	public void analyzePolylines(SVGG svgg, List<SVGPolyline> polylines, PlotBox plotBox) {
+		Real2Range boxRange = (plotBox == null) ? null : plotBox.getBoxRange();
+		if (boxRange != null)
 		for (SVGPolyline polyline : polylines) {
-			LOG.trace("Polyline "+polyline.getBoundingBox());
-			Real2Array polylineCoords = polyline.getReal2Array();
-			LOG.trace("COORDS "+polylineCoords.size());
-			if (horizontalAxis != null) {
-				RealArray xValueCoords = extractAndScaleCoords(horizontalAxis, polylineCoords.getXArray());
-			}
-			if (verticalAxis != null) {
-				RealArray yValueCoords = extractAndScaleCoords(verticalAxis, polylineCoords.getYArray());
+			SVGG parentG = (SVGG) polyline.getParent();
+			Real2Range polyBox = polyline.getBoundingBox();
+			LOG.debug("Polyline "+polyBox);
+			if (boxRange.includes(polyBox)) {
+				Real2Array polylineCoords = polyline.getReal2Array();
+				LOG.debug("COORDS "+polylineCoords.size());
+				Axis horizontalAxis = plotBox.getHorizontalAxis();
+				Axis verticalAxis = plotBox.getVerticalAxis();
+				CMLArray xArray = createCoordinateArray(polylineCoords, horizontalAxis, LineOrientation.HORIZONTAL);
+				if (xArray != null) {
+					parentG.appendChild(xArray);
+				}
+				CMLArray yArray = createCoordinateArray(polylineCoords, verticalAxis, LineOrientation.VERTICAL);
+				if (yArray != null) {
+					parentG.appendChild(yArray);
+				}
+				parentG.setClassName(PLOT);
+//				polyline.setFill("blue");
+				/// for debugging
+				polyline.setStroke("red");
+				polyline.setStrokeWidth(2.5);
 			}
 		}
 	}
 
+	private CMLArray createCoordinateArray(Real2Array polylineCoords, Axis axis, LineOrientation lineOrientation) {
+		CMLArray array = null;
+		if (axis != null) {
+			RealArray coords = (LineOrientation.HORIZONTAL.equals(lineOrientation)) ? 
+					polylineCoords.getXArray() : polylineCoords.getYArray();
+			RealArray scaledCoords = extractAndScaleCoords(axis, coords);
+			if (scaledCoords == null) {
+				scaledCoords = coords;  // use original coords
+			}
+			array = new CMLArray(scaledCoords);
+			String axisLabel = (LineOrientation.HORIZONTAL.equals(lineOrientation)) ? XAXIS : YAXIS;
+			array.addAttribute(new Attribute(ROLE, axisLabel));
+		}
+		if (array == null) {
+			LOG.debug("CANNOT MAKE AXIS: "+lineOrientation);
+		}
+		return array;
+	}
+
 	private RealArray extractAndScaleCoords(Axis axis, RealArray pixelCoords) {
 		RealArray valueCoords = axis.createScaledArrayToRange(pixelCoords);
-		GraphUtil.format(valueCoords, ChunkAnalyzer.PLACES);
+		if (valueCoords != null) {
+			GraphUtil.format(valueCoords, ChunkAnalyzer.PLACES);
+		}
 		return valueCoords;
 	}
+	
 	private void addMarker(SVGLine line0, Map<Integer, List<SVGLine>> map, Real2 xy, String orient) {
 		Double xx = ((HOR.equals(orient)) ? xy.getY() : xy.getX());
 		Double yy = ((HOR.equals(orient)) ? xy.getX() : xy.getY());

src/main/java/org/xmlcml/graphics/text/SubSupAnalyzer.java

 
 public class SubSupAnalyzer extends AbstractSVGAnalyzer {
 
+	public static final String SCRIPT_TYPE = "scriptType";
+
 	public enum SubSup {
 		SUBSCRIPT("sub", 1),
 		SUPERSCRIPT("sup", -1);
 		return mergedList;
 	}
 
-	private SVGText markSubSup(SubSup subSup, SVGText svgText) {
+	public static SVGText markSubSup(SubSup subSup, SVGText svgText) {
 		if (subSup != null) {
-			svgText.addAttribute(new Attribute("scriptType", ""+subSup));
+			svgText.addAttribute(new Attribute(SCRIPT_TYPE, ""+subSup));
 		}
 		return svgText;
 	}

src/main/java/org/xmlcml/graphics/text/TextAnalyzer.java

 
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
-import org.xmlcml.cml.base.CMLConstants;
+import org.xmlcml.cml.base.CMLUtil;
+import org.xmlcml.euclid.EuclidConstants;
 import org.xmlcml.euclid.Real;
 import org.xmlcml.euclid.Real2;
+import org.xmlcml.euclid.Real2Array;
 import org.xmlcml.euclid.Real2Range;
 import org.xmlcml.euclid.RealArray;
 import org.xmlcml.euclid.RealRange;
 import org.xmlcml.graphics.svg.SVGG;
 import org.xmlcml.graphics.svg.SVGRect;
 import org.xmlcml.graphics.svg.SVGSVG;
+import org.xmlcml.graphics.svg.SVGTSpan;
 import org.xmlcml.graphics.svg.SVGText;
 import org.xmlcml.graphics.svg.SVGUtil;
 
 	}
 	
 	public static final String TEXT1 = "text1";
+	public static final String CHUNK = "chunk";
+	public static final String DATA_TYPE = "dataType";
 	private static final Double INDENT = 6.0; // appears to be about 1.5 char widths for BMC
-	private static final double YPARA_SEPARATION_FACTOR = 1.15;
-	public static final String CHUNK = "chunk";
 	public static final String TEXT = "TEXT";
 	private static final String WORD_LIST = "wordList";
 	public static final String NUMBER = "number";
-	public static final String DATA_TYPE = "dataType";
+	public static final String NUMBERS = "numbers";
+	private static final String PREVIOUS = "previous";
+	private static final double YPARA_SEPARATION_FACTOR = 1.15;
 	
 	public static final double DEFAULT_TEXTWIDTH_FACTOR = 0.9;
 	public static Double TEXT_EPS = 1.0;
 	private boolean createHTML;
 	private SimpleFont simpleFont;
 	
+	private boolean subSup;
+	private boolean removeNumericTSpans;
+	private boolean splitAtSpaces;
+	
 	public TextAnalyzer() {
 		super(new PageAnalyzer());
 	}
 	}
 
 	public void analyzeTexts(SVGG svgg, List<SVGText> textCharacters) {
+		LOG.trace("ANALYZE TEXT "+textCharacters.size());
 		this.svgg = svgg;
 		createHorizontalCharacterLists(textCharacters);
 		analyzeSpaces();
 		createWordsInSublines();
+		mergeSubSup();
 		addNumericValues();
+		splitAtSpaces();
+		normalizeTSpanToText();
 	}
 
 
+	private void normalizeTSpanToText() {
+		List<SVGText> textWithTSpans = SVGText.extractTexts(SVGUtil.getQuerySVGElements(svgg, ".//svg:text[svg:tspan]"));
+		for (SVGText textWithTSpan : textWithTSpans) {
+			SVGElement parent = (SVGElement) textWithTSpan.getParent();
+			List<SVGTSpan> tSpanList = textWithTSpan.getChildTSpans();
+			for (SVGTSpan tSpan : tSpanList) {
+				SVGText text = createText(tSpan);
+				parent.appendChild(text);
+			}
+			textWithTSpan.detach();
+		}
+	}
+
+	public static SVGText createText(SVGTSpan tSpan) {
+		SVGText text = new SVGText();
+		CMLUtil.copyAttributes(tSpan, text);
+		text.setText(tSpan.getText());
+		return text;
+	}
+
+	private void normalizeTSpanToText(SVGTSpan tSpan, SVGElement parent) {
+	}
+
 	private void analyzeSpaces() {
 		this.getWidthByCharacterNormalizedByFontSize();
 		this.getMedianWidthOfCharacterNormalizedByFont();
 	public void applyAndRemoveCumulativeTransforms() {
 		Long time0 = System.currentTimeMillis();
 		SVGUtil.applyAndRemoveCumulativeTransformsFromDocument(getSVGPage());
-		LOG.debug("cumulative transforms on text: "+(System.currentTimeMillis()-time0));
+		LOG.trace("cumulative transforms on text: "+(System.currentTimeMillis()-time0));
 	}
 
 	@Deprecated // use getRawCharacterListText
 		if (rawCharacterList == null) {
 			List<SVGElement> textElements = SVGUtil.getQuerySVGElements(svgParent, ".//svg:text");
 			getRawCharacterList(textElements);
-			LOG.debug("read "+rawCharacterList.size()+" raw characters "+rawCharacterList.toString());
+			LOG.trace("read "+rawCharacterList.size()+" raw characters "+rawCharacterList.toString());
 		}
 		return rawCharacterList;
 	}
 	}
 	
 	private void addNumericValues() {
+		// not quite sure when the TSpans get added so this is messy
 		List<SVGText> texts = SVGText.extractTexts(SVGUtil.getQuerySVGElements(svgg, ".//svg:g[@class='word']/svg:text"));
 		for (SVGText text : texts) {
-			String value = text.getValue().trim();
-			Number number = null;
-			String dataType = null;
-			try {
-				Integer integer = new Integer(value);
-				number = integer;
-				dataType = CMLConstants.XSD_INTEGER;
-			} catch (Exception e) {
-				try {
-					Double dubble = new Double(value);
-					number = dubble;
-					dataType = CMLConstants.XSD_DOUBLE;
-				} catch (Exception e1) {
-					// not a  number
-				}
-			}
-			if (number != null) {
-				text.addAttribute(new Attribute(NUMBER, ""+number));
-				text.addAttribute(new Attribute(DATA_TYPE, dataType));
+			System.out.println(text.toXML());
+			TypedNumberList typedNumberList = interpretTypedNumberList(text);
+			TypedNumber typedNumber = interpretTypedNumber(text);
+		}
+	}
+
+	public TypedNumber interpretTypedNumber(SVGText text) {
+		TypedNumber typedNumber = TypedNumber.createNumber(text);
+		if (typedNumber != null) {
+			String number = ""+typedNumber.getNumber();
+			text.addAttribute(new Attribute(NUMBER, number));
+			text.addAttribute(new Attribute(DATA_TYPE, typedNumber.getDataType()));
+			if (removeNumericTSpans) {
+				removeNumericTSpans(text, number);
 			}
 		}
+		return typedNumber;
 	}
 	
+	public TypedNumberList interpretTypedNumberList(SVGText text) {
+		TypedNumberList typedNumberList = TypedNumberList.createFromTextSpans(text);
+		if (typedNumberList != null) {
+			String numbers = typedNumberList.getNumberString();
+			text.addAttribute(new Attribute(NUMBERS, numbers));
+			text.addAttribute(new Attribute(DATA_TYPE, typedNumberList.getDataType()));
+			if (removeNumericTSpans) {
+				removeNumericTSpanList(text, numbers);
+			}
+		}
+		return typedNumberList;
+	}
+	
+	private void removeNumericTSpans(SVGText text, String number) {
+		List<SVGTSpan> tSpans = text.getChildTSpans();
+		if (tSpans.size() == 1) {
+			tSpans.get(0).detach();
+			text.setText(number);
+		} else if (tSpans.size() == 2) {
+			tSpans.get(0).detach();
+			tSpans.get(1).detach();
+			text.setText(number);
+		}
+	}
+
+	private void removeNumericTSpanList(SVGText text, String number) {
+		List<SVGTSpan> tSpans = text.getChildTSpans();
+		text.setText(number);
+		for (SVGTSpan tSpan : tSpans) {
+			tSpan.detach();
+		}
+	}
+
 	/** uses all lines to estimate the width of a character
 	 * 
 	 * @param lineListlist
 
 	private static Real2Range scaleBoxX(double scale,	Real2Range rawBoundingBox) {
 		if (rawBoundingBox == null) {
-			LOG.debug("RAWBB");
+			LOG.trace("RAWBB");
 		}
 		RealRange xRange = rawBoundingBox.getXRange();
 		double r = xRange.getRange();
 	
 	public void analyzeRawText(SVGElement element) {
 		this.svgParent = element;
-		//svgParent.debug("TEXT");
 		this.getRawTextCharacterList();
 		this.createRawTextCharacterPositionMaps();
 		this.createHorizontalCharacterListAndCreateWords();
 
 	private void copyWordSequencesToParent() {
 		if (rawCharacterList.size() > 0) {
-			LOG.debug("WordSequences "+wordSequenceList.size());
+			LOG.trace("WordSequences "+wordSequenceList.size());
 			SVGElement parent = (SVGElement) rawCharacterList.get(0).getParent();
 			if (parent == null) {
 				LOG.warn("No parent for rawCharacterList: "+rawCharacterList);
 			} else {
-				LOG.debug("P "+parent.getId());
+				LOG.trace("P "+parent.getId());
 				int wsSerial = 0;
 				for (WordSequence ws : wordSequenceList) {
 					SVGText svgText = ws.createSVGText();
-					LOG.debug("Text "+svgText.getId());
+					LOG.trace("Text "+svgText.getId());
 					List<Word> words = ws.getWords();
 					LOG.trace("words "+words.size());
 					if (words.size() > 0) {
 			for (SVGElement paraElement : paraList) {
 				Paragraph paragraph = Paragraph.createElement(paraElement);
 				paragraph.createAndAddHTML();
-//				paragraph.debug("PPP");
 			}
 		}
-//		removeOriginaSVGTextFromParagraphs();
-//		interpretTextChunks();
 	}
 
 	public void setCreateTSpans(boolean createTSpans) {
 	 * @param texts
 	 * @return
 	 */
+	public void mergeSubSup() {
+		if (subSup) {
+			List<SVGText> texts = SVGText.extractTexts(SVGUtil.getQuerySVGElements(svgg, ".//svg:g[@class='word']/svg:text"));
+			LOG.trace("SUBSUP....."+texts.size());
+			mergeSubSup(texts);
+		}
+	}
+
 	public void mergeSubSup(List<SVGText> texts) {
 		SubSupAnalyzer subSupAnalyzer = new SubSupAnalyzer(this);
 		subSupAnalyzer.mergeTexts(texts);
 	}
+
+	public void splitAtSpaces() {
+		// select texts which contains spaces
+		List<SVGText> texts = SVGText.extractTexts(SVGUtil.getQuerySVGElements(
+				svgg, ".//svg:text[contains(text(),' ')] | .//svg:tspan[contains(.,' ')]"));
+		for (SVGText text : texts) {
+			splitAtSpaces(text);
+		}
+	}
+
+	public void splitAtSpaces(SVGText textOrSpan) {
+		String s = textOrSpan.getText();
+		String id = textOrSpan.getId();
+		String coords = textOrSpan.getAttributeValue(Word.COORDS);
+		List<SVGTSpan> spans = textOrSpan.getChildTSpans();
+		if (spans.size() > 0) {
+			processSpans(textOrSpan, spans);
+		} else {
+			Real2Array real2Array = (coords == null) ? null : Real2Array.createFromCoords(coords);
+			if (real2Array == null || s == null || real2Array.size() !=s.length()) {
+				LOG.debug("Cannot match array: "+coords);
+				real2Array = null;
+			} else {
+				processLeafSpanOrText(textOrSpan, s, id, real2Array);
+			}
+		}
+	}
+
+	private void processLeafSpanOrText(SVGText textOrSpan, String s, String id, Real2Array real2Array) {
+		Integer index = null;
+		SVGText parent = null;
+		if (textOrSpan instanceof SVGTSpan) {
+			parent = (SVGText) textOrSpan.getParent();
+			index = parent.indexOf(textOrSpan);
+			textOrSpan.detach();
+		} else {
+			parent = textOrSpan;
+			parent.setText(null);
+		}
+		SVGTSpan lastSpan = null;
+		int last = 0;
+		int l = s.length();
+		for (int i = 0; i < l; i++) {
+			if (s.charAt(i) == EuclidConstants.C_SPACE || i == l-1) {
+				i = (i == l-1) ? l : i;
+				String ss = s.substring(last, i);
+				if (ss.trim().length() > 0) {
+					SVGTSpan tSpan = new SVGTSpan(real2Array.get(last), ss);
+					CMLUtil.copyAttributes(textOrSpan, tSpan);
+					tSpan.setId(id+"_"+last);
+					Real2Array subArray = real2Array.createSubArray(last, i-1);
+					tSpan.addAttribute(new Attribute(Word.COORDS, subArray.toString()));
+					tSpan.setXY(subArray.get(0));
+					if (index == null) {
+						parent.appendChild(tSpan);
+					} else {
+						parent.insertChild(tSpan, index);
+						index++;
+					}
+					if (lastSpan != null) {
+						tSpan.addAttribute(new Attribute(PREVIOUS, lastSpan.getId()));
+					}
+					lastSpan = tSpan;
+				}
+				last = i+1;
+			}
+		}
+	}
+	
+	private void processSpans(SVGText textOrSpan, List<SVGTSpan> spans) {
+		textOrSpan.setText(null);
+		for (SVGTSpan span : spans) {
+			splitAtSpaces(span);
+		}
+	}
+
+	public void setSubSup(boolean subSup) {
+		this.subSup = subSup;
+	}
+
+	public void setRemoveNumericTSpans(boolean removeNumericTSpans) {
+		this.removeNumericTSpans = removeNumericTSpans;
+	}
+
+	public void setSplitAtSpaces(boolean splitAtSpaces) {
+		this.splitAtSpaces = splitAtSpaces;
+	}
+
+	public static String getNumericValue(SVGText numericText) {
+		return numericText.getAttributeValue(NUMBER);
+	}
 	
 }

src/main/java/org/xmlcml/graphics/text/TypedNumber.java

+package org.xmlcml.graphics.text;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.log4j.Logger;
+import org.xmlcml.cml.base.CMLConstants;
+import org.xmlcml.euclid.Real;
+import org.xmlcml.graphics.svg.SVGTSpan;
+import org.xmlcml.graphics.svg.SVGText;
+import org.xmlcml.graphics.svg.SVGUtil;
+import org.xmlcml.graphics.text.SubSupAnalyzer.SubSup;
+
+public class TypedNumber {
+
+	private final static Logger LOG = Logger.getLogger(TypedNumber.class);
+	
+	Number number = null;
+	private List<Number> numberList = null;
+	private String dataType = null;
+
+	/** create either from the text value of Child TSpans
+	 * 
+	 * @param text
+	 */
+	TypedNumber(SVGText text) {
+		if (text.getChildElements().size() == 0) {
+			createFromString(text.getValue().trim());
+		} else {
+			TypedNumber typedNumber = createFromText(text);
+			if (typedNumber != null) {
+				this.number = typedNumber.number;
+				this.dataType = typedNumber.dataType;
+			}
+		}
+	}
+
+	public TypedNumber(Double dubble) {
+		this.number = dubble;
+		dataType = CMLConstants.XSD_DOUBLE;
+	}
+
+	public TypedNumber(Double abscissa, Integer power) {
+		Double exponentiated  =Math.pow(10.0, (double) power);
+		this.number = abscissa * exponentiated;
+		dataType = CMLConstants.XSD_DOUBLE;
+	}
+
+	private void createFromString(String value) {
+		createInteger(value);
+		createDouble(value);
+	}
+
+	/** create from SVGText 
+	 * may have textString value
+	 * 1 TSpan with value
+	 * 2 tSpans with exponential SUPERSCRIPT
+	 * @param text
+	 * @return
+	 */
+	public static TypedNumber createFromText(SVGText text) {
+		TypedNumber typedNumber = null;