Commits

petermr committed ed53baa

added xpath visitor

Comments (0)

Files changed (2)

src/main/java/org/xmlcml/cml/crystaleye/AbstractCrystaleyeVisitor.java

 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
 
 import nu.xom.Document;
 import nu.xom.Element;
 
 	protected byte[] outputBytes;
 	protected Map<String, String> parameterMap;
+	protected int timeout = Integer.MAX_VALUE;
+	
+	public void setTimeout(int timeout) {
+		this.timeout = timeout;
+	}
 
-	
 	public AbstractCrystaleyeVisitor() {
 		setDefaults();
 	}
 		if (inputBytes.length == 0) {
 			zeroByteOutput(outputFile);
 		} else {
-			Document document = createOutputDocument(inputFile);
+			Document document = null;
+			KillableThread thread = new KillableThread(this, inputFile);
+			thread.start();
+			try {
+				thread.join(timeout);
+			} catch (InterruptedException e) {
+				// ignore since killing thread
+			}
+			if (thread.isAlive()) {
+				System.err.println("killed potential runaway process after "+timeout/1000+" seconds");
+				thread.stop(); // deprecated but good enough for now
+				document = null;
+			} else {
+				document = thread.getDocument();
+			}
+//				document = this.createOutputDocument(inputFile);
 			if (document != null) {
 				addFileId(document, outputFile);
 				debugOutput(inputFile, outputFile, document);
 			}
 		}
 	}
+	
+	
 
 	public static void zeroByteOutput(File outputFile) {
 		FileUtils.deleteQuietly(outputFile);
 		zeroByteOutput(outputFile);
 //		System.err.println("Cannot create output file "+outputFile.getAbsolutePath());
 	}
-
+ 
 	protected void writeOutput(byte[] bytes, File outputFile) {
 		if (bytes != null) {
 			try {
 			parameterMap = new HashMap<String, String>();
 		}
 	}
+	
+	protected String getParameter(String name) {
+		ensureParameterMap();
+		return parameterMap.get(name);
+		
+	}
 
 	private void registerFileFilter(Element fileFilterElement) {
 		fileFilterMethod = fileFilterElement.getAttributeValue(METHOD);
 
 	
 }
+class KillableThread extends Thread {
+	
+	private File file;
+	private AbstractCrystaleyeVisitor visitor;
+	private Document document;
+	
+	public KillableThread(AbstractCrystaleyeVisitor visitor, File file) {
+		this.file = file;
+		this.visitor = visitor;
+	} 
+	
+	public void run() {
+		document = visitor.createOutputDocument(file);
+	}
+	
+	public Document getDocument() {
+		return document;
+	}
+	
 
+}
+

src/main/java/org/xmlcml/cml/crystaleye/XPathVisitor.java

+package org.xmlcml.cml.crystaleye;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import nu.xom.Document;
+import nu.xom.Element;
+import nu.xom.Node;
+import nu.xom.Nodes;
+
+import org.apache.log4j.Logger;
+import org.xmlcml.cml.base.CMLUtil;
+
+
+/** Splits document into components determined by XPath expression
+ * @author pm286
+ *
+ */
+public class XPathVisitor extends AbstractCrystaleyeVisitor {
+	private static final String XPATH_NAME = "XPATH";
+	private static final String XPATH_DIRECTORY = "XPATH_DIRECTORY";
+	private static final String XPATH_PREFIX = "XPATH_PREFIX";
+	private static final String XML = ".xml";
+	private static Logger LOG = Logger.getLogger(XPathVisitor.class);
+	private String directoryName = null;
+	private String prefix = "";
+	private String xPath;
+	private File outputDirectory = null;
+	
+	public XPathVisitor() {
+	}
+	
+	@Override
+	public Document createOutputDocument(File xmlFile) {
+		xPath = getParameter(XPATH_NAME);
+		if (xPath == null) {
+			throw new RuntimeException("must provide XPATH parameter for XPathVisitor");
+		}
+		String s = getParameter(XPATH_DIRECTORY);
+		directoryName = (s == null) ? directoryName : s;
+		outputDirectory = xmlFile.getParentFile();
+		if (directoryName != null) {
+			outputDirectory = new File(outputDirectory, directoryName);
+			outputDirectory.mkdir();
+		}
+		
+		s = getParameter(XPATH_PREFIX);
+		prefix = (s == null) ? prefix : s;
+		return split(xmlFile);
+	}
+
+	private Document split(File xmlFile) {
+		Document outputDocument = null;
+		Document inputDocument = CMLUtil.parseQuietlyToDocument(xmlFile);
+		Nodes nodes = inputDocument.query(xPath);
+		if (nodes.size() == 0) {
+			System.err.println("No split nodes to write");
+		} else {
+			System.out.println("Writing nodes "+nodes.size());
+			for (int i = 0; i < nodes.size(); i++) {
+				try {
+					writeElement(nodes.get(i), i);
+				} catch (IOException e) {
+					throw new RuntimeException("Cannot write xpath split file ", e);
+				}
+			}
+		}
+
+		return outputDocument;
+	}
+
+	private void writeElement(Node node, int i) throws IOException {
+		if (node instanceof Element) {
+			String filename = null;
+			if (prefix == null) {
+				Nodes nodes = node.query("@id | @Id | @ID");
+				filename = (nodes.size() > 0) ? nodes.get(0).getValue()+"_" : "file_"+i;
+				filename = filename+XML;
+			} else {	
+				filename = prefix+"_"+i+XML;
+			}
+			File outfile = new File(outputDirectory, filename);
+			CMLUtil.debug((Element) node, new FileOutputStream(outfile), -1);
+		}
+	}
+
+}