1. Sara Magliacane
  2. recoprov

Commits

Sara Magliacane  committed 0d9633e

refactoring

  • Participants
  • Parent commits 590e578
  • Branches master

Comments (0)

Files changed (9)

File src/main/java/nl/vu/recoprov/CompletePipeline.java

View file
 	private PlagiarismCorpusSpecificFilter plagiarismCorpusSpecificFilter = new PlagiarismCorpusSpecificFilter();
 	private ProvDMtranslator ProvDMtranslator = new ProvDMtranslator();
 	private DropboxClient client = new DropboxClient();
+	private BackwardTemporalFilter backwardTemporalFilter = new BackwardTemporalFilter();
 	
 	public CompletePipeline() {
 		logger = LoggerFactory
 
 		pipeline.filterSignals(depGraph);
 
-		// System.out.println(depGraph);
-		// System.out.println(depGraph.getAttributes());
-
-		// System.out.println(depGraph.toCSVString());
-
 		pipeline.aggregateSignals(depGraph);
 
 		pipeline.writeToFile(depGraph);
 		return depGraph;
 	}
 
-	public void loadMetadaAndIndexes(DependencyGraph depGraph) {
+	public void loadMetadaAndIndexes(DependencyGraph depGraph) throws IOException {
 
 		TikaReader tika = new TikaReader(currentDir);
 		depGraph = tika.read(depGraph, params);
 
 	}
 
-	public void indexFiles(DependencyGraph depGraph){
-		indexFiles(depGraph, currentDir);
-	}
-	
-	public void indexFiles(DependencyGraph depGraph, String dir){
-		LuceneIndexer indexer = new LuceneIndexer(dir);
+	public void indexFiles(DependencyGraph depGraph) throws IOException{
+		LuceneIndexer indexer = new LuceneIndexer();
 		indexer.indexFiles(depGraph);
 	}
 	
 	public DependencyGraph computeSignals(DependencyGraph depGraph) {
 		// SIGNALS
-		depGraph = new LuceneSimilaritySignal().computeSignal(depGraph);
-		depGraph = new LuceneInverseSimilarity().computeSignal(depGraph);
+		new LuceneSimilaritySignal().computeSignal(depGraph);
+		new LuceneInverseSimilarity().computeSignal(depGraph);
 		// use lucene for better similarity - overlap of words
-		depGraph = new MetadataSimilaritySignal().computeSignal(depGraph);
-		depGraph = new MatchTitleInContentSignal().computeSignal(depGraph);
+		new MetadataSimilaritySignal().computeSignal(depGraph);
+		new MatchTitleInContentSignal().computeSignal(depGraph);
 
 		// TODO: compare nouns, verbs, named entities
 		// preprocessing get rid of the tags
 
 		// TODO: transitive reduction after transitive closure?
 
-		depGraph = new ImageSimilaritySignal().computeSignal(depGraph);
+		new ImageSimilaritySignal().computeSignal(depGraph);
 
 		// depGraph = new DiffSignal().computeSignal(depGraph);
 		return depGraph;
 
 	public void filterSignals(DependencyGraph depGraph) {
 		// FILTERS
-		depGraph = new BackwardTemporalFilter().filterSignals(depGraph);
+		backwardTemporalFilter.filterSignals(depGraph);
 
 	}
 	

File src/main/java/nl/vu/recoprov/ImageReader.java

View file
 					
 				}
 	    		
-				if(d.getMimeType().contains("pdf")){
-	    			
-					
-					input = readImagesFromPDF( input,  d,  depname,  docbuilder,  writer); 
+				if (d.getMimeType().contains("pdf")) {
+					input = readImagesFromPDF(input, d, depname, docbuilder,
+							writer);
 				}
-				
 	      	
 
 			} catch (Exception e) {

File src/main/java/nl/vu/recoprov/LuceneIndexer.java

View file
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
-import java.util.Set;
 import nl.vu.recoprov.baseclasses.DependencyGraph;
 import nl.vu.recoprov.baseclasses.DependencyNode;
 import nl.vu.recoprov.utils.ConfigurationDefaults;
 import nl.vu.recoprov.utils.CustomAnalyzer;
 import nl.vu.recoprov.utils.CustomFileReader;
-
 import org.apache.commons.io.FileUtils;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 
 public class LuceneIndexer {
 
-	private String rootpath;
 	private Boolean cleanupIndex = false;
-	
-	public LuceneIndexer(String rootpath) {
-		this.rootpath = rootpath;
-	}
 
 	public void indexFiles(DependencyGraph input) throws IOException {
 
 	}
 	
 	public Document createLuceneDocument (DependencyNode node, StringBuffer content){
+		
 		Document doc = new Document();
 		doc.add(new StringField("name", node.getCompleteFilepath(),
 				Field.Store.YES));

File src/main/java/nl/vu/recoprov/ProvDMtranslator.java

View file
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.FileWriter;
+import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.LinkedList;
-
-import javax.xml.namespace.QName;
-
 import nl.vu.recoprov.baseclasses.DependencyGraph;
 import nl.vu.recoprov.baseclasses.DependencyNode;
 import nl.vu.recoprov.baseclasses.DependencyGraph.LabelledEdge;
 import nl.vu.recoprov.signalaggregators.WeightedSumAggregator;
 import nl.vu.recoprov.signaldetectors.MetadataSimilaritySignal;
 import nl.vu.recoprov.utils.TransitiveClosure;
-
 import org.openprovenance.prov.dot.ProvToDot;
 import org.openprovenance.prov.xml.Activity;
 import org.openprovenance.prov.xml.Agent;
 import org.openprovenance.prov.xml.Document;
 import org.openprovenance.prov.xml.Entity;
 import org.openprovenance.prov.xml.EntityRef;
-import org.openprovenance.prov.xml.InternationalizedString;
 import org.openprovenance.prov.xml.ProvFactory;
 import org.openprovenance.prov.xml.SpecializationOf;
 import org.openprovenance.prov.xml.Statement;
 import org.openprovenance.prov.xml.WasDerivedFrom;
-//import org.openprovenance.prov.rdf.RdfConstructor;
-
-// prov to dot uses the XML representation
-
-//import org.openprovenance.prov.rdf.Entity;
-//import org.openprovenance.prov.rdf.Derivation;
-//import org.openprovenance.prov.rdf.Revision;
-//import org.openprovenance.prov.rdf.EntityInvolvement;
-//import org.openprovenance.prov.rdf.TimeInstant;
-
-/// track the time in which an entity was generated
-
-
-// further annotation is in Notes - an entity can be linked to a note by hasAnnotation
-
-// entity attributes - [attr1=val] prov:type="document'
-// derivation attributes - prov:type = "physical transform" wasDerivedFrom
-// types of derivation:
-// revision - newer and older   wasRevisionOf
-// quotation - quote(partial copy), original     wasQuotedFrom
-// original source  - derived (entity), source   hadOriginalSource
-
-// derivation is a particular form of Trace - tracedTo - entity, ancestor
-
-// specialization is a more constrained entity  specializationOf
-
-// attrib - prov:label, prov:type, prov:value (score)
-
 
 public class ProvDMtranslator {
 
-	private HashMap<Integer, Entity> listOfAvailableEntities = new HashMap<Integer, Entity> ();
-	private Collection<Statement> listOfAvailableRelations = new ArrayList<Statement> ();
+	private HashMap<Integer, Entity> listOfAvailableEntities;
+	private Collection<Statement> listOfAvailableRelations;
 	private boolean useTred = false;
+	private ProvFactory factory;
 	
-		public String translate(DependencyGraph input){
-			System.out.println("Got called...");
-			String result =  translate( input, "graph.gv");
-			System.out.println("Got someting back");
-			return result;
-		}
-	 
+	public String translate(DependencyGraph input) throws FileNotFoundException {
+		System.out.println("Got called...");
+		String result = translate(input, "graph.gv");
+		System.out.println("Got someting back");
+		return result;
+	}
 	 
-	public String translate(DependencyGraph input,String graphfilename) {
+	public String translate(DependencyGraph input, String graphfilename) throws FileNotFoundException {
+
 		System.out.println("Translate to PROVDM.");
-		ProvFactory factory = new ProvFactory();
-		Document container;
+		
+		factory = new ProvFactory();		
+		listOfAvailableEntities = new HashMap<Integer, Entity> ();
+		listOfAvailableRelations = new ArrayList<Statement> ();
 
 		for (String name: input.keySet()){
 			
 			// for each node take the edges
 			DependencyNode d = input.get(name);
 			
-			EntityRef originEntity = getEntityRefFromDependencyNode(factory, d);
-			
+			EntityRef originEntity = getEntityRefFromDependencyNode(factory, d);		
 			ArrayList<LabelledEdge> edgearray = input.getAllEdges(d.getLuceneDocNumber());
 			
 			if(edgearray == null)
 				
 			}	
 		}
-		
-		System.out.println("Going to build a dot file");
-		
-		ProvToDot provtodot = new ProvToDot();
-				
-		container = factory.newDocument( new LinkedList<Activity>(), listOfAvailableEntities.values(),   new LinkedList<Agent>(), listOfAvailableRelations);
-		try {
-			provtodot.convert(container, new File(graphfilename));
-			
-			//System.out.println(graphfilename);
-			
-			//Transitive reduction
-			if (useTred) {
-				String cmd = "tred " + graphfilename;
-				BufferedWriter fout = new BufferedWriter(new FileWriter("Tred"
-						+ graphfilename));
 
-				Runtime run = Runtime.getRuntime();
-				Process pr = run.exec(cmd);
-				pr.waitFor();
-				BufferedReader buf = new BufferedReader(new InputStreamReader(
-						pr.getInputStream()));
-				String line = "";
-				String out = "";
-				while ((line = buf.readLine()) != null) {
-					System.out.println(line);
-					out = out + line + "\n";
-					fout.write(line);
-				}
-
-				fout.flush();
-				fout.close();
-				buf.close();
-				pr.destroy();
 
+		// Transitive reduction
+		if (useTred)
+			try {
+				callTredOnDotFile(graphfilename);
+			} catch (IOException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+			} catch (InterruptedException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
 			}
-			String svg = "didn't work";
-		    // svg = convertToSVG("graph2.gv");
-			
 
-			listOfAvailableEntities = new HashMap<Integer, Entity> ();
-			listOfAvailableRelations = new ArrayList<Statement> ();
-			
-			return svg;
+		convertToDot(graphfilename);
 
-		} catch (Exception e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-	
-		
-		return "didn't work";
+		String svg = "didn't work";
+		// svg = convertToSVG("graph2.gv");
+
+		return svg;
 	}
 	
 	
-	public String convertToSVG(String filename) throws Exception
-	{
-		String cmd = "dot -Tsvg " + filename  ;
-		
+	public String convertToSVG(String filename) throws Exception {
+		String cmd = "dot -Tsvg " + filename;
+
 		Runtime run = Runtime.getRuntime();
 		Process pr = run.exec(cmd);
 		pr.waitFor();
-		BufferedReader buf = new BufferedReader(new InputStreamReader(pr.getInputStream())); 
-		String line = ""; 
+		BufferedReader buf = new BufferedReader(new InputStreamReader(
+				pr.getInputStream()));
+		String line = "";
 		String out = "";
-		while ((line=buf.readLine())!=null) { 
-				System.out.println(line); 
-				out = out + line + "\n";
-				
-		} 
+		while ((line = buf.readLine()) != null) {
+			System.out.println(line);
+			out = out + line + "\n";
+
+		}
 
 		buf.close();
 		return out;
 	}
-	
+
 	public  EntityRef getEntityRefFromDependencyNode(ProvFactory factory, DependencyNode d){
 		EntityRef ref = new EntityRef();
 		
 		ref.setRef(entity.getId());
 		return ref;
 	}
-		
+	
+	public void convertToDot(String graphfilename) throws FileNotFoundException {
+
+		System.out.println("Going to build a dot file");
+		ProvToDot provtodot = new ProvToDot();
+
+		Document container = factory.newDocument(new LinkedList<Activity>(),
+				listOfAvailableEntities.values(), new LinkedList<Agent>(),
+				listOfAvailableRelations);
+
+		provtodot.convert(container, new File(graphfilename));
+
+	}
+	
+	
+	public void callTredOnDotFile( String graphfilename) throws IOException, InterruptedException{
+	
+		String cmd = "tred " + graphfilename;
+		BufferedWriter fout = new BufferedWriter(new FileWriter("Tred"
+						+ graphfilename));
+		Runtime run = Runtime.getRuntime();
+		Process pr = run.exec(cmd);
+		pr.waitFor();
+		BufferedReader buf = new BufferedReader(new InputStreamReader(
+						pr.getInputStream()));
+		String line = "";
+		while ((line = buf.readLine()) != null) {
+			fout.write(line);
+		}
+		fout.flush();
+		fout.close();
+		buf.close();
+		pr.destroy();
+	}
 
 }

File src/main/java/nl/vu/recoprov/TikaReader.java

View file
 
 	}
 
-	private static void listAvailableMetaDataFields(Metadata metadata) {
-		for (int i = 0; i < metadata.names().length; i++) {
-			String name = metadata.names()[i];
-			System.out.println(name + " : " + metadata.get(name));
-		}
-	}
+//	private static void listAvailableMetaDataFields(Metadata metadata) {
+//		for (int i = 0; i < metadata.names().length; i++) {
+//			String name = metadata.names()[i];
+//			System.out.println(name + " : " + metadata.get(name));
+//		}
+//	}
 
 }

File src/main/java/nl/vu/recoprov/experiments/CorpusGeneratorBiomed.java

View file
 		
 
 		// System.out.println("### THIRD PHASE: Index the contents with Apache Lucene. \n");
-		pipeline.indexFiles(depGraph, dir);
+		pipeline.indexFiles(depGraph);
 		
 
 		createEntityFromDepGraph(factory, depGraph);

File src/main/java/nl/vu/recoprov/experiments/Experiment2.java

View file
 
 		try {
 			CorpusGeneratorBiomed.depGraph = pipeline.initDependencyGraph();
-			pipeline.indexFiles(CorpusGeneratorBiomed.depGraph,dir);
+			pipeline.indexFiles(CorpusGeneratorBiomed.depGraph);
 
 			
 		} catch (Exception e) {

File src/main/java/nl/vu/recoprov/experiments/Experiment3.java

View file
 
 	}
 
-	public static DependencyGraph createGraph() throws DropboxException {
+	public static DependencyGraph createGraph() throws DropboxException, IOException {
 
 		if (baselineGraph == null) {
 			baselineGraph = new DependencyGraph();

File src/main/java/nl/vu/recoprov/experiments/PROVReader.java

View file
 
 import java.io.File;
 import java.io.FileNotFoundException;
+import java.io.IOException;
 import java.util.HashMap;
 import java.util.Hashtable;
 import java.util.LinkedHashMap;
 	}
 	
 	
-	public DependencyGraph generatePANDepGraph() throws DropboxException {
+	public DependencyGraph generatePANDepGraph() throws DropboxException, IOException {
 
 		logger.info("Reading PROV description of folder: {} in JSON {}: ", dir, jsonfile);