Commits

Sara Magliacane  committed 3f088e0

cleaning up the code and removing hardcoded filesystem references.

  • Participants
  • Parent commits 47c0d96

Comments (0)

Files changed (2)

File src/nl/vu/recoprov/experiments/Experiment3.java

  * Plagiarism detection experiment using the PAN 2012 corpus
  */
 
-import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStreamReader;
-
 import nl.vu.recoprov.CompletePipeline;
-import nl.vu.recoprov.LuceneIndexer;
 import nl.vu.recoprov.ProvDMtranslator;
 import nl.vu.recoprov.baseclasses.DependencyGraph;
 import nl.vu.recoprov.signalaggregators.WeightedSumAggregator;
-import nl.vu.recoprov.signaldetectors.DiffSignal;
-import nl.vu.recoprov.signaldetectors.ImageSimilaritySignal;
-import nl.vu.recoprov.signaldetectors.LuceneInverseSimilarity;
 import nl.vu.recoprov.signaldetectors.LuceneSimilaritySignal;
-import nl.vu.recoprov.signaldetectors.MatchTitleInContentSignal;
-import nl.vu.recoprov.signaldetectors.MetadataSimilaritySignal;
-import nl.vu.recoprov.signalfilters.BackwardTemporalFilter;
-import nl.vu.recoprov.signalfilters.TransitiveReductionFilter;
 import nl.vu.recoprov.utils.ConfigurationDefaults;
-import nl.vu.recoprov.utils.TransitiveClosure;
-
-import org.openprovenance.prov.xml.ProvFactory;
-
-import com.dropbox.client2.exception.DropboxException;
 		
 
 public class Experiment3 {
 	
-	private final static String dir = "/Users/saramagliacane/Documents/workspace/recoprov/pan12-detailed-comparison-training-corpus/";
+	private static String dirfile = "pan12-detailed-comparison-training-corpus/";
+	private static String jsonfile = "pan.json";
 	
 	private static DependencyGraph baselineGraph = null;
 	
 		if (baselineGraph == null){
 		
 			baselineGraph = new DependencyGraph();
-			CompletePipeline pipeline = new CompletePipeline(false, dir, ConfigurationDefaults.PLAGIARISMDETECTIONDIRS);
+			CompletePipeline pipeline = new CompletePipeline(false, dirfile, ConfigurationDefaults.PLAGIARISMDETECTIONDIRS);
 			
 			try {
 				baselineGraph = pipeline.initDependencyGraph();
 		}
 		
 		DependencyGraph copyOfBaseline = baselineGraph.copyGraph();
-		
-		
-		
 		return copyOfBaseline;
 	}
 	
 	
 	
 	public static void main(String[] args) throws Exception {
-
-		DependencyGraph depGraph =  PROVReader.generatePANDepGraph();
+		
+		File dir = new File(dirfile);
+		dirfile = dir.getAbsolutePath();
+		
+		File json = new File(jsonfile);
+		jsonfile = json.getAbsolutePath();
+		
+		PROVReader provreader = new PROVReader(dirfile, jsonfile);
+		DependencyGraph depGraph =  provreader.generatePANDepGraph();
 		System.out.println("Done with reference graph");
 
 		// Compare with Transitive Closure
 
 		// use only Lucene
 		DependencyGraph depGraph1 = createGraph();
+		System.out.println("Done with baseline graph");
 		depGraph1 = new LuceneSimilaritySignal().computeSignal(depGraph1);
-		depGraph1 = new BackwardTemporalFilter().filterSignals(depGraph1);
+		//depGraph1 = new BackwardTemporalFilter().filterSignals(depGraph1);
 		depGraph1 = new WeightedSumAggregator().aggregateSignals(depGraph1);
 		
 		File logfile = new File("log" + System.currentTimeMillis() + ".txt");

File src/nl/vu/recoprov/experiments/PROVReader.java

 package nl.vu.recoprov.experiments;
 
-import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Date;
-import java.util.GregorianCalendar;
 import java.util.HashMap;
 import java.util.Hashtable;
 import java.util.LinkedHashMap;
 import java.util.LinkedList;
 import java.util.List;
 
-import javax.xml.datatype.DatatypeConfigurationException;
-import javax.xml.datatype.DatatypeFactory;
-import javax.xml.datatype.XMLGregorianCalendar;
-import javax.xml.namespace.QName;
-
 import nl.vu.recoprov.CompletePipeline;
-import nl.vu.recoprov.LuceneIndexer;
 import nl.vu.recoprov.baseclasses.DependencyGraph;
 import nl.vu.recoprov.baseclasses.DependencyNode;
-import nl.vu.recoprov.baseclasses.DependencyGraph.LabelledEdge;
 import nl.vu.recoprov.signalaggregators.WeightedSumAggregator;
-import nl.vu.recoprov.signaldetectors.MetadataSimilaritySignal;
 import nl.vu.recoprov.utils.ConfigurationDefaults;
-import nl.vu.recoprov.utils.CustomFileReader;
-
 import org.openprovenance.prov.dot.ProvToDot;
 import org.openprovenance.prov.json.Converter;
-import org.openprovenance.prov.notation.PROV_NParser;
-import org.openprovenance.prov.notation.TreeTraversal;
-import org.openprovenance.prov.notation.Utility;
-import org.openprovenance.prov.notation.PROV_NParser.document_return;
 import org.openprovenance.prov.xml.Activity;
-import org.openprovenance.prov.xml.ActivityRef;
 import org.openprovenance.prov.xml.Agent;
-import org.openprovenance.prov.xml.AgentRef;
 import org.openprovenance.prov.xml.Document;
 import org.openprovenance.prov.xml.Entity;
-import org.openprovenance.prov.xml.EntityRef;
 import org.openprovenance.prov.xml.ProvFactory;
-import org.openprovenance.prov.xml.SpecializationOf;
 import org.openprovenance.prov.xml.Statement;
 import org.openprovenance.prov.xml.StatementOrBundle;
-import org.openprovenance.prov.xml.Used;
-import org.openprovenance.prov.xml.WasAssociatedWith;
 import org.openprovenance.prov.xml.WasDerivedFrom;
-import org.openprovenance.prov.xml.WasGeneratedBy;
-//import org.openprovenance.prov.rdf.RdfConstructor;
-
-
-import com.dropbox.client2.exception.DropboxException;
 
 
 public class PROVReader {
 	static int counter = 0;
 	
 	
-	private static DependencyGraph depGraph;
-	private final static String dir = "/Users/saramagliacane/Documents/workspace/recoprov/pan12-detailed-comparison-training-corpus/";
-	private final static String jsonfile = "/Users/saramagliacane/Documents/workspace/recoprov/pan.json";
+	private  DependencyGraph depGraph;
+	private  String dir ;
+	private  String jsonfile ;
 	
-	private final static String namespace = "{http://www.vu.nl/prov/pan/}";
 	private final static String suspiciousFolder = "susp/";
 	private final static String sourceFolder = "src/";
 	
-	public static void main(String args[]) throws Exception {
-		
-		
-		ProvFactory factory = initFactory();
 
-		// need those to initialize the dependency graph structure
-		CompletePipeline pipeline = new CompletePipeline(false, dir);
-		depGraph = pipeline.initDependencyGraph(); 
-		
-		// and get the lucene identifiers
-		depGraph = pipeline.indexFiles(depGraph);
-
-		createEntityFromDepGraph(factory, depGraph);
-		
-		readJSON(jsonfile, factory);
-		
-		//System.out.println(depGraph);
-		
-		convertToDot(factory);
-
-		
-		
+	public PROVReader(String dir, String jsonfile){
+		this.dir = dir;
+		this.jsonfile = jsonfile;
 		
+		System.out.println("Dir: "+ dir + " - json: "+ jsonfile);
 	}
 	
-	public static DependencyGraph generatePANDepGraph() throws Exception{
+	
+	public DependencyGraph generatePANDepGraph() throws Exception {
+
 		ProvFactory factory = initFactory();
 
 		// need those to initialize the dependency graph structure
 		CompletePipeline pipeline = new CompletePipeline(false, dir);
-		depGraph = pipeline.initDependencyGraph(); 
-		
+
+		depGraph = pipeline.initDependencyGraph();
+
 		// and get the lucene identifiers
-		depGraph = pipeline.indexFiles(depGraph);
+		depGraph = pipeline.loadMetadaAndIndexes(depGraph);
 
 		createEntityFromDepGraph(factory, depGraph);
-		
+
 		readJSON(jsonfile, factory);
-		
+
+		// convertToDot(factory);
+
 		return depGraph;
-		
+
 	}
 		
 		
 	
-	public static ProvFactory initFactory() {
+	public ProvFactory initFactory() {
 		ProvFactory factory = ProvFactory.getFactory();
 		Hashtable<String, String> namespace = new Hashtable<String, String>();	
 		namespace.put("_", "");
 	
 
 	
-public static void createEntityFromDepGraph(ProvFactory factory, DependencyGraph depGraph){
-	for (String name: depGraph.keySet()){
-		// create an entity for each node in the dependency graph
-		if(ConfigurationDefaults.ignoreFile(name))
-			continue;
-		// for each node take the edges
-		DependencyNode d = depGraph.get(name);
-		createEntity(factory, d);
-		
-		
+	public void createEntityFromDepGraph(ProvFactory factory,
+			DependencyGraph depGraph) {
+		for (String name : depGraph.keySet()) {
+			// create an entity for each node in the dependency graph
+			if (ConfigurationDefaults.ignoreFile(name))
+				continue;
+			// for each node take the edges
+			DependencyNode d = depGraph.get(name);
+			createEntity(factory, d);
+
+		}
 	}
-}
 
 
-public static void createEntity(ProvFactory factory, DependencyNode d){
+	public void createEntity(ProvFactory factory, DependencyNode d) {
 
+		String name = d.getCompleteFilepath();
+		// name = name.replace(dir, "");
+		String id = "" + d.getLuceneDocNumber();
 
-	String name = d.getCompleteFilepath();
-	//name = name.replace(dir, "");
-	String id = ""+d.getLuceneDocNumber();
-	
-	Entity entity = listOfAvailableEntities.get(name);
-	if(entity == null){
-		entity = factory.newEntity(name, id );
-		listOfAvailableEntities.put(name, entity);
-		
-	}		
+		Entity entity = listOfAvailableEntities.get(name);
+		if (entity == null) {
+			entity = factory.newEntity(name, id);
+			listOfAvailableEntities.put(name, entity);
 
-	
-}
+		}
+
+	}
 
 
 	
 
 
-public static void readJSON(String jsonfile, ProvFactory factory){
-	Converter conv = new Converter();
-	Document doc = new Document();
-	try{
-		doc = conv.readDocument(jsonfile);
-	}
-	catch(Exception e ){ 
-		//ignore}
-	}
-	List<StatementOrBundle> provlist = doc.getEntityAndActivityAndWasGeneratedBy();
-	for (StatementOrBundle s: provlist){
-		// assume are all statements
-//		if (s instanceof Entity){
-//			String entityName = ((Entity) s).getId().toString();
-//			entityName = entityName.replace(namespace, "");
-//			System.out.println("Entity: " + entityName);
-//		}
-//		
-//		if (s instanceof Activity){
-//			String actName = ((Activity) s).getId().toString();
-//			actName = actName.replace(namespace, "");
-//			System.out.println("Activity: " + actName);
-//		}
+	public void readJSON(String jsonfile, ProvFactory factory) {
+		Converter conv = new Converter();
+		Document doc = new Document();
+		File sourcedir = new File(dir, sourceFolder);
+		File suspdir = new File(dir, suspiciousFolder);
 		
-//		if (s instanceof WasGeneratedBy){
-//			String actName = ((WasGeneratedBy) s).getId().toString();
-//			actName = actName.replace(namespace, "");
-//			System.out.println("WasGeneratedBy: " + actName);
-//		}
-		
-		if (s instanceof WasDerivedFrom){
-			String used = ((WasDerivedFrom) s).getUsedEntity().getRef().getLocalPart();
-			String generated = ((WasDerivedFrom) s).getGeneratedEntity().getRef().getLocalPart();
-			//System.out.println("WasDerivedFrom: " + used + " ->"+ generated);
-			
-			depGraph.addEdge(depGraph.get(dir+sourceFolder+used), depGraph.get(dir+suspiciousFolder+generated),WeightedSumAggregator.FINAL_SCORE, 1.0);
+		try {
+			doc = conv.readDocument(jsonfile);
+		} catch (Exception e) {
+			// ignore}
 		}
-		
+		List<StatementOrBundle> provlist = doc
+				.getEntityAndActivityAndWasGeneratedBy();
+		for (StatementOrBundle s : provlist) {
+			// assume are all statements
+			// if (s instanceof Entity){
+			// String entityName = ((Entity) s).getId().toString();
+			// entityName = entityName.replace(namespace, "");
+			// System.out.println("Entity: " + entityName);
+			// }
+			//
+			// if (s instanceof Activity){
+			// String actName = ((Activity) s).getId().toString();
+			// actName = actName.replace(namespace, "");
+			// System.out.println("Activity: " + actName);
+			// }
+
+			// if (s instanceof WasGeneratedBy){
+			// String actName = ((WasGeneratedBy) s).getId().toString();
+			// actName = actName.replace(namespace, "");
+			// System.out.println("WasGeneratedBy: " + actName);
+			// }
+
+			if (s instanceof WasDerivedFrom) {
+				String used = ((WasDerivedFrom) s).getUsedEntity().getRef()
+						.getLocalPart();
+				String generated = ((WasDerivedFrom) s).getGeneratedEntity()
+						.getRef().getLocalPart();
+				// System.out.println("WasDerivedFrom: " + used + " ->"+
+				// generated);
+
+				File usedFile = new File(sourcedir, used);
+				File genFile = new File(suspdir, generated);
+				
+				depGraph.addEdge(depGraph.get(usedFile.getAbsolutePath()),
+						depGraph.get(genFile.getAbsolutePath()),
+						WeightedSumAggregator.FINAL_SCORE, 1.0);
+			}
+
+		}
+
 	}
-	
-}
 
 
 
 
-public static void convertToDot(ProvFactory factory){
+	public void convertToDot(ProvFactory factory) {
 
-	ProvToDot provtodot = new ProvToDot();
-	Document container;
-	container = factory.newDocument(listOfAvailableActivities.values(), listOfAvailableEntities.values(),  new LinkedList<Agent>(), new LinkedList<Statement>());
-	
-	try {
-		provtodot.convert(container, new File("graphCorpus.gv"));
-	} catch (FileNotFoundException e) {
-	
-		e.printStackTrace();
+		ProvToDot provtodot = new ProvToDot();
+		Document container;
+		container = factory.newDocument(listOfAvailableActivities.values(),
+				listOfAvailableEntities.values(), new LinkedList<Agent>(),
+				new LinkedList<Statement>());
+
+		try {
+		
+			provtodot.convert(container, new File("graphCorpus.gv"));
+		
+		} catch (FileNotFoundException e) {
+
+			e.printStackTrace();
+		}
 	}
-}
+