1. Sara Magliacane
  2. recoprov

Commits

Sara Magliacane  committed 58f9006

adding compression distance to experiments

  • Participants
  • Parent commits e254429
  • Branches master

Comments (0)

Files changed (2)

File src/main/java/nl/vu/recoprov/CompletePipeline.java

View file
  • Ignore whitespace
 import nl.vu.recoprov.ProvDMtranslator;
 import nl.vu.recoprov.baseclasses.DependencyGraph;
 import nl.vu.recoprov.signalaggregators.WeightedSumAggregator;
+import nl.vu.recoprov.signaldetectors.CompressionDistanceSignal;
 import nl.vu.recoprov.signaldetectors.ImageSimilaritySignal;
 import nl.vu.recoprov.signaldetectors.LuceneInverseSimilarity;
 import nl.vu.recoprov.signaldetectors.LuceneMoreLikeThisSignal;
 	private ImageSimilaritySignal imageSimilaritySignal = new ImageSimilaritySignal();
 	private LuceneMoreLikeThisSignal luceneMoreLikeThisSignal = new LuceneMoreLikeThisSignal();
 	private TopKEdges topKEdges = new TopKEdges();
+	private CompressionDistanceSignal compressionDistanceSignal = new CompressionDistanceSignal();
 	
 	private TikaReader tika;
 	
 	public void imageSimilaritySignal(DependencyGraph depGraph) {
 		imageSimilaritySignal.computeSignal(depGraph);
 	}
+	
+	public void compressionDistanceSignal(DependencyGraph depGraph){
+		compressionDistanceSignal.computeSignal(depGraph);
+	}
 
 	public void filterBackWards(DependencyGraph depGraph) {
 		backwardTemporalFilter.filterSignals(depGraph);

File src/main/java/nl/vu/recoprov/experiments/Experiment3.java

View file
  • Ignore whitespace
 import java.io.IOException;
 import nl.vu.recoprov.CompletePipeline;
 import nl.vu.recoprov.baseclasses.DependencyGraph;
+import nl.vu.recoprov.signaldetectors.CompressionDistanceSignal;
 import nl.vu.recoprov.utils.ConfigurationDefaults;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 		if (!checkInitialParameters()) {
 			System.exit(0);
 		}
+		
+		// experimenting to get perfect recall
+		ConfigurationDefaults.LUCENE_MAX_NUMBER_DOCS = 6000;
 
 		CompletePipeline pipeline = new CompletePipeline(false, dirfile);
 		FileWriter writer = createFileResultsWriter();
 		DependencyGraph depGraphLucene = createLuceneGraph(pipeline);
 		DependencyGraph depGraph = createReferenceGraph(pipeline);
 		DependencyGraph depGraphLuceneMore = createLuceneMoreGraph(pipeline);
-
 		
-
-		// useful for small experiments
-		double[] thresholds = { 0.01, 0.05, 0.1, 0.2 };
+		//add compression
+		DependencyGraph depGraphLuceneCompression = depGraphLucene
+				.copyGraph();
+		pipeline.compressionDistanceSignal(depGraphLuceneCompression);
+	
+		double[] thresholds = { 0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35,
+				0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9,
+				0.95 };
 
 		for (double threshold : thresholds) {
 			try {
 				pipeline.filterTextContainment(depGraph1);
 				pipeline.filterPlagiarismCorpus(depGraph1);
 				pipeline.aggregateSignals(depGraph1);
-				pipeline.filterTopKEdges(depGraph1);
 				writeResults(pipeline, depGraph, depGraph1,
 						"LucenePAN2Filters", writer);
 				
+				depGraph1 = depGraphLuceneCompression.copyGraph();
+				pipeline.filterTopKEdges(depGraph1, CompressionDistanceSignal.COMPRESSION_DISTANCE);
+				pipeline.filterPlagiarismCorpus(depGraph1);
+				pipeline.filterLuceneThreshold(depGraphLuceneThreshold);
+				pipeline.aggregateSignals(depGraph1);
+				writeResults(pipeline, depGraph, depGraph1,
+						"LuceneCompressionPANFilter", writer);
 				
 				
 				
 				throw e;
 			}
 		}
-		// trying with Lucene More Like This
-
-
-		//
-		// for (double threshold : thresholds) {
-		//
-		// ConfigurationDefaults.LUCENE_THRESHOLD = threshold;
-		//
-		// DependencyGraph depGraphLuceneMoreThreshold = depGraphLuceneMore
-		// .copyGraph();
-		// luceneThresholdFilter.filterSignals(depGraphLuceneMoreThreshold);
-		//
-		// DependencyGraph depGraph1 = depGraphLuceneMoreThreshold.copyGraph();
-		// aggregator.aggregateSignals(depGraph1);
-		// writeResults(depGraph1, "LuceneMoreLikeThis", writer);
-		//
-		// depGraph1 = depGraphLuceneMoreThreshold.copyGraph();
-		// textContainmentFilter.filterSignals(depGraph1);
-		// plagiarismCorpusSpecificFilter.filterSignals(depGraph1);
-		// writeResults(depGraph1, "LuceneMoreLikeThisPAN2Filters", writer);
-		// }
-
+		
 		writer.flush();
 		writer.close();