Commits

Sara Magliacane committed 2ea7f12

ulterior refactoring and exception handling

Comments (0)

Files changed (8)

src/main/java/nl/vu/recoprov/CompletePipeline.java

 
 import java.io.BufferedWriter;
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.FileWriter;
 import java.io.IOException;
 
 	}
 
 
-	private void translateToPROVDM(DependencyGraph depGraph) {
+	private void translateToPROVDM(DependencyGraph depGraph) throws FileNotFoundException {
 		ProvDMtranslator.translate(depGraph);
 		
 	}

src/main/java/nl/vu/recoprov/LuceneIndexer.java

 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.SimpleFSDirectory;
 
+/**
+ * Create the Lucene index for the files in the DependencyGraph.
+ * @author saramagliacane
+ *
+ */
 
 public class LuceneIndexer {
 
 
 
 			
-	public void createIndex(DependencyGraph input) throws IOException {
+	private void createIndex(DependencyGraph input) throws IOException {
 		
 		int count = 0;
 
 
 	}
 
-	public void assignLuceneNumbers(DependencyGraph input) throws IOException {
+	private void assignLuceneNumbers(DependencyGraph input) throws IOException {
 
 		File indexDir = new File(ConfigurationDefaults.RELATIVE_INDEX_DIR);
 		FSDirectory store;
 	}
 	
 	
-	public StringBuffer readContentFile(DependencyNode node) {
+	private StringBuffer readContentFile(DependencyNode node) {
 		
 		StringBuffer content = new StringBuffer();
 
 		return content;
 	}
 	
-	public Document createLuceneDocument (DependencyNode node, StringBuffer content){
+	private Document createLuceneDocument (DependencyNode node, StringBuffer content){
 		
 		Document doc = new Document();
 		doc.add(new StringField("name", node.getCompleteFilepath(),
 		
 	}
 	
-	public IndexWriter createIndexWriter(FSDirectory store) throws IOException {
+	private IndexWriter createIndexWriter(FSDirectory store) throws IOException {
 
 		Analyzer analyzer = new CustomAnalyzer(
 				ConfigurationDefaults.LUCENE_VERSION);

src/main/java/nl/vu/recoprov/SearchCache.java

 import java.util.HashMap;
 import java.util.Map;
 
+/**
+ * A modified version of the SearchCache from the Dropbox SDK.
+ * It handles the caching of the user authorization.
+ *
+ */
+
 public class SearchCache
 {
  

src/main/java/nl/vu/recoprov/baseclasses/DependencyGraph.java

 		
 		}
 		
-
-
 			
 	}
 	

src/main/java/nl/vu/recoprov/baseclasses/DependencyNode.java

 
 	public void setLuceneDocNumber(int luceneDocNumber) {
 		this.recoMetadata.setLuceneDocNumber(luceneDocNumber);
-
-		// System.out.println("translation: " +luceneDocNumber + "-" +
-		// this.getCompleteFilepath());
-
 		depGraph.addTranslation(luceneDocNumber, this.getCompleteFilepath());
 	}
 
 	}
 	
 	public String toString() {
-		String temp = "##########################################";
-		temp += "\nMetadata: " + this.completefilepath + " \n";
+		StringBuffer temp = new StringBuffer("##########################################");
+		temp.append("\nMetadata: ");
+		temp.append(this.completefilepath);
+		temp.append(" \n");
 
 		for (String name : this.recoMetadata.names()) {
-			temp += name + ": " + this.recoMetadata.get(name) + "\n";
+			temp.append(name);
+			temp.append(": ");
+			temp.append(this.recoMetadata.get(name));
+			temp.append("\n");
 		}
 
 		if (this.content != null) {
-			temp += "Contentfilename: " + content;
+			temp.append("Contentfilename: ");
+			temp.append(content);
 		}
 
-		temp += "\n";
-		return temp;
+		temp.append("\n");
+		return temp.toString();
 	}
 
 

src/main/java/nl/vu/recoprov/baseclasses/RecoMetadata.java

 	public final static String DROPBOX_CLIENTMTIME = "dropbox-clientMtime";
 	public final static String DROPBOX_BYTES = "dropbox-bytes";
 	public final static String DROPBOX_SIZE = "dropbox-size";
+	public static final String DROPBOX_MODIFIED = "dropbox-modified";	
 	public final static String DROPBOX_MIMETYPE = "dropbox-mimeType";
 	public final static String FILESYSTEM_LASTMODIFIED = "filesystem-last-modified";
-	public final static String COMPRESSED_SIZE = "compressed-size";	
-	private static final String FILESYSTEM_DATELASTMODIFIED = "filesystem-dir-last-modified";
+	public final static String COMPRESSED_SIZE = "compressed-size";
+
 	
 	//dropbox
 	private Entry DropboxEntry = null;	
 	
 
 	
-//	private ContentType contentType;
-	
+//	private ContentType contentType;	
 //	private SemanticType semanticType;
 	
-	
 
 	public Entry getDropboxEntry() {
 		return DropboxEntry;
 	
 	public Date getCreationDate(){
 		if(this.get(this.CREATION_DATE) != null){
-			String date = this.get(this.CREATION_DATE);
-			
-			SimpleDateFormat formatter;
-			formatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
-
-			Date val = null;
+			String date = this.get(this.CREATION_DATE);		
+			SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
+			Date val;
 			try {
 				val = formatter.parse(date);
 			} catch (ParseException e) {
-				// TODO Auto-generated catch block
 				e.printStackTrace();
+				return null;
 			}
-
-			return val;
-			
+			return val;		
 		}	
-		
 		return null;
 	}
 
 
-	public Date getModified()  {
-		
-		if (modified!= null)
+	public Date getModified() {
+
+		if (modified != null)
 			return modified;
 		else
 			return fsmodified;
 	
 	public void setModified( Date modified) {
 		this.modified = modified;
-		this.set(Property.externalDate("dropbox-modified"), this.modified);
+		this.set(Property.externalDate(DROPBOX_MODIFIED), this.modified);
 		
 	}
 
 			
 		}
 		
-		if (!this.imagePDMetadata.isEmpty() && !r.getImagePDMetadata().isEmpty()){
-			
-			for (String images: imagePDMetadata.keySet()){
-				PDMetadata thismetadata = getImagePDMetadata().get(images);
-				PDMetadata thatmetadata = r.getImagePDMetadata().get(images);
-				if (thismetadata.equals(thatmetadata))
-					System.out.println("************* Works!");
-			}
-		}
-
-		
 		return result;
 	}
 	
 
 		
 	}
-	
-
-	@Deprecated
-	public int numberOfEquals(RecoMetadata r){
-		int count = 0;
-		for (String name:this.names()){
-			if(r.get(name) == null)
-				continue;
-			if(this.get(name).equals(r.get(name)))
-				count++;
-		}
-		return count;
-	}
-	
-	
-	public String toString(){
-		String temp = "";
-		for (String name: this.names()){
-			temp+= name + ":" + this.get(name) + "\n";
-		}
-		return temp;
-	}
-
-	public String toCSVString(Set<String>listOfNames) {
-		String temp = getLuceneDocNumber()+ DependencyNode.DELIMITER; 
-		
-		for (String name: listOfNames){
-			if(name.equals(LUCENE_DOCID))
-				continue;
-			else if(this.get(name)!= null)
-				temp+= this.get(name) + DependencyNode.DELIMITER;
-			else
-				temp+= "" + DependencyNode.DELIMITER;
-		}
-		return temp;
-	}
 
 	public Set<String> getAttributes() {
 		Set<String> attrib = new TreeSet<String>();
 		Set<String> insertionOrderedSet = new LinkedHashSet<String>();
 		insertionOrderedSet.add(LUCENE_DOCID);
 		insertionOrderedSet.addAll(attrib);
-
-		
 		return insertionOrderedSet;
 	}
 
 			
 	
 	public String convertStreamToString(InputStream is) throws IOException {
-	    /*
-	     * To convert the InputStream to String we use the BufferedReader.readLine()
-	     * method. We iterate until the BufferedReader return null which means
-	     * there's no more data to read. Each line will appended to a StringBuilder
-	     * and returned as String.
-	     */
-	    if (is != null) {
-	        StringBuilder sb = new StringBuilder();
-	        String line;
+		if (is != null) {
+			StringBuilder sb = new StringBuilder();
+			String line;
+
+			try {
+				BufferedReader reader = new BufferedReader(
+						new InputStreamReader(is, "UTF-8"));
+				while ((line = reader.readLine()) != null) {
+					sb.append(line).append("\n");
+				}
+			} finally {
+				is.close();
+			}
+			return sb.toString();
+		} else {
+			return "";
+		}
+	}
 
-	        try {
-	            BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
-	            while ((line = reader.readLine()) != null) {
-	                sb.append(line).append("\n");
-	            }
-	        } finally {
-	            is.close();
-	        }
-	        return sb.toString();
-	    } else {       
-	        return "";
-	    }
+	public String toString() {
+		StringBuffer temp = new StringBuffer();
+		for (String name : this.names()) {
+			temp.append(name).append(":");
+			temp.append(this.get(name)).append("\n");
+		}
+		return temp.toString();
 	}
 
+	public String toCSVString(Set<String>listOfNames) {
+		StringBuffer temp = new StringBuffer(getLuceneDocNumber());
+		temp.append(DependencyNode.DELIMITER); 
+		
+		for (String name: listOfNames){
+			if(name.equals(LUCENE_DOCID)){
+				continue;
+			}
+			else if (this.get(name)!= null){
+				temp.append(this.get(name));
+				temp.append(DependencyNode.DELIMITER); 
+			}
+			else{
+				temp.append(DependencyNode.DELIMITER); 
+			}
+		}
+		return temp.toString();
+	}
 
 }

src/main/java/nl/vu/recoprov/experiments/Experiment3.java

  */
 
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.FileWriter;
 import java.io.IOException;
 import nl.vu.recoprov.CompletePipeline;
 	private static ProvDMtranslator provtranslator = new ProvDMtranslator();
 	private static Logger logger;
 
-	public static void main(String[] args) throws IOException, DropboxException {
+	public static void main(String[] args) throws Exception  {
 
 		logger = LoggerFactory
 				.getLogger("nl.vu.recoprov.experiments.Experiment3");
 		}
 		
 		String resultsfilename = "results" + System.currentTimeMillis() + ".txt";
-		FileWriter writer = createFileResultsWriter(resultsfilename);
-
-		DependencyGraph depGraphLucene = createGraph();
-		new LuceneSimilaritySignal().computeSignal(depGraphLucene);
-		logger.info("Lucene graph created.");
-
-		PROVReader provreader = new PROVReader(dirfile, jsonfile);
-		depGraph = provreader.generatePANDepGraph();
-		provtranslator.translate(depGraph, "graphCorpus.gv");
-		logger.info("Reference graph created.");
-
+		FileWriter writer;
+		try {
+			writer = createFileResultsWriter(resultsfilename);
+		} catch (IOException e) {
+			e.printStackTrace();
+			logger.error("Could not create results file writer {}", resultsfilename);
+			throw e;
+		}
 
+		DependencyGraph depGraphLucene;
+		try {
+			depGraphLucene = createGraph();
+		} catch (Exception e) {
+			e.printStackTrace();
+			logger.error("Could not create baseline graph.");
+			throw e;
+		}
+		
+		try {
+			new LuceneSimilaritySignal().computeSignal(depGraphLucene);
+			logger.info("Lucene graph created.");
+		} catch (Exception e) {
+			e.printStackTrace();
+			logger.error("Could not create Lucene graph.");
+			throw e;
+		}
+		
+		try {
+			PROVReader provreader = new PROVReader(dirfile, jsonfile);
+			depGraph = provreader.generatePANDepGraph();
+			provtranslator.translate(depGraph, "graphCorpus.gv");
+			logger.info("Reference graph created.");
+		} catch (Exception e) {
+			e.printStackTrace();
+			logger.error("Could not create reference graph.");
+			throw e;
+		}
+	
 		LuceneThresholdFilter luceneThresholdFilter = new LuceneThresholdFilter();
 		WeightedSumAggregator aggregator = new WeightedSumAggregator();
 		TextContainmentFilter textContainmentFilter = new TextContainmentFilter();
 		PlagiarismCorpusSpecificFilter plagiarismCorpusSpecificFilter = new PlagiarismCorpusSpecificFilter();
 
 		// useful for small experiments
-		double[] thresholds = { 0, 0.01, 0.05, 0.1, 0.2 };
-
-		// double[] thresholds = { 0.05};
+		double[] thresholds = { 0.01, 0.05, 0.1, 0.2 };
+		
 		for (double threshold : thresholds) {
-
-			ConfigurationDefaults.LUCENE_THRESHOLD = threshold;
-
-			DependencyGraph depGraphLuceneThreshold = depGraphLucene
-					.copyGraph();
-			luceneThresholdFilter.filterSignals(depGraphLuceneThreshold);
-
-			DependencyGraph depGraph1 = depGraphLuceneThreshold.copyGraph();
-			aggregator.aggregateSignals(depGraph1);
-			writeResults(depGraph1, "Lucene", writer);
-
-			depGraph1 = depGraphLuceneThreshold.copyGraph();
-			textContainmentFilter.filterSignals(depGraph1);
-			aggregator.aggregateSignals(depGraph1);
-			writeResults(depGraph1, "LuceneFilter", writer);
-
-			depGraph1 = depGraphLuceneThreshold.copyGraph();
-			plagiarismCorpusSpecificFilter.filterSignals(depGraph1);
-			aggregator.aggregateSignals(depGraph1);
-			writeResults(depGraph1, "LucenePANFilter", writer);
-
-			depGraph1 = depGraphLuceneThreshold.copyGraph();
-			textContainmentFilter.filterSignals(depGraph1);
-			plagiarismCorpusSpecificFilter.filterSignals(depGraph1);
-			aggregator.aggregateSignals(depGraph1);
-			writeResults(depGraph1, "LucenePAN2Filters", writer);
-
+			try {
+				// double[] thresholds = { 0.05};
+
+				ConfigurationDefaults.LUCENE_THRESHOLD = threshold;
+
+				DependencyGraph depGraphLuceneThreshold = depGraphLucene
+						.copyGraph();
+				luceneThresholdFilter.filterSignals(depGraphLuceneThreshold);
+
+				DependencyGraph depGraph1 = depGraphLuceneThreshold.copyGraph();
+				aggregator.aggregateSignals(depGraph1);
+				writeResults(depGraph1, "Lucene", writer);
+
+				depGraph1 = depGraphLuceneThreshold.copyGraph();
+				textContainmentFilter.filterSignals(depGraph1);
+				aggregator.aggregateSignals(depGraph1);
+				writeResults(depGraph1, "LuceneFilter", writer);
+
+				depGraph1 = depGraphLuceneThreshold.copyGraph();
+				plagiarismCorpusSpecificFilter.filterSignals(depGraph1);
+				aggregator.aggregateSignals(depGraph1);
+				writeResults(depGraph1, "LucenePANFilter", writer);
+
+				depGraph1 = depGraphLuceneThreshold.copyGraph();
+				textContainmentFilter.filterSignals(depGraph1);
+				plagiarismCorpusSpecificFilter.filterSignals(depGraph1);
+				aggregator.aggregateSignals(depGraph1);
+				writeResults(depGraph1, "LucenePAN2Filters", writer);
+
+			} catch (Exception e) {
+				e.printStackTrace();
+				logger.error("Error in the main loop, threshold {}", threshold);
+				throw e;
+			}
 		}
-		
-		
 		// trying with Lucene More Like This
 		
-		DependencyGraph depGraphLuceneMore = createGraph();
-		new LuceneMoreLikeThisSignal().computeSignal(depGraphLuceneMore);
-		logger.info("LuceneMoreLikeThis graph created.");
-		
-		for (double threshold : thresholds) {
-
-			ConfigurationDefaults.LUCENE_THRESHOLD = threshold;
-
-			DependencyGraph depGraphLuceneMoreThreshold = depGraphLuceneMore
-					.copyGraph();
-			luceneThresholdFilter.filterSignals(depGraphLuceneMoreThreshold);
-			
-			DependencyGraph depGraph1 = depGraphLuceneMoreThreshold.copyGraph();
-			aggregator.aggregateSignals(depGraph1);
-			writeResults(depGraph1, "LuceneMoreLikeThis", writer);
-
-			depGraph1 = depGraphLuceneMoreThreshold.copyGraph();
-			textContainmentFilter.filterSignals(depGraph1);
-			plagiarismCorpusSpecificFilter.filterSignals(depGraph1);
-			writeResults(depGraph1, "LuceneMoreLikeThisPAN2Filters", writer);
-		}
+//		DependencyGraph depGraphLuceneMore = createGraph();
+//		new LuceneMoreLikeThisSignal().computeSignal(depGraphLuceneMore);
+//		logger.info("LuceneMoreLikeThis graph created.");
+//		
+//		for (double threshold : thresholds) {
+//
+//			ConfigurationDefaults.LUCENE_THRESHOLD = threshold;
+//
+//			DependencyGraph depGraphLuceneMoreThreshold = depGraphLuceneMore
+//					.copyGraph();
+//			luceneThresholdFilter.filterSignals(depGraphLuceneMoreThreshold);
+//			
+//			DependencyGraph depGraph1 = depGraphLuceneMoreThreshold.copyGraph();
+//			aggregator.aggregateSignals(depGraph1);
+//			writeResults(depGraph1, "LuceneMoreLikeThis", writer);
+//
+//			depGraph1 = depGraphLuceneMoreThreshold.copyGraph();
+//			textContainmentFilter.filterSignals(depGraph1);
+//			plagiarismCorpusSpecificFilter.filterSignals(depGraph1);
+//			writeResults(depGraph1, "LuceneMoreLikeThisPAN2Filters", writer);
+//		}
 
 
 		writer.flush();
 	}
 
 	private static void writeResults(DependencyGraph predicted, String message,
-			FileWriter writer) {
+			FileWriter writer) throws FileNotFoundException {
 
 		String temp = depGraph.similarToGraph(predicted).toString();
 		try {

src/main/java/nl/vu/recoprov/signalaggregators/WeightedSumAggregator.java

 package nl.vu.recoprov.signalaggregators;
 
 import java.util.ArrayList;
-import java.util.Date;
 import java.util.HashMap;
-
 import nl.vu.recoprov.abstractclasses.SignalAggregator;
 import nl.vu.recoprov.baseclasses.DependencyGraph;
 import nl.vu.recoprov.baseclasses.DependencyNode;