Commits

Sara Magliacane committed 590e578

refactoring and adding logging

  • Participants
  • Parent commits fb7fe60

Comments (0)

Files changed (24)

 	<artifactId>gson</artifactId>
 	<version>2.2.2</version>
 </dependency>
-            
+            <dependency>
+	<groupId>ch.qos.logback</groupId>
+	<artifactId>logback-classic</artifactId>
+	<version>1.0.11</version>
+</dependency>
   	<dependency>
   		<groupId>org.eclipse.jetty.aggregate</groupId>
   		<artifactId>jetty-all-server</artifactId>
 	<version>0.4.1.5</version>
 </dependency>
 <dependency>
-	<groupId>ch.qos.logback</groupId>
-	<artifactId>logback-classic</artifactId>
-	<version>1.0.6</version>
-</dependency>
-<dependency>
 	<groupId>org.apache.sanselan</groupId>
 	<artifactId>sanselan</artifactId>
 	<version>0.97-incubator</version>

File src/main/java/nl/vu/recoprov/CompletePipeline.java

 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.dropbox.client2.exception.DropboxException;
+
 import nl.vu.recoprov.ProvDMtranslator;
 import nl.vu.recoprov.baseclasses.DependencyGraph;
 import nl.vu.recoprov.signalaggregators.WeightedSumAggregator;
 import nl.vu.recoprov.signaldetectors.MatchTitleInContentSignal;
 import nl.vu.recoprov.signaldetectors.MetadataSimilaritySignal;
 import nl.vu.recoprov.signalfilters.BackwardTemporalFilter;
+import nl.vu.recoprov.signalfilters.LuceneThresholdFilter;
+import nl.vu.recoprov.signalfilters.PlagiarismCorpusSpecificFilter;
+import nl.vu.recoprov.signalfilters.TextContainmentFilter;
 import nl.vu.recoprov.utils.ConfigurationDefaults;
 import nl.vu.recoprov.utils.ConfigurationReader;
 
 
 public class CompletePipeline {
 
-	private String currentDir;
+	private String currentDir = null;
 	private Boolean connectToInternet = true;
 	private String[] params = null;
+	private Logger logger;
 	
-	public CompletePipeline() {}
-
-	public CompletePipeline(Boolean online) {
-		this.connectToInternet = online;
+	private LuceneThresholdFilter luceneThresholdFilter = new LuceneThresholdFilter();
+	private WeightedSumAggregator aggregator = new WeightedSumAggregator();
+	private TextContainmentFilter textContainmentFilter = new TextContainmentFilter();
+	private PlagiarismCorpusSpecificFilter plagiarismCorpusSpecificFilter = new PlagiarismCorpusSpecificFilter();
+	private ProvDMtranslator ProvDMtranslator = new ProvDMtranslator();
+	private DropboxClient client = new DropboxClient();
+	
+	public CompletePipeline() {
+		logger = LoggerFactory
+				.getLogger("nl.vu.recoprov.CompletePipeline");
 	}
 
+
 	public CompletePipeline(Boolean online, String dir) {
-		this.connectToInternet = online;
-		this.currentDir = dir;
+		this (online, dir, null);
 	}
 	public CompletePipeline(Boolean online, String dir, String[] params) {
 		this.connectToInternet = online;
 		this.currentDir = dir;
 		this.params = params;
+		logger = LoggerFactory
+				.getLogger("nl.vu.recoprov.CompletePipeline");
 	}
 	
 	public static void main(String[] args) throws Exception {
 
 		depGraph = pipeline.initDependencyGraph();
 
-		depGraph = pipeline.loadMetadaAndIndexes(depGraph);
+		pipeline.loadMetadaAndIndexes(depGraph);
 
-		depGraph = pipeline.computeSignals(depGraph);
+		pipeline.computeSignals(depGraph);
 
-		depGraph = pipeline.filterSignals(depGraph);
+		pipeline.filterSignals(depGraph);
 
 		// System.out.println(depGraph);
 		// System.out.println(depGraph.getAttributes());
 
 		// System.out.println(depGraph.toCSVString());
 
-		depGraph = pipeline.aggregateSignals(depGraph);
-
-		System.out.println(depGraph);
+		pipeline.aggregateSignals(depGraph);
 
 		pipeline.writeToFile(depGraph);
 
 	}
 
 
-
 	private void translateToPROVDM(DependencyGraph depGraph) {
-		new ProvDMtranslator().translate(depGraph);
+		ProvDMtranslator.translate(depGraph);
 		
 	}
 
-	public DependencyGraph initDependencyGraph() throws Exception {
+	public DependencyGraph initDependencyGraph() throws DropboxException {
+		logger.info("Initializing DependencyGraph for directory: {}", currentDir);
 		DependencyGraph depGraph = new DependencyGraph();
-		DropboxClient client = new DropboxClient();
-
+		
 		if (connectToInternet) {
 			if (!client.isLinked()) {
 				client.linkToAccount();
 			}
-			// dir = dir.substring(dir.indexOf("Dropbox/")+7, dir.length());
-			depGraph = client.getAllRevs(depGraph, currentDir);
+			client.getAllRevs(currentDir, depGraph);
 			currentDir = ConfigurationDefaults.TEMPDIR;
 		} else {
-			depGraph = client.getAllRevsOffline(new File(currentDir), depGraph);
-		}
-		System.out.println("PHASE 1 completed: Initialization of the DependencyGraph");
-
+			client.getAllRevsOffline(new File(currentDir), depGraph);
+		}		
 		return depGraph;
 	}
 
-	public DependencyGraph loadMetadaAndIndexes(DependencyGraph depGraph) {
+	public void loadMetadaAndIndexes(DependencyGraph depGraph) {
 
-		// System.out.println("### SECOND PHASE: Analyze the contents with Apache Tika. \n");
 		TikaReader tika = new TikaReader(currentDir);
 		depGraph = tika.read(depGraph, params);
 
 		// TODO: stripping of tags
 
 		// System.out.println("### THIRD PHASE: Index the contents with Apache Lucene. \n");
-		depGraph = indexFiles(depGraph);
+		indexFiles(depGraph);
 
 		// EXTRACT IMAGES
 		depGraph = ImageReader.read(currentDir, depGraph);
 
-		return depGraph;
-
 	}
 
-	public DependencyGraph indexFiles(DependencyGraph depGraph){
-		return indexFiles(depGraph, currentDir);
+	public void indexFiles(DependencyGraph depGraph){
+		indexFiles(depGraph, currentDir);
 	}
 	
-	public DependencyGraph indexFiles(DependencyGraph depGraph, String dir){
+	public void indexFiles(DependencyGraph depGraph, String dir){
 		LuceneIndexer indexer = new LuceneIndexer(dir);
-		return indexer.indexFiles(depGraph);
+		indexer.indexFiles(depGraph);
 	}
 	
 	public DependencyGraph computeSignals(DependencyGraph depGraph) {
 		return depGraph;
 	}
 
-	public DependencyGraph filterSignals(DependencyGraph depGraph) {
+	public void filterSignals(DependencyGraph depGraph) {
 		// FILTERS
 		depGraph = new BackwardTemporalFilter().filterSignals(depGraph);
-		// Doesn't work completely, better the standard dot transitive reduction
-		// depGraph = new TransitiveReductionFilter().filterSignals(depGraph);
 
-		return depGraph;
 	}
 	
 	
-	public DependencyGraph aggregateSignals(DependencyGraph depGraph) {
-		depGraph = new WeightedSumAggregator().aggregateSignals(depGraph);
-		return depGraph;
+	public void aggregateSignals(DependencyGraph depGraph) {
+		aggregator.aggregateSignals(depGraph);
 	}
 
 	public void writeToFile(DependencyGraph depGraph){
 			writer.write(depGraph.toString());
 			writer.close();
 		} catch (IOException e) {
-			// TODO Auto-generated catch block
+			logger.error("Could not write to file: {}", ConfigurationDefaults.RESULTS);
 			e.printStackTrace();
 		}
 	}

File src/main/java/nl/vu/recoprov/DropboxClient.java

 	 * @return
 	 * @throws DropboxException
 	 */
-	public DependencyGraph getAllRevs(DependencyGraph depGraph)
+	public void getAllRevs(DependencyGraph depGraph)
 			throws DropboxException {
-		return getAllRevs(depGraph, "");
+		getAllRevs("", depGraph);
 	}
 
 	/**
 	 * @return
 	 * @throws DropboxException
 	 */
-	public DependencyGraph getAllRevs(DependencyGraph depGraph,
-			String dropboxFolder) throws DropboxException {
+	public void getAllRevs(String dropboxFolder, 
+			DependencyGraph depGraph) throws DropboxException {
 
 		// Load state.
 		State state = State.load(ConfigurationDefaults.STATE_FILE);
 
 		}
 
-		return depGraph;
 	}
 
 	private void copyToTempFolder(DropboxAPI client, List<Entry> result,
 		return revisions;
 	}
 
-	public Boolean isLinked() throws Exception {
+	public Boolean isLinked()  {
 
 		System.out.println("Checking linkage");
 		File f = new File(ConfigurationDefaults.STATE_FILE);

File src/main/java/nl/vu/recoprov/LuceneIndexer.java

 package nl.vu.recoprov;
 
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.Set;
 import nl.vu.recoprov.baseclasses.DependencyGraph;
 
 public class LuceneIndexer {
 
-
 	private String rootpath;
 	private Boolean cleanupIndex = false;
 	
 		this.rootpath = rootpath;
 	}
 
-	public DependencyGraph indexFiles(DependencyGraph input) {
-		
+	public void indexFiles(DependencyGraph input) throws IOException {
+
 		File indexDir = new File(ConfigurationDefaults.RELATIVE_INDEX_DIR);
-		
-		if (!indexDir.exists())
-			input = createIndex(input);
+
+		if (!indexDir.exists()) {
+			// if index directory doesn't exist, create the index
+			createIndex(input);
+		} 
 		else {
-			
-			if (cleanupIndex){
+			// the index directory exists, do we want to remove it?
+			if (cleanupIndex) {
 				try {
 					FileUtils.deleteDirectory(indexDir);
 				} catch (IOException e) {
 				}
 			}
 		}
-		
-
-		input = assignLuceneNumbers(input);
-		return input;
+		// now the index is created we have the lucene docid that we can assign
+		// to the nodes
+		assignLuceneNumbers(input);
 
 	}
 
 
 			
-	public DependencyGraph createIndex(DependencyGraph input) {
+	public void createIndex(DependencyGraph input) throws IOException {
+		
 		int count = 0;
 
 		File indexDir = new File(ConfigurationDefaults.RELATIVE_INDEX_DIR);
 		ConfigurationDefaults.INDEX_DIR = indexDir.getAbsolutePath();
+		
+		FSDirectory store = SimpleFSDirectory.open(indexDir);
+		IndexWriter writer = createIndexWriter(store);
 
+		for (String name : input.keySet()) {
 
-		try {
-			FSDirectory store = SimpleFSDirectory.open(indexDir);
+			if (ConfigurationDefaults.ignoreFile(name)) {
+				continue;
+			}
 
-			Analyzer analyzer = new CustomAnalyzer(ConfigurationDefaults.LUCENE_VERSION);
-//			Map<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>();
-//			fieldAnalyzers.put("raw-contents", new KeywordAnalyzer());
-//
-//			PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
-//					analyzer, fieldAnalyzers);
+			System.out.println(count++ + ": adding " + name);
 
-			IndexWriterConfig config = new IndexWriterConfig(ConfigurationDefaults.LUCENE_VERSION,
-					analyzer);
+			DependencyNode node = input.get(name);
+			StringBuffer content = readContentFile(node);
+			Document doc = createLuceneDocument(node, content);
+			writer.addDocument(doc);
 
-			IndexWriter writer = new IndexWriter(store, config);
-			Set<String> names = input.keySet();
+		}
 
-			for (String name : names) {
+		writer.close();
+		store.close();
 
-				if (ConfigurationDefaults.ignoreFile(name)) {
-					continue;
-				}
+	}
 
-				System.out.println(count + ": adding " + name);
-				count++;
+	public void assignLuceneNumbers(DependencyGraph input) throws IOException {
 
-				DependencyNode node = input.get(name);
-				StringBuffer content = new StringBuffer();
+		File indexDir = new File(ConfigurationDefaults.RELATIVE_INDEX_DIR);
+		FSDirectory store;
 
-				if ((node.getContent() != null)
-						&& (!node.getContent().isEmpty())) {
-					CustomFileReader contentFile = new CustomFileReader(
-							node.getContent());
+		store = SimpleFSDirectory.open(indexDir);
 
-					while (true) {
-						String line = contentFile.readLine();
-						if (line == null)
-							break;
-						content = new StringBuffer(content + line);
-					}
+		IndexReader reader = DirectoryReader.open(store);
+		IndexSearcher searcher = new IndexSearcher(reader);
+		int numdocs = reader.numDocs();
 
-					contentFile.close();
+		for (int i = 0; i < numdocs; i++) {
+			Document doc = searcher.doc(i);
+			String key = doc.getField("name").stringValue();
+			DependencyNode d = input.get(key);
 
-				}
+			d.setLuceneDocNumber(i);
+			input.put(d.getCompleteFilepath(), d);
+		}
 
-				Document doc = new Document();
-				doc.add(new StringField("name", node.getCompleteFilepath(),Field.Store.YES));
-				doc.add(new StringField("path", node.getCompleteFilepath(),
-						Field.Store.YES));
-				if ((node.getContent() != null)
-						&& (!node.getContent().isEmpty())) {
-					doc.add(new TextField("contents", new String(content),
-							Field.Store.YES));
-					doc.add(new TextField("raw-contents", new String(content),
-							Field.Store.YES));
-				} else {
-					System.out.println("No content in file "
-							+ node.getCompleteFilepath());
-				}
-				writer.addDocument(doc);
+		reader.close();
+		store.close();
 
+	}
+	
+	
+	public StringBuffer readContentFile(DependencyNode node) {
+		
+		StringBuffer content = new StringBuffer();
+
+		if ((node.getContent() != null) && (!node.getContent().isEmpty())) {
+			CustomFileReader contentFile;
+			try {
+				contentFile = new CustomFileReader(node.getContent());
+			} catch (FileNotFoundException e) {
+				System.out.println("Content file not found "
+						+ node.getContent());
+				e.printStackTrace();
+				return null;
 			}
 
-			writer.close();
-			store.close();
-
-			return input;
-
-		} catch (IOException e) {
-			System.out
-					.println("IOException: Indexing of files failed for directory: "
-							+ rootpath);
-			e.printStackTrace();
-
-			return input;
+			while (true) {
+				String line = contentFile.readLine();
+				if (line == null)
+					break;
+				content.append(line);
+			}
+			contentFile.close();
 		}
+		return content;
 	}
+	
+	public Document createLuceneDocument (DependencyNode node, StringBuffer content){
+		Document doc = new Document();
+		doc.add(new StringField("name", node.getCompleteFilepath(),
+				Field.Store.YES));
+		doc.add(new StringField("path", node.getCompleteFilepath(),
+				Field.Store.YES));
+		if (content != null && content.length() ==  0) {
+			doc.add(new TextField("contents", new String(content),
+					Field.Store.YES));
+		} 
+		else {
+			System.out.println("No content in file "
+					+ node.getCompleteFilepath());
+		}
+		
+		return doc;
+		
+	}
+	
+	public IndexWriter createIndexWriter(FSDirectory store) throws IOException {
 
-	public DependencyGraph assignLuceneNumbers(DependencyGraph input) {
-
-		File indexDir = new File(ConfigurationDefaults.RELATIVE_INDEX_DIR);
-		FSDirectory store;
-		try {
-			store = SimpleFSDirectory.open(indexDir);
-
-			IndexReader reader = DirectoryReader.open(store);
-			IndexSearcher searcher = new IndexSearcher(reader);
-			int numdocs = reader.numDocs();
-
-			for (int i = 0; i < numdocs; i++) {
-				Document doc = searcher.doc(i);
-				String key = doc.getField("name").stringValue();
-				DependencyNode d = input.get(key);
-
-				d.setLuceneDocNumber(i);
-				input.put(d.getCompleteFilepath(), d);
-			}
-
-			reader.close();
-			store.close();
+		Analyzer analyzer = new CustomAnalyzer(
+				ConfigurationDefaults.LUCENE_VERSION);
+		IndexWriterConfig config = new IndexWriterConfig(
+				ConfigurationDefaults.LUCENE_VERSION, analyzer);
 
-		} catch (IOException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
+		IndexWriter writer = new IndexWriter(store, config);
 
-		return input;
+		return writer;
 
 	}
 	
+
 }
 	
 

File src/main/java/nl/vu/recoprov/SearchCache.java

 package nl.vu.recoprov;
 
-import com.dropbox.client2.exception.DropboxException;
-import com.dropbox.client2.session.Session;
-import com.dropbox.client2.session.WebAuthSession;
 import com.dropbox.client2.session.AppKeyPair;
 import com.dropbox.client2.session.AccessTokenPair;
-import com.dropbox.client2.DropboxAPI;
-import com.dropbox.client2.DropboxAPI.DeltaEntry;
-
-
 import com.dropbox.client2.jsonextract.*;
-
-import nl.vu.recoprov.utils.ConfigurationDefaults;
-
 import org.json.simple.JSONArray;
 import org.json.simple.JSONObject;
 import org.json.simple.parser.JSONParser;
 import org.json.simple.parser.ParseException;
-
 import java.io.*;
-import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
 
 public class SearchCache
 {
-    
-
-    public static void main(String[] args)
-        throws DropboxException
-    {
-        if (args.length == 0) {
-            printUsage(System.out);
-            throw die();
-        }
-
-        String command = args[0];
-        if (command.equals("link")) {
-            doLink(args);
-        }
-        else if (command.equals("update")) {
-            doUpdate(args);
-        }
-        else if (command.equals("find")) {
-            doFind(args);
-        }
-        else if (command.equals("reset")) {
-            doReset(args);
-        }
-        else {
-            System.err.println("ERROR: Unknown command: \"" + command + "\"");
-            System.err.println("Run with no arguments for help.");
-            throw die();
-        }
-    }
-    
-   
-    
-
-    private static void doLink(String[] args)
-        throws DropboxException
-    {
-        if (args.length != 3) {
-            throw die("ERROR: \"link\" takes exactly two arguments.");
-        }
-
-        AppKeyPair appKeyPair = new AppKeyPair(args[1], args[2]);
-        WebAuthSession was = new WebAuthSession(appKeyPair, Session.AccessType.APP_FOLDER);
-
-        // Make the user log in and authorize us.
-        WebAuthSession.WebAuthInfo info = was.getAuthInfo();
-        System.out.println("1. Go to: " + info.url);
-        System.out.println("2. Allow access to this app.");
-        System.out.println("3. Press ENTER.");
-
-        try {
-            while (System.in.read() != '\n') {}
-        }
-        catch (IOException ex) {
-            throw die("I/O error: " + ex.getMessage());
-        }
-
-        // This will fail if the user didn't visit the above URL and hit 'Allow'.
-        was.retrieveWebAccessToken(info.requestTokenPair);
-        AccessTokenPair accessToken = was.getAccessTokenPair();
-        System.out.println("Link successful.");
-
-        // Save state
-        State state = new State(appKeyPair, accessToken, new Content.Folder());
-        state.save(ConfigurationDefaults.STATE_FILE);
-    }
-    
-   
-
-    private static void doUpdate(String[] args)
-        throws DropboxException
-    {
-        int pageLimit;
-        if (args.length == 2) {
-            pageLimit = Integer.parseInt(args[1]);
-        }
-        else if (args.length == 1) {
-            pageLimit = -1;
-        }
-        else {
-            throw die("ERROR: \"update\" takes either zero or one arguments.");
-        }
-
-        // Load state.
-        State state = State.load(ConfigurationDefaults.STATE_FILE);
-
-        // Connect to Dropbox.
-        WebAuthSession session = new WebAuthSession(state.appKey, WebAuthSession.AccessType.APP_FOLDER);
-        session.setAccessTokenPair(state.accessToken);
-        DropboxAPI<?> client = new DropboxAPI<WebAuthSession>(session);
-
-        int pageNum = 0;
-        boolean changed = false;
-        String cursor = state.cursor;
-        while (pageLimit < 0 || (pageNum < pageLimit)) {
-            // Get /delta results from Dropbox
-            DropboxAPI.DeltaPage<DropboxAPI.Entry> page = client.delta(cursor);
-            pageNum++;
-            if (page.reset) {
-                state.tree.children.clear();
-                changed = true;
-            }
-            // Apply the entries one by one.
-            for (DeltaEntry<DropboxAPI.Entry> e : page.entries) {
-                applyDelta(state.tree, e);
-                changed = true;
-            }
-            cursor = page.cursor;
-            if (!page.hasMore) break;
-        }
-
-        // Save state.
-        if (changed) {
-            state.cursor = cursor;
-            state.save(ConfigurationDefaults.STATE_FILE);
-        }
-        else {
-            System.out.println("No updates.");
-        }
-    }
-
-    private static void printUsage(PrintStream out)
-    {
-        out.println("Usage:");
-        out.println("    ./run link <app-key> <secret>  Link a user's account to the given app.");
-        out.println("    ./run update                   Update cache to the latest on Dropbox.");
-        out.println("    ./run update <num>             Update cache, limit to <num> pages of updates.");
-        out.println("    ./run find <term>              Search cache for <term> (case-sensitive).");
-        out.println("    ./run find                     Display entire cache.");
-        out.println("    ./run reset                    Delete the cache.");
-    }
-
+ 
     private static RuntimeException die(String message)
     {
         System.err.println(message);
         return new RuntimeException();
     }
 
-    // ------------------------------------------------------------------------
-    // Apply delta entries to the tree.
-
-    private static void applyDelta(Content.Folder parent, DeltaEntry<DropboxAPI.Entry> e)
-    {
-        Path path = Path.parse(e.lcPath);
-        DropboxAPI.Entry md = e.metadata;
-
-        if (md != null) {
-            System.out.println("+ " + e.lcPath);
-            // Traverse down the tree until we find the parent of the entry we
-            // want to add.  Create any missing folders along the way.
-            for (String b : path.branch) {
-                Node n = getOrCreateChild(parent, b);
-                if (n.content instanceof Content.Folder) {
-                    parent = (Content.Folder) n.content;
-                } else {
-                    // No folder here, automatically create an empty one.
-                    n.content = parent = new Content.Folder();
-                }
-            }
-
-            // Create the file/folder here.
-            Node n = getOrCreateChild(parent, path.leaf);
-            n.path = md.path;  // Save the un-lower-cased path.
-            if (md.isDir) {
-                // Only create an empty folder if there isn't one there already.
-                if (!(n.content instanceof Content.Folder)) {
-                    n.content = new Content.Folder();
-                }
-            }
-            else {
-                n.content = new Content.File(md.size, md.modified);
-            }
-        }
-        else {
-            System.out.println("- " + e.lcPath);
-            // Traverse down the tree until we find the parent of the entry we
-            // want to delete.
-            boolean missingParent = false;
-            for (String b : path.branch) {
-                Node n = parent.children.get(b);
-                if (n != null && n.content instanceof Content.Folder) {
-                    parent = (Content.Folder) n.content;
-                } else {
-                    // If one of the parent folders is missing, then we're done.
-                    missingParent = true;
-                    break;
-                }
-            }
-
-            if (!missingParent) {
-                parent.children.remove(path.leaf);
-            }
-        }
-    }
-
-    private static Node getOrCreateChild(Content.Folder folder, String lowercaseName)
-    {
-        Node n = folder.children.get(lowercaseName);
-        if (n == null) {
-            folder.children.put(lowercaseName, n = new Node(null, null));
-        }
-        return n;
-    }
 
     /**
      * Represent a path as a list of ancestors and a leaf name.
         }
     }
 
-    // ------------------------------------------------------------------------
-    // Search through the tree.
-
-    private static void doFind(String[] args)
-        throws DropboxException
-    {
-        String term;
-        if (args.length == 1) {
-            term = "";
-        }
-        else if (args.length == 2) {
-            term = args[1];
-        }
-        else {
-            throw die("ERROR: \"find\" takes either zero or one arguments");
-        }
-
-        // Load cached state.
-        State state = State.load(ConfigurationDefaults.STATE_FILE);
-
-        ArrayList<String> results = new ArrayList<String>();
-        searchTree(results, state.tree, term);
-        for (String r : results) {
-            System.out.println(r);
-        }
-        if (results.isEmpty()) {
-            System.out.println("[No matches.]");
-        }
-    }
-
-    private static void searchTree(ArrayList<String> results, Content.Folder tree, String term)
-    {
-        for (Map.Entry<String,Node> child : tree.children.entrySet()) {
-            Node n = child.getValue();
-            String path = n.path;
-            if (path != null && path.contains(term)) {
-                if (n.content instanceof Content.Folder) {
-                    results.add(path);
-                }
-                else if (n.content instanceof Content.File) {
-                    Content.File f = (Content.File) n.content;
-                    results.add(path + " (" + f.size + ", " + f.lastModified + ")");
-                }
-                else {
-                    throw new AssertionError("bad type: " + n.content);
-                }
-            }
-            // Recurse on children.
-            if (n.content instanceof Content.Folder) {
-                Content.Folder f = (Content.Folder) n.content;
-                searchTree(results, f, term);
-            }
-        }
-    }
-
-    // ------------------------------------------------------------------------
-    // Reset state
-
-    private static void doReset(String[] args)
-        throws DropboxException
-    {
-        if (args.length != 1) {
-            throw die("ERROR: \"reset\" takes no arguments");
-        }
-
-        // Load state.
-        State state = State.load(ConfigurationDefaults.STATE_FILE);
-
-        // Clear state.
-        state.tree.children.clear();
-        state.cursor = null;
-
-        // Save state back.
-        state.save(ConfigurationDefaults.STATE_FILE);
-    }
 
     // ------------------------------------------------------------------------
     // State model (load+save to JSON)

File src/main/java/nl/vu/recoprov/TikaReader.java

 					continue;
 				}
 
-
 				if (ConfigurationDefaults.checkBlackList(filename, params)) {
 					continue;
 				}
 							new Date(f.getParentFile().lastModified()));
 				}
 
-				depNodeMap.put(filepath, d);
+				//depNodeMap.put(filepath, d);
 				//
 				// System.out.println("-----------------------------------------------------");
 				// System.out.println(filename);

File src/main/java/nl/vu/recoprov/abstractclasses/SignalFilterer.java

  * Abstract class for the signal filter phase
  */
 import nl.vu.recoprov.baseclasses.DependencyGraph;
+import nl.vu.recoprov.baseclasses.DependencyNode;
 
 public abstract class SignalFilterer {
 
-	public abstract DependencyGraph filterSignals(DependencyGraph input);
+	/**
+	 * A function that represents the API of the filter.
+	 * Given an input DependencyGraph, it outputs the filtered version.
+	 * @param input
+	 * @return
+	 */
+	public DependencyGraph filterSignals(DependencyGraph input){
+		return filterGraph(input);
+	}
+	
+	public DependencyGraph filterGraph (DependencyGraph input){
+		for (String name : input.keySet()) {
+			DependencyNode d = input.get(name);
+			filterNode(input, d);			
+		}	
+		return input;
+	}
+	
+	public DependencyGraph filterNode (DependencyGraph input, DependencyNode d){
+		return input;
+	}
 }

File src/main/java/nl/vu/recoprov/baseclasses/DependencyGraph.java

 		
 		Set<String> newAttributes = value.getAttributes();
 		attributes.addAll(newAttributes);
-
 		return value;
 	}
 	
 	public Set<String> getAttributes(){
 		return attributes;
 	}
-	
-//	public getSimilarities(){
-//		
-//	}
+
 
 	
 	public HashMap<Integer, ArrayList<LabelledEdge>>  getIncidencyMatrix(){

File src/main/java/nl/vu/recoprov/baseclasses/DependencyNode.java

 package nl.vu.recoprov.baseclasses;
 
-
-import java.util.LinkedHashMap;
-import java.util.Map;
 import java.util.Set;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.tika.metadata.Metadata;
 import com.dropbox.client2.DropboxAPI.Entry;
 
 
-public  class DependencyNode {
-	
-	//TODO: include into metadata
-	private String completefilepath;
+public class DependencyNode {
 
-	//private String directory;
-	
-	private DependencyGraph depGraph;
-	
 	public static final String DELIMITER = "&";
-	private long compressedSize = 0;
-
 
+	private String completefilepath;
+	private DependencyGraph depGraph;
 	private ScoreDoc[] luceneSimilarity = null;
-	//TODO: remove
-	@Deprecated
-	private Map<String, Map <DependencyNode,Float>> depNodeSimilarity= new LinkedHashMap<String,  Map<DependencyNode, Float>>();
-	
 	private String content = null;
-	
 	private RecoMetadata recoMetadata = new RecoMetadata();
 
-	public DependencyNode(DependencyGraph d){
+	public DependencyNode(DependencyGraph d) {
 		setDepGraph(d);
 	}
-	
-	
+
 	public String getCompleteFilepath() {
 		return completefilepath;
 	}
+
 	public void setCompleteFilepath(String filepath) {
-		
 		this.completefilepath = filepath;
-
-		
 	}
+
 	public RecoMetadata getMetadata() {
 		return this.recoMetadata;
 	}
+
 	public void setMetadata(Metadata metadata) {
 		recoMetadata.setTikaMetadata(metadata);
 	}
-	
+
 	public void addtoMetadata(String name, String value) {
 		this.recoMetadata.add(name, value);
 	}
-	
+
 	public Entry getDropboxEntry() {
 		return recoMetadata.getDropboxEntry();
 	}
+
 	public void setDropboxEntry(Entry dropboxEntry) {
 		recoMetadata.setDropboxEntry(dropboxEntry);
-
-		
 	}
-	
+
 	public String getDropboxFilepath() {
 		return recoMetadata.getDropboxPath();
 	}
 
+	public String toCSVString(Set<String> listOfNames) {
+		String temp = this.recoMetadata.toCSVString(listOfNames);
 
-	public String toString(){
-		String temp = "##########################################";
-		temp+="\nMetadata: "+ this.completefilepath + " \n";
-
-		for (String name: this.recoMetadata.names()){
-			temp+= name + ": " + this.recoMetadata.get(name) + "\n";
-		}
-	
-		
-		if (this.depNodeSimilarity!=null){
-
-			
-			for (String typeOfSignal: this.depNodeSimilarity.keySet()){
-				temp+="\n### " + typeOfSignal+ ":";
-				for (DependencyNode d: this.depNodeSimilarity.get(typeOfSignal).keySet()){
-					temp+= "\n " + d.getCompleteFilepath()+" " + this.depNodeSimilarity.get(typeOfSignal).get(d);
-				}
-			}
-
-
-		}
-		
-		if (this.content!= null){
-			temp+= "Contentfilename: " + content ;
-			//temp+="[first 100 chars]:" + content.substring(0,content.length()>100?100:content.length()) + "\n";
-		}
-	
-		
-		
-		temp+="\n";
-		
-		return temp; 
-	}
-	
-	
-	public String toCSVString(Set<String> listOfNames){
-		String temp = this.recoMetadata.toCSVString(listOfNames); 
-		
-		temp += this.completefilepath.replaceAll(DELIMITER, "_") + "\n";	
-		return temp; 
+		temp += this.completefilepath.replaceAll(DELIMITER, "_") + "\n";
+		return temp;
 	}
-	
-	public Set<String> getAttributes(){
-		
-		return recoMetadata.getAttributes();
-
 
+	public Set<String> getAttributes() {
+		return recoMetadata.getAttributes();
 	}
-	
-	
+
 	public String getContent() {
 		return content;
 	}
+
 	public void setContent(String content) {
 		this.content = content;
-		//this.contentFilename = this.completefilepath +".content";
-		
 	}
-	
-	public int getId(){
+
+	public int getId() {
 		return getLuceneDocNumber();
 	}
-	
+
 	public int getLuceneDocNumber() {
 		return this.recoMetadata.getLuceneDocNumber();
 	}
+
 	public void setLuceneDocNumber(int luceneDocNumber) {
 		this.recoMetadata.setLuceneDocNumber(luceneDocNumber);
-		
-		//System.out.println("translation: " +luceneDocNumber + "-" + this.getCompleteFilepath());
-		
+
+		// System.out.println("translation: " +luceneDocNumber + "-" +
+		// this.getCompleteFilepath());
+
 		depGraph.addTranslation(luceneDocNumber, this.getCompleteFilepath());
 	}
-	
-	
-	public void setDepGraph(DependencyGraph d){
+
+	public void setDepGraph(DependencyGraph d) {
 		depGraph = d;
 	}
-	
-	
+
 	public ScoreDoc[] getLuceneSimilarity() {
 		return luceneSimilarity;
 	}
+
 	public void setLuceneSimilarity(ScoreDoc[] luceneSimilarity) {
 		this.luceneSimilarity = luceneSimilarity;
 	}
-	
-	@Deprecated
-	public Map<DependencyNode,Float> getDepNodeSimilarity(String typeOfSignal) {
-		return depNodeSimilarity.get(typeOfSignal);
-	}
 
-	@Deprecated
-	public void addDepNodeSimilarity(String typeOfSignal, DependencyNode node, Float score) {
-		
-		Map <DependencyNode,Float> temp = depNodeSimilarity.get(typeOfSignal);
-		if (temp == null) {
-			temp = new LinkedHashMap<DependencyNode,Float>();
-		}
-		temp.put(node, score);
-		depNodeSimilarity.put(typeOfSignal, temp);
-	}
-	
 	public String getMimeType() {
 		return this.recoMetadata.getMimeType();
 	}
 
-	public long getSize(){
+	public long getSize() {
 		return this.recoMetadata.getSize();
 	}
 	
-	public DependencyNode copyInGraph(DependencyGraph depGraph){
+	public long getCompressedSize() {
+		return this.recoMetadata.getCompressedSize();
+	}
+
+	public void setCompressedSize(long zipFile) {
+		this.recoMetadata.setCompressedSize(zipFile);
+	}
+
+	public DependencyNode copyInGraph(DependencyGraph depGraph) {
+
 		DependencyNode d = new DependencyNode(depGraph);
-		
 		d.setContent(this.getContent());
 		d.setCompleteFilepath(this.getCompleteFilepath());
 		d.setDropboxEntry(getDropboxEntry());
 		d.setLuceneSimilarity(getLuceneSimilarity());
 		d.setLuceneDocNumber(getLuceneDocNumber());
 		d.setMetadata(getMetadata());
-		
 		return d;
-		
-
-		
-	}
-	public long getCompressedSize() {
-		 return compressedSize;
-		
 	}
+	
+	public String toString() {
+		String temp = "##########################################";
+		temp += "\nMetadata: " + this.completefilepath + " \n";
 
-	public void setCompressedSize(long zipFile) {
-		 compressedSize = zipFile;
-		
+		for (String name : this.recoMetadata.names()) {
+			temp += name + ": " + this.recoMetadata.get(name) + "\n";
+		}
+
+		if (this.content != null) {
+			temp += "Contentfilename: " + content;
+		}
+
+		temp += "\n";
+		return temp;
 	}
+
+
 }

File src/main/java/nl/vu/recoprov/baseclasses/RecoMetadata.java

 public class RecoMetadata extends Metadata{
 	
 	private static final long serialVersionUID = 1L;
-
 	public final static String LUCENE_DOCID = "lucene-docid";
-	
 	public final static String DROPBOX_PATH = "dropbox-path";
-	
+	public final static String DROPBOX_REVISION = "dropbox-revision";
+	public final static String DROPBOX_CLIENTMTIME = "dropbox-clientMtime";
+	public final static String DROPBOX_BYTES = "dropbox-bytes";
+	public final static String DROPBOX_SIZE = "dropbox-size";
+	public final static String DROPBOX_MIMETYPE = "dropbox-mimeType";
 	public final static String FILESYSTEM_LASTMODIFIED = "filesystem-last-modified";
-
+	public final static String COMPRESSED_SIZE = "compressed-size";	
 	private static final String FILESYSTEM_DATELASTMODIFIED = "filesystem-dir-last-modified";
 	
 	//dropbox
 	
 	private Set<String> images = null;
 	
+
+	
 //	private ContentType contentType;
 	
 //	private SemanticType semanticType;
 		
 		this.setModified(RESTUtility.parseDate(dropboxEntry.modified));
 		this.set(DROPBOX_PATH, dropboxEntry.path);
-		this.set("dropbox-revision", dropboxEntry.rev);
-		this.set(Property.externalDate("dropbox-clientMtime"), dropboxEntry.clientMtime);
-		this.set(Property.externalReal("dropbox-bytes"), (double) dropboxEntry.bytes);
-		this.set("dropbox-mimeType", dropboxEntry.mimeType);
-		this.set("dropbox-size", dropboxEntry.size);
+		this.set(DROPBOX_REVISION, dropboxEntry.rev);
+		this.set(Property.externalDate(DROPBOX_CLIENTMTIME), dropboxEntry.clientMtime);
+		this.set(Property.externalReal(DROPBOX_BYTES), (double) dropboxEntry.bytes);
+		this.set(DROPBOX_MIMETYPE, dropboxEntry.mimeType);
+		this.set(DROPBOX_SIZE, dropboxEntry.size);
 		
 	}
 	
 			return tikaMetadata.get(Metadata.CONTENT_TYPE);
 		}	
 		else if (this.DropboxEntry!= null){
-			return this.DropboxEntry.mimeType;
+			return this.get(DROPBOX_MIMETYPE);
 		}
 		else{
 			return "text/plain";
 	}
 	
 	public String getDropboxPath(){
-		return this.get("dropbox-path");
+		return this.get(DROPBOX_PATH);
 	}
 	
 	public String getRevision(){
-		return this.get("dropbox-revision");
+		return this.get(DROPBOX_REVISION);
 	}
 	
 
 	public void setLuceneDocNumber(int luceneDocNumber) {
-		this.set(Property.externalInteger(LUCENE_DOCID),luceneDocNumber );
+		this.set(Property.externalInteger(LUCENE_DOCID), luceneDocNumber );
 
 	}
 
 	public  HashMap<String, IImageMetadata> getIImageMetadata(){
 		return imageIImageMetadata;
 	}
+	
+	public long getCompressedSize() {
+		return 	this.getInt(Property.externalInteger(COMPRESSED_SIZE));
+	}
+	
+	public void setCompressedSize(long compressedSize) {
+		this.set(Property.externalInteger(COMPRESSED_SIZE), compressedSize);
+	}
 			
 	
 	public String convertStreamToString(InputStream is) throws IOException {

File src/main/java/nl/vu/recoprov/experiments/CorpusGeneratorBiomed.java

 		
 
 		// System.out.println("### THIRD PHASE: Index the contents with Apache Lucene. \n");
-		depGraph = pipeline.indexFiles(depGraph, dir);
+		pipeline.indexFiles(depGraph, dir);
 		
 
 		createEntityFromDepGraph(factory, depGraph);

File src/main/java/nl/vu/recoprov/experiments/CorpusGeneratorProvDM.java

 		CompletePipeline pipeline = new CompletePipeline(false, dir);
 		
 		depGraph = pipeline.initDependencyGraph(); 
-		depGraph = pipeline.loadMetadaAndIndexes(depGraph);
+		pipeline.loadMetadaAndIndexes(depGraph);
 		createEntityFromDepGraph(factory, depGraph);
 		
 		manualAnnotation();

File src/main/java/nl/vu/recoprov/experiments/Experiment1.java

 
 		try {
 			CorpusGeneratorProvDM.depGraph = pipeline.initDependencyGraph();
-			CorpusGeneratorProvDM.depGraph = pipeline.indexFiles(CorpusGeneratorProvDM.depGraph);
+			pipeline.indexFiles(CorpusGeneratorProvDM.depGraph);
 					
 			
 		} catch (Exception e) {
 		
 		try {
 			depGraphDerived = pipeline.initDependencyGraph();
-			depGraphDerived = pipeline.loadMetadaAndIndexes(depGraphDerived);
+			pipeline.loadMetadaAndIndexes(depGraphDerived);
 		} catch (Exception e) {
 			// TODO Auto-generated catch block
 			e.printStackTrace();

File src/main/java/nl/vu/recoprov/experiments/Experiment2.java

 
 		try {
 			CorpusGeneratorBiomed.depGraph = pipeline.initDependencyGraph();
-			CorpusGeneratorBiomed.depGraph = pipeline.indexFiles(CorpusGeneratorBiomed.depGraph,dir);
+			pipeline.indexFiles(CorpusGeneratorBiomed.depGraph,dir);
 
 			
 		} catch (Exception e) {
 			
 			try {
 				baselineGraph = pipeline.initDependencyGraph();
-				baselineGraph = pipeline.loadMetadaAndIndexes(baselineGraph);
+				pipeline.loadMetadaAndIndexes(baselineGraph);
 			} catch (Exception e) {
 				// TODO Auto-generated catch block
 				e.printStackTrace();

File src/main/java/nl/vu/recoprov/experiments/Experiment3.java

 package nl.vu.recoprov.experiments;
+
 /**
  * Plagiarism detection experiment using the PAN 2012 corpus
  */
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
-
 import nl.vu.recoprov.CompletePipeline;
 import nl.vu.recoprov.ProvDMtranslator;
 import nl.vu.recoprov.baseclasses.DependencyGraph;
 import nl.vu.recoprov.signalfilters.TextContainmentFilter;
 import nl.vu.recoprov.utils.ConfigurationDefaults;
 import nl.vu.recoprov.utils.TransitiveClosure;
-		
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.dropbox.client2.exception.DropboxException;
 
 public class Experiment3 {
-	
+
 	private static String dirfile = "pan12-detailed-comparison-training-corpus/";
 	private static String jsonfile = "pan.json";
-	
+
 	private static DependencyGraph baselineGraph = null;
 	private static DependencyGraph depGraph = null;
-	private static ProvDMtranslator provtranslator = new  ProvDMtranslator();
-
-	
-	
-	
-	
-	public static void main(String[] args) throws Exception {
+	private static ProvDMtranslator provtranslator = new ProvDMtranslator();
+	private static Logger logger;
 
-		System.out.println("Experiment 3: Plagiarism Detection Corpus");
+	public static void main(String[] args) throws IOException, DropboxException {
 
-		File dir = new File(dirfile);
-		dirfile = dir.getAbsolutePath();
+		logger = LoggerFactory
+				.getLogger("nl.vu.recoprov.experiments.Experiment3");
+		
+		if (!checkInitialParameters()){
+			// if anything is wrong with the two parameters
+			System.exit(0);
+		}
+		
+		String resultsfilename = "results" + System.currentTimeMillis() + ".txt";
+		FileWriter writer = createFileResultsWriter(resultsfilename);
 
-		File json = new File(jsonfile);
-		jsonfile = json.getAbsolutePath();
+		DependencyGraph depGraphLucene = createGraph();
+		new LuceneSimilaritySignal().computeSignal(depGraphLucene);
+		logger.info("Lucene graph created.");
 
 		PROVReader provreader = new PROVReader(dirfile, jsonfile);
-
-		File logfile = new File("log" + System.currentTimeMillis() + ".txt");
-		FileWriter writer = new FileWriter(logfile);
-
-		
-		DependencyGraph depGraphLucene = createGraph();
-		depGraphLucene = new LuceneSimilaritySignal().computeSignal(depGraphLucene);
-		
-		DependencyGraph depGraphLuceneMore = createGraph();
-		depGraphLuceneMore = new LuceneMoreLikeThisSignal().computeSignal(depGraphLuceneMore);
-		
 		depGraph = provreader.generatePANDepGraph();
 		provtranslator.translate(depGraph, "graphCorpus.gv");
-		System.out.println("Done with reference graph");
+		logger.info("Reference graph created.");
 
-		// Compare with Transitive Closure
-//		DependencyGraph depGraphT = new TransitiveClosure()
-//				.aggregateSignals(depGraph);
-//		provtranslator.translate(depGraphT, "graphCorpus_Trans.gv");
 
-		
+		LuceneThresholdFilter luceneThresholdFilter = new LuceneThresholdFilter();
+		WeightedSumAggregator aggregator = new WeightedSumAggregator();
+		TextContainmentFilter textContainmentFilter = new TextContainmentFilter();
+		PlagiarismCorpusSpecificFilter plagiarismCorpusSpecificFilter = new PlagiarismCorpusSpecificFilter();
+
 		// useful for small experiments
-		double[] thresholds = { 0, 0.01, 0.05, 0.1, 0.2};
+		double[] thresholds = { 0, 0.01, 0.05, 0.1, 0.2 };
 
-		//double[] thresholds = { 0.05};
+		// double[] thresholds = { 0.05};
 		for (double threshold : thresholds) {
 
 			ConfigurationDefaults.LUCENE_THRESHOLD = threshold;
-			DependencyGraph depGraphLuceneThreshold = depGraphLucene.copyGraph();
-			depGraphLuceneThreshold = new LuceneThresholdFilter().filterSignals(depGraphLuceneThreshold);
-			
-			
+
+			DependencyGraph depGraphLuceneThreshold = depGraphLucene
+					.copyGraph();
+			luceneThresholdFilter.filterSignals(depGraphLuceneThreshold);
+
 			DependencyGraph depGraph1 = depGraphLuceneThreshold.copyGraph();
-			depGraph1 = new WeightedSumAggregator().aggregateSignals(depGraph1);
-			System.out.println("Done with baseline graph (Lucene)");
+			aggregator.aggregateSignals(depGraph1);
 			writeResults(depGraph1, "Lucene", writer);
 
-
 			depGraph1 = depGraphLuceneThreshold.copyGraph();
-			depGraph1 = new TextContainmentFilter().filterSignals(depGraph1);
-			depGraph1 = new WeightedSumAggregator().aggregateSignals(depGraph1);
-			System.out.println("Done with Lucene + Filter");
+			textContainmentFilter.filterSignals(depGraph1);
+			aggregator.aggregateSignals(depGraph1);
 			writeResults(depGraph1, "LuceneFilter", writer);
 
 			depGraph1 = depGraphLuceneThreshold.copyGraph();
-			depGraph1 = new PlagiarismCorpusSpecificFilter().filterSignals(depGraph1);
-			depGraph1 = new WeightedSumAggregator().aggregateSignals(depGraph1);
-			System.out.println("Done with Lucene + Domain Specific Filter");
+			plagiarismCorpusSpecificFilter.filterSignals(depGraph1);
+			aggregator.aggregateSignals(depGraph1);
 			writeResults(depGraph1, "LucenePANFilter", writer);
-			
+
 			depGraph1 = depGraphLuceneThreshold.copyGraph();
-			depGraph1 = new TextContainmentFilter().filterSignals(depGraph1);
-			depGraph1 = new PlagiarismCorpusSpecificFilter().filterSignals(depGraph1);
-			depGraph1 = new WeightedSumAggregator().aggregateSignals(depGraph1);
-			System.out.println("Done with Lucene + Filter + Domain Specific Filter");
+			textContainmentFilter.filterSignals(depGraph1);
+			plagiarismCorpusSpecificFilter.filterSignals(depGraph1);
+			aggregator.aggregateSignals(depGraph1);
 			writeResults(depGraph1, "LucenePAN2Filters", writer);
+
+		}
+		
+		
+		// trying with Lucene More Like This
+		
+		DependencyGraph depGraphLuceneMore = createGraph();
+		new LuceneMoreLikeThisSignal().computeSignal(depGraphLuceneMore);
+		logger.info("LuceneMoreLikeThis graph created.");
+		
+		for (double threshold : thresholds) {
+
+			ConfigurationDefaults.LUCENE_THRESHOLD = threshold;
+
+			DependencyGraph depGraphLuceneMoreThreshold = depGraphLuceneMore
+					.copyGraph();
+			luceneThresholdFilter.filterSignals(depGraphLuceneMoreThreshold);
 			
-			
-			depGraphLuceneThreshold = depGraphLuceneMore.copyGraph();
-			depGraph1 = new LuceneThresholdFilter().filterSignals(depGraph1);
-			depGraph1 = new WeightedSumAggregator().aggregateSignals(depGraph1);
-			System.out.println("Done with Lucene more like this");
+			DependencyGraph depGraph1 = depGraphLuceneMoreThreshold.copyGraph();
+			aggregator.aggregateSignals(depGraph1);
 			writeResults(depGraph1, "LuceneMoreLikeThis", writer);
 
-			
-			depGraph1 = depGraphLuceneMore.copyGraph();
-			depGraph1 = new LuceneThresholdFilter().filterSignals(depGraph1);
-			depGraph1 = new TextContainmentFilter().filterSignals(depGraph1);
-			depGraph1 = new PlagiarismCorpusSpecificFilter().filterSignals(depGraph1);
-			depGraph1 = new WeightedSumAggregator().aggregateSignals(depGraph1);
-			System.out.println("Done with LuceneMoreLikeThis + Filter + Domain Specific Filter");
+			depGraph1 = depGraphLuceneMoreThreshold.copyGraph();
+			textContainmentFilter.filterSignals(depGraph1);
+			plagiarismCorpusSpecificFilter.filterSignals(depGraph1);
 			writeResults(depGraph1, "LuceneMoreLikeThisPAN2Filters", writer);
-			
-//			depGraph1 = createGraph();
-//			depGraph1 = new CompressionDistanceSignal().computeSignal(depGraph1);
-//			depGraph1 = new TextContainmentFilter().filterSignals(depGraph1);
-//			depGraph1 = new WeightedSumAggregator().aggregateSignals(depGraph1);
-//			System.out.println("Done with compression distance + Filter");
-//			writeResults(depGraph1, "CompressionFilter", writer);
-//			
-//			depGraph1 = createGraph();
-//			depGraph1 = new CompressionDistanceSignal().computeSignal(depGraph1);
-//			depGraph1 = new LuceneSimilaritySignal().computeSignal(depGraph1);
-//			depGraph1 = new TextContainmentFilter().filterSignals(depGraph1);
-//			depGraph1 = new WeightedSumAggregator().aggregateSignals(depGraph1);
-//			System.out.println("Done with compression distance + Lucene + Filter");
-//			writeResults(depGraph1, "CompressionLuceneFilter", writer);
+		}
 
 
-		}
-		
-		
 		writer.flush();
 		writer.close();
 
 	}
-	
-	private static void writeResults(DependencyGraph predicted, String message, FileWriter writer) throws IOException{
-		writer.append("\n\n"+ message +" \nLucene Threshold: " + ConfigurationDefaults.LUCENE_THRESHOLD + "\n");
+
+	private static void writeResults(DependencyGraph predicted, String message,
+			FileWriter writer) {
+
 		String temp = depGraph.similarToGraph(predicted).toString();
-		writer.append(temp);
-		
-		//writer.append("\n\n"+ predicted.toString() +" \n");
-		writer.flush();
+		try {
+			writer.append("\n\n" + message + " \nLucene Threshold: "
+					+ ConfigurationDefaults.LUCENE_THRESHOLD + "\n");
+			writer.append(temp);
+			writer.flush();
+		} catch (IOException e) {
+			logger.error(
+					"Results file IOException (cannot write) for experiment {}",
+					message);
+			e.printStackTrace();
+		}
 		
-		provtranslator.translate(predicted, "graph"+message+"_"+ConfigurationDefaults.LUCENE_THRESHOLD+".gv");
-		System.out.println("Done with "+ message);
-	
+		logger.info("{} graph created, threshold {}", message, ConfigurationDefaults.LUCENE_THRESHOLD );
+
+		// writer.append("\n\n"+ predicted.toString() +" \n");
+
+		provtranslator.translate(predicted, "graph" + message + "_"
+				+ ConfigurationDefaults.LUCENE_THRESHOLD + ".gv");
+
+
 	}
-	
 
- 		
-	public static DependencyGraph createGraph(){
-		
-		if (baselineGraph == null){
-		
+	public static DependencyGraph createGraph() throws DropboxException {
+
+		if (baselineGraph == null) {
 			baselineGraph = new DependencyGraph();
-			CompletePipeline pipeline = new CompletePipeline(false, dirfile, ConfigurationDefaults.PLAGIARISMDETECTIONDIRS);
-			
-			try {
-				baselineGraph = pipeline.initDependencyGraph();
-				baselineGraph = pipeline.loadMetadaAndIndexes(baselineGraph);
-			} catch (Exception e) {
-				// TODO Auto-generated catch block
-				e.printStackTrace();
-			}
+			CompletePipeline pipeline = new CompletePipeline(false, dirfile,
+					ConfigurationDefaults.PLAGIARISMDETECTIONDIRS);
+
+			baselineGraph = pipeline.initDependencyGraph();
+			pipeline.loadMetadaAndIndexes(baselineGraph);
+			logger.info("Baseline graph created.");
 		}
-		
+
 		DependencyGraph copyOfBaseline = baselineGraph.copyGraph();
 		return copyOfBaseline;
 	}
+	
+	public static Boolean checkInitialParameters() {
+		logger.info("Starting experiment 3: Plagiarism Detection Corpus");
+
+		File dir = new File(dirfile);
+
+		if (!dir.exists()) {
+			logger.error("Directory doesn't exist {}", dirfile);
+			return false;
+		}
+
+		if (!dir.isDirectory()) {
+			logger.error("Directory is not a directory {}", dirfile);
+			return false;
+		}
+		dirfile = dir.getAbsolutePath();
+
+		File json = new File(jsonfile);
+
+		if (!json.exists()) {
+			logger.error("json doesn't exist {}", jsonfile);
+			return false;
+		}
+		jsonfile = json.getAbsolutePath();
+
+		return true;
+
+	}
+	
+	public static FileWriter createFileResultsWriter(String resultsfilename) throws IOException {
+		FileWriter writer;
+		try {
+			writer = new FileWriter(resultsfilename);
+		} catch (IOException e) {
+			logger.error("Results file IOException (cannot open FileWriter) {}",
+					resultsfilename);
+			e.printStackTrace();
+			throw e;
+		}
+		return writer;
+	}
 
 }

File src/main/java/nl/vu/recoprov/experiments/PROVReader.java

 import org.openprovenance.prov.xml.ProvFactory;
 import org.openprovenance.prov.xml.StatementOrBundle;
 import org.openprovenance.prov.xml.WasDerivedFrom;
+import org.slf4j.LoggerFactory;
+import org.slf4j.Logger;
+
+
+import com.dropbox.client2.exception.DropboxException;
 
 
 public class PROVReader {
 
-	static HashMap<String, Entity> listOfAvailableEntities = new HashMap<String, Entity> ();
-	static HashMap<String, Object> listOfAvailableRelations =  new HashMap<String, Object> ();
-	static  HashMap<String,Activity> listOfAvailableActivities = new  LinkedHashMap<String, Activity> ();
-	static int counter = 0;
-	
+	private static HashMap<String, Entity> listOfAvailableEntities = new HashMap<String, Entity> ();
+	private static HashMap<String, Object> listOfAvailableRelations =  new HashMap<String, Object> ();
+	private static  HashMap<String,Activity> listOfAvailableActivities = new  LinkedHashMap<String, Activity> ();
+	private static int counter = 0;
+	private  Logger logger;
 	
-	private  DependencyGraph depGraph;
 	private  String dir ;
 	private  String jsonfile ;
 	private Document provdoc = new Document();
 	public PROVReader(String dir, String jsonfile){
 		this.dir = dir;
 		this.jsonfile = jsonfile;
+		logger = LoggerFactory
+				.getLogger("nl.vu.recoprov.experiments.PROVReader");
 	}
 	
 	
-	public DependencyGraph generatePANDepGraph() throws Exception {
+	public DependencyGraph generatePANDepGraph() throws DropboxException {
 
-		System.out.println("Reading PROV description of folder: "+ dir + " \nPROV contained in JSON: "+ jsonfile);
+		logger.info("Reading PROV description of folder: {} in JSON {}: ", dir, jsonfile);
 		
 		ProvFactory factory = initFactory();
 		CompletePipeline pipeline = new CompletePipeline(false, dir);
-
-		depGraph = pipeline.initDependencyGraph();
+		DependencyGraph depGraph = pipeline.initDependencyGraph();
 
 		// and get the lucene identifiers
-		depGraph = pipeline.indexFiles(depGraph);
-
+		pipeline.indexFiles(depGraph);
+		
 		createEntityFromDepGraph(factory, depGraph);
 
-		readJSON(jsonfile, factory);
+		readJSON(jsonfile, depGraph, factory);
 
 		//convertToDot(factory);
 
 
 	}
 		
-		
 	
 	public ProvFactory initFactory() {
 		ProvFactory factory = ProvFactory.getFactory();
 		Hashtable<String, String> namespace = new Hashtable<String, String>();	
 		namespace.put("_", "");
-		factory.setNamespaces(namespace);
-
-		
+		factory.setNamespaces(namespace);		
 		return factory;
 	}
 	
 
 	}
 	
-	public void readPROVN(String provnfile, ProvFactory factory) {
-		Converter conv = new Converter();
-
-		File sourcedir = new File(dir, sourceFolder);
-		File suspdir = new File(dir, suspiciousFolder);
-		
-		try {
-			provdoc = conv.readDocument(provnfile);
-		} catch (Exception e) {
-			// ignore}
-		}
-		List<StatementOrBundle> provlist = provdoc
-				.getEntityAndActivityAndWasGeneratedBy();
-		for (StatementOrBundle s : provlist) {
-			// assume are all statements
-			// if (s instanceof Entity){
-			// String entityName = ((Entity) s).getId().toString();
-			// entityName = entityName.replace(namespace, "");
-			// System.out.println("Entity: " + entityName);
-			// }
-			//
-			// if (s instanceof Activity){
-			// String actName = ((Activity) s).getId().toString();
-			// actName = actName.replace(namespace, "");
-			// System.out.println("Activity: " + actName);
-			// }
-
-			// if (s instanceof WasGeneratedBy){
-			// String actName = ((WasGeneratedBy) s).getId().toString();
-			// actName = actName.replace(namespace, "");
-			// System.out.println("WasGeneratedBy: " + actName);
-			// }
-
-			if (s instanceof WasDerivedFrom) {
-				String used = ((WasDerivedFrom) s).getUsedEntity().getRef()
-						.getLocalPart();
-				String generated = ((WasDerivedFrom) s).getGeneratedEntity()
-						.getRef().getLocalPart();
-				// System.out.println("WasDerivedFrom: " + used + " ->"+
-				// generated);
-
-				File usedFile = new File(sourcedir, used);
-				File genFile = new File(suspdir, generated);
-				
-				
-				depGraph.addEdge(depGraph.get(genFile.getAbsolutePath()),
-						depGraph.get(usedFile.getAbsolutePath()),
-						WeightedSumAggregator.FINAL_SCORE, 1.0);
-			}
-
-		}
-
-	}
 
 
 
-
-	
-
-
-	public void readJSON(String jsonfile, ProvFactory factory) {
+	public void readJSON(String jsonfile, DependencyGraph depGraph, ProvFactory factory) {
 		Converter conv = new Converter();
 
 		File sourcedir = new File(dir, sourceFolder);
 //				listOfAvailableEntities.values(), new LinkedList<Agent>(),
 //				new LinkedList<Statement>());
 
-		try {
-		
+		try {	
 			provtodot.convert(provdoc, new File("graphCorpus.gv"));
-		
 		} catch (FileNotFoundException e) {
-
 			e.printStackTrace();
 		}
 	}
 
 
+}	
+
+	
+/*********************************************************************	
+//	 Old code
+	
+	
+	
+	
+//	public void readPROVN(String provnfile, DependencyGraph depGraph, ProvFactory factory) {
+//		Converter conv = new Converter();
+//
+//		File sourcedir = new File(dir, sourceFolder);
+//		File suspdir = new File(dir, suspiciousFolder);
+//		
+//		try {
+//			provdoc = conv.readDocument(provnfile);
+//		} catch (Exception e) {
+//			// ignore}
+//		}
+//		List<StatementOrBundle> provlist = provdoc
+//				.getEntityAndActivityAndWasGeneratedBy();
+//		for (StatementOrBundle s : provlist) {
+//			// assume are all statements
+//			// if (s instanceof Entity){
+//			// String entityName = ((Entity) s).getId().toString();
+//			// entityName = entityName.replace(namespace, "");
+//			// System.out.println("Entity: " + entityName);
+//			// }