Commits

Sara Magliacane committed f332a67

updating to the new version of lucene (4.2.1) and lire

Comments (0)

Files changed (63)

lib/JOpenSurf.jar

Binary file added.

lib/commons-math3-3.0.jar

Binary file added.

lib/net.semanticmetadata.lire_1.0.0_SNAPSHOT.jar

Binary file removed.

lib/net.semanticmetadata.lire_1.0.1_SNAPSHOT.jar

Binary file added.
 	<version>2.2.2</version>
 </dependency>
             
-<dependency>
-	<groupId>net.semanticmetadata</groupId>
-	<artifactId>lire</artifactId>
-	<version>1.0</version>
-</dependency>
   	<dependency>
   		<groupId>org.eclipse.jetty.aggregate</groupId>
   		<artifactId>jetty-all-server</artifactId>
   	<dependency>
 	<groupId>org.apache.lucene</groupId>
 	<artifactId>lucene-core</artifactId>
-	<version>3.6.0</version>
+	<version>4.2.1</version>
+</dependency>
+<dependency>
+	<groupId>org.apache.lucene</groupId>
+	<artifactId>lucene-analyzers-common</artifactId>
+	<version>4.2.1</version>
+</dependency>
+            <dependency>
+	<groupId>org.apache.lucene</groupId>
+	<artifactId>lucene-queries</artifactId>
+	<version>4.2.1</version>
+</dependency>
+<dependency>
+	<groupId>org.apache.lucene</groupId>
+	<artifactId>lucene-queryparser</artifactId>
+	<version>4.2.1</version>
 </dependency>
 <dependency>
 	<groupId>org.apache.commons</groupId>
   		<artifactId>simmetrics</artifactId>
   		<version>1.6.2</version>
   	</dependency>
-  	<dependency>
-	<groupId>org.apache.solr</groupId>
-	<artifactId>solr-core</artifactId>
-	<version>3.6.0</version>
-</dependency>
 <dependency>
 	<groupId>org.jvnet.jaxb2_commons</groupId>
 	<artifactId>jaxb2-basics-runtime</artifactId>
   		  <dependency>
   <groupId>net.semanticmetadata</groupId>
   <artifactId>lire</artifactId>
-  <version>1.0.0_SNAPSHOT</version>
+  <version>1.0.1_SNAPSHOT</version>
 </dependency>
 <dependency>
   <groupId>org.openprovenance.prov</groupId>

repo/com/dropbox/client2/1.3.0_SNAPSHOT/maven-metadata-local.xml

     <snapshot>
       <localCopy>true</localCopy>
     </snapshot>
-    <lastUpdated>20130417183624</lastUpdated>
+    <lastUpdated>20130422220623</lastUpdated>
     <snapshotVersions>
       <snapshotVersion>
         <extension>jar</extension>
         <value>1.3.0_SNAPSHOT</value>
-        <updated>20130417183624</updated>
+        <updated>20130422220623</updated>
       </snapshotVersion>
       <snapshotVersion>
         <extension>pom</extension>

repo/com/dropbox/client2/1.3.0_SNAPSHOT/maven-metadata-local.xml.md5

-361715afe97aef4def1c574ea18f704a
+53369e698e9682aed8bcc18729350e0d

repo/com/dropbox/client2/1.3.0_SNAPSHOT/maven-metadata-local.xml.sha1

-5e835b7e7fdd0d375619cd0a09180efda80b0302
+d5cbfc599db61e04366ef132ba93d23f1aaaf98f

repo/com/dropbox/client2/maven-metadata-local.xml

     <versions>
       <version>1.3.0_SNAPSHOT</version>
     </versions>
-    <lastUpdated>20130417183624</lastUpdated>
+    <lastUpdated>20130422220623</lastUpdated>
   </versioning>
 </metadata>

repo/com/dropbox/client2/maven-metadata-local.xml.md5

-da6e435d01fc5b3407b2316825aa1f8b
+1f70c1dd92c787921dea5e2577aa619e

repo/com/dropbox/client2/maven-metadata-local.xml.sha1

-edfd1320ebcc0dd056051fc94327a6995dd391a3
+8118cd0a7a77deeb7d18290ae598c6eaedd227c9

repo/net/semanticmetadata/lire/1.0.0_SNAPSHOT/lire-1.0.0_SNAPSHOT.jar

Binary file removed.

repo/net/semanticmetadata/lire/1.0.0_SNAPSHOT/lire-1.0.0_SNAPSHOT.jar.md5

-8464cf908b343e02f4753b007e3a69ab

repo/net/semanticmetadata/lire/1.0.0_SNAPSHOT/lire-1.0.0_SNAPSHOT.jar.sha1

-20074e383fbbc09f2e0c5d2898458c28e55a65d2

repo/net/semanticmetadata/lire/1.0.0_SNAPSHOT/lire-1.0.0_SNAPSHOT.pom

-<?xml version="1.0" encoding="UTF-8"?>
-<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-  <modelVersion>4.0.0</modelVersion>
-  <groupId>net.semanticmetadata</groupId>
-  <artifactId>lire</artifactId>
-  <version>1.0.0_SNAPSHOT</version>
-  <description>POM was created from install:install-file</description>
-</project>

repo/net/semanticmetadata/lire/1.0.0_SNAPSHOT/lire-1.0.0_SNAPSHOT.pom.md5

-447a97c158e99a9652286233c0bc7069

repo/net/semanticmetadata/lire/1.0.0_SNAPSHOT/lire-1.0.0_SNAPSHOT.pom.sha1

-125904ba5ef465cee83f2cd21394f2f1fc54cc21

repo/net/semanticmetadata/lire/1.0.0_SNAPSHOT/maven-metadata-local.xml

-<?xml version="1.0" encoding="UTF-8"?>
-<metadata modelVersion="1.1.0">
-  <groupId>net.semanticmetadata</groupId>
-  <artifactId>lire</artifactId>
-  <version>1.0.0_SNAPSHOT</version>
-  <versioning>
-    <snapshot>
-      <localCopy>true</localCopy>
-    </snapshot>
-    <lastUpdated>20130417183626</lastUpdated>
-    <snapshotVersions>
-      <snapshotVersion>
-        <extension>jar</extension>
-        <value>1.0.0_SNAPSHOT</value>
-        <updated>20130417183626</updated>
-      </snapshotVersion>
-      <snapshotVersion>
-        <extension>pom</extension>
-        <value>1.0.0_SNAPSHOT</value>
-        <updated>20130417183626</updated>
-      </snapshotVersion>
-    </snapshotVersions>
-  </versioning>
-</metadata>

repo/net/semanticmetadata/lire/1.0.0_SNAPSHOT/maven-metadata-local.xml.md5

-47fe7f9f9016949d1cadf0508e700578

repo/net/semanticmetadata/lire/1.0.0_SNAPSHOT/maven-metadata-local.xml.sha1

-d620a321867201411d1027add32d969c3803beb9

repo/net/semanticmetadata/lire/1.0.1_SNAPSHOT/lire-1.0.1_SNAPSHOT.jar

Binary file added.

repo/net/semanticmetadata/lire/1.0.1_SNAPSHOT/lire-1.0.1_SNAPSHOT.jar.md5

+811b9f5417e078cae97f8df908442dde

repo/net/semanticmetadata/lire/1.0.1_SNAPSHOT/lire-1.0.1_SNAPSHOT.jar.sha1

+5caf0fe1a4bbd243eb37be94ce6e5f6de48cb130

repo/net/semanticmetadata/lire/1.0.1_SNAPSHOT/lire-1.0.1_SNAPSHOT.pom

+<?xml version="1.0" encoding="UTF-8"?>
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>net.semanticmetadata</groupId>
+  <artifactId>lire</artifactId>
+  <version>1.0.1_SNAPSHOT</version>
+  <description>POM was created from install:install-file</description>
+</project>

repo/net/semanticmetadata/lire/1.0.1_SNAPSHOT/lire-1.0.1_SNAPSHOT.pom.md5

+d49a8bec44a0cb0f9119f7a87629d61d

repo/net/semanticmetadata/lire/1.0.1_SNAPSHOT/lire-1.0.1_SNAPSHOT.pom.sha1

+a72b01b54ccfb04dc0d03d44a65e3a4f5cf75877

repo/net/semanticmetadata/lire/1.0.1_SNAPSHOT/maven-metadata-local.xml

+<?xml version="1.0" encoding="UTF-8"?>
+<metadata modelVersion="1.1.0">
+  <groupId>net.semanticmetadata</groupId>
+  <artifactId>lire</artifactId>
+  <version>1.0.1_SNAPSHOT</version>
+  <versioning>
+    <snapshot>
+      <localCopy>true</localCopy>
+    </snapshot>
+    <lastUpdated>20130422220626</lastUpdated>
+    <snapshotVersions>
+      <snapshotVersion>
+        <extension>jar</extension>
+        <value>1.0.1_SNAPSHOT</value>
+        <updated>20130422220626</updated>
+      </snapshotVersion>
+      <snapshotVersion>
+        <extension>pom</extension>
+        <value>1.0.1_SNAPSHOT</value>
+        <updated>20130422191357</updated>
+      </snapshotVersion>
+    </snapshotVersions>
+  </versioning>
+</metadata>

repo/net/semanticmetadata/lire/1.0.1_SNAPSHOT/maven-metadata-local.xml.md5

+b4af05d1479879a5f501837fa9cfb4c9

repo/net/semanticmetadata/lire/1.0.1_SNAPSHOT/maven-metadata-local.xml.sha1

+20d5c50995c549ca7a64947f9ba53c01de7c1ba7

repo/net/semanticmetadata/lire/maven-metadata-local.xml

   <versioning>
     <versions>
       <version>1.0.0_SNAPSHOT</version>
+      <version>1.0.1_SNAPSHOT</version>
     </versions>
-    <lastUpdated>20130417183626</lastUpdated>
+    <lastUpdated>20130422220626</lastUpdated>
   </versioning>
 </metadata>

repo/net/semanticmetadata/lire/maven-metadata-local.xml.md5

-e62178991d076acc0b0c0b8e60dd05f0
+d01fba975e1498933fc1a170f098074a

repo/net/semanticmetadata/lire/maven-metadata-local.xml.sha1

-81acc4332bbd411e3096405a8f7c387d25f89ee5
+405861c142b4bee2216febb4d930b97a25def5f0

repo/org/openprovenance/prov/dot/0.2.1_SNAPSHOT/maven-metadata-local.xml

     <snapshot>
       <localCopy>true</localCopy>
     </snapshot>
-    <lastUpdated>20130417183628</lastUpdated>
+    <lastUpdated>20130422220628</lastUpdated>
     <snapshotVersions>
       <snapshotVersion>
         <extension>jar</extension>
         <value>0.2.1_SNAPSHOT</value>
-        <updated>20130417183628</updated>
+        <updated>20130422220628</updated>
       </snapshotVersion>
       <snapshotVersion>
         <extension>pom</extension>

repo/org/openprovenance/prov/dot/0.2.1_SNAPSHOT/maven-metadata-local.xml.md5

-a88e162b85bae5fb45bf46192b360766
+019c6713920971f5b2567d3de45e0862

repo/org/openprovenance/prov/dot/0.2.1_SNAPSHOT/maven-metadata-local.xml.sha1

-662b8a2d7dafaf3deb9df7c49cc5c096e9d3698a
+dd8ead65d8b1a8a38dbb3f3bbdebea9c4f55449a

repo/org/openprovenance/prov/dot/maven-metadata-local.xml

     <versions>
       <version>0.2.1_SNAPSHOT</version>
     </versions>
-    <lastUpdated>20130417183628</lastUpdated>
+    <lastUpdated>20130422220628</lastUpdated>
   </versioning>
 </metadata>

repo/org/openprovenance/prov/dot/maven-metadata-local.xml.md5

-464a4a8029faa4f921329523bf41ad70
+e903c77c2e1ee8f7b46f6c7ed62889e7

repo/org/openprovenance/prov/dot/maven-metadata-local.xml.sha1

-43daa84566a110c9b5b45299bc28651bea04fc63
+8440a3b74b67e6049412545a78421746c4017374

repo/org/openprovenance/prov/interoperability/0.2.1_SNAPSHOT/maven-metadata-local.xml

     <snapshot>
       <localCopy>true</localCopy>
     </snapshot>
-    <lastUpdated>20130417183630</lastUpdated>
+    <lastUpdated>20130422220630</lastUpdated>
     <snapshotVersions>
       <snapshotVersion>
         <extension>jar</extension>
         <value>0.2.1_SNAPSHOT</value>
-        <updated>20130417183630</updated>
+        <updated>20130422220630</updated>
       </snapshotVersion>
       <snapshotVersion>
         <extension>pom</extension>

repo/org/openprovenance/prov/interoperability/0.2.1_SNAPSHOT/maven-metadata-local.xml.md5

-4b3bc461eb8c8c99b47ad8975a2ddb39
+0210fc83501a4622861588f2147f66d8

repo/org/openprovenance/prov/interoperability/0.2.1_SNAPSHOT/maven-metadata-local.xml.sha1

-7ab757b29593426c0b4eef65c7cad1aa6911198e
+c9db3426d8b974ed8ba2eb59513970f24d74eff2

repo/org/openprovenance/prov/interoperability/maven-metadata-local.xml

     <versions>
       <version>0.2.1_SNAPSHOT</version>
     </versions>
-    <lastUpdated>20130417183630</lastUpdated>
+    <lastUpdated>20130422220630</lastUpdated>
   </versioning>
 </metadata>

repo/org/openprovenance/prov/interoperability/maven-metadata-local.xml.md5

-c6abf630854693a7ff89012c7816cfba
+e09a919cb6ab11959b31389f0734efbd

repo/org/openprovenance/prov/interoperability/maven-metadata-local.xml.sha1

-8b9e5d9a5e13a7c973a0bf8e13a91c65691b0f8b
+255897582e2d783047e1513f99d1cfbc9e2edd10

repo/org/openprovenance/prov/json/0.2.1_SNAPSHOT/maven-metadata-local.xml

     <snapshot>
       <localCopy>true</localCopy>
     </snapshot>
-    <lastUpdated>20130417183631</lastUpdated>
+    <lastUpdated>20130422220632</lastUpdated>
     <snapshotVersions>
       <snapshotVersion>
         <extension>jar</extension>
         <value>0.2.1_SNAPSHOT</value>
-        <updated>20130417183631</updated>
+        <updated>20130422220632</updated>
       </snapshotVersion>
       <snapshotVersion>
         <extension>pom</extension>

repo/org/openprovenance/prov/json/0.2.1_SNAPSHOT/maven-metadata-local.xml.md5

-705ec4c6d72e36becbb8f682555e66e5
+2c796935a08835ff539da8111b04c55b

repo/org/openprovenance/prov/json/0.2.1_SNAPSHOT/maven-metadata-local.xml.sha1

-d10712a8eb3e6de67d973f9d038b4ad5525ad1b6
+fb086e8f1317da4807c81c16ebd051d14da27d8d

repo/org/openprovenance/prov/json/maven-metadata-local.xml

     <versions>
       <version>0.2.1_SNAPSHOT</version>
     </versions>
-    <lastUpdated>20130417183631</lastUpdated>
+    <lastUpdated>20130422220632</lastUpdated>
   </versioning>
 </metadata>

repo/org/openprovenance/prov/json/maven-metadata-local.xml.md5

-1d619c2d1e5facb3cdc4575c411cc855
+60f91aef5737d5754d39a1001b5a0c56

repo/org/openprovenance/prov/json/maven-metadata-local.xml.sha1

-482b7d116073266890221f746bc3ec308031fc46
+5a193071d92556050972b0be6830cd653935534f

repo/org/openprovenance/prov/xml/0.2.1_SNAPSHOT/maven-metadata-local.xml

     <snapshot>
       <localCopy>true</localCopy>
     </snapshot>
-    <lastUpdated>20130417183633</lastUpdated>
+    <lastUpdated>20130422220634</lastUpdated>
     <snapshotVersions>
       <snapshotVersion>
         <extension>jar</extension>
         <value>0.2.1_SNAPSHOT</value>
-        <updated>20130417183633</updated>
+        <updated>20130422220634</updated>
       </snapshotVersion>
       <snapshotVersion>
         <extension>pom</extension>

repo/org/openprovenance/prov/xml/0.2.1_SNAPSHOT/maven-metadata-local.xml.md5

-1e7c461b860a946355d4684261179c38
+2d35c17dacd765a004a1cc72212ef976

repo/org/openprovenance/prov/xml/0.2.1_SNAPSHOT/maven-metadata-local.xml.sha1

-e1260ea71f6323157b5441c59490a9b44a0443e3
+cb97f233fbe5d00d1abb29da85c888fee7e284c5

repo/org/openprovenance/prov/xml/maven-metadata-local.xml

     <versions>
       <version>0.2.1_SNAPSHOT</version>
     </versions>
-    <lastUpdated>20130417183633</lastUpdated>
+    <lastUpdated>20130422220634</lastUpdated>
   </versioning>
 </metadata>

repo/org/openprovenance/prov/xml/maven-metadata-local.xml.md5

-380ae768941ec85c12bdb184de14624f
+fc0a93bf127408d28da89d96c26b0f52

repo/org/openprovenance/prov/xml/maven-metadata-local.xml.sha1

-3dba994b4301344d99545dd6aa4b3f2371c30b08
+75b877f3158eb976affac1dfec91e9d386eb0df7

src/nl/vu/recoprov/ImageReader.java

 
 import org.apache.commons.io.FileUtils;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.CorruptIndexException;
 			e2.printStackTrace();
 		}
 		
-		Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_35);
-		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, analyzer );
+		Analyzer analyzer = new EnglishAnalyzer(Version.LUCENE_42);
+		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42, analyzer );
 		
 		IndexWriter writer = null;
 		try {

src/nl/vu/recoprov/LuceneIndexer.java

 import nl.vu.recoprov.baseclasses.DependencyGraph;
 import nl.vu.recoprov.baseclasses.DependencyNode;
 import nl.vu.recoprov.utils.ConfigurationDefaults;
-import nl.vu.recoprov.utils.CustomAnalyzer;
 import nl.vu.recoprov.utils.CustomFileReader;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.KeywordAnalyzer;
-import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 		try {
 			FSDirectory store = SimpleFSDirectory.open(indexDir);
 
-			CustomAnalyzer analyzer = new CustomAnalyzer(Version.LUCENE_35);
-			Map<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>();
-			fieldAnalyzers.put("raw-contents", new KeywordAnalyzer());
+			Analyzer analyzer = new EnglishAnalyzer(Version.LUCENE_42);
+//			Map<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>();
+//			fieldAnalyzers.put("raw-contents", new KeywordAnalyzer());
+//
+//			PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
+//					analyzer, fieldAnalyzers);
 
-			PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
-					analyzer, fieldAnalyzers);
-
-			IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
-					wrapper);
+			IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42,
+					analyzer);
 
 			IndexWriter writer = new IndexWriter(store, config);
 			Set<String> names = input.keySet();
 
 			for (int i = 0; i < numdocs; i++) {
 				Document doc = searcher.doc(i);
-				String key = doc.getFieldable("name").stringValue();
+				String key = doc.getField("name").stringValue();
 				DependencyNode d = input.get(key);
 
 				d.setLuceneDocNumber(i);
 				input.put(d.getCompleteFilepath(), d);
 			}
 
-			searcher.close();
 			reader.close();
 			store.close();
 

src/nl/vu/recoprov/experiments/Experiment3.java

 import nl.vu.recoprov.ProvDMtranslator;
 import nl.vu.recoprov.baseclasses.DependencyGraph;
 import nl.vu.recoprov.signalaggregators.WeightedSumAggregator;
+import nl.vu.recoprov.signaldetectors.LuceneInverseSimilarity;
 import nl.vu.recoprov.signaldetectors.LuceneSimilaritySignal;
 import nl.vu.recoprov.utils.ConfigurationDefaults;
 		
 		DependencyGraph depGraph1 = createGraph();
 		System.out.println("Done with baseline graph");
 		depGraph1 = new LuceneSimilaritySignal().computeSignal(depGraph1);
+		//depGraph1 = new LuceneInverseSimilarity().computeSignal(depGraph1);
 		//depGraph1 = new BackwardTemporalFilter().filterSignals(depGraph1);
 		depGraph1 = new WeightedSumAggregator().aggregateSignals(depGraph1);
 		
 		File logfile = new File("log" + System.currentTimeMillis() + ".txt");
 		FileWriter writer = new FileWriter(logfile);
 		
+		writer.append("Original \n" + depGraph + "\n \n \n");
+		
 		writer.append("\n\nLucene - Filter: \n");
 		String temp = depGraph.similarToGraph(depGraph1).toString();
 		writer.append(temp);
 		
-		writer.append("\n" + depGraph1.toBooleanArray() + "\n");
+		writer.append("\n\n \n Predicted\n" + depGraph1 + "\n");
 		
 		writer.flush();
 		new ProvDMtranslator()

src/nl/vu/recoprov/experiments/PROVReader.java

 				File usedFile = new File(sourcedir, used);
 				File genFile = new File(suspdir, generated);
 				
-				depGraph.addEdge(depGraph.get(usedFile.getAbsolutePath()),
-						depGraph.get(genFile.getAbsolutePath()),
+				depGraph.addEdge(depGraph.get(genFile.getAbsolutePath()),
+						depGraph.get(usedFile.getAbsolutePath()),
 						WeightedSumAggregator.FINAL_SCORE, 1.0);
 			}
 

src/nl/vu/recoprov/signaldetectors/LuceneSimilaritySignal.java

 import java.util.TreeMap;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.TermFreqVector;
-import org.apache.lucene.queryParser.ParseException;
-import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.SimpleFSDirectory;
 import org.apache.lucene.util.Version;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
 
 import nl.vu.recoprov.LuceneIndexer;
 import nl.vu.recoprov.abstractclasses.SignalDetector;
 import nl.vu.recoprov.baseclasses.DependencyGraph;
 import nl.vu.recoprov.baseclasses.DependencyNode;
 import nl.vu.recoprov.utils.ConfigurationDefaults;
-import nl.vu.recoprov.utils.CustomAnalyzer;
+
 
 public class LuceneSimilaritySignal extends SignalDetector {
 
 	public static final String LUCENE_SIMILARITY = "lucene-similarity";
 	public static final String LUCENE_INVERSE_SIMILARITY = LuceneSimilaritySignal.LUCENE_SIMILARITY+ "_inverse";
-	public static double LUCENE_THRESHOLD = 0.1;
-
+	public static double LUCENE_THRESHOLD = 0.01;
+	public static int LUCENE_MAX_NUMBER_DOCS = 100;
+	
 	public DependencyGraph computeSignal(DependencyGraph input) {
 
 		try {
 			// indexDir.mkdir();
 
 			FSDirectory store = SimpleFSDirectory.open(indexDir);
-			IndexReader reader = IndexReader.open(store);
+			IndexReader reader = DirectoryReader.open(store);
 			IndexSearcher searcher = new IndexSearcher(reader);
 			int numdocs = reader.numDocs();
 
 			for (int i = 0; i < numdocs; i++) {
-				TermFreqVector[] tfvs = reader.getTermFreqVectors(i);
+				Terms tfvs = reader.getTermVector(i, "contents");
 				ScoreDoc[] hits = new ScoreDoc[0];
-
+				String querystring = "";
+				String term = null;
+				
 				if (tfvs == null) {
 					System.out.println("No terms vector found for doc " + i);
 				} else {
-					for (TermFreqVector tfv : tfvs) {
-						String fieldname = tfv.getField();
-						
-						if (!fieldname.equals("contents"))
-							continue;
-						
-						
-						String[] terms = tfv.getTerms();
-						int[] frequencies = tfv.getTermFrequencies();
+					TermsEnum tenum = tfvs.iterator(null);
+					
 
-						String querystring = "";
 
-						int k = -1;
-						Map<String, Integer> termVector = new TreeMap<String, Integer>();
-						for (String term : terms) {
-							k = k + 1;
-							if (ignoreTerm(term)) {
-								continue;
-							}
-							term = modifyTerm(term);
-
-							termVector.put(term, frequencies[k]);
+					while ( tenum.next() != null) {
+						
+						term = tenum.term().utf8ToString();
+						
+						if (querystring == ""){
+							querystring += term;
 						}
-
-						// TODO: order by most frequent ones
-
-						for (String term : termVector.keySet()) {
-							if (querystring.equals("")) {
-								querystring = term;
-								continue;
-							}
+						else{
 							querystring = querystring + " OR " + term;
 						}
+						
+					}
+
+					hits = searchForString(searcher, querystring);
 
-						hits = searchForString(searcher, querystring);
 
-					}
 				}
 
 				Document doc = searcher.doc(i);
-				String key = doc.getFieldable("name").stringValue();
+				String key = doc.getField("name").stringValue();
 				DependencyNode d = input.get(key);
 				d.setLuceneSimilarity(hits);
 				// probably unnecessary
 			String queryString) throws IOException, ParseException {
 
 		return searchForString("contents", searcher, queryString,
-				new CustomAnalyzer(Version.LUCENE_35));
+				new EnglishAnalyzer(Version.LUCENE_42));
 
 	}
 	
 
 	public static ScoreDoc[] searchForString(String fieldname,
 			IndexSearcher searcher, String queryString, Analyzer analyzer)
-			throws IOException, ParseException {
+			throws IOException, ParseException{
 		
 		if (queryString.equals(""))
 			return new ScoreDoc[0];
 
-		BooleanQuery.setMaxClauseCount(100000);
+		BooleanQuery.setMaxClauseCount(1000000);
 
-		QueryParser parser = new QueryParser(Version.LUCENE_35, fieldname,
+		QueryParser parser = new QueryParser(Version.LUCENE_42, fieldname,
 				analyzer);
 		Query query = parser.parse(queryString);
 
 		// System.out.println(queryString);
 
-		TopScoreDocCollector collector = TopScoreDocCollector.create(20, true);
+		TopScoreDocCollector collector = TopScoreDocCollector.create(LUCENE_MAX_NUMBER_DOCS, true);
 		searcher.search(query, collector);
 		ScoreDoc[] hits = collector.topDocs().scoreDocs;
 		return hits;
 			return new ScoreDoc[0];
 
 		
-		QueryParser parser = new QueryParser(Version.LUCENE_35, fieldname,
+		QueryParser parser = new QueryParser(Version.LUCENE_42, fieldname,
 				analyzer);
 		Query query = parser.parse(queryString);
 

src/nl/vu/recoprov/signaldetectors/MatchTitleInContentSignal.java

 package nl.vu.recoprov.signaldetectors;
 
 import java.io.File;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
+
+
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.IndexSearcher;
 				
 				//better to have an original index (no strange changes)
 				
-				ScoreDoc[] hits = LuceneSimilaritySignal.searchForPhrase("contents", searcher, key, new WhitespaceAnalyzer());
+				ScoreDoc[] hits = null;
+				//hits = LuceneSimilaritySignal.searchForPhrase("contents", searcher, key, new EnglishAnalyzer());
 
 				
 				

src/nl/vu/recoprov/utils/CustomAnalyzer.java

-package nl.vu.recoprov.utils;
-
-
-/*
- *  Modified version of StandardAnalyzer (cannot extend since it's final and wrapping it is a tedious job).
- *  Includes Porter Stemmer
- */
-
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.standard.StandardFilter;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
-import org.apache.solr.analysis.TrimFilterFactory;
-import org.apache.solr.analysis.WordDelimiterFilterFactory;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.Reader;
-import java.util.Set;
-
-/**
- * Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
- * LowerCaseFilter} and {@link StopFilter}, using a list of
- * English stop words.
- *
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating PorterAnalyzer:
- * <ul>
- *   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
- *        from their combining characters. If you use a previous version number,
- *        you get the exact broken behavior for backwards compatibility.
- *   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
- *        and StopFilter correctly handles Unicode 4.0 supplementary characters
- *        in stopwords.  {@link ClassicTokenizer} and {@link ClassicAnalyzer} 
- *        are the pre-3.1 implementations of StandardTokenizer and
- *        PorterAnalyzer.
- *   <li> As of 2.9, StopFilter preserves position increments
- *   <li> As of 2.4, Tokens incorrectly identified as acronyms
- *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
- * </ul>
- */
-public final class CustomAnalyzer extends StopwordAnalyzerBase {
-
-  /** Default maximum allowed token length */
-  public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
-  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
-  /**
-   * Specifies whether deprecated acronyms should be replaced with HOST type.
-   * See {@linkplain "https://issues.apache.org/jira/browse/LUCENE-1068"}
-   */
-  private final boolean replaceInvalidAcronym;
-
-  /** An unmodifiable set containing some common English words that are usually not
-  useful for searching. */
-  public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 
-
-  /** Builds an analyzer with the given stop words.
-   * @param matchVersion Lucene version to match See {@link
-   * <a href="#version">above</a>}
-   * @param stopWords stop words */
-  public CustomAnalyzer(Version matchVersion, Set<?> stopWords) {
-    super(matchVersion, stopWords);
-    replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
-  }
-
-  /** Builds an analyzer with the default stop words ({@link
-   * #STOP_WORDS_SET}).
-   * @param matchVersion Lucene version to match See {@link
-   * <a href="#version">above</a>}
-   */
-  public CustomAnalyzer(Version matchVersion) {
-    this(matchVersion, STOP_WORDS_SET);
-  }
-
-  /** Builds an analyzer with the stop words from the given file.
-   * @see WordlistLoader#getWordSet(Reader, Version)
-   * @param matchVersion Lucene version to match See {@link
-   * <a href="#version">above</a>}
-   * @param stopwords File to read stop words from */
-  public CustomAnalyzer(Version matchVersion, File stopwords) throws IOException {
-    this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords,
-        IOUtils.CHARSET_UTF_8), matchVersion));
-  }
-
-  /** Builds an analyzer with the stop words from the given reader.
-   * @see WordlistLoader#getWordSet(Reader, Version)
-   * @param matchVersion Lucene version to match See {@link
-   * <a href="#version">above</a>}
-   * @param stopwords Reader to read stop words from */
-  public CustomAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
-    this(matchVersion, WordlistLoader.getWordSet(stopwords, matchVersion));
-  }
-
-  /**
-   * Set maximum allowed token length.  If a token is seen
-   * that exceeds this length then it is discarded.  This
-   * setting only takes effect the next time tokenStream or
-   * reusableTokenStream is called.
-   */
-  public void setMaxTokenLength(int length) {
-    maxTokenLength = length;
-  }
-    
-  /**
-   * @see #setMaxTokenLength
-   */
-  public int getMaxTokenLength() {
-    return maxTokenLength;
-  }
-
-  @Override
-  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
-    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
-    src.setMaxTokenLength(maxTokenLength);
-
-    
- 
-    TokenStream tok = new StandardFilter(matchVersion, src);
-    
-    // TODO: make it better - only difference - remove short or too long tokens
-    tok = new ASCIIFoldingFilter(tok);
-    tok = (new TrimFilterFactory()).create(tok);
-    tok = new LengthFilter(true,  tok, 3,255);
-    
-    tok = new LowerCaseFilter(matchVersion, tok);
-    tok = new StopFilter(matchVersion, tok, stopwords);
-    // only difference
-    //tok = new PorterStemFilter(tok);
-    
-    tok = (new WordDelimiterFilterFactory()).create(tok);
-    
-    return new TokenStreamComponents(src, tok) {
-      @Override
-      protected boolean reset(final Reader reader) throws IOException {
-        src.setMaxTokenLength(CustomAnalyzer.this.maxTokenLength);
-        return super.reset(reader);
-      }
-    };
-  }
-}