Commits

Scott Farquhar committed 744cfc1

Created Abstract HitCollector superclass for both HitCollectors and removed TermHitCollector.

Comments (0)

Files changed (6)

src/main/java/com/atlassian/lucene/stats/AbstractHitCollector.java

+package com.atlassian.lucene.stats;
+
+import com.atlassian.lucene.stats.cache.ReaderCache;
+import com.atlassian.lucene.stats.util.Supplier;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.HitCollector;
+
+import java.io.IOException;
+import java.util.Collection;
+
+public abstract class AbstractHitCollector extends HitCollector {
+    protected final ReaderCache readerCache;
+    protected final LuceneFieldSorter aggregateField;
+
+    public AbstractHitCollector(final ReaderCache readerCache, final LuceneFieldSorter aggregateField) {
+        this.readerCache = readerCache;
+        this.aggregateField = aggregateField;
+    }
+
+    protected Collection<String>[] getDocToValueTerms(final String documentConstant, final IndexReader indexReader)
+    {
+        return readerCache.get(indexReader, documentConstant, new Supplier<Collection<String>[]>()
+        {
+            public Collection<String>[] get()
+            {
+                try
+                {
+                    return JiraLuceneFieldFinder.getInstance().getMatches(indexReader, documentConstant);
+                }
+                catch (IOException e)
+                {
+                    throw new RuntimeIOException(e);
+                }
+            }
+        });
+    }
+
+    protected Number getValue(Collection values)
+    {
+        if (values == null || values.isEmpty())
+        {
+            return null;
+        }
+        else if (values.size() > 1)
+        {
+            throw new IllegalArgumentException("More than one value stored for statistic \"" + values.toString() + "\".");
+        }
+        else
+        {
+            Object o = aggregateField.getValueFromLuceneField((String) values.iterator().next());
+
+            if (o == null)
+                return 0;
+
+            if (o instanceof Number)
+            {
+                return (Number) o;
+            }
+            else
+            {
+                throw new IllegalArgumentException("Value returned for statistic from " + aggregateField.getClass().getName() + " \"" + (o.getClass().getName()) + "\".  Expected \"java.lang.Number\"");
+            }
+        }
+    }
+}

src/main/java/com/atlassian/lucene/stats/OneDimensionalObjectHitCollector.java

 package com.atlassian.lucene.stats;
 
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.HitCollector;
+import com.atlassian.lucene.stats.cache.ReaderCache;
 
-import java.io.IOException;
+
 import java.util.Collection;
 import java.util.Map;
 
  * of runtime conversion / translation (eg a StatisticsMapper that groups dates by Month, or groups users by email
  * domain name).
  */
-public class OneDimensionalObjectHitCollector extends HitCollector
-{
+public class OneDimensionalObjectHitCollector extends AbstractHitCollector {
     private StatisticsMapper<Object> statisticsMapper;
     private final Map<Object, Integer> result;
-    private Collection<String>[] docToTerms;
+    private Collection<String>[] docToValueTerms;
 
-    public OneDimensionalObjectHitCollector(StatisticsMapper<Object> statisticsMapper, Map<Object, Integer> result, IndexReader indexReader)
+    public OneDimensionalObjectHitCollector(final StatisticsMapper<Object> statisticsMapper, Map<Object, Integer> result, final IndexReader indexReader, final ReaderCache readerCache)
     {
+        super(readerCache, null);
         this.result = result;
         this.statisticsMapper = statisticsMapper;
-        try
-        {
-            docToTerms = JiraLuceneFieldFinder.getInstance().getMatches(indexReader, statisticsMapper.getDocumentConstant());
-        }
-        catch (IOException e)
-        {
-            //ignore
-        }
+        docToValueTerms = getDocToValueTerms(statisticsMapper.getDocumentConstant(),  indexReader);
     }
 
+
     public void collect(int i, float v)
     {
-        adjustMapForValues(result, docToTerms[i]);
+        adjustMapForValues(result, docToValueTerms[i]);
     }
 
     private void adjustMapForValues(Map<Object, Integer> map, Collection<String> terms)
         {
             return;
         }
+
         for (String term : terms)
         {
             Object object = statisticsMapper.getValueFromLuceneField(term);

src/main/java/com/atlassian/lucene/stats/OneDimensionalTermHitCollector.java

-package com.atlassian.lucene.stats;
-
-import com.atlassian.lucene.stats.cache.ReaderCache;
-import com.atlassian.lucene.stats.util.Supplier;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.HitCollector;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Map;
-
-/**
- * A HitCollector that creates a doc -> term mapping.  This is useful for collecting documents where there are a
- * limited number of terms.  The caching also ensures that if multiple searches sort on the same terms, the doc -> term
- * mapping is maintained.
- * <p/>
- * This HitCollector can be quite memory intensive, however the cache is stored with a weak reference, so it will
- * be garbage collected.
- * //todo - fix this!!!  Use everything from TwoDimensionalTermHitCollector
- */
-public class OneDimensionalTermHitCollector extends HitCollector {
-    private Collection<String>[] docToTerms;
-    private final Map<String, Integer> result;
-
-
-//    /**
-//     * Records the number of times the {@link #collect(int, float)} method was called. The method
-//     * should be called once for each issue.
-//     */
-//    private long hitCount = 0;
-
-    public OneDimensionalTermHitCollector(final String fieldId, final Map<String, Integer> result,
-                                          final IndexReader indexReader,
-                                          final ReaderCache readerCache) {
-        this.result = result;
-
-        this.docToTerms = readerCache.get(indexReader, fieldId, new Supplier<Collection<String>[]>() {
-            public Collection<String>[] get() {
-                try {
-                    return JiraLuceneFieldFinder.getInstance().getMatches(indexReader, fieldId);
-                } catch (IOException e) {
-                    throw new RuntimeIOException(e);
-                }
-            }
-        });
-    }
-
-
-    public void collect(int i, float v) {
-//        ++hitCount;
-        Collection<String> terms = docToTerms[i];
-        if (terms == null) {
-            incrementCount(null, result);
-        } else {
-            for (String term : terms) {
-                incrementCount(term, result);
-            }
-        }
-    }
-
-
-//    /**
-//     * Returns the number of times the {@link #collect(int, float)} method was called.
-//     * This should return the number of unique issues that was matched during a search.
-//     * @return number of times the {@link #collect(int, float)} method was called.
-//     */
-//    public long getHitCount()
-//    {
-//        return hitCount;
-//    }
-
-
-    private void incrementCount(final String key, final Map<String, Integer> map) {
-        Integer count = map.get(key);
-
-        if (count == null) {
-            count = 1;
-        } else {
-            count++;
-        }
-        map.put(key, count);
-    }
-
-}

src/main/java/com/atlassian/lucene/stats/TwoDimensionalTermHitCollector.java

 package com.atlassian.lucene.stats;
 
 import com.atlassian.lucene.stats.cache.ReaderCache;
-import com.atlassian.lucene.stats.util.Supplier;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.HitCollector;
 
-import java.io.IOException;
 import java.util.Collection;
 
 /**
  *
  * @since v3.11
  */
-public class TwoDimensionalTermHitCollector extends HitCollector
+public class TwoDimensionalTermHitCollector extends AbstractHitCollector
 {
     private final TwoDimensionalStatsMap statsMap;
-    private final ReaderCache readerCache;
-    private final LuceneFieldSorter aggregateField;
     private final Collection[] docToXTerms;
     private final Collection[] docToYTerms;
     private final Collection[] docToValueTerms;
 
 
-    public TwoDimensionalTermHitCollector(final TwoDimensionalStatsMap statsMap, final IndexReader indexReader,
-            final ReaderCache readerCache)
+    public TwoDimensionalTermHitCollector(final TwoDimensionalStatsMap statsMap, final IndexReader indexReader, final ReaderCache readerCache)
     {
         this(statsMap, indexReader, readerCache, null);
     }
             final ReaderCache readerCache,
             final LuceneFieldSorter aggregateField)
     {
+        super(readerCache, aggregateField);
         this.statsMap = statsMap;
-        this.readerCache = readerCache;
-        this.aggregateField = aggregateField;
         this.docToXTerms = getDocToXTerms(indexReader, statsMap);
         this.docToYTerms = getDocToYTerms(indexReader, statsMap);
         // this will be null if there we are not aggregating by a field
         return value != null ? value.intValue() : 0;
     }
 
-    private Number getValue(Collection values)
-    {
-        if (values == null || values.isEmpty())
-        {
-            return null;
-        }
-        else if (values.size() > 1)
-        {
-            throw new IllegalArgumentException("More than one value stored for statistic \"" + values.toString() + "\".");
-        }
-        else
-        {
-            Object o = aggregateField.getValueFromLuceneField((String) values.iterator().next());
-
-            if (o == null)
-                return 0;
-
-            if (o instanceof Number)
-            {
-                return (Number) o;
-            }
-            else
-            {
-                throw new IllegalArgumentException("Value returned for statistic from " + aggregateField.getClass().getName() + " \"" + (o.getClass().getName()) + "\".  Expected \"java.lang.Number\"");
-            }
-        }
-    }
-
     private Collection[] getDocToXTerms(IndexReader indexReader, TwoDimensionalStatsMap statsMap)
     {
         return getDocToValueTerms(statsMap.getxAxisMapper().getDocumentConstant(), indexReader);
         }
         return null;
     }
-
-    private Collection<String>[] getDocToValueTerms(final String documentConstant, final IndexReader indexReader)
-    {
-        return readerCache.get(indexReader, documentConstant, new Supplier<Collection<String>[]>()
-        {
-            public Collection<String>[] get()
-            {
-                try
-                {
-                    return JiraLuceneFieldFinder.getInstance().getMatches(indexReader, documentConstant);
-                }
-                catch (IOException e)
-                {
-                    throw new RuntimeIOException(e);
-                }
-            }
-        });
-    }
-
-
 }

src/test/java/com/atlassian/lucene/stats/TestOneDimensionalTermHitCollector.java

-package com.atlassian.lucene.stats;
-
-import com.atlassian.lucene.stats.cache.DefaultReaderCache;
-import com.atlassian.lucene.stats.util.EasyList;
-import org.junit.Test;
-import static org.junit.Assert.*;
-import static org.junit.Assert.assertEquals;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Searcher;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-public class TestOneDimensionalTermHitCollector extends AbstractHitCollectorTestCase
-{
-    private static final String FIELD = "field";
-    private Map result = new HashMap();
-    private OneDimensionalTermHitCollector hitCollector;
-
-    private final Document doc = new Document();
-    private final Document doc2 = new Document();
-    private final Document doc3 = new Document();
-
-    /**
-     * Test that the OneDimensionalTermHitCollector tracks the number of hits
-     * @throws java.io.IOException if unable to index to RAM
-     */
-    @Test
-    public void testOneDimensionalTermHitCollectorHitCount() throws IOException
-    {
-//        index(doc, FIELD, "1");
-//        index(doc, "project", "10000");
-//        index(doc, "issuetype", "1");
-//        index(doc2, FIELD, "22");
-//        index(doc2, "project", "10000");
-//        index(doc2, "issuetype", "1");
-//        index(doc3, FIELD, "333");
-//        index(doc3, "project", "10000");
-//        index(doc3, "issuetype", "1");
-//
-//        List docsList = EasyList.build();
-//        collectStats(docsList);
-//        assertEquals(0, hitCollector.getHitCount());
-//        assertEquals(0, hitCollector.getIrrelevantCount());
-//
-//        docsList = EasyList.build(doc);
-//        collectStats(docsList);
-//        assertEquals(1, hitCollector.getHitCount());
-//        assertEquals(0, hitCollector.getIrrelevantCount());
-//
-//        docsList = EasyList.build(doc, doc2);
-//        collectStats(docsList);
-//        assertEquals(2, hitCollector.getHitCount());
-//        assertEquals(0, hitCollector.getIrrelevantCount());
-//
-//        docsList = EasyList.build(doc, doc2, doc3);
-//        collectStats(docsList);
-//        assertEquals(3, hitCollector.getHitCount());
-//        assertEquals(0, hitCollector.getIrrelevantCount());
-//
-//        docsList = EasyList.build(doc, doc, doc2, doc3, doc, doc3);
-//        collectStats(docsList);
-//        assertEquals(6, hitCollector.getHitCount());
-//        assertEquals(0, hitCollector.getIrrelevantCount());
-    }
-
-
-
-    private void collectStats(Collection docs) throws IOException
-    {
-        final IndexReader reader = writeToRAM(docs);
-
-        hitCollector = new OneDimensionalTermHitCollector(FIELD, result, reader, new DefaultReaderCache());
-
-        Searcher searcher = new IndexSearcher(reader);
-        Query query = new MatchAllDocsQuery();
-        searcher.search(query, hitCollector);
-    }
-
-
-
-}

src/test/java/com/atlassian/lucene/stats/TestOneObjectTermHitCollector.java

+package com.atlassian.lucene.stats;
+
+import com.atlassian.lucene.stats.cache.DefaultReaderCache;
+import com.atlassian.lucene.stats.util.EasyList;
+import org.junit.Test;
+import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Searcher;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class TestOneObjectTermHitCollector extends AbstractHitCollectorTestCase
+{
+    private static final String FIELD = "field";
+    private Map result = new HashMap();
+    private OneDimensionalObjectHitCollector hitCollector;
+
+    private final Document doc = new Document();
+    private final Document doc2 = new Document();
+    private final Document doc3 = new Document();
+
+    /**
+     * Test that the OneDimensionalTermHitCollector tracks the number of hits
+     * @throws java.io.IOException if unable to index to RAM
+     */
+    @Test
+    public void testOneDimensionalTermHitCollectorHitCount() throws IOException
+    {
+//        index(doc, FIELD, "1");
+//        index(doc, "project", "10000");
+//        index(doc, "issuetype", "1");
+//        index(doc2, FIELD, "22");
+//        index(doc2, "project", "10000");
+//        index(doc2, "issuetype", "1");
+//        index(doc3, FIELD, "333");
+//        index(doc3, "project", "10000");
+//        index(doc3, "issuetype", "1");
+//
+//        List docsList = EasyList.build();
+//        collectStats(docsList);
+//        assertEquals(0, hitCollector.getHitCount());
+//        assertEquals(0, hitCollector.getIrrelevantCount());
+//
+//        docsList = EasyList.build(doc);
+//        collectStats(docsList);
+//        assertEquals(1, hitCollector.getHitCount());
+//        assertEquals(0, hitCollector.getIrrelevantCount());
+//
+//        docsList = EasyList.build(doc, doc2);
+//        collectStats(docsList);
+//        assertEquals(2, hitCollector.getHitCount());
+//        assertEquals(0, hitCollector.getIrrelevantCount());
+//
+//        docsList = EasyList.build(doc, doc2, doc3);
+//        collectStats(docsList);
+//        assertEquals(3, hitCollector.getHitCount());
+//        assertEquals(0, hitCollector.getIrrelevantCount());
+//
+//        docsList = EasyList.build(doc, doc, doc2, doc3, doc, doc3);
+//        collectStats(docsList);
+//        assertEquals(6, hitCollector.getHitCount());
+//        assertEquals(0, hitCollector.getIrrelevantCount());
+    }
+
+
+
+    private void collectStats(Collection docs) throws IOException
+    {
+//        final IndexReader reader = writeToRAM(docs);
+//
+//        hitCollector = new OneDimensionalObjectHitCollector(FIELD, result, reader, new DefaultReaderCache());
+//
+//        Searcher searcher = new IndexSearcher(reader);
+//        Query query = new MatchAllDocsQuery();
+//        searcher.search(query, hitCollector);
+    }
+
+
+
+}