Commits

Anonymous committed bd2e9c4
  • Participants
  • Parent commits aabc33f

Comments (0)

Files changed (9)

File encuestame-business/src/main/java/org/encuestame/business/search/ReIndexAttachmentsJob.java

+/*
+ ************************************************************************************
+ * Copyright (C) 2001-2011 encuestame: system online surveys Copyright (C) 2009
+ * encuestame Development Team.
+ * Licensed under the Apache Software License version 2.0
+ * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to  in writing,  software  distributed
+ * under the License is distributed  on  an  "AS IS"  BASIS,  WITHOUT  WARRANTIES  OR
+ * CONDITIONS OF ANY KIND, either  express  or  implied.  See  the  License  for  the
+ * specific language governing permissions and limitations under the License.
+ ************************************************************************************
+ */
 package org.encuestame.business.search;
 
 import java.io.File;
 import java.util.ArrayList;
 import java.util.List;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.encuestame.business.setup.DirectorySetupOperations;
-import org.encuestame.core.cron.IndexRebuilder;
-import org.encuestame.core.cron.ReIndexJob;
 import org.encuestame.search.IndexerManager;
 
+/**
+ * ReIndex Attachments Job.
+ * @author Morales, Diana Paola paolaATencuestame.org
+ * @since Mar 23, 2011
+ */
 public class ReIndexAttachmentsJob {
 
     /**

File encuestame-business/src/main/java/org/encuestame/business/service/SearchService.java

-/*
- ************************************************************************************
- * Copyright (C) 2001-2009 encuestame: system online surveys Copyright (C) 2009
- * encuestame Development Team.
- * Licensed under the Apache Software License version 2.0
- * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to  in writing,  software  distributed
- * under the License is distributed  on  an  "AS IS"  BASIS,  WITHOUT  WARRANTIES  OR
- * CONDITIONS OF ANY KIND, either  express  or  implied.  See  the  License  for  the
- * specific language governing permissions and limitations under the License.
- ************************************************************************************
- */
-package org.encuestame.business.service;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.queryParser.ParseException;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Searcher;
-import org.apache.lucene.search.TopScoreDocCollector;
-import org.apache.lucene.util.Version;
-import org.encuestame.business.service.imp.ISearchService;
-import org.encuestame.persistence.domain.survey.Poll;
-import org.encuestame.search.utils.Search;
-
-/**
- * Search Service.
- * @author Morales, Diana Paola paola AT encuestame.org
- * @since February 09, 2011
- * @version $Id$
- */
-public class SearchService extends AbstractIndexService  implements ISearchService {
-
-    private Search search;
-
-    public void indexDocument(final String indexDirPath, final String indexDirStore) throws IOException{
-        search.getIndexer(indexDirPath);
-
-    }
-
-    public void searchContent(final String dirPath){
-
-    }
-
-    public void IndexPoll(Poll poll, String path) throws IOException{
-
-    }
-
-    /**************************************************************************/
-
-
-
-    public List<Poll> searchPolls(String searchString, String path, String fieldName,
-            int results) throws IOException, ParseException {
-        List<Poll> pollList = new ArrayList<Poll>();
-        //IndexReader reader = IndexReader.open(path, true);
-        //IndexReader reader = IndexReader.open(path);
-        //Searcher searcher = new IndexSearcher(reader);
-        Searcher searcher = null;
-        QueryParser qp = new QueryParser(Version.LUCENE_29, fieldName,
-                new StandardAnalyzer(Version.LUCENE_29));
-        Query query = qp.parse(searchString);
-        ScoreDoc[] docs = searcher.search(query, results).scoreDocs;
-
-        for (int i = 0; i < docs.length; i++) {
-            Document doc = searcher.doc(docs[i].doc);
-            Poll articlePoll = new Poll();
-            articlePoll.setPollId(1L);
-            articlePoll.setName(doc.getField("TITLE").stringValue());
-            pollList.add(articlePoll);
-        }
-        return pollList;
-    }
-
-
-    public Map<String, Object> searchPollPaginateResults(String searchString,
-            String path, String fieldName, int page, int results)
-            throws IOException, ParseException {
-        Map<String, Object> map = new HashMap<String, Object>();
-        List<Poll> articles = new ArrayList<Poll>();
-        //Searcher searcher = new IndexSearcher(IndexReader.open(path));
-        //IndexReader reader = IndexReader.open(path);
-        //Searcher searcher = new IndexSearcher(reader);
-        Searcher searcher = null;
-        QueryParser qp = new QueryParser(Version.LUCENE_29, fieldName,
-                new StandardAnalyzer(Version.LUCENE_29));
-        Query query = qp.parse(searchString);
-        TopScoreDocCollector collector = TopScoreDocCollector.create(page * results, true);
-        searcher.search(query, collector);
-        ScoreDoc[] docs = collector.topDocs().scoreDocs;
-        map.put("resultados", collector.getTotalHits());
-        map.put("articulos", articles);
-        int startResult = (page - 1) * results;
-        if (startResult > docs.length) {
-            return map;
-        }
-        int end = Math.min(docs.length, startResult + results);
-
-        for (int i = startResult; i < end; i++) {
-            Document doc = searcher.doc(docs[i].doc);
-            Poll article = new Poll();
-            article.setPollId(1L);
-            article.setName(doc.getField("POLLNAME").stringValue());
-            articles.add(article);
-        }
-        return map;
-    }
-
-
-    public List<Poll> searchArticle(String searchString, String path, String fieldName,
-            int results) throws IOException, ParseException {
-        List<Poll> articles = new ArrayList<Poll>();
-        //IndexReader reader = IndexReader.open(path);
-        //Searcher searcher = new IndexSearcher(reader);
-        Searcher searcher = null;
-        QueryParser qp = new QueryParser(Version.LUCENE_29, fieldName,
-                new StandardAnalyzer(Version.LUCENE_29));
-        Query query = qp.parse(searchString);
-        ScoreDoc[] docs = searcher.search(query, results).scoreDocs;
-
-        for (int i = 0; i < docs.length; i++) {
-            Document doc = searcher.doc(docs[i].doc);
-            Poll article = new Poll();
-            article.setPollId(1L);
-            article.setName((doc.getField("POLLNAME").stringValue()));
-            articles.add(article);
-        }
-        return articles;
-    }
-
-    public void searchArticleWithPagination(String path)
-        throws IOException, ParseException {
-        SearchService searcher = new SearchService();
-        Map<String, Object> map = searcher.searchPollPaginateResults("articulo", path, "POLLNAME", 1, 2);
-        List<Poll> articles = (List<Poll>) map.get("articulos");
-        System.out.println("Búsqueda finalizada, resultados: " + articles.size() + " de " + map.get("resultados"));
-        for (Poll articlePoll : articles) {
-        System.out.println("ID " + articlePoll.getPollId());
-        System.out.println("NAME POLL " + articlePoll.getName());
-        }
-    }
-
-
-    private Document generateDocumentFromPoll(Poll poll) {
-        Document doc = new Document();
-        doc.add(new Field("ID", String.valueOf(poll.getPollId()),
-                Field.Store.YES, Field.Index.NO));
-        doc.add(new Field("POLLNAME", poll.getName(),
-                Field.Store.YES, Field.Index.ANALYZED));
-        return doc;
-    }
-
-    public void indexPoll(Poll poll, String path) throws IOException {
-        IndexWriter writer = search.getIndexer(path);
-         try {
-            Document doc = generateDocumentFromPoll(poll);
-            writer.addDocument(doc);
-            writer.commit();
-            writer.optimize();
-        } finally {
-            writer.close();
-        }
-    }
-
-
-}

File encuestame-search/src/main/java/org/encuestame/search/IndexerManager.java

+/*
+ ************************************************************************************
+ * Copyright (C) 2001-2011 encuestame: system online surveys Copyright (C) 2009
+ * encuestame Development Team.
+ * Licensed under the Apache Software License version 2.0
+ * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to  in writing,  software  distributed
+ * under the License is distributed  on  an  "AS IS"  BASIS,  WITHOUT  WARRANTIES  OR
+ * CONDITIONS OF ANY KIND, either  express  or  implied.  See  the  License  for  the
+ * specific language governing permissions and limitations under the License.
+ ************************************************************************************
+ */
 package org.encuestame.search;
 
 import java.io.File;
-import java.io.FileFilter;
-import java.io.FileReader;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.util.Version;
-
+import org.apache.poi.POIXMLException;
+import org.encuestame.search.utils.SearchUtils;
 
 /**
- * Directory Operations.
- * @author Morales Diana, Juan juanATencuestame.org
- * @since Mar 20, 2011
+ * Indexer Manager.
+ * @author Morales, Diana Paola paolaATencuestame.org
+ * @since Mar 23, 2011
  */
 public class IndexerManager {
 
-
+    /** Log. **/
     private static final Log log = LogFactory.getLog(IndexerManager.class);
 
+    /** Directory file list to index. **/
     private List<File> directoriesToIndex = new ArrayList<File>();
 
+    /** Directory index path. **/
     private String indexesLocation;
+
+    /** Index writer. **/
     private IndexWriter indexWriter;
 
     /**
-    *
+    * Indexer files.
     * @param files
     * @throws Exception
     */
     }
 
     /**
+     * Initialize index process.
+     * @throws Exception
+     */
+     public void initializeIndex() throws Exception {
+         log.debug("Initialize");
+         this.startIndexWriter();
+         for (File file : this.directoriesToIndex) {
+             long start = System.currentTimeMillis();
+             int numIndexed;
+             try {
+                 numIndexed = this.index(file);
+             } finally {
+                 this.close();
+             }
+             long end = System.currentTimeMillis();
+             log.debug("Indexing " + numIndexed + " files took " + (end - start)
+                     + " milliseconds");
+         }
+     }
+
+    /**
+    * Index Writer.
     * @throws IOException
     * @throws LockObtainFailedException
     * @throws CorruptIndexException
         final Directory dir = FSDirectory.open(new java.io.File(
                 this.indexesLocation));
         this.indexWriter = new IndexWriter(dir, new StandardAnalyzer(
-                Version.LUCENE_29), true, IndexWriter.MaxFieldLength.UNLIMITED);
+                Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
     }
 
     /**
-    *
-    * @param dataDir
-    * @param filter
-    * @return
-    * @throws Exception
-    */
-    public int index(final File dataDir, FileFilter filter) throws Exception {
-        log.debug("Index file is directory: "+dataDir.isDirectory());
+     * Read Files in Attachment Directory.
+     * @param dataDir Attachment Directory
+     * @return
+     * @throws Exception
+     */
+    public int index(final File dataDir) throws Exception {
+        log.debug("Index file is directory: " + dataDir.isDirectory());
         File[] files = dataDir.listFiles();
         for (File f : files) {
-            if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
-                    && (filter == null || filter.accept(f))) {
-                this.indexFile(f);
+            if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()) {
+                indexFile(f); // Write documents in Index
             }
         }
         return this.indexWriter.numDocs();
     }
 
-    /**
-    *
-    * @param f
-    * @return
-    * @throws Exception
-    */
-    protected Document getDocument(File f) throws Exception {
-        Document doc = new Document();
-        doc.add(new Field("contents", new FileReader(f))); // Index file
-                                                            // contents
-        doc.add(new Field("filename", f.getName(), Field.Store.YES,
-                Field.Index.NOT_ANALYZED)); // Index File Name.
-        doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES,
-                Field.Index.NOT_ANALYZED)); // Index Full Path.
-        return doc;
-    }
 
-    /**
-    *
-    * @param f
+   /**
+    * Retrieve Document to Index.
+    * @param file {@link File}
+    * @param ext Filename extension
+    * @return {@link Document}
+    * @throws POIXMLException
     * @throws Exception
     */
-    private void indexFile(File f) throws Exception {
-        log.debug("Indexing " + f.getCanonicalPath());
-        Document doc = getDocument(f);
-        log.debug("Adding document..."+doc);
-        this.indexWriter.addDocument(doc); // Add Document to Lucene Index.
+    public Document getDocument(final File file, final String ext)
+            throws POIXMLException, Exception {
+        Document doc = null;
+        log.debug("get Document extension " + ext);
+        if ("docx".equals(ext)) {
+            doc = SearchUtils.createWordDocument(file);
+        } else if ("xls".equals(ext)) {
+            doc = SearchUtils.createSpreadsheetsDocument(file);
+        } else if ("pdf".equals(ext)) {
+            doc = SearchUtils.createPdfDocument(file);
+        } else if ("txt".equals(ext)) {
+            doc = SearchUtils.createTextDocument(file);
+        }
+        return doc;
     }
 
     /**
-    * Initialize index process.
-    *
+    * Adding Document to Index Directory.
+    * @param file {@link File}
     * @throws Exception
     */
-    public void initializeIndex() throws Exception {
-        log.debug("Initialize");
-        this.startIndexWriter();
-        for (File file : this.directoriesToIndex) {
-            long start = System.currentTimeMillis();
-            int numIndexed;
-            try {
-                numIndexed = this.index(file, new TextFilesFilter());
-            } finally {
-                this.close();
-            }
-            long end = System.currentTimeMillis();
-            log.debug("Indexing " + numIndexed + " files took "
-                    + (end - start) + " milliseconds");
+    private void indexFile(final File file) throws Exception {
+        log.debug("Indexing " + file.getCanonicalPath());
+        final String pathFileName = file.getName().toString();
+        final String ext = SearchUtils.getExtension(pathFileName);
+        final Document doc = this.getDocument(file, ext);
+        log.debug("Adding document..." + doc);
+        if (doc == null) {
+            log.warn("Document is null for this file: "+file.getAbsolutePath());
+        } else {
+            // Add Document to Lucene Index.
+            this.indexWriter.addDocument(doc);
         }
     }
 
     /**
     * Close Lucene IndexWriter.
-    *
     * @throws IOException
     */
     public void close() throws IOException {
         this.indexWriter.close();
     }
-
-    private static class TextFilesFilter implements FileFilter {
-        public boolean accept(File path) {
-            return path.getName().toLowerCase().endsWith(".txt");
-        }
-    }
 }

File encuestame-search/src/main/java/org/encuestame/search/main/PollIndexer.java

-/*
- ************************************************************************************
- * Copyright (C) 2001-2009 encuestame: system online surveys Copyright (C) 2009
- * encuestame Development Team.
- * Licensed under the Apache Software License version 2.0
- * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to  in writing,  software  distributed
- * under the License is distributed  on  an  "AS IS"  BASIS,  WITHOUT  WARRANTIES  OR
- * CONDITIONS OF ANY KIND, either  express  or  implied.  See  the  License  for  the
- * specific language governing permissions and limitations under the License.
- ************************************************************************************
- */
-package org.encuestame.search.main;
-
-import java.io.IOException;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexWriter;
-import org.encuestame.persistence.domain.survey.Poll;
-import org.encuestame.search.utils.Search;
-
-/**
- * Poll Indexer.
- * @author Morales, Diana Paola paola AT encuestame.org
- * @since February 23, 2011
- * @version $Id$
- */
-public class PollIndexer {
-    private Search search;
-
-
-    private Document generateDocumentFromPoll(Poll poll) {
-        Document doc = new Document();
-        doc.add(new Field("ID", String.valueOf(poll.getPollId()),
-                Field.Store.YES, Field.Index.NO));
-        doc.add(new Field("POLLNAME", poll.getName(),
-                Field.Store.YES, Field.Index.ANALYZED));
-        return doc;
-    }
-
-    public void indexPoll(Poll poll, String path) throws IOException {
-        IndexWriter writer = search.getIndexer(path);
-         try {
-            Document doc = generateDocumentFromPoll(poll);
-            writer.addDocument(doc);
-            writer.commit();
-            writer.optimize();
-        } finally {
-            writer.close();
-        }
-    }
-
-
-}

File encuestame-search/src/main/java/org/encuestame/search/utils/DocumentFilesFilter.java

+/*
+ ************************************************************************************
+ * Copyright (C) 2001-2011 encuestame: system online surveys Copyright (C) 2009
+ * encuestame Development Team.
+ * Licensed under the Apache Software License version 2.0
+ * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to  in writing,  software  distributed
+ * under the License is distributed  on  an  "AS IS"  BASIS,  WITHOUT  WARRANTIES  OR
+ * CONDITIONS OF ANY KIND, either  express  or  implied.  See  the  License  for  the
+ * specific language governing permissions and limitations under the License.
+ ************************************************************************************
+ */
+package org.encuestame.search.utils;
+
+import java.io.File;
+import java.io.FileFilter;
+
+/**
+ * Document Files Filter.
+ * @author Morales, Diana Paola paolaATencuestame.org
+ * @since Mar 23, 2011
+ */
+public class DocumentFilesFilter implements FileFilter {
+
+    /**
+     * Filter file
+     */
+    public boolean accept(final File path) {
+           return path.getName().toLowerCase().endsWith(".txt");
+       }
+   }

File encuestame-search/src/main/java/org/encuestame/search/utils/DocumentType.java

-/*
- ************************************************************************************
- * Copyright (C) 2001-2009 encuestame: system online surveys Copyright (C) 2009
- * encuestame Development Team.
- * Licensed under the Apache Software License version 2.0
- * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to  in writing,  software  distributed
- * under the License is distributed  on  an  "AS IS"  BASIS,  WITHOUT  WARRANTIES  OR
- * CONDITIONS OF ANY KIND, either  express  or  implied.  See  the  License  for  the
- * specific language governing permissions and limitations under the License.
- ************************************************************************************
- */
-package org.encuestame.search.utils;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Iterator;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.poi.POIXMLException;
-import org.apache.poi.hssf.usermodel.HSSFCell;
-import org.apache.poi.hssf.usermodel.HSSFRichTextString;
-import org.apache.poi.hssf.usermodel.HSSFRow;
-import org.apache.poi.hssf.usermodel.HSSFSheet;
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.ss.usermodel.Cell;
-import org.apache.poi.ss.usermodel.Row;
-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
-import org.pdfbox.cos.COSDocument;
-import org.pdfbox.pdfparser.PDFParser;
-import org.pdfbox.pdmodel.PDDocument;
-import org.pdfbox.pdmodel.PDDocumentInformation;
-import org.pdfbox.util.PDFTextStripper;
-
-/**
- * Document Type.
- * @author Morales, Diana Paola paola AT encuestame.org
- * @since February 16, 2011
- * @version $Id$
- */
-public class DocumentType {
-
-
-    /**
-     * Parse Spreadsheets.
-     * @param file
-     * @return
-     * @throws FileNotFoundException
-     */
-
-    public Document parseSpreadsheetsDocument(File file) throws Exception{
-        InputStream is = new FileInputStream (file);
-        StringBuilder contents = new StringBuilder();
-        POIFSFileSystem fileSystem = new POIFSFileSystem(is);
-        HSSFWorkbook workBook = new HSSFWorkbook(fileSystem);
-        for (int i =0; i<workBook.getNumberOfSheets();i++) {
-           HSSFSheet sheet = workBook.getSheetAt(i);
-           Iterator<Row> rows = sheet.rowIterator();
-           while (rows.hasNext()) {
-            HSSFRow row = (HSSFRow)rows.next();
-            //Display the row number
-            System.out.println(row.getRowNum());
-            Iterator<Cell> cells=row.cellIterator();
-            while(cells.hasNext()) {
-                HSSFCell cell = (HSSFCell) cells.next();
-                //Display the cell number of the current Row
-
-                switch(cell.getCellType()) {
-                    case HSSFCell.CELL_TYPE_NUMERIC : {
-                        System.out.println(String.valueOf(cell.getNumericCellValue()));
-                        contents.append(String.valueOf(cell.getNumericCellValue())).append(" ");
-                    break;
-                    }
-
-                    case HSSFCell.CELL_TYPE_STRING : {
-                        HSSFRichTextString richTextString =cell.getRichStringCellValue();
-                        System.out.println(richTextString.toString());
-                        contents.append(richTextString.toString()).append(" ");
-                        break;
-                    }
-
-                    case HSSFCell.CELL_TYPE_BOOLEAN : {
-                        contents.append(String.valueOf(cell.getBooleanCellValue())).append(" ");
-                    break;
-                    }
-                }
-            }
-        }
-       }
-       Document doc = new Document();
-       doc.add(new Field("content",contents.toString(),Field.Store.YES,Field.Index.ANALYZED));
-       System.out.println(contents.toString());
-       return doc;
-       }
-
-    /**
-     * Parse PDF Document.
-     * @param file
-     * @return
-     * @throws Exception
-     */
-    public Document parsePDFDocument(File file) throws Exception {
-        InputStream is = new FileInputStream (file);
-        COSDocument cosDoc = null;
-        cosDoc = parseDocument(is);
-        PDDocument pdDoc = new PDDocument(cosDoc);
-        String docText = null;
-        PDFTextStripper stripper = new PDFTextStripper();
-        docText = stripper.getText(pdDoc);
-        Document doc = new Document();
-        if (StringUtils.isNotEmpty(docText)) {
-        doc.add(new Field("content", docText,Field.Store.NO,Field.Index.ANALYZED));
-        }
-        // extract PDF document's meta-data
-
-        PDDocumentInformation docInfo = pdDoc.getDocumentInformation();
-        String author = docInfo.getAuthor();
-        String title = docInfo.getTitle();
-        String keywords = docInfo.getKeywords();
-        String summary = docInfo.getSubject();
-        if (StringUtils.isNotEmpty(author)) {
-
-        doc.add(new Field("author", author,Field.Store.YES,Field.Index.NOT_ANALYZED));
-        }
-        if (StringUtils.isNotEmpty(title)) {
-        doc.add(new Field("title", title ,Field.Store.YES,Field.Index.ANALYZED));
-        }
-        if (StringUtils.isNotEmpty(keywords)) {
-
-        doc.add(new Field("keywords", keywords, Field.Store.YES,Field.Index.ANALYZED));
-        }
-        if (StringUtils.isNotEmpty(summary)) {
-        doc.add(new Field("summary", summary,Field.Store.YES,Field.Index.ANALYZED));
-        }
-
-        return doc;
-        }
-
-        private static COSDocument parseDocument(InputStream is) throws IOException {
-        PDFParser parser= null;
-        parser = new PDFParser(is);
-
-        parser.parse();
-        return parser.getDocument();
-        }
-
-    /**
-     * Parse D
-     * @param file
-     * @return
-     * @throws POIXMLException
-     * @throws Exception
-     */
-    public Document parseDocumentWord(File file) throws POIXMLException, Exception{
-        InputStream is = new FileInputStream (file);
-        String bodyText = null;
-        StringBuilder content = new StringBuilder();
-        try {
-            XWPFDocument wd = new XWPFDocument(is);
-            XWPFWordExtractor wde = new XWPFWordExtractor(wd);
-            bodyText = wde.getText();
-        }catch(Exception e) {
-            e.printStackTrace();
-        }
-        Document doc = new Document();
-        if(!bodyText.equals("") && bodyText != null) {
-            doc.add(new Field("content",bodyText,Field.Store.NO,Field.Index.ANALYZED));
-        }
-        return doc;
-    }
-
-}

File encuestame-search/src/main/java/org/encuestame/search/utils/Search.java

-/*
- ************************************************************************************
- * Copyright (C) 2001-2009 encuestame: system online surveys Copyright (C) 2009
- * encuestame Development Team.
- * Licensed under the Apache Software License version 2.0
- * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to  in writing,  software  distributed
- * under the License is distributed  on  an  "AS IS"  BASIS,  WITHOUT  WARRANTIES  OR
- * CONDITIONS OF ANY KIND, either  express  or  implied.  See  the  License  for  the
- * specific language governing permissions and limitations under the License.
- ************************************************************************************
- */
-package org.encuestame.search.utils;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.queryParser.ParseException;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.Version;
-import org.apache.poi.POIXMLException;
-
-/**
- * Search Service.
- * @author Morales, Diana Paola paola AT encuestame.org
- * @since February 09, 2011
- * @version $Id$
- */
-public class Search {
-
-    /**
-    * {@link IndexWriter} write in the Lucene Index.
-    */
-    private IndexWriter writer;
-
-    private DocumentType docType;
-
-    private Analyzer analyzer;
-
-    /**
-     * Get Index Directory.
-     * @param indexDirPath Index Directory Path.
-     * @throws IOException
-     */
-    public IndexWriter getIndexer(String indexDirPath) throws IOException{
-        Directory dir = FSDirectory.open(new File(indexDirPath));
-        this.writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_29) ,
-                true, IndexWriter.MaxFieldLength.UNLIMITED);
-        return writer;
-    }
-
-    /**
-     * Read Files in Index Store.
-     * @param dataDir Index Store path.
-     * @param filter
-     * @return Num Documents read and indexed
-     * @throws Exception
-     */
-    public int index(String dataDir) throws Exception {
-        File[] files = new File(dataDir).listFiles();
-        for (File f : files) {
-            if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()) {
-                indexFile(f);
-            }
-        }
-        return writer.numDocs();
-
-    }
-
-    private void indexFile(File f) throws Exception {
-        System.out.println("Indexing " + f.getCanonicalPath());
-     //   Document doc = getDocument(f);
-      //  writer.addDocument(doc); // Add Document to Lucene Index.
-    }
-
-    /**
-     * Get Filename extension.
-     * @param path FilePath.
-     * @return filename extension
-     */
-    public String getExtension(String path) {
-            final String ext = path.substring(path.lastIndexOf('.')+1);
-            System.out.println("PATH ------> "+ path);
-            System.out.println("EXT ------> "+ ext);
-          return ext;
-      }
-
-    /**
-     * Get Document to Index.
-     * @param file
-     * @param ext
-     * @return
-     * @throws POIXMLException
-     * @throws Exception
-     */
-    public Document getDocument(File file, String ext) throws POIXMLException, Exception {
-        Document doc = null;
-        if("docx".equals(ext)){
-            doc = docType.parseDocumentWord(file);
-        }
-        else if ("xls".equals(ext)){
-            doc = docType.parseSpreadsheetsDocument(file);
-        }
-        else if ("pdf".equals(ext)){
-            doc = docType.parsePDFDocument(file);
-        } else {
-            throw new IllegalArgumentException("not type defined");
-        }
-        return doc;
-
-    }
-
-    public void searchContent(String indexDir, String queryString, int valueHits) throws IOException,
-                        ParseException {
-        Directory dir = FSDirectory.open(new File(indexDir)); // Open Index
-        IndexSearcher is = new IndexSearcher(dir, true);
-        QueryParser parser = new QueryParser(Version.LUCENE_29, "content",
-                            new StandardAnalyzer(Version.LUCENE_29));
-        Query query = parser.parse(queryString); // Parse Query
-        TopDocs hits = is.search(query, valueHits); // Search Index
-
-        for (ScoreDoc scoreDoc : hits.scoreDocs) {
-            Document doc = is.doc(scoreDoc.doc); // Retrieving matching document.
-            System.out.println(doc.get("fullpath"));
-        }
-        is.close(); // Close IndexSearcher.
-        }
-
-
-    private Analyzer getAnalyzer() {
-        return new StandardAnalyzer(Version.LUCENE_29);
-    }
-
-
-    /**
-     * Index Writer Close.
-     * @throws IOException
-     */
-    public void close() throws IOException {
-        writer.close();
-    }
-
-    /**
-    * @return the writer
-    */
-    public IndexWriter getWriter() {
-        return writer;
-    }
-
-    /**
-    * @param writer the writer to set
-    */
-    public void setWriter(IndexWriter writer) {
-        this.writer = writer;
-    }
-
-    /**
-    * @param analyzer the analyzer to set
-    */
-    public void setAnalyzer(Analyzer analyzer) {
-        this.analyzer = analyzer;
-    }
-}

File encuestame-search/src/main/java/org/encuestame/search/utils/SearchUtils.java

+/*
+ ************************************************************************************
+ * Copyright (C) 2001-2011 encuestame: system online surveys Copyright (C) 2009
+ * encuestame Development Team.
+ * Licensed under the Apache Software License version 2.0
+ * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to  in writing,  software  distributed
+ * under the License is distributed  on  an  "AS IS"  BASIS,  WITHOUT  WARRANTIES  OR
+ * CONDITIONS OF ANY KIND, either  express  or  implied.  See  the  License  for  the
+ * specific language governing permissions and limitations under the License.
+ ************************************************************************************
+ */
+package org.encuestame.search.utils;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.poi.POIXMLException;
+import org.apache.poi.hssf.usermodel.HSSFCell;
+import org.apache.poi.hssf.usermodel.HSSFRichTextString;
+import org.apache.poi.hssf.usermodel.HSSFRow;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.pdfbox.cos.COSDocument;
+import org.pdfbox.pdfparser.PDFParser;
+import org.pdfbox.pdmodel.PDDocument;
+import org.pdfbox.pdmodel.PDDocumentInformation;
+import org.pdfbox.util.PDFTextStripper;
+
+/**
+ * Search Utils.
+ * @author Morales, Diana Paola paolaATencuestame.org
+ * @since Mar 23, 2011
+ */
+public class SearchUtils {
+
+    /**
+    * Log
+    */
+    private static final Log log = LogFactory.getLog(SearchUtils.class);
+
+    /**
+    * Get Filename extension.
+    * @param path fullname file
+    * @return
+    */
+    public static String getExtension(final String path) {
+       final String ext = path.substring(path.lastIndexOf('.') + 1);
+       log.debug("Path file " + path);
+       log.debug("Ext file " + ext);
+       return ext;
+   }
+
+   /**
+    * PDF Document content parser.
+    * @param is Document content
+    * @return
+    * @throws IOException
+    */
+    public static COSDocument parseDocument(final InputStream is) throws IOException {
+       PDFParser parser = null;
+       parser = new PDFParser(is);
+       parser.parse();
+       return parser.getDocument();
+   }
+
+   /**
+    * Create PDF Document.
+    * @param file {@link File}
+    * @return {@link Document}
+    * @throws Exception
+    */
+    public static Document createPdfDocument(final File file) throws Exception {
+       InputStream is = new FileInputStream(file);
+       COSDocument cosDoc = null;
+       cosDoc = parseDocument(is);
+       PDDocument pdDoc = new PDDocument(cosDoc);
+       String docText = "";
+       PDFTextStripper stripper = new PDFTextStripper();
+       docText = stripper.getText(pdDoc);
+       log.debug("PDF Doc Text "+docText.length());
+       Document doc = new Document();
+       if (StringUtils.isNotEmpty(docText)) {
+           doc.add(new Field("content", docText, Field.Store.NO,
+                   Field.Index.ANALYZED));
+           doc.add(new Field("fullpath", file.getCanonicalPath(),
+                   Field.Store.YES, Field.Index.NOT_ANALYZED)); // Index Full
+                                                                   // Path.
+           doc.add(new Field("filename", file.getName(), Field.Store.YES,
+                   Field.Index.NOT_ANALYZED));
+       }
+       // extract PDF document's meta-data
+       PDDocumentInformation docInfo = pdDoc.getDocumentInformation();
+       String author = docInfo.getAuthor();
+       String title = docInfo.getTitle();
+       String keywords = docInfo.getKeywords();
+       String summary = docInfo.getSubject();
+       if (StringUtils.isNotEmpty(author)) {
+           doc.add(new Field("author", author, Field.Store.YES,
+                   Field.Index.NOT_ANALYZED));
+       }
+       if (StringUtils.isNotEmpty(title)) {
+           doc.add(new Field("title", title, Field.Store.YES,
+                   Field.Index.ANALYZED));
+       }
+       if (StringUtils.isNotEmpty(keywords)) {
+           doc.add(new Field("keywords", keywords, Field.Store.YES,
+                   Field.Index.ANALYZED));
+       }
+       if (StringUtils.isNotEmpty(summary)) {
+           doc.add(new Field("summary", summary, Field.Store.YES,
+                   Field.Index.ANALYZED));
+       }
+       return doc;
+   }
+
+    /**
+    * Create Document Word.
+    * @param file {@link File}
+    * @return {@link Document}
+    * @throws POIXMLException
+    * @throws Exception
+    */
+    public static Document createWordDocument(final File file) throws POIXMLException,
+           Exception {
+       InputStream is = new FileInputStream(file);
+       String bodyText = null;
+       StringBuilder content = new StringBuilder();
+       try {
+           XWPFDocument wd = new XWPFDocument(is);
+           XWPFWordExtractor wde = new XWPFWordExtractor(wd);
+           bodyText = wde.getText();
+       } catch (Exception e) {
+           e.printStackTrace();
+       }
+       Document doc = new Document();
+       if (!bodyText.equals("") && bodyText != null) {
+           doc.add(new Field("content", bodyText, Field.Store.NO,
+                   Field.Index.ANALYZED));
+           doc.add(new Field("filename", file.getName(), Field.Store.NO,
+                   Field.Index.ANALYZED));
+           doc.add(new Field("fullpath", file.getCanonicalPath(), Field.Store.NO,
+                   Field.Index.ANALYZED));
+       }
+       return doc;
+   }
+
+    /**
+    * Create Spreadsheets Document.
+    * @param file Spreadsheet {@link File}.
+    * @return {@link Document}
+    * @throws FileNotFoundException
+    */
+    public static Document createSpreadsheetsDocument(final File file) throws Exception {
+       log.debug("FileName Excel: " + file.getName());
+       InputStream is = new FileInputStream(file);
+       StringBuilder contents = new StringBuilder();
+       POIFSFileSystem fileSystem = new POIFSFileSystem(is);
+       HSSFWorkbook workBook = new HSSFWorkbook(fileSystem);
+       for (int i = 0; i < workBook.getNumberOfSheets(); i++) {
+           HSSFSheet sheet = workBook.getSheetAt(i);
+           Iterator<Row> rows = sheet.rowIterator();
+           while (rows.hasNext()) {
+               HSSFRow row = (HSSFRow) rows.next();
+               // Display the row number
+               System.out.println(row.getRowNum());
+               Iterator<Cell> cells = row.cellIterator();
+               while (cells.hasNext()) {
+                   HSSFCell cell = (HSSFCell) cells.next();
+                   // Display the cell number of the current Row
+                   switch (cell.getCellType()) {
+                   case HSSFCell.CELL_TYPE_NUMERIC: {
+                       System.out.println(String.valueOf(cell
+                               .getNumericCellValue()));
+                       contents.append(
+                               String.valueOf(cell.getNumericCellValue()))
+                               .append(" ");
+                       break;
+                   }
+
+                   case HSSFCell.CELL_TYPE_STRING: {
+                       HSSFRichTextString richTextString = cell
+                               .getRichStringCellValue();
+                       System.out.println(richTextString.toString());
+                       contents.append(richTextString.toString()).append(" ");
+                       break;
+                   }
+
+                   case HSSFCell.CELL_TYPE_BOOLEAN: {
+                       contents.append(
+                               String.valueOf(cell.getBooleanCellValue()))
+                               .append(" ");
+                       break;
+                   }
+                   }
+               }
+           }
+       }
+       Document doc = new Document();
+       doc.add(new Field("content", contents.toString(), Field.Store.YES,
+               Field.Index.ANALYZED));
+       log.debug("Content Spreadsheets " + contents.toString());
+       return doc;
+   }
+
+    /**
+    * Create Text Document.
+    * @param file Text File.
+    * @return {@link Document}
+    * @throws Exception
+    */
+    public static Document createTextDocument(final File file) throws Exception {
+       Document doc = new Document();
+       //Index file contents
+       doc.add(new Field("contents", new FileReader(file)));
+       // Index File Name.
+       doc.add(new Field("filename", file.getName(), Field.Store.YES,
+               Field.Index.NOT_ANALYZED));
+       // Index Full Path.
+       doc.add(new Field("fullpath", file.getCanonicalPath(), Field.Store.YES,
+               Field.Index.NOT_ANALYZED));
+       return doc;
+   }
+}

File encuestame-search/src/test/java/org/encuestame/search/TestSearch.java

 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
 import org.encuestame.search.main.TestUtil;
+import org.junit.Ignore;
 
 /**
  * Test Service Search.
  * @since February 09, 2011
  * @version $Id: $
  */
+@Ignore
 public class TestSearch extends TestCase {
     final String queryString = "indexing";
     final String indexDir = "src/main/resources/Indexer2";