Commits

Sam Adams committed 7f23e57 Draft

Added pubserver archiver

  • Participants
  • Parent commits 7143e0d

Comments (0)

Files changed (7)

File applications/pom.xml

             <version>0.4-SNAPSHOT</version>
         </dependency>
 
+        <dependency>
+            <groupId>uk.ac.cam.ch.wwmm</groupId>
+            <artifactId>archiver</artifactId>
+            <version>0.4-SNAPSHOT</version>
+        </dependency>
+
     </dependencies>
 
     <build>

File applications/src/main/java/wwmm/pubcrawler/main/CrawlerApplication.java

 import wwmm.pubcrawler.inject.HttpFetcherModule;
 import wwmm.pubcrawler.inject.MongoRepositoryModule;
 import wwmm.pubcrawler.inject.PubcrawlerModule;
+import wwmm.pubserver.archiver.ArchiveProcessor;
+import wwmm.pubserver.archiver.PubserverArchiverModule;
 
 import java.net.UnknownHostException;
 import java.util.concurrent.ExecutorService;
             new PubcrawlerModule(),
             new HttpFetcherModule(httpdb),
             new MongoRepositoryModule(pubdb, taskdb),
+            new PubserverArchiverModule(),
             getPublisherModule()
         );
 
-        final ExecutorService executorService = Executors.newSingleThreadExecutor();
+        final ExecutorService taskExecutor = Executors.newSingleThreadExecutor();
+        final ExecutorService archiveExecutor = Executors.newSingleThreadExecutor();
+
+        final ArchiveProcessor archiveProcessor = injector.getInstance(ArchiveProcessor.class);
+        archiveExecutor.submit(archiveProcessor);
 
         final Class<? extends Runnable> seederType = getSeederType();
         final Runnable seedRunner = injector.getInstance(seederType);
-        executorService.submit(seedRunner);
+        taskExecutor.submit(seedRunner);
 
         final Runnable resumeRunner = injector.getInstance(ResumeTask.class);
-        executorService.submit(resumeRunner);
+        taskExecutor.submit(resumeRunner);
 
         final CrawlerExecutor crawlRunner = injector.getInstance(CrawlerExecutor.class);
-        executorService.submit(crawlRunner);
+        taskExecutor.submit(crawlRunner);
 
         // Wait for CrawlRunner to complete and stop executor service
-        executorService.shutdown();
+        taskExecutor.shutdown();
     }
 
     protected abstract Module getPublisherModule();

File core/src/main/java/wwmm/pubcrawler/processors/ArticleProcessor.java

 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import wwmm.pubcrawler.archivers.ArticleArchiver;
+import wwmm.pubcrawler.archivers.Archiver;
 import wwmm.pubcrawler.crawlers.ArticleHandler;
 import wwmm.pubcrawler.crawlers.ResourceProcessor;
 import wwmm.pubcrawler.model.Article;
 
     private static final Logger LOG = LoggerFactory.getLogger(ArticleProcessor.class);
 
-    private final ArticleArchiver articleArchiver;
+    private final Archiver<Article> articleArchiver;
     private final ArticleHandler articleHandler;
     private final ArticleParserFactory<Resource> parserFactory;
 
     @Inject
-    public ArticleProcessor(final ArticleArchiver articleArchiver, final ArticleHandler articleHandler, final ArticleParserFactory<Resource> parserFactory) {
+    public ArticleProcessor(final Archiver<Article> articleArchiver, final ArticleHandler articleHandler, final ArticleParserFactory<Resource> parserFactory) {
         this.articleArchiver = articleArchiver;
         this.articleHandler = articleHandler;
         this.parserFactory = parserFactory;

File core/src/main/java/wwmm/pubcrawler/processors/IssueTocProcessor.java

 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import wwmm.pubcrawler.archivers.ArticleArchiver;
-import wwmm.pubcrawler.archivers.IssueArchiver;
+import wwmm.pubcrawler.archivers.Archiver;
 import wwmm.pubcrawler.crawlers.ArticleHandler;
 import wwmm.pubcrawler.crawlers.IssueHandler;
 import wwmm.pubcrawler.crawlers.ResourceProcessor;
 
     private static final Logger LOG = LoggerFactory.getLogger(IssueTocProcessor.class);
 
-    private final IssueArchiver issueArchiver;
+    private final Archiver<Issue> issueArchiver;
+    private final Archiver<Article> articleArchiver;
     private final IssueHandler issueHandler;
-    private final ArticleArchiver articleArchiver;
     private final ArticleHandler articleHandler;
     private final IssueTocParserFactory<Resource> parserFactory;
 
     @Inject
-    public IssueTocProcessor(final IssueArchiver issueArchiver, final IssueHandler issueHandler, final ArticleArchiver articleArchiver, final ArticleHandler articleHandler, final IssueTocParserFactory<Resource> parserFactory) {
+    public IssueTocProcessor(final Archiver<Issue> issueArchiver, final IssueHandler issueHandler, final Archiver<Article> articleArchiver, final ArticleHandler articleHandler, final IssueTocParserFactory<Resource> parserFactory) {
         this.issueArchiver = issueArchiver;
         this.issueHandler = issueHandler;
         this.articleArchiver = articleArchiver;

File core/src/main/java/wwmm/pubcrawler/processors/PublicationListProcessor.java

 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import wwmm.pubcrawler.archivers.JournalArchiver;
+import wwmm.pubcrawler.archivers.Archiver;
 import wwmm.pubcrawler.crawlers.JournalHandler;
 import wwmm.pubcrawler.crawlers.ResourceProcessor;
 import wwmm.pubcrawler.model.Journal;
     private static final Logger LOG = LoggerFactory.getLogger(PublicationListProcessor.class);
 
     private final PublicationListParserFactory<Resource> parserFactory;
-    private final JournalArchiver journalArchiver;
+    private final Archiver<Journal> journalArchiver;
     private final JournalHandler journalHandler;
 
     @Inject
-    public PublicationListProcessor(final PublicationListParserFactory<Resource> parserFactory, final JournalArchiver journalArchiver, final JournalHandler journalHandler) {
+    public PublicationListProcessor(final PublicationListParserFactory<Resource> parserFactory, final Archiver<Journal> journalArchiver, final JournalHandler journalHandler) {
         this.parserFactory = parserFactory;
         this.journalArchiver = journalArchiver;
         this.journalHandler = journalHandler;

File publishers/springer/src/main/java/wwmm/pubcrawler/crawlers/springer/SpringerPublicationListProcessor.java

 package wwmm.pubcrawler.crawlers.springer;
 
-import wwmm.pubcrawler.archivers.JournalArchiver;
+import wwmm.pubcrawler.archivers.Archiver;
 import wwmm.pubcrawler.crawler.Task;
 import wwmm.pubcrawler.crawler.TaskQueue;
 import wwmm.pubcrawler.crawlers.JournalHandler;
 import wwmm.pubcrawler.crawlers.ResourceProcessor;
-import wwmm.pubcrawler.crawlers.springer.SpringerPublicationListCrawlTaskData;
-import wwmm.pubcrawler.crawlers.springer.SpringerPublicationListCrawlTaskFactory;
-import wwmm.pubcrawler.crawlers.springer.SpringerPublicationListParserFactory;
 import wwmm.pubcrawler.crawlers.springer.parsers.SpringerPublicationListParser;
 import wwmm.pubcrawler.http.DocumentResource;
 import wwmm.pubcrawler.model.Journal;
 
     private final SpringerPublicationListParserFactory publicationListParserFactory;
     private final SpringerPublicationListCrawlTaskFactory issueIndexCrawlTaskFactory;
-    private final JournalArchiver journalArchiver;
+    private final Archiver<Journal> journalArchiver;
     private final JournalHandler journalHandler;
     private final TaskQueue taskQueue;
 
     @Inject
-    public SpringerPublicationListProcessor(final SpringerPublicationListParserFactory publicationListParserFactory, final SpringerPublicationListCrawlTaskFactory issueIndexCrawlTaskFactory, final JournalArchiver journalArchiver, final JournalHandler journalHandler, final TaskQueue taskQueue) {
+    public SpringerPublicationListProcessor(final SpringerPublicationListParserFactory publicationListParserFactory, final SpringerPublicationListCrawlTaskFactory issueIndexCrawlTaskFactory, final Archiver<Journal> journalArchiver, final JournalHandler journalHandler, final TaskQueue taskQueue) {
         this.publicationListParserFactory = publicationListParserFactory;
         this.issueIndexCrawlTaskFactory = issueIndexCrawlTaskFactory;
         this.journalArchiver = journalArchiver;

File test-utils/src/main/java/wwmm/pubcrawler/MockRepositoryModule.java

 import com.google.inject.AbstractModule;
 import com.google.inject.Provides;
 import org.mockito.Mockito;
+import wwmm.pubcrawler.archivers.Archiver;
 import wwmm.pubcrawler.archivers.ArticleArchiver;
 import wwmm.pubcrawler.archivers.IssueArchiver;
 import wwmm.pubcrawler.archivers.JournalArchiver;
+import wwmm.pubcrawler.model.Article;
+import wwmm.pubcrawler.model.Issue;
+import wwmm.pubcrawler.model.Journal;
 
 /**
  * @author Sam Adams
     }
     
     @Provides
-    public JournalArchiver provideJournalArchiver() {
+    public Archiver<Journal> provideJournalArchiver() {
         return Mockito.mock(JournalArchiver.class);
     }
 
     @Provides
-    public IssueArchiver provideIssueArchiver() {
+    public Archiver<Issue> provideIssueArchiver() {
         return Mockito.mock(IssueArchiver.class);
     }
 
     @Provides
-    public ArticleArchiver providesArticleArchiver() {
+    public Archiver<Article> providesArticleArchiver() {
         return Mockito.mock(ArticleArchiver.class);
     }