Commits

Anonymous committed 715329d

Maintenance.. updated the ARC/WARC modules to the lastest version of JWAT.

Comments (0)

Files changed (7)

config/messages/arc_messages.properties

 #   Message templates for class org.jhove2.module.format.arc.ArcModule
 # ##############################################################################
 #
-org.jhove2.module.format.arc.ArcModule.missing = \
-        Missing ARC header fields {0}
-org.jhove2.module.format.arc.ArcModule.invalid = \
-        Invalid {0}: {1}
-org.jhove2.module.format.arc.ArcModule.warning = \
-        Warning: {0}
+#org.jhove2.module.format.arc.ArcModule.missing = \
+#        Missing ARC header fields {0}
+#org.jhove2.module.format.arc.ArcModule.invalid = \
+#        Invalid {0}: {1}
+#org.jhove2.module.format.arc.ArcModule.warning = \
+#        Warning: {0}
+
+org.jhove2.module.format.arc.WarcModule.duplicate = \
+        Duplicate {0}: ''{1}''
+org.jhove2.module.format.arc.WarcModule.empty = \
+        Empty {0}
+
+org.jhove2.module.format.arc.WarcModule.error_expected = \
+        Empty {0}, expected ''{1}''
+
+org.jhove2.module.format.arc.WarcModule.invalid = \
+        Invalid {0}
+
+org.jhove2.module.format.arc.WarcModule.invalid_data = \
+        Invalid {0}, ''{1}''
+
+org.jhove2.module.format.arc.WarcModule.invalid_encoding = \
+        Invalid encoded {0}, ''{1}'' - encoding: ''{2}''
+
+org.jhove2.module.format.arc.WarcModule.invalid_expected = \
+        Invalid {0}, ''{1}'' - expected: ''{2}''
+
+org.jhove2.module.format.arc.WarcModule.recommended = \
+        Invalid {0}: ''{1}''
+
+org.jhove2.module.format.arc.WarcModule.required_invalid = \
+        Invalid {0}: ''{1}''
+
+org.jhove2.module.format.arc.WarcModule.reserved = \
+        Invalid {0}: ''{1}''
+
+org.jhove2.module.format.arc.WarcModule.undesired_data = \
+        Invalid {0}: ''{1}''
+
+org.jhove2.module.format.arc.WarcModule.unknown = \
+        Invalid {0}: ''{1}''
+
 #
 org.jhove2.module.format.arc.ArcModule.characterizationError = \
         Fatal characterization error: {0}

config/messages/warc_messages.properties

 #   Message templates for class org.jhove2.module.format.warc.WarcModule
 # ##############################################################################
 #
-org.jhove2.module.format.warc.WarcModule.missing = \
-        Missing ARC header fields {0}
+#org.jhove2.module.format.warc.WarcModule.missing = \
+#        Missing ARC header fields {0}
+#org.jhove2.module.format.warc.WarcModule.invalid = \
+#        Invalid {0}: {1}
+#org.jhove2.module.format.warc.WarcModule.warning = \
+#        Warning: {0}
+
+org.jhove2.module.format.warc.WarcModule.duplicate = \
+        Duplicate {0}: ''{1}''
+org.jhove2.module.format.warc.WarcModule.empty = \
+        Empty {0}
+
+org.jhove2.module.format.warc.WarcModule.error_expected = \
+        Empty {0}, expected ''{1}''
+
 org.jhove2.module.format.warc.WarcModule.invalid = \
-        Invalid {0}: {1}
-org.jhove2.module.format.warc.WarcModule.warning = \
-        Warning: {0}
+        Invalid {0}
 
-org.jhove2.module.format.warc.WarcModule.empty = \
-        Empty ARC header field {0}
-org.jhove2.module.format.warc.WarcModule.duplicate = \
-        Invalid {0}: {1}
+org.jhove2.module.format.warc.WarcModule.invalid_data = \
+        Invalid {0}, ''{1}''
+
+org.jhove2.module.format.warc.WarcModule.invalid_encoding = \
+        Invalid encoded {0}, value: ''{1}'', encoding: ''{2}''
+
+org.jhove2.module.format.warc.WarcModule.invalid_expected = \
+        Invalid {0}, value: ''{1}'', expected: ''{2}''
+
+org.jhove2.module.format.warc.WarcModule.recommended = \
+        Invalid {0}: ''{1}''
+
+org.jhove2.module.format.warc.WarcModule.required_invalid = \
+        Invalid {0}: ''{1}''
+
+org.jhove2.module.format.warc.WarcModule.reserved = \
+        Invalid {0}: ''{1}''
+
+org.jhove2.module.format.warc.WarcModule.undesired_data = \
+        Invalid {0}: ''{1}''
+
 org.jhove2.module.format.warc.WarcModule.unknown = \
-        Invalid {0}: {1}
-org.jhove2.module.format.warc.WarcModule.wanted = \
-        Invalid {0}: {1}
-org.jhove2.module.format.warc.WarcModule.unwanted = \
-        Invalid {0}: {1}
-org.jhove2.module.format.warc.WarcModule.recommended = \
-        Invalid {0}: {1}
+        Invalid {0}: ''{1}''
 
 #
 org.jhove2.module.format.warc.WarcModule.characterizationError = \
 		<dependency>
 			<groupId>org.jwat</groupId>
 			<artifactId>jwat-gzip</artifactId>
-			<version>0.8.0-SNAPSHOT</version>
+			<version>0.8.1-SNAPSHOT</version>
 		</dependency>
 		<dependency>
 			<groupId>org.jwat</groupId>
 			<artifactId>jwat-arc</artifactId>
-			<version>0.8.0-SNAPSHOT</version>
+			<version>0.8.1-SNAPSHOT</version>
 		</dependency>
 		<dependency>
 			<groupId>org.jwat</groupId>
 			<artifactId>jwat-warc</artifactId>
-			<version>0.8.0-SNAPSHOT</version>
+			<version>0.8.1-SNAPSHOT</version>
 		</dependency>
 	</dependencies>
 	<licenses>

src/main/java/org/jhove2/module/format/arc/ArcModule.java

 import org.jwat.arc.ArcReaderFactory;
 import org.jwat.arc.ArcRecord;
 import org.jwat.arc.ArcRecordBase;
-import org.jwat.arc.ArcValidationError;
 import org.jwat.arc.ArcVersionBlock;
+import org.jwat.common.Diagnosis;
 import org.jwat.common.HttpResponse;
 import org.jwat.common.Payload;
 
  * JHOVE2 ARC module. This class is mostly a JHOVE2 wrapper that uses
  * the JWAT package for the actual ARC validation.
  *
- * @author lbihanic, selghissassi
+ * @author lbihanic, selghissassi, nicl
  */
 @Persistent
 public class ArcModule extends BaseFormatModule implements Validator {
                  * First record. (Unless the parent modules are not correct!)
                  */
                 mod = parentSrc.addModule(this);
-                parseRecordsCompressed(jhove2, sourceFactory, source, reader, true);
+                // TODO offset
+                parseRecordsCompressed(jhove2, sourceFactory, source, reader, -1L, true);
             } else {
-                arcMod.parseRecordsCompressed(jhove2, sourceFactory, source, reader, false);
+            	// TODO offset
+                arcMod.parseRecordsCompressed(jhove2, sourceFactory, source, reader, -1L, false);
                 // Validity
                 if (arcMod.isValid != Validity.False) {
                     if (reader.isCompliant()) {
      * @throws JHOVE2Exception if a serious problem needs to be reported
      */
     protected void parseRecordsCompressed(JHOVE2 jhove2, SourceFactory sourceFactory,
-    		Source parentSource, ArcReader reader, boolean bReadVersion)
+    		Source parentSource, ArcReader reader, Long offset, boolean bReadVersion)
     				throws EOFException, IOException, JHOVE2Exception {
         ArcVersionBlock versionBlock;
         ArcRecord record;
             parentSource.setIsAggregate(true);
             InputStream in = parentSource.getInputStream();
             if (bReadVersion) {
-                versionBlock = reader.getVersionBlock(in);
+                versionBlock = reader.getVersionBlockFrom(in, offset);
                 if (versionBlock != null) {
                     processVersionBlock(jhove2, sourceFactory, parentSource, versionBlock);
                 }
             /*
              * Loop through available records.
              */
-            while ((record = reader.getNextRecordFrom(in, 8192, 0)) != null) {
+            while ((record = reader.getNextRecordFrom(in, offset, 8192)) != null) {
                 processRecord(jhove2, sourceFactory, parentSource, record);
             }
         } else {
         if (!record.isValid()) {
         }
         */
-        if (record.hasErrors()) {
+        if (record.diagnostics.hasErrors()) {
             // Report errors on source object.
-           for (ArcValidationError e : record.getValidationErrors()) {
+           for (Diagnosis d : record.diagnostics.getErrors()) {
                src.addMessage(newValidityError(jhove2,Message.Severity.ERROR,
-                                               e.error.toString(),e.field,e.value));
+            		   d.type.toString().toLowerCase(), d.getMessageArgs()));
                //updateMap(e.error.toString() + '-' + e.field, this.errors);
            }
         }
-        if (record.hasWarnings()) {
+        if (record.diagnostics.hasWarnings()) {
             // Report warnings on source object.
-            for (String warning : record.getWarnings()) {
+            for (Diagnosis d : record.diagnostics.getWarnings()) {
                 src.addMessage(newValidityError(jhove2,Message.Severity.WARNING,
-                                                "warning",warning));
+                		d.type.toString().toLowerCase(), d.getMessageArgs()));
             }
          }
     }
      * @throws JHOVE2Exception if a serious problem needs to be reported
      */
     private Message newValidityError(JHOVE2 jhove2, Severity severity, String id,
-                                     Object... params) throws JHOVE2Exception {
+                                     Object[] messageArgs) throws JHOVE2Exception {
     	return new Message(severity, Message.Context.OBJECT,
-    					   this.getClass().getName() + '.' + id, params,
+    					   this.getClass().getName() + '.' + id, messageArgs,
     					   jhove2.getConfigInfo());
     }
 

src/main/java/org/jhove2/module/format/gzip/GzipModule.java

  * file format specification version 4.3) and supports multiple member
  * GZIP files.</p>
  *
- * @author lbihanic, selghissassi
+ * @author lbihanic, selghissassi, nicl
  */
 @Persistent
 public class GzipModule extends BaseFormatModule implements Validator {
 
         instanceId = autoIncId.get();
         // This is done because it is not persisted immediately.
-        // I need it in recursive calls and not when the gzip module exits.
+        // It is needed in recursive calls and not when the gzip module exits.
         // Each time jhove2 looks up an existing module it actually
         // instantiates a new class and loads the persisted values. 
         // So a version with the correct instanceId exists on the call stack
         // but every time someone requests it a new one is created and
-        // populated with persisted data.
+        // populated with persisted data. Epic fail!
         getModuleAccessor().persistModule(this);
         synchronized (gzipMap) {
             gzipMap.put(instanceId, this);

src/main/java/org/jhove2/module/format/warc/WarcModule.java

 import org.jhove2.module.format.gzip.GzipModule;
 import org.jhove2.module.format.warc.properties.WarcRecordData;
 import org.jhove2.persist.FormatModuleAccessor;
+import org.jwat.common.Diagnosis;
 import org.jwat.common.HttpResponse;
 import org.jwat.common.Payload;
 import org.jwat.warc.WarcReader;
 import org.jwat.warc.WarcReaderFactory;
 import org.jwat.warc.WarcRecord;
-import org.jwat.warc.WarcValidationError;
 
 import com.sleepycat.persist.model.Persistent;
 
                  * First record. (Unless the parent modules are not correct!)
                  */
                 mod = parentSrc.addModule(this);
-                parseRecordsCompressed(jhove2, sourceFactory, source, reader);
+                // TODO offset
+                parseRecordsCompressed(jhove2, sourceFactory, source, reader, -1L);
             } else {
-                warcMod.parseRecordsCompressed(jhove2, sourceFactory, source, reader);
+            	// TODO offset
+                warcMod.parseRecordsCompressed(jhove2, sourceFactory, source, reader, -1L);
                 // Validity
                 if (warcMod.isValid != Validity.False) {
                     if (reader.isCompliant()) {
      * @throws JHOVE2Exception if a serious problem needs to be reported
      */
     protected void parseRecordsCompressed(JHOVE2 jhove2, SourceFactory sourceFactory,
-    		Source parentSource, WarcReader reader)
+    		Source parentSource, WarcReader reader, Long offset)
     				throws EOFException, IOException, JHOVE2Exception {
         WarcRecord record;
         // Ensure a WARC reader could be instantiated.
              * Loop through available records.
              */
             InputStream in = parentSource.getInputStream();
-            while ((record = reader.getNextRecordFrom(in, 8192)) != null) {
+            while ((record = reader.getNextRecordFrom(in, offset, 8192)) != null) {
                 processRecord(jhove2, sourceFactory, parentSource, record);
             }
         } else {
         if (!record.isValid()) {
         }
         */
-        if (record.hasErrors()) {
+        if (record.diagnostics.hasErrors()) {
             // Report errors on source object.
-           for (WarcValidationError e : record.getValidationErrors()) {
-               src.addMessage(newValidityError(jhove2,Message.Severity.ERROR,
-                                               e.error.toString(),e.field,e.value));
+           for (Diagnosis d : record.diagnostics.getErrors()) {
+               src.addMessage(newValidityError(jhove2, Message.Severity.ERROR,
+            		   d.type.toString().toLowerCase(), d.getMessageArgs()));
                //updateMap(e.error.toString() + '-' + e.field, this.errors);
            }
         }
-        /*
-        if (record.hasWarnings()) {
+        if (record.diagnostics.hasWarnings()) {
             // Report warnings on source object.
-            for (String warning : record.getWarnings()) {
-                src.addMessage(newValidityError(jhove2,Message.Severity.WARNING,
-                                                "warning",warning));
+            for (Diagnosis d : record.diagnostics.getWarnings()) {
+                src.addMessage(newValidityError(jhove2, Message.Severity.WARNING,
+                		d.type.toString().toLowerCase(), d.getMessageArgs()));
             }
          }
-         */
     }
 
     /**
      * @throws JHOVE2Exception if a serious problem needs to be reported
      */
     private Message newValidityError(JHOVE2 jhove2, Severity severity, String id,
-                                     Object... params) throws JHOVE2Exception {
+                                     Object[] messageArgs) throws JHOVE2Exception {
     	return new Message(severity, Message.Context.OBJECT,
-    					   this.getClass().getName() + '.' + id, params,
+    					   this.getClass().getName() + '.' + id, messageArgs,
     					   jhove2.getConfigInfo());
     }
 

src/main/java/org/jhove2/module/format/warc/properties/WarcRecordData.java

      * @param record parsed WARC record
      */
     public WarcRecordData(WarcRecord record) {
-        startOffset = record.getOffset();
+        startOffset = record.getStartOffset();
         consumed = record.getConsumed();
         this.warcType = record.warcTypeStr;
         this.warcFilename = record.warcFilename;