Anonymous avatar Anonymous committed c4e929c

Fixed offset of GZip'ed ARC/WARC record.

Comments (0)

Files changed (5)

src/main/java/org/jhove2/app/util/FeatureConfigurationUtil.java

 	 * @return String containing path to file
 	 * @throws JHOVE2Exception if file is not found or ClassLoader throws exception
 	 */
-	public static String getFilePathFromClasspath(String fileName, String fileDescription)throws JHOVE2Exception {
+	public static String getFilePathFromClasspath(String fileName, String fileDescription) throws JHOVE2Exception {
 	    URI fileURI = null;
 	    try {
 	        fileURI = ClassLoader.getSystemResource(fileName).toURI();

src/main/java/org/jhove2/module/format/arc/ArcModule.java

 import org.jhove2.core.format.FormatIdentification;
 import org.jhove2.core.format.FormatIdentification.Confidence;
 import org.jhove2.core.io.Input;
+import org.jhove2.core.reportable.Reportable;
 import org.jhove2.core.source.Source;
 import org.jhove2.core.source.SourceFactory;
 import org.jhove2.module.Module;
 import org.jhove2.module.format.Validator;
 import org.jhove2.module.format.arc.properties.ArcRecordData;
 import org.jhove2.module.format.gzip.GzipModule;
+import org.jhove2.module.format.gzip.GzipModule.GZipOffsetProperty;
 import org.jhove2.persist.FormatModuleAccessor;
 import org.jwat.arc.ArcReader;
 import org.jwat.arc.ArcReaderFactory;
          */
         ArcReader reader = null;
         if (gzipMod != null) {
-            // This should probably be changed according to success reading VersionBlock.
+        	// Obtain GZip startOffset from dummy property.
+        	long offset = -1;
+        	List<Reportable> gzipProps = source.getExtraProperties();
+        	Reportable prop;
+        	int i = 0;
+        	while (i<gzipProps.size()) {
+        		prop = gzipProps.get(i);
+        		if (prop instanceof GZipOffsetProperty) {
+        			offset = ((GZipOffsetProperty)prop).offset;
+        			gzipProps.remove(i);
+        			// ...
+        			source.getSourceAccessor().persistSource(source);
+        		}
+        		else {
+        			++i;
+        		}
+        	}
+        	// This should probably be changed according to success reading VersionBlock.
             gzipMod.presumptiveFormat = new FormatIdentification(format.getIdentifier(), Confidence.Tentative);
             /*
              * GZip compressed.
                  * First record. (Unless the parent modules are not correct!)
                  */
                 mod = parentSrc.addModule(this);
-                // TODO offset, when gzipmodule is refactored
-                parseRecordsCompressed(jhove2, sourceFactory, source, reader, -1L, true);
+                parseRecordsCompressed(jhove2, sourceFactory, source, reader, offset, true);
             } else {
-                // TODO offset, when gzipmodule is refactored
-                arcMod.parseRecordsCompressed(jhove2, sourceFactory, source, reader, -1L, false);
+                arcMod.parseRecordsCompressed(jhove2, sourceFactory, source, reader, offset, false);
                 // Validity
                 if (arcMod.isValid != Validity.False) {
                     if (reader.isCompliant()) {

src/main/java/org/jhove2/module/format/gzip/GzipModule.java

 import org.jhove2.core.format.Format;
 import org.jhove2.core.format.FormatIdentification;
 import org.jhove2.core.io.Input;
+import org.jhove2.core.reportable.AbstractReportable;
 import org.jhove2.core.source.Source;
 import org.jhove2.core.source.SourceFactory;
 import org.jhove2.module.format.BaseFormatModule;
     @NotPersistent
     public transient Object reader;
 
+    @Persistent
+    public static class GZipOffsetProperty extends AbstractReportable {
+        public long offset;
+        public GZipOffsetProperty() {
+		}
+        public GZipOffsetProperty(long offset) {
+        	this.offset = offset;
+		}
+    }
+
     /**
      * Presumptive format used to identify subsequent ARC/WARC records which are
      * not identified by the identifier module.
                     }
 
                     if (recurse) {
+                    	// expose offset to ARC/WARC modules.
+                    	src.addExtraProperties(new GZipOffsetProperty(gzipEntry.getStartOffset()));
                         characterizeMember(jhove2, src);
                     }
                     src.close();

src/main/java/org/jhove2/module/format/warc/WarcModule.java

 import org.jhove2.core.format.FormatIdentification;
 import org.jhove2.core.format.FormatIdentification.Confidence;
 import org.jhove2.core.io.Input;
+import org.jhove2.core.reportable.Reportable;
 import org.jhove2.core.source.Source;
 import org.jhove2.core.source.SourceFactory;
 import org.jhove2.module.Module;
 import org.jhove2.module.format.BaseFormatModule;
 import org.jhove2.module.format.Validator;
 import org.jhove2.module.format.gzip.GzipModule;
+import org.jhove2.module.format.gzip.GzipModule.GZipOffsetProperty;
 import org.jhove2.module.format.warc.properties.WarcRecordData;
 import org.jhove2.persist.FormatModuleAccessor;
 import org.jwat.common.Diagnosis;
          */
         WarcReader reader = null;
         if (gzipMod != null) {
+        	// Obtain GZip startOffset from dummy property.
+        	long offset = -1;
+        	List<Reportable> gzipProps = source.getExtraProperties();
+        	Reportable prop;
+        	int i = 0;
+        	while (i<gzipProps.size()) {
+        		prop = gzipProps.get(i);
+        		if (prop instanceof GZipOffsetProperty) {
+        			offset = ((GZipOffsetProperty)prop).offset;
+        			gzipProps.remove(i);
+        			// ...
+        			source.getSourceAccessor().persistSource(source);
+        		}
+        		else {
+        			++i;
+        		}
+        	}
+        	// Better safe than sorry.
+            gzipMod.presumptiveFormat = new FormatIdentification(format.getIdentifier(), Confidence.Tentative);
             /*
              * GZip compressed.
              */
                  * First record. (Unless the parent modules are not correct!)
                  */
                 mod = parentSrc.addModule(this);
-                // TODO offset, when gzipmodule is refactored
-                parseRecordsCompressed(jhove2, sourceFactory, source, reader, -1L);
+                parseRecordsCompressed(jhove2, sourceFactory, source, reader, offset);
             } else {
-                // TODO offset, when gzipmodule is refactored
-                warcMod.parseRecordsCompressed(jhove2, sourceFactory, source, reader, -1L);
+                warcMod.parseRecordsCompressed(jhove2, sourceFactory, source, reader, offset);
                 // Validity
                 if (warcMod.isValid != Validity.False) {
                     if (reader.isCompliant()) {

src/test/resources/config/filepaths-config.xml

 	        value="examples/wave/"/>
 	</bean>
 	
-	<bean id="warcDirBasePath" class="java.lang.String" >
-		<constructor-arg type="java.lang.String" 
-	        value="examples/warc/"/>
-	</bean>
-
-	
 	<bean id="xmlDirBasePath" class="java.lang.String" >
 		<constructor-arg type="java.lang.String" 
 	        value="examples/xml/"/>
 	</bean>
 	
+	<bean id="arcDirBasePath" class="java.lang.String" >
+		<constructor-arg type="java.lang.String" 
+	        value="examples/arc/"/>
+	</bean>
+	
+	<bean id="gzipDirBasePath" class="java.lang.String" >
+		<constructor-arg type="java.lang.String" 
+	        value="examples/gzip/"/>
+	</bean>
+	
+	<bean id="warcDirBasePath" class="java.lang.String" >
+		<constructor-arg type="java.lang.String" 
+	        value="examples/warc/"/>
+	</bean>
+	
 </beans>
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.