Commits

Anonymous committed 16b4ba0

Cleanup and review changes.

  • Participants
  • Parent commits 9717c2d

Comments (0)

Files changed (18)

File config/spring/jhove2-framework-config.xml

 
 	<!-- BnF (Bibliothéque nationale de France) agent bean -->
 	<bean id="BnFAgent" class="org.jhove2.core.Agent" scope="singleton">
-		<constructor-arg value="Bibliothèque nationale de France"
-		                 type="java.lang.String"/>
-		<constructor-arg value="Corporate"
-		                 type="org.jhove2.core.Agent$Type"/>
+		<constructor-arg type="java.lang.String" value="Bibliothéque nationale de France"/>
+		<constructor-arg type="org.jhove2.core.Agent$Type" value="Corporate"/>
 		<property name="URI" value="http://www.bnf.fr/"/>
 	</bean>
 
+	<!-- Netarkivet agent bean -->
+	<bean id="NetarkivetAgent" class="org.jhove2.core.Agent" scope="singleton">
+		<constructor-arg type="java.lang.String" value="Netarkivet"/>
+		<constructor-arg type="org.jhove2.core.Agent$Type" value="Corporate"/>
+		<property name="URI" value="http://netarkivet.dk/"/>
+	</bean>
+
 	<bean id="DisplayVisibility" class="org.jhove2.config.spring.PropertiesFactoryBean">
 		<property name="propertyFileBaseName" value="displayer"/>
 	</bean>

File config/spring/module/format/arc/jhove2-arc-config.xml

 	<constructor-arg ref="FormatModuleAccessor"/>
 	<property name="developers">
 	    <list value-type="org.jhove2.core.Agent">
+	      <ref bean="NetarkivetAgent"/>
 	      <ref bean="BnFAgent"/>
 	    </list>
 	</property>

File config/spring/module/format/gzip/jhove2-gzip-config.xml

 		<constructor-arg ref="FormatModuleAccessor"/>
 		<property name="developers">
 			<list value-type="org.jhove2.core.Agent">
+				<ref bean="NetarkivetAgent"/>
 				<ref bean="BnFAgent"/>
 			</list>
 		</property>

File config/spring/module/format/warc/jhove2-warc-config.xml

     <constructor-arg ref="FormatModuleAccessor"/>
     <property name="developers">
       <list value-type="org.jhove2.core.Agent">
-	<ref bean="BnFAgent"/>
+        <ref bean="NetarkivetAgent"/>
+        <ref bean="BnFAgent"/>
       </list>
     </property>
     <property name="recurse" value="false"/>

File src/main/java/org/jhove2/module/format/arc/ArcModule.java

 import org.jhove2.module.Module;
 import org.jhove2.module.format.BaseFormatModule;
 import org.jhove2.module.format.Validator;
-import org.jhove2.module.format.Validator.Coverage;
 import org.jhove2.module.format.arc.properties.ArcRecordData;
 import org.jhove2.module.format.gzip.GzipModule;
 import org.jhove2.persist.FormatModuleAccessor;
     private static transient Map<String, FormatIdentification> jhove2Ids = null;
 
     /** Validation status. */
-    private volatile Validity isValid;
+    private Validity isValid;
 
     /** Used protocols. */
     private Map<String, Integer> protocols =
     //                            new ConcurrentHashMap<String,AtomicInteger>();
 
     /** The number or ARC records. */
-    //private AtomicInteger arcRecordNumber = new AtomicInteger(0);
     private int arcRecordNumber;
 
     /** The name of the ARC file. */
     /**
      * Instantiate a new <code>ArcModule</code> instance.
      * This constructor is used by the Spring framework.
-     * @param format ARC format.
+     * @param format Jhove2 Format used by this module to handle ARC
      * @param formatModuleAccessor FormatModuleAccessor to manage access to Format Profiles
      */
     public ArcModule(Format format,
      * @param  param    ARC source input
      * @return number of consumed bytes parsed
      * @throws IOException If an I/O exception is raised reading the source unit
-     * @throws JHOVE2Exception
+     * @throws JHOVE2Exception if a serious error hinders correct module execution
      * @see org.jhove2.module.format.FormatModule#parse(org.jhove2.core.JHOVE2,
      *      org.jhove2.core.source.Source, org.jhove2.core.io.Input)
      */
     public long parse(JHOVE2 jhove2,Source source, Input input)
                         throws IOException, EOFException, JHOVE2Exception {
         /*
-         * Cache Content-Types to J2 FormatIdentifications.
+         * Cache Content-Types to JHove2 FormatIdentifications.
          */
         if (jhove2Ids == null) {
             Map<String,String> ids =
             jhove2Ids = Collections.unmodifiableSortedMap(idsTemp);
         }
         /*
+         * SourceFactory for later use.
+         */
+        //Invocation cfg = jhove2.getInvocation();
+        SourceFactory sourceFactory = jhove2.getSourceFactory();
+        if (sourceFactory == null) {
+            throw new JHOVE2Exception("INTERNAL ERROR - JHOVE2 SourceFactory is null");
+        }
+        /*
          * Module init.
          */
         long consumed = 0L;
         isValid = Validity.Undetermined;
+        // No effect unless read methods on the input object are called.
         input.setByteOrder(ByteOrder.LITTLE_ENDIAN);
-
-        // Reset state.
-        arcFileName = null;
-        arcFileSize = -1L;
-        //this.arcRecordNumber.set(0);
-        //this.errors.clear();
-        //this.protocols.clear();
-
         /*
          * Module context.
          */
                 mod = parentMods.get(i);
                 if (mod instanceof GzipModule) {
                     gzipMod = (GzipModule)mod;
+                    // Lookup the the GZipModule which is on the call stack.
+                    // Required since the JHove2 lookup returns a new instance
+                    // populated with persisted values and not the instance on
+                    // the call stack.
                     gzipMod = GzipModule.gzipMap.get(gzipMod.instanceId);
                 }
                 if (mod instanceof ArcModule) {
+                	// The same goes for the WarcModule except we do not need
+                	// any transient fields here.
                     arcMod = (ArcModule)mod;
                 }
             }
         }
 
-        //Invocation cfg = jhove2.getInvocation();
-
         ArcReader reader = null;
         if (gzipMod != null) {
             // This should probably be changed according to success reading VersionBlock.
             reader = (ArcReader)gzipMod.reader;
             if (reader == null) {
                 reader = ArcReaderFactory.getReaderUncompressed();
-                reader.setBlockDigestEnabled(bComputeBlockDigest);
-                reader.setPayloadDigestEnabled(bComputePayloadDigest);
-                try {
-                    reader.setBlockDigestAlgorithm(blockDigestAlgorithm);
-                    reader.setPayloadDigestAlgorithm(payloadDigestAlgorithm);
-                } catch (NoSuchAlgorithmException e) {
-                    // TODO Auto-generated catch block
-                    e.printStackTrace();
-                }
-                reader.setBlockDigestEncoding(blockDigestEncoding);
-                reader.setPayloadDigestEncoding(payloadDigestEncoding);
+                setDigestOptions(reader);
                 gzipMod.reader = reader;
             }
             if (arcMod == null) {
                  * First record. (Unless the parent modules are not correct!)
                  */
                 mod = parentSrc.addModule(this);
-                parseRecordsCompressed(jhove2, source, reader, true);
-            }
-            else {
-                arcMod.parseRecordsCompressed(jhove2, source, reader, false);
+                parseRecordsCompressed(jhove2, sourceFactory, source, reader, true);
+            } else {
+                arcMod.parseRecordsCompressed(jhove2, sourceFactory, source, reader, false);
                 // Validity
                 if (arcMod.isValid != Validity.False) {
                     if (reader.isCompliant()) {
                         arcMod.isValid = Validity.True;
-                    }
-                    else {
+                    } else {
                         arcMod.isValid = Validity.False;
                     }
                 }
                 arcMod = (ArcModule)arcMod.getModuleAccessor().persistModule(arcMod);
-                // Remove WarcModule from source instance since we added one to the parent source.
-                /*
-                List<Module> sourceMods = source.getModules();
-                Iterator<Module> iter = sourceMods.iterator();
-                while (iter.hasNext()) {
-                    mod = iter.next();
-                    if (mod instanceof ArcModule) {
-                        iter.remove();
-                    }
-                }
-                */
+                // Remove ArcModule from source instance since we added one to the parent source.
                 this.setParentSourceId(null);
                 source = source.getSourceAccessor().persistSource(source);
             }
             consumed = reader.getConsumed();
-        }
-        else {
+        } else {
             /*
              * Not GZip compressed.
              */
             reader = ArcReaderFactory.getReaderUncompressed(source.getInputStream(), 8192);
-            reader.setBlockDigestEnabled(bComputeBlockDigest);
-            reader.setPayloadDigestEnabled(bComputePayloadDigest);
-            try {
-                reader.setBlockDigestAlgorithm(blockDigestAlgorithm);
-                reader.setPayloadDigestAlgorithm(payloadDigestAlgorithm);
-            } catch (NoSuchAlgorithmException e) {
-                // TODO Auto-generated catch block
-                e.printStackTrace();
-            }
-            reader.setBlockDigestEncoding(blockDigestEncoding);
-            reader.setPayloadDigestEncoding(payloadDigestEncoding);
-            parseRecordsUncompressed(jhove2, source, reader, true);
+            setDigestOptions(reader);
+            parseRecordsUncompressed(jhove2, sourceFactory, source, reader, true);
             reader.close();
             consumed = reader.getConsumed();
             /*
             if (isValid != Validity.False) {
                 if (reader.isCompliant()) {
                     isValid = Validity.True;
-                }
-                else {
+                } else {
                     isValid = Validity.False;
                 }
             }
     }
 
     /**
+     * Set digest options for ARC reader.
+     * @param reader ARC reader instance
+     */
+    protected void setDigestOptions(ArcReader reader) throws JHOVE2Exception {
+        reader.setBlockDigestEnabled(bComputeBlockDigest);
+        reader.setPayloadDigestEnabled(bComputePayloadDigest);
+        try {
+            reader.setBlockDigestAlgorithm(blockDigestAlgorithm);
+        } catch (NoSuchAlgorithmException e) {
+        	throw new JHOVE2Exception("Invalid block digest algorithm: " + blockDigestAlgorithm);
+        }
+        try {
+            reader.setPayloadDigestAlgorithm(payloadDigestAlgorithm);
+        } catch (NoSuchAlgorithmException e) {
+        	throw new JHOVE2Exception("Invalid payload digest algorithm: " + payloadDigestAlgorithm);
+        }
+        reader.setBlockDigestEncoding(blockDigestEncoding);
+        reader.setPayloadDigestEncoding(payloadDigestEncoding);
+    }
+
+    /**
      * Parse ARC records that are not encased in GZip entries. Parsing should
      * should be straight forward with all records accessible through the same
      * source. The version block is only read if the reader was initialized
      * during this module call.
      * @param jhove2 the JHove2 characterization context
-     * @param source ARC source unit
+     * @param sourceFactory JHove2 source factory
+     * @param parentSource ARC source unit
      * @param reader ARC reader used to parse records
-     * @param bReadVersion read version block or go straight to parsing records
+     * @param bReadVersion read version block first or go straight to parsing records
      * @throws EOFException if EOF occurs prematurely
-     * @throws IOException if an error occurs while processing
+     * @throws IOException if an IO error occurs while processing
      * @throws JHOVE2Exception if a serious problem needs to be reported
      */
-    protected void parseRecordsUncompressed(JHOVE2 jhove2, Source source, ArcReader reader, boolean bReadVersion)
-            throws EOFException, IOException, JHOVE2Exception {
+    protected void parseRecordsUncompressed(JHOVE2 jhove2, SourceFactory sourceFactory,
+    		Source parentSource, ArcReader reader, boolean bReadVersion)
+    				throws EOFException, IOException, JHOVE2Exception {
         ArcVersionBlock versionBlock;
         ArcRecord record;
 
         // Ensure a ARC reader could be instantiated.
         if (reader != null) {
-            source.setIsAggregate(true);
+            parentSource.setIsAggregate(true);
             if (bReadVersion) {
                 versionBlock = reader.getVersionBlock();
                 if (versionBlock != null) {
-                    processVersionBlock(jhove2, source, versionBlock);
+                    processVersionBlock(jhove2, sourceFactory, parentSource, versionBlock);
                 }
             }
             /*
              * Loop through available records.
              */
             while ((record = reader.getNextRecord()) != null) {
-                processRecord(jhove2, source, record);
+                processRecord(jhove2, sourceFactory, parentSource, record);
             }
-        }
-        else {
-            // No WARC reader. Oh the horror!
+        } else {
+            throw new JHOVE2Exception("ArcReader is null");
         }
     }
 
      * The version block is only read if the reader was initialized
      * during this module call.
      * @param jhove2 the JHove2 characterization context
-     * @param source ARC source unit
+     * @param sourceFactory JHove2 source factory
+     * @param parentSource ARC source unit
      * @param reader ARC reader used to parse records
-     * @param bReadVersion read version block or go straight to parsing records
+     * @param bReadVersion read version block first or go straight to parsing records
      * @throws EOFException if EOF occurs prematurely
-     * @throws IOException if an error occurs while processing
+     * @throws IOException if an IO error occurs while processing
      * @throws JHOVE2Exception if a serious problem needs to be reported
      */
-    protected void parseRecordsCompressed(JHOVE2 jhove2, Source source, ArcReader reader, boolean bReadVersion)
-            throws EOFException, IOException, JHOVE2Exception {
+    protected void parseRecordsCompressed(JHOVE2 jhove2, SourceFactory sourceFactory,
+    		Source parentSource, ArcReader reader, boolean bReadVersion)
+    				throws EOFException, IOException, JHOVE2Exception {
         ArcVersionBlock versionBlock;
         ArcRecord record;
 
         // Ensure a ARC reader could be instantiated.
         if (reader != null) {
-            source.setIsAggregate(true);
-            InputStream in = source.getInputStream();
+            parentSource.setIsAggregate(true);
+            InputStream in = parentSource.getInputStream();
             if (bReadVersion) {
                 versionBlock = reader.getVersionBlock(in);
                 if (versionBlock != null) {
-                    processVersionBlock(jhove2, source, versionBlock);
+                    processVersionBlock(jhove2, sourceFactory, parentSource, versionBlock);
                 }
             }
             /*
              * Loop through available records.
              */
             while ((record = reader.getNextRecordFrom(in, 8192, 0)) != null) {
-                processRecord(jhove2, source, record);
+                processRecord(jhove2, sourceFactory, parentSource, record);
             }
-        }
-        else {
-            // No WARC reader. Oh the horror!
+        } else {
+            throw new JHOVE2Exception("ArcReader is null");
         }
     }
 
      * to the supplied input source. Relevant reportable properties are added
      * to the <code>ArcRecordSource</code>.
      * @param jhove2 the JHove2 characterization context
-     * @param source ARC source unit
+     * @param sourceFactory JHove2 source factory
+     * @param parentSource ARC source unit
      * @param versionBlock ARC version block
      * @throws EOFException if EOF occurs prematurely
-     * @throws IOException if an error occurs while processing
+     * @throws IOException if an IO error occurs while processing
      * @throws JHOVE2Exception if a serious problem needs to be reported
      */
-    protected void processVersionBlock(JHOVE2 jhove2, Source source, ArcVersionBlock versionBlock)
-                            throws EOFException, IOException, JHOVE2Exception {
-        SourceFactory factory = jhove2.getSourceFactory();
-        if (factory == null) {
-            throw new JHOVE2Exception("JHOVE2 SourceFactory is null");
-        }
-
+    protected void processVersionBlock(JHOVE2 jhove2, SourceFactory sourceFactory,
+    		Source parentSource, ArcVersionBlock versionBlock)
+    				throws EOFException, IOException, JHOVE2Exception {
         ArcRecordData recordData;
 
         /*
          * Arc Record Source.
          */
         Source versionBlockSrc = new ArcRecordSource();
-        versionBlockSrc.setSourceAccessor(factory.createSourceAccessor(versionBlockSrc));
-        versionBlockSrc = source.addChildSource(versionBlockSrc);
+        versionBlockSrc.setSourceAccessor(sourceFactory.createSourceAccessor(versionBlockSrc));
+        versionBlockSrc = parentSource.addChildSource(versionBlockSrc);
         ++arcRecordNumber;
         /*
          * Properties.
         recordData = new ArcRecordData(versionBlock);
         versionBlockSrc.addExtraProperties(recordData.getArcRecordBaseProperties());
         versionBlockSrc.addExtraProperties(recordData.getArcVersionBlockProperties());
-        // Update protocol map.
+        // Update protocol statistics.
         if (recordData.protocol != null) {
             int number = 1;
             if (protocols.containsKey(recordData.protocol)) {
      * http response. The content-type is added as a presumptive format on the
      * embedded source.
      * @param jhove2 the JHove2 characterization context
-     * @param source ARC source unit
+     * @param sourceFactory JHove2 source factory
+     * @param parentSource ARC source unit
      * @param record ARC record from ARC reader
      * @throws EOFException if EOF occurs prematurely
-     * @throws IOException if an error occurs while processing
+     * @throws IOException if an IO error occurs while processing
      * @throws JHOVE2Exception if a serious problem needs to be reported
      */
-    protected void processRecord(JHOVE2 jhove2, Source source, ArcRecord record)
-                            throws EOFException, IOException, JHOVE2Exception {
-        SourceFactory factory = jhove2.getSourceFactory();
-        if (factory == null) {
-            throw new JHOVE2Exception("JHOVE2 SourceFactory is null");
-        }
-
+    protected void processRecord(JHOVE2 jhove2, SourceFactory sourceFactory,
+    		Source parentSource, ArcRecord record)
+    				throws EOFException, IOException, JHOVE2Exception {
         Payload payload;
         HttpResponse httpResponse;
         InputStream payload_stream;
          * Arc Record Source.
          */
         Source recordSrc = new ArcRecordSource();
-        recordSrc.setSourceAccessor(factory.createSourceAccessor(recordSrc));
-        recordSrc = source.addChildSource(recordSrc);
+        recordSrc.setSourceAccessor(sourceFactory.createSourceAccessor(recordSrc));
+        recordSrc = parentSource.addChildSource(recordSrc);
         ++arcRecordNumber;
         /*
          * Prepare payload.
             httpResponse = payload.getHttpResponse();
             if (httpResponse == null) {
                 payload_stream = payload.getInputStream();
-            }
-            else {
+            } else {
                 contentType = httpResponse.getProtocolContentType();
                 payload_stream = httpResponse.getPayloadInputStream();
             }
         }
         /*
-         * Presumptive format.
+         * Decide on Jhove2 format from contentType information.
          */
         if (contentType != null) {
             int idx = contentType.indexOf(';');
          * Characterize payload.
          */
         if (recurse && payload_stream != null) {
-            Source payloadSrc = factory.getSource(jhove2, payload_stream, name, null);        // properties
-            if (payloadSrc != null) {
-                payloadSrc = recordSrc.addChildSource(payloadSrc);
-                // Add presumptive format based on content-type.
-                if(formatId != null){
-                    payloadSrc = payloadSrc.addPresumptiveFormat(formatId);
-                }
-                /* Make sure to close the Input after
-                 * characterization is completed.
-                 */
-                Input src_input = payloadSrc.getInput(jhove2);
-                try {
-                    payloadSrc = jhove2.characterize(payloadSrc, src_input);
-                }
-                finally {
-                    if (src_input != null) {
-                        src_input.close();
-                    }
-                }
-            }
+        	characterizePayload(jhove2, sourceFactory, recordSrc, payload_stream, formatId);
         }
         if (payload_stream != null) {
             payload_stream.close();
         recordData = new ArcRecordData(record);
         recordSrc.addExtraProperties(recordData.getArcRecordBaseProperties());
         recordSrc.addExtraProperties(recordData.getArcRecordProperties());
-        // Update protocol map.
+        // Update protocol statistics.
         if (recordData.protocol != null) {
             int number = 1;
             if (protocols.containsKey(recordData.protocol)) {
     }
 
     /**
+     * Process a ARC record payload, recursively if configured to do so.
+     * @param jhove2 the JHove2 characterization context
+     * @param sourceFactory JHove2 source factory
+     * @param recordSrc ARC record source unit
+     * @param payload_stream payload inputstream
+     * @param formatId JHove2 format identification based on contentType
+     * @throws EOFException if EOF occurs prematurely
+     * @throws IOException if an IO error occurs while processing
+     * @throws JHOVE2Exception if a serious problem needs to be reported
+     */
+    protected void characterizePayload(JHOVE2 jhove2, SourceFactory sourceFactory,
+    		Source recordSrc, InputStream payload_stream, FormatIdentification formatId)
+    				throws EOFException, IOException, JHOVE2Exception {
+        // Not all properties are ready yet, they are added as extras.
+        Source payloadSrc = sourceFactory.getSource(jhove2, payload_stream, name, null);
+        if (payloadSrc != null) {
+            payloadSrc = recordSrc.addChildSource(payloadSrc);
+            // Add presumptive format based on content-type.
+            if(formatId != null){
+                payloadSrc = payloadSrc.addPresumptiveFormat(formatId);
+            }
+            /* Make sure to close the Input after
+             * characterization is completed.
+             */
+            Input src_input = payloadSrc.getInput(jhove2);
+            try {
+                payloadSrc = jhove2.characterize(payloadSrc, src_input);
+            } finally {
+                if (src_input != null) {
+                    src_input.close();
+                }
+            }
+        }
+    }
+
+    /**
      * Checks ARC record validity and reports validation errors.
      * @param src ARC source unit
      * @param record the ARC record to characterize.
      * @param jhove2 the JHove2 characterization context.
-     * @throws JHOVE2Exception
-     * @throws IOException
+     * @throws IOException if an IO error occurs while processing
+     * @throws JHOVE2Exception if a serious problem needs to be reported
      */
     private void checkRecordValidity(Source src, ArcRecordBase record,
                         JHOVE2 jhove2) throws JHOVE2Exception, IOException {
      * @param id the configuration property relative name.
      * @param params the values to add in the message
      * @return the new localized message
-     * @throws JHOVE2Exception
+     * @throws JHOVE2Exception if a serious problem needs to be reported
      */
     private Message newValidityError(JHOVE2 jhove2,Severity severity,String id,
                                      Object... params)throws JHOVE2Exception {
     //------------------------------------------------------------------------
 
     /**
-     * Validates the ARC file.
+     * Validates the ARC file, which in this case amounts to returning the
+     * result since validation has already been done.
      * @param  jhove2   the JHove2 characterization context.
      * @param  source   ARC file source unit.
      * @param  input    ARC file source input.
         this.recurse = recurse;
     }
 
+    /**
+     * Enable or disable block digest computation.
+     * @param bComputeBlockDigest block digest computation toggle
+     */
     public void setComputeBlockDigest(boolean bComputeBlockDigest) {
         this.bComputeBlockDigest = bComputeBlockDigest;
     }
 
+    /**
+     * Set the block digest algorithm to be used in case no digest is present
+     * in the WARC header.
+     * @param blockDigestAlgorithm block digest algorithm
+     */
     public void setBlockDigestAlgorithm(String blockDigestAlgorithm) {
         this.blockDigestAlgorithm = blockDigestAlgorithm;
     }
 
+    /**
+     * Set the block digest encoding scheme to be used in case no digest
+     * is present in the WARC header.
+     * @param blockDigestEncoding block digest encoding scheme
+     */
     public void setBlockDigestEncoding(String blockDigestEncoding) {
         this.blockDigestEncoding = blockDigestEncoding;
     }
 
+    /**
+     * Enable or disable payload digest computation.
+     * @param bComputePayloadDigest payload digest computation toggle
+     */
     public void setComputePayloadDigest(boolean bComputePayloadDigest) {
         this.bComputePayloadDigest = bComputePayloadDigest;
     }
 
+    /**
+     * Set the payload digest algorithm to be used in case no digest is present
+     * in the WARC header.
+     * @param payloadDigestAlgorithm payload digest algorithm
+     */
     public void setPayloadDigestAlgorithm(String payloadDigestAlgorithm) {
         this.payloadDigestAlgorithm = payloadDigestAlgorithm;
     }
 
+    /**
+     * Set the payload digest encoding scheme to be used in case no digest
+     * is present in the WARC header.
+     * @param payloadDigestEncoding payload digest encoding scheme
+     */
     public void setPayloadDigestEncoding(String payloadDigestEncoding) {
         this.payloadDigestEncoding = payloadDigestEncoding;
     }

File src/main/java/org/jhove2/module/format/arc/properties/ArcRecordData.java

 @Persistent
 public class ArcRecordData {
 
-    public Long startOffset;
-    public Long consumed;
+    protected Long startOffset;
+    protected Long consumed;
 
-    public String url;
+    protected String url;
     public String protocol;
-    public String ipAddress;
-    public String ipVersion;
-    public String archiveDate;
-    public String rawArchiveDate;
-    public String contentType;
-    public String length;
-    public String resultCode;
-    public String checksum;
-    public String location;
-    public String offset;
-    public String filename;
-    public Boolean bHasPayload;
-    public String payloadLength;
-    public boolean bIsNonCompliant;
+    protected String ipAddress;
+    protected String ipVersion;
+    protected String archiveDate;
+    protected String rawArchiveDate;
+    protected String contentType;
+    protected String length;
+    protected String resultCode;
+    protected String checksum;
+    protected String location;
+    protected String offset;
+    protected String filename;
+    protected Boolean bHasPayload;
+    protected String payloadLength;
+    protected boolean bIsNonCompliant;
 
-    public String computedBlockDigest;
-    public String computedBlockDigestAlgorithm;
-    public String computedBlockDigestEncoding;
-    public String computedPayloadDigest;
-    public String computedPayloadDigestAlgorithm;
-    public String computedPayloadDigestEncoding;
+    protected String computedBlockDigest;
+    protected String computedBlockDigestAlgorithm;
+    protected String computedBlockDigestEncoding;
+    protected String computedPayloadDigest;
+    protected String computedPayloadDigestAlgorithm;
+    protected String computedPayloadDigestEncoding;
 
-    public String versionNumber;
-    public String reserved;
-    public String originCode;
+    protected String versionNumber;
+    protected String reserved;
+    protected String originCode;
 
-    public String protocolResultCode;
-    public String protocolVersion;
-    public String protocolContentType;
-    public String protocolServer;
+    protected String protocolResultCode;
+    protected String protocolVersion;
+    protected String protocolContentType;
+    protected String protocolServer;
 
     /** WARC <code>DateFormat</code> as specified by the WARC ISO standard. */
-    public transient DateFormat warcDateFormat = WarcDateParser.getWarcDateFormat();
+    protected transient DateFormat warcDateFormat = WarcDateParser.getWarcDateFormat();
 
     /**
      * Constructor required by the persistence layer.
      * and arc record classes.
      * @param record record containing common data
      */
-    public void populateArcRecordBase(ArcRecordBase record) {
+    protected void populateArcRecordBase(ArcRecordBase record) {
         startOffset = record.getOffset();
         consumed = record.getConsumed();
         url = record.recUrl;
             if (record.inetAddress.getAddress().length == 4) {
                 ipVersion = "4";
             }
-            else {
+            else if (record.inetAddress.getAddress().length == 16) {
                 ipVersion = "6";
             }
         }
         Payload payload = record.getPayload();
         if (payload != null) {
             // TODO Verify meaning of ObjectSize in JHove2 ARC specs
+        	// payloadLength is reported back as ObjectSize in the Jhove2 specs
             HttpResponse httpResponse = payload.getHttpResponse();
             if (httpResponse != null) {
                 payloadLength = Long.toString(httpResponse.getPayloadLength());
 
     /**
      * Returns a persistent reportable arc version block property instance.
-     * @return
+     * @return a persistent reportable arc version block property instance
      */
     public AbstractReportable getArcVersionBlockProperties() {
         return new ArcVersionBlockProperties(this);
 
     /**
      * Returns a persistent reportable arc record property instance.
-     * @return
+     * @return a persistent reportable arc record property instance
      */
     public AbstractReportable getArcRecordProperties() {
         return new ArcRecordProperties(this);

File src/main/java/org/jhove2/module/format/gzip/GzipModule.java

     public static final Coverage COVERAGE = Coverage.Selective;
 
     /** Validation status. */
-    protected Validity isValid;
+    private Validity isValid;
 
     /** Number of members compressed with the deflate compression method. */
-    //private final AtomicLong deflateMemberCount = new AtomicLong(0L);
-    private Long deflateMemberCount = new Long(0L);
+    private long deflateMemberCount = 0;
+
     /** Number of non-valid members. */
-    //private final AtomicLong invalidMembers = new AtomicLong(0L);
-    private Long invalidMembers = new Long(0L);
+    private long invalidMembers = 0;
+
     /** Validation error messages. */
     //private final Collection<Message> validationMessages =
     //                                    new ConcurrentLinkedQueue<Message>();
     /** Thread pool size for parallel characterization of GZip member. */
     //private int nThreads = 0;
 
-    public static transient Map<Integer, GzipModule> gzipMap = new TreeMap<Integer, GzipModule>();
+    public static final transient Map<Integer, GzipModule> gzipMap = new TreeMap<Integer, GzipModule>();
 
-    public static transient AutoIncrement index = new AutoIncrement();
+    public static final transient AutoIncrement index = new AutoIncrement();
 
     public Integer instanceId;
 
     public GzipModule(Format format,
             FormatModuleAccessor formatModuleAccessor) {
         super(VERSION, RELEASE, RIGHTS, format, formatModuleAccessor);
-        this.isValid = Validity.Undetermined;
+        isValid = Validity.Undetermined;
     }
 
     /**
         // Reset state.
         //this.deflateMemberCount.set(0L);
         //this.invalidMembers.set(0L);
-        this.deflateMemberCount = 0L;
-        this.invalidMembers = 0L;
-        this.validationMessages.clear();
-        this.isValid = Validity.Undetermined;
-        this.wovenFormatParser = null;
-        final boolean doRecurse = this.recurse;
+        deflateMemberCount = 0L;
+        invalidMembers = 0L;
+        validationMessages.clear();
+        isValid = Validity.Undetermined;
+        wovenFormatParser = null;
+        final boolean doRecurse = recurse;
 
         // In GZip format, least-significant bytes come first.
         input.setByteOrder(ByteOrder.LITTLE_ENDIAN);
                     new BufferedInputStream(source.getInputStream(), 8192));
 
         instanceId = index.get();
-        getModuleAccessor().persistModule(this);        // Oh My God...!
+        // This is done because it is not persisted immediately.
+        // I need it in recursive calls and not when the gzip module exits.
+        // Each time jhove2 looks up an existing module it actually
+        // instantiates a new class and loads the persisted values. 
+        // So a version with the correct instanceId exists on the call stack
+        // but every time someone requests it a new one is created and
+        // populated with persisted data.
+        getModuleAccessor().persistModule(this);
         synchronized (gzipMap) {
             gzipMap.put(instanceId, this);
         }
                             // Set parent module.
                             //src.setParentModule(this);
                             // Characterize member content.
-                            this.characterizeMember(jhove2, src);
+                            characterizeMember(jhove2, src);
                         }
                         else {
                             /*
                             else {
                             */
                                 // Sync. characterization in current thread.
-                                this.characterizeMember(jhove2, src);
+                                characterizeMember(jhove2, src);
                             /*
                             }
                             */
                     if (e.getCompressionMethod().getValue() ==
                                                     GzipInputStream.DEFLATE) {
                         //this.deflateMemberCount.incrementAndGet();
-                        ++this.deflateMemberCount;
+                        ++deflateMemberCount;
                     }
                     // Check member validity.
                     if (! e.isValid()) {
                         //this.invalidMembers.incrementAndGet();
-                        ++this.invalidMembers;
-                        this.isValid = Validity.False;
+                        ++invalidMembers;
+                        isValid = Validity.False;
                         // Report errors on child source object.
-                        this.reportValidationErrors(e, src, jhove2);
+                        reportValidationErrors(e, src, jhove2);
                     }
                 }
             }
-            if (this.isValid == Validity.Undetermined) {
+            if (isValid == Validity.Undetermined) {
                 // No invalid members found and EOF reached without
                 // any exception being thrown => Source is valid.
-                this.isValid = Validity.True;
+                isValid = Validity.True;
             }
         }
         catch (IOException e) {
-            this.handleError(e, jhove2, gz.getOffset());
+            handleError(e, jhove2, gz.getOffset());
             if (! ((e instanceof EOFException) && (gz.getOffset() != 0L))) {
                 // Not an EOF error occurring before the very first entry.
                 throw e;
     {
         Input input = source.getInput(jhove2);
         try {
-            if (this.wovenFormatParser != null) {
+            if (wovenFormatParser != null) {
                 // Start timer.
                 TimerInfo timer = source.getTimerInfo();
                 timer.setStartTime();
                     source.setDeleteTempFileOnClose(jhove2.getInvocation()
                             .getDeleteTempFilesOnClose());
                     // Woven format => Delegate content handling.
-                    this.wovenFormatParser.parse(jhove2, source, input);
+                    wovenFormatParser.parse(jhove2, source, input);
                 }
                 finally {
                     // Delete temp. files and compute processing duration.
     private void handleError(Exception e, JHOVE2 jhove2, long offset)
     {
         try {
-            this.isValid = Validity.False;
-            this.validationMessages.add(this.newValidityError(jhove2,
+            isValid = Validity.False;
+            validationMessages.add(newValidityError(jhove2,
                     "invalidGzipFile", Long.valueOf(offset), e));
         }
         catch (JHOVE2Exception ex) {
     public Validity validate(JHOVE2 jhove2, Source source, Input input)
         throws JHOVE2Exception
     {
-        return this.isValid();
+        return isValid();
     }
 
     /** Get validation coverage.
     @Override
     public Validity isValid()
     {
-        return this.isValid;
+        return isValid;
     }
 
     //------------------------------------------------------------------------
                         value = "Number of members compressed with the deflate compression method")
     public long getNumDeflateMembers() {
         //return this.deflateMemberCount.get();
-        return this.deflateMemberCount;
+        return deflateMemberCount;
     }
 
     /**
     @ReportableProperty(order = 2, value = "Number of non-valid members")
     public long getNumInvalidMembers() {
         //return this.invalidMembers.get();
-        return this.invalidMembers;
+        return invalidMembers;
     }
 
     /**
     public Collection<Message> getValidationMessages() {
         // Return null if the list is empty to prevent the displayer
         // from rendering this property.
-        return (this.validationMessages.isEmpty())? null:
-                    Collections.unmodifiableCollection(this.validationMessages);
+        return (validationMessages.isEmpty())? null:
+                    Collections.unmodifiableCollection(validationMessages);
     }
 
     //------------------------------------------------------------------------
      *         to <code>true</code>.
      */
     public boolean getRecurse() {
-        return this.recurse;
+        return recurse;
     }
 
     /*

File src/main/java/org/jhove2/module/format/warc/WarcModule.java

     /** Validation status. */
     private Validity isValid;
 
-    /** Number of record seen by this instance. */
+    /** Number of records seen by this instance. */
     private int warcRecordNumber;
 
-    /** WARC filename. */
+    /** WARC filename, from metadata record, if present. */
     private String warcFileName;
 
-    /** WARC file size. */
+    /** WARC file size, whole file. */
     private long warcFileSize;
 
-    /** File version. */
-    private String fileVersion;
+    /** File version, null unless all records have the same version. */
+    private String warcFileVersion;
 
     /**
      * Instantiate a new <code>WarcModule</code> instance.
      * This constructor is used by the Spring framework.
-     * @param format WARC format
+     * @param format Jhove2 Format used by this module to handle WARC
      * @param formatModuleAccessor FormatModuleAccessor to manage access to Format Profiles
      */
     public WarcModule(Format format,
      * @return number of consumed bytes parsed
      * @throws EOFException If End-of-File is reached reading the source unit
      * @throws IOException If an I/O exception is raised reading the source unit
-     * @throws JHOVE2Exception
+     * @throws JHOVE2Exception if a serious error hinders correct module execution
      * @see org.jhove2.module.format.FormatModule#parse(org.jhove2.core.JHOVE2,
      *      org.jhove2.core.source.Source, org.jhove2.core.io.Input)
      */
     public long parse(JHOVE2 jhove2, Source source, Input input)
                             throws IOException, EOFException, JHOVE2Exception {
         /*
-         * Cache Content-Types to J2 FormatIdentifications.
+         * Cache Content-Types to JHove2 FormatIdentifications.
          */
         if (jhove2Ids == null) {
             Map<String,String> ids =
             jhove2Ids = Collections.unmodifiableSortedMap(idsTemp);
         }
         /*
+         * SourceFactory for later use.
+         */
+        //Invocation cfg = jhove2.getInvocation();
+        SourceFactory sourceFactory = jhove2.getSourceFactory();
+        if (sourceFactory == null) {
+            throw new JHOVE2Exception("INTERNAL ERROR - JHOVE2 SourceFactory is null");
+        }
+        /*
          * Module init.
          */
         long consumed = 0L;
         isValid = Validity.Undetermined;
+        // No effect unless read methods on the input object are called.
         input.setByteOrder(ByteOrder.LITTLE_ENDIAN);
         /*
          * Module context.
                 mod = parentMods.get(i);
                 if (mod instanceof GzipModule) {
                     gzipMod = (GzipModule)mod;
+                    // Lookup the the GZipModule which is on the call stack.
+                    // Required since the JHove2 lookup returns a new instance
+                    // populated with persisted values and not the instance on
+                    // the call stack.
                     gzipMod = GzipModule.gzipMap.get(gzipMod.instanceId);
                 }
                 if (mod instanceof WarcModule) {
+                	// The same goes for the WarcModule except we do not need
+                	// any transient fields here.
                     warcMod = (WarcModule)mod;
                 }
             }
         }
-
-        //Invocation cfg = jhove2.getInvocation();
-
+        /*
+         * Read some WARC records.
+         */
         WarcReader reader = null;
         if (gzipMod != null) {
             /*
             reader = (WarcReader)gzipMod.reader;
             if (reader == null) {
                 reader = WarcReaderFactory.getReaderUncompressed();
-                reader.setBlockDigestEnabled(bComputeBlockDigest);
-                reader.setPayloadDigestEnabled(bComputePayloadDigest);
-                try {
-                    reader.setBlockDigestAlgorithm(blockDigestAlgorithm);
-                    reader.setPayloadDigestAlgorithm(payloadDigestAlgorithm);
-                } catch (NoSuchAlgorithmException e) {
-                    // TODO Auto-generated catch block
-                    e.printStackTrace();
-                }
-                reader.setBlockDigestEncoding(blockDigestEncoding);
-                reader.setPayloadDigestEncoding(payloadDigestEncoding);
+                setDigestOptions(reader);
                 gzipMod.reader = reader;
             }
             if (warcMod == null) {
                  * First record. (Unless the parent modules are not correct!)
                  */
                 mod = parentSrc.addModule(this);
-                parseRecordsCompressed(jhove2, source, reader);
-            }
-            else {
-                warcMod.parseRecordsCompressed(jhove2, source, reader);
+                parseRecordsCompressed(jhove2, sourceFactory, source, reader);
+            } else {
+                warcMod.parseRecordsCompressed(jhove2, sourceFactory, source, reader);
                 // Validity
                 if (warcMod.isValid != Validity.False) {
                     if (reader.isCompliant()) {
                         warcMod.isValid = Validity.True;
-                    }
-                    else {
+                    } else {
                         warcMod.isValid = Validity.False;
                     }
                 }
                 warcMod = (WarcModule)warcMod.getModuleAccessor().persistModule(warcMod);
                 // Remove WarcModule from source instance since we added one to the parent source.
-                /*
-                List<Module> sourceMods = source.getModules();
-                Iterator<Module> iter = sourceMods.iterator();
-                while (iter.hasNext()) {
-                    mod = iter.next();
-                    if (mod instanceof WarcModule) {
-                        iter.remove();
-                    }
-                }
-                */
                 this.setParentSourceId(null);
                 source = source.getSourceAccessor().persistSource(source);
             }
              * Not GZip compressed.
              */
             reader = WarcReaderFactory.getReaderUncompressed(source.getInputStream(), 8192);
-            reader.setBlockDigestEnabled(bComputeBlockDigest);
-            reader.setPayloadDigestEnabled(bComputePayloadDigest);
-            try {
-                reader.setBlockDigestAlgorithm(blockDigestAlgorithm);
-                reader.setPayloadDigestAlgorithm(payloadDigestAlgorithm);
-            } catch (NoSuchAlgorithmException e) {
-                // TODO Auto-generated catch block
-                e.printStackTrace();
-            }
-            reader.setBlockDigestEncoding(blockDigestEncoding);
-            reader.setPayloadDigestEncoding(payloadDigestEncoding);
-            parseRecordsUncompressed(jhove2, source, reader);
+            setDigestOptions(reader);
+            parseRecordsUncompressed(jhove2, sourceFactory, source, reader);
             reader.close();
             consumed = reader.getConsumed();
             /*
             if (isValid != Validity.False) {
                 if (reader.isCompliant()) {
                     isValid = Validity.True;
-                }
-                else {
+                } else {
                     isValid = Validity.False;
                 }
             }
     }
 
     /**
+     * Set digest options for WARC reader.
+     * @param reader WARC reader instance
+     */
+    protected void setDigestOptions(WarcReader reader) throws JHOVE2Exception {
+        reader.setBlockDigestEnabled(bComputeBlockDigest);
+        reader.setPayloadDigestEnabled(bComputePayloadDigest);
+        try {
+            reader.setBlockDigestAlgorithm(blockDigestAlgorithm);
+        } catch (NoSuchAlgorithmException e) {
+        	throw new JHOVE2Exception("Invalid block digest algorithm: " + blockDigestAlgorithm);
+        }
+        try {
+            reader.setPayloadDigestAlgorithm(payloadDigestAlgorithm);
+        } catch (NoSuchAlgorithmException e) {
+        	throw new JHOVE2Exception("Invalid payload digest algorithm: " + payloadDigestAlgorithm);
+        }
+        reader.setBlockDigestEncoding(blockDigestEncoding);
+        reader.setPayloadDigestEncoding(payloadDigestEncoding);
+    }
+
+    /**
      * Parse WARC records that are not encased in GZip entries. Parsing should
      * should be straight forward with all records accessible through the same
      * source.
      * @param jhove2 the JHove2 characterization context
-     * @param source WARC source unit
+     * @param sourceFactory JHove2 source factory
+     * @param parentSource WARC source unit
      * @param reader WARC reader used to parse records
      * @throws EOFException if EOF occurs prematurely
-     * @throws IOException if an error occurs while processing
+     * @throws IOException if an IO error occurs while processing
      * @throws JHOVE2Exception if a serious problem needs to be reported
      */
-    protected void parseRecordsUncompressed(JHOVE2 jhove2, Source source, WarcReader reader)
-            throws EOFException, IOException, JHOVE2Exception {
+    protected void parseRecordsUncompressed(JHOVE2 jhove2, SourceFactory sourceFactory,
+    		Source parentSource, WarcReader reader)
+    				throws EOFException, IOException, JHOVE2Exception {
         WarcRecord record;
         // Ensure a WARC reader could be instantiated.
         if (reader != null) {
-            source.setIsAggregate(true);
+            parentSource.setIsAggregate(true);
             /*
             * Loop through available records.
             */
             while ((record = reader.getNextRecord()) != null) {
-                processRecord(jhove2, source, record);
+                processRecord(jhove2, sourceFactory, parentSource, record);
             }
-        }
-        else {
-            // No WARC reader. Oh the horror!
+        } else {
+            throw new JHOVE2Exception("WarcReader is null");
         }
     }
 
      * a GZip module instance. Since each record will presumably be parse from
      * a different source alternative methods in the WARC reader will be used.
      * @param jhove2 the JHove2 characterization context
-     * @param source WARC source unit
+     * @param sourceFactory JHove2 source factory
+     * @param parentSource WARC source unit
      * @param reader WARC reader used to parse records
      * @throws EOFException if EOF occurs prematurely
-     * @throws IOException if an error occurs while processing
+     * @throws IOException if an IO error occurs while processing
      * @throws JHOVE2Exception if a serious problem needs to be reported
      */
-    protected void parseRecordsCompressed(JHOVE2 jhove2, Source source, WarcReader reader)
-                            throws EOFException, IOException, JHOVE2Exception {
+    protected void parseRecordsCompressed(JHOVE2 jhove2, SourceFactory sourceFactory,
+    		Source parentSource, WarcReader reader)
+    				throws EOFException, IOException, JHOVE2Exception {
         WarcRecord record;
         // Ensure a WARC reader could be instantiated.
         if (reader != null) {
-            source.setIsAggregate(true);
+            parentSource.setIsAggregate(true);
             /*
              * Loop through available records.
              */
-            InputStream in = source.getInputStream();
+            InputStream in = parentSource.getInputStream();
             while ((record = reader.getNextRecordFrom(in, 8192)) != null) {
-                processRecord(jhove2, source, record);
+                processRecord(jhove2, sourceFactory, parentSource, record);
             }
-        }
-        else {
-            // No WARC reader. Oh the horror!
+        } else {
+            throw new JHOVE2Exception("WarcReader is null");
         }
     }
 
      * http response. The content-type is added as a presumptive format on the
      * embedded source.
      * @param jhove2 the JHove2 characterization context
-     * @param source WARC source unit
+     * @param sourceFactory JHove2 source factory
+     * @param parentSource WARC source unit
      * @param record WARC record from WARC reader
      * @throws EOFException if EOF occurs prematurely
-     * @throws IOException if an error occurs while processing
+     * @throws IOException if an IO error occurs while processing
      * @throws JHOVE2Exception if a serious problem needs to be reported
      */
-    protected void processRecord(JHOVE2 jhove2, Source source, WarcRecord record)
-                            throws EOFException, IOException, JHOVE2Exception {
-        SourceFactory factory = jhove2.getSourceFactory();
-        if (factory == null) {
-            throw new JHOVE2Exception("JHOVE2 SourceFactory is null");
-        }
-
+    protected void processRecord(JHOVE2 jhove2, SourceFactory sourceFactory,
+    		Source parentSource, WarcRecord record) throws EOFException, IOException, JHOVE2Exception {
         Payload payload;
         HttpResponse httpResponse;
         InputStream payload_stream;
          * Warc Record Source.
          */
         Source recordSrc = new WarcRecordSource();
-        recordSrc.setSourceAccessor(factory.createSourceAccessor(recordSrc));
-        recordSrc = source.addChildSource(recordSrc);
+        recordSrc.setSourceAccessor(sourceFactory.createSourceAccessor(recordSrc));
+        recordSrc = parentSource.addChildSource(recordSrc);
         ++warcRecordNumber;
         /*
          * Prepare payload.
             httpResponse = payload.getHttpResponse();
             if (httpResponse == null) {
                 payload_stream = payload.getInputStream();
-            }
-            else {
+            } else {
                 contentType = httpResponse.getProtocolContentType();
                 payload_stream = httpResponse.getPayloadInputStream();
             }
         }
         /*
-         * Presumptive format.
+         * Decide on Jhove2 format from contentType information.
          */
         if (contentType != null) {
             int idx = contentType.indexOf(';');
          * Characterize payload.
          */
         if (recurse && payload_stream != null) {
-            // Not all properties are ready yet, they are added as extras.
-            Source payloadSrc = factory.getSource(jhove2, payload_stream, name, null);
-            if (payloadSrc != null) {
-                payloadSrc = recordSrc.addChildSource(payloadSrc);
-                // Add presumptive format based on content-type.
-                if(formatId != null){
-                    payloadSrc = payloadSrc.addPresumptiveFormat(formatId);
-                }
-                /* Make sure to close the Input after
-                 * characterization is completed.
-                 */
-                Input src_input = payloadSrc.getInput(jhove2);
-                try {
-                    payloadSrc = jhove2.characterize(payloadSrc, src_input);
-                }
-                finally {
-                    if (src_input != null) {
-                        src_input.close();
-                    }
-                }
-                payloadSrc.close();
-            }
+        	characterizePayload(jhove2, sourceFactory, recordSrc, payload_stream, formatId);
         }
         if (payload_stream != null) {
             payload_stream.close();
     }
 
     /**
+     * Process a WARC record payload, recursively if configured to do so.
+     * @param jhove2 the JHove2 characterization context
+     * @param sourceFactory JHove2 source factory
+     * @param recordSrc WARC record source unit
+     * @param payload_stream payload inputstream
+     * @param formatId JHove2 format identification based on contentType
+     * @throws EOFException if EOF occurs prematurely
+     * @throws IOException if an IO error occurs while processing
+     * @throws JHOVE2Exception if a serious problem needs to be reported
+     */
+    protected void characterizePayload(JHOVE2 jhove2, SourceFactory sourceFactory,
+    		Source recordSrc, InputStream payload_stream, FormatIdentification formatId)
+    				throws EOFException, IOException, JHOVE2Exception {
+        // Not all properties are ready yet, they are added as extras.
+        Source payloadSrc = sourceFactory.getSource(jhove2, payload_stream, name, null);
+        if (payloadSrc != null) {
+            payloadSrc = recordSrc.addChildSource(payloadSrc);
+            // Add presumptive format based on content-type.
+            if(formatId != null){
+                payloadSrc = payloadSrc.addPresumptiveFormat(formatId);
+            }
+            /* Make sure to close the Input after
+             * characterization is completed.
+             */
+            Input src_input = payloadSrc.getInput(jhove2);
+            try {
+                payloadSrc = jhove2.characterize(payloadSrc, src_input);
+            } finally {
+                if (src_input != null) {
+                    src_input.close();
+                }
+            }
+            payloadSrc.close();
+        }
+    }
+
+    /**
      * Checks WARC record validity and reports validation errors.
      * @param src WARC source unit
      * @param record the WARC record to characterize.
      * @param jhove2 the JHove2 characterization context.
-     * @throws JHOVE2Exception
-     * @throws IOException
+     * @throws IOException if an IO error occurs while processing
+     * @throws JHOVE2Exception if a serious problem needs to be reported
      */
     private void checkRecordValidity(Source src, WarcRecord record,
                         JHOVE2 jhove2) throws JHOVE2Exception, IOException {
      * @param id the configuration property relative name.
      * @param params the values to add in the message
      * @return the new localized message
-     * @throws JHOVE2Exception
+     * @throws JHOVE2Exception if a serious problem needs to be reported
      */
     private Message newValidityError(JHOVE2 jhove2,Severity severity,String id,
                                      Object... params)throws JHOVE2Exception {
     //------------------------------------------------------------------------
 
     /**
-     * Validate the Zip file.
+     * Validate the Zip file, which in this case amounts to returning the
+     * result since validation has already been done.
      * @param jhove2 JHOVE2 framework object
      * @param source Zip file source unit
      * @param input  Zip file source input
      */
     @ReportableProperty(order=5, value="File version")
     public String getFileVersion() {
-        return fileVersion;
+        return warcFileVersion;
     }
 
-//    /** Get Zip file entries.
-//     * @return Zip file entries
-//     */
-//    @ReportableProperty(order=1, value="Zip file entries")
-//    public List<ZipFileEntry> getZipFileEntries() {
-//        return this.entries;
-//    }
-
     //------------------------------------------------------------------------
     // Specific implementation
     //------------------------------------------------------------------------
         this.recurse = recurse;
     }
 
+    /**
+     * Enable or disable block digest computation.
+     * @param bComputeBlockDigest block digest computation toggle
+     */
     public void setComputeBlockDigest(boolean bComputeBlockDigest) {
         this.bComputeBlockDigest = bComputeBlockDigest;
     }
 
+    /**
+     * Set the block digest algorithm to be used in case no digest is present
+     * in the WARC header.
+     * @param blockDigestAlgorithm block digest algorithm
+     */
     public void setBlockDigestAlgorithm(String blockDigestAlgorithm) {
         this.blockDigestAlgorithm = blockDigestAlgorithm;
     }
 
+    /**
+     * Set the block digest encoding scheme to be used in case no digest
+     * is present in the WARC header.
+     * @param blockDigestEncoding block digest encoding scheme
+     */
     public void setBlockDigestEncoding(String blockDigestEncoding) {
         this.blockDigestEncoding = blockDigestEncoding;
     }
 
+    /**
+     * Enable or disable payload digest computation.
+     * @param bComputePayloadDigest payload digest computation toggle
+     */
     public void setComputePayloadDigest(boolean bComputePayloadDigest) {
         this.bComputePayloadDigest = bComputePayloadDigest;
     }
 
+    /**
+     * Set the payload digest algorithm to be used in case no digest is present
+     * in the WARC header.
+     * @param payloadDigestAlgorithm payload digest algorithm
+     */
     public void setPayloadDigestAlgorithm(String payloadDigestAlgorithm) {
         this.payloadDigestAlgorithm = payloadDigestAlgorithm;
     }
 
+    /**
+     * Set the payload digest encoding scheme to be used in case no digest
+     * is present in the WARC header.
+     * @param payloadDigestEncoding payload digest encoding scheme
+     */
     public void setPayloadDigestEncoding(String payloadDigestEncoding) {
         this.payloadDigestEncoding = payloadDigestEncoding;
     }

File src/main/java/org/jhove2/module/format/warc/properties/WarcContinuationProperties.java

 
     /**
      * Construct WARC continuation property instance with the supplied data.
-     * @param record WARC continuation property data
+     * @param record WARC record data
      */
     public WarcContinuationProperties(WarcRecordData record) {
         this.record = record;

File src/main/java/org/jhove2/module/format/warc/properties/WarcConversionProperties.java

 
     /**
      * Construct WARC conversion property instance with the supplied data.
-     * @param record WARC conversion property data
+     * @param record WARC record data
      */
     public WarcConversionProperties(WarcRecordData record) {
         this.record = record;

File src/main/java/org/jhove2/module/format/warc/properties/WarcMetadataProperties.java

 
     /**
      * Construct WARC metadata property instance with the supplied data.
-     * @param record WARC metadata property data
+     * @param record WARC record data
      */
     public WarcMetadataProperties(WarcRecordData record) {
         this.record = record;

File src/main/java/org/jhove2/module/format/warc/properties/WarcRecordBaseProperties.java

 
     /**
      * Construct WARC record base property instance with the supplied data.
-     * @param record WARC record base property data
+     * @param record WARC record data
      */
     public WarcRecordBaseProperties(WarcRecordData record) {
         this.record = record;

File src/main/java/org/jhove2/module/format/warc/properties/WarcRecordData.java

  * Since the WARC reader is not persistent its data must be moved to a simpler
  * data class which can be persisted instead.
  *
+ * Note: Some populate methods currently do not include any functionality.
+ * However they are included for backwards compatibility in case the ISO
+ * standard changes and extra properties are required.
+ *
  * @author nicl
  */
 @Persistent
 public class WarcRecordData {
 
-    public Long startOffset;
-    public Long consumed;
+    protected Long startOffset;
+    protected Long consumed;
 
-    public String warcType;
-    public String warcFilename;
-    public String warcRecordId;
-    public String warcDate;
-    public String contentLength;
-    public String contentType;
-    public String warcTruncated;
-    public String warcIpAddress;
-    public List<String> warcConcurrentToList;
-    public String warcRefersTo;
-    public String warcTargetUri;
-    public String warcWarcinfoId;
-    public String warcIdentifiedPayloadType;
-    public String warcProfile;
-    public String warcSegmentNumber;
-    public String warcSegmentOriginId;
-    public String warcSegmentTotalLength;
+    protected String warcType;
+    protected String warcFilename;
+    protected String warcRecordId;
+    protected String warcDate;
+    protected String contentLength;
+    protected String contentType;
+    protected String warcTruncated;
+    protected String warcIpAddress;
+    protected List<String> warcConcurrentToList;
+    protected String warcRefersTo;
+    protected String warcTargetUri;
+    protected String warcWarcinfoId;
+    protected String warcIdentifiedPayloadType;
+    protected String warcProfile;
+    protected String warcSegmentNumber;
+    protected String warcSegmentOriginId;
+    protected String warcSegmentTotalLength;
 
-    public String warcBlockDigest;
-    public String warcBlockDigestAlgorithm;
-    public String warcBlockDigestEncoding;
-    public String warcPayloadDigest;
-    public String warcPayloadDigestAlgorithm;
-    public String warcPayloadDigestEncoding;
+    protected String warcBlockDigest;
+    protected String warcBlockDigestAlgorithm;
+    protected String warcBlockDigestEncoding;
+    protected String warcPayloadDigest;
+    protected String warcPayloadDigestAlgorithm;
+    protected String warcPayloadDigestEncoding;
 
-    public String computedBlockDigest;
-    public String computedBlockDigestAlgorithm;
-    public String computedBlockDigestEncoding;
-    public String computedPayloadDigest;
-    public String computedPayloadDigestAlgorithm;
-    public String computedPayloadDigestEncoding;
+    protected String computedBlockDigest;
+    protected String computedBlockDigestAlgorithm;
+    protected String computedBlockDigestEncoding;
+    protected String computedPayloadDigest;
+    protected String computedPayloadDigestAlgorithm;
+    protected String computedPayloadDigestEncoding;
 
-    public String recordIdScheme;
+    protected String recordIdScheme;
 
-    public Boolean bIsNonCompliant;
-    public Boolean isValidBlockDigest;
-    public Boolean isValidPayloadDigest;
+    protected Boolean bIsNonCompliant;
+    protected Boolean isValidBlockDigest;
+    protected Boolean isValidPayloadDigest;
 
-    public Boolean bHasPayload;
-    public String payloadLength;
+    protected Boolean bHasPayload;
+    protected String payloadLength;
 
-    public String ipVersion;
+    protected String ipVersion;
 
-    public String resultCode;
-    public String protocolVersion;
-    public String protocolContentType;
-    public String protocolServer;
-    public String protocolUserAgent;
+    protected String resultCode;
+    protected String protocolVersion;
+    protected String protocolContentType;
+    protected String protocolServer;
+    protected String protocolUserAgent;
 
     /**
      * Constructor required by the persistence layer.
          */
         bHasPayload = record.hasPayload();
         Payload payload = record.getPayload();
+        HeaderLine headerLine;
         if (payload != null) {
             // TODO Verify meaning of JHove2 WARC draft
             HttpResponse httpResponse = payload.getHttpResponse();
             if (httpResponse != null) {
                 payloadLength = Long.toString(httpResponse.getPayloadLength());
+                // TODO Fix this when request and response are separate objects.
+                protocolVersion = httpResponse.protocolVersion;
+                resultCode = httpResponse.resultCode;
+                protocolContentType = httpResponse.contentType;
+                headerLine = httpResponse.getHeader("server");
+                if (headerLine != null && headerLine.value != null) {
+                    protocolServer = headerLine.value;
+                }
+            	// TODO HttpRequest not supported yet in JWAT
+                protocolVersion = httpResponse.protocolVersion;
+                headerLine = httpResponse.getHeader("user-agent");
+                if (headerLine != null && headerLine.value != null) {
+                    protocolUserAgent = headerLine.value;
+                }
             }
             else {
                 payloadLength = Long.toString(payload.getTotalLength());;
             }
         }
-    }
-
-    /**
-     * Populate object with WarcInfo record type specific data which is to be
-     * reported back as properties.
-     * @return this object with populated data for WarcInfo properties
-     */
-    public WarcRecordData populateWarcinfo(WarcRecord record) {
-        return this;
-    }
-
-    /**
-     * Populate object with Response record type specific data which is to be
-     * reported back as properties.
-     * @return this object with populated data for Response properties
-     */
-    public WarcRecordData populateResponse(WarcRecord record) {
+        /*
+         * IpVersion, common for several record properties.
+         */
         if (record.warcInetAddress != null) {
             if (record.warcInetAddress.getAddress().length == 4) {
                 ipVersion = "4";
                 ipVersion = "6";
             }
         }
-
-        Payload payload = record.getPayload();
-
-        if (payload != null) {
-            HttpResponse httpResponse = payload.getHttpResponse();
-            HeaderLine headerLine;
-            if (httpResponse != null) {
-                resultCode = httpResponse.resultCode;
-                protocolVersion = httpResponse.protocolVersion;
-                protocolContentType = httpResponse.contentType;
-                headerLine = httpResponse.getHeader("server");
-                if (headerLine != null && headerLine.value != null) {
-                    protocolServer = headerLine.value;
-                }
-            }
-        }
-        return this;
-    }
-
-    /**
-     * Populate object with Resource record type specific data which is to be
-     * reported back as properties.
-     * @return this object with populated data for Resource properties
-     */
-    public WarcRecordData populateResource(WarcRecord record) {
-        if (record.warcInetAddress != null) {
-            if (record.warcInetAddress.getAddress().length == 4) {
-                ipVersion = "4";
-            }
-            else {
-                ipVersion = "6";
-            }
-        }
-        return this;
-    }
-
-    /**
-     * Populate object with Request record type specific data which is to be
-     * reported back as properties.
-     * @return this object with populated data for Request properties
-     */
-    public WarcRecordData populateRequest(WarcRecord record) {
-        if (record.warcInetAddress != null) {
-            if (record.warcInetAddress.getAddress().length == 4) {
-                ipVersion = "4";
-            }
-            else {
-                ipVersion = "6";
-            }
-        }
-
-        Payload payload = record.getPayload();
-
-        if (payload != null) {
-            HttpResponse httpResponse = payload.getHttpResponse();
-            HeaderLine headerLine;
-            if (httpResponse != null) {
-            	// TODO HttpRequest not supported yet in JWAT
-                protocolVersion = httpResponse.protocolVersion;
-                headerLine = httpResponse.getHeader("user-agent");
-                if (headerLine != null && headerLine.value != null) {
-                    protocolUserAgent = headerLine.value;
-                }
-            }
-        }
-        return this;
-    }
-
-    /**
-     * Populate object with Metadata record type specific data which is to be
-     * reported back as properties.
-     * @return this object with populated data for Metadata properties