Commits

Anonymous committed 16eef5b

GZip, ARC and WARC module should delete temp files according to config now.
Added missing XSLDisplay module and util classes.

  • Participants
  • Parent commits 5684542

Comments (0)

Files changed (7)

File src/main/java/org/jhove2/module/display/XSLDisplayer.java

+/**
+ * JHOVE2 - Next-generation architecture for format-aware characterization
+ *
+ * Copyright (c) 2009 by The Regents of the University of California,
+ * Ithaka Harbors, Inc., and The Board of Trustees of the Leland Stanford
+ * Junior University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * o Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * o Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * o Neither the name of the University of California/California Digital
+ *   Library, Ithaka Harbors/Portico, or Stanford University, nor the names of
+ *   its contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.jhove2.module.display;
+
+import java.io.File;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import javax.xml.transform.Templates;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.sax.SAXTransformerFactory;
+import javax.xml.transform.sax.TransformerHandler;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.transform.stream.StreamSource;
+
+import org.jhove2.annotation.ReportableProperty;
+import org.jhove2.persist.ModuleAccessor;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+import com.sleepycat.persist.model.Persistent;
+
+/**
+ * An extension of the {@link XMLDisplayer} that directly pipelines
+ * the XML output to an XSLT stylesheet.
+ * <p>
+ * This displayer shall be configured (in JHOVE2 Spring configuration)
+ * with scope <code>prototype</code> as a new TrAX Transformer object
+ * shall be allocated for each new Reportable to display.</p>
+ *
+ * @author lbihanic
+ */
+@Persistent
+public class XSLDisplayer extends XMLDisplayer {
+
+	/** The SAX transformer factory. */
+    private static SAXTransformerFactory stf;
+    /** The cache of compiled XLST stylesheets. */
+    private static ConcurrentMap<String,Templates> stylesheets =
+                                    new ConcurrentHashMap<String, Templates>();
+
+    private static Logger log = Logger.getLogger(XSLDisplayer.class.getName());
+
+    File styleSheetFile;
+    /** The XSLT stylesheet to apply, as a compile TrAX Templates object. */
+    transient Templates stylesheet = null;
+    /** The XSLT processor as a SAX ContentHandler. */
+    transient ContentHandler out = null;
+
+	/**
+	 * Instantiate a new <code>XSLDisplayer</code>.
+	 */
+    public XSLDisplayer() {
+        this(null);
+    }
+
+	/**
+	 * Instantiate a new <code>XMLDisplayer</code>.
+     * @param moduleAccessor 
+	 * 		     Displayer persistence manager 
+	 */
+	public XSLDisplayer(ModuleAccessor moduleAccessor) {
+		super(moduleAccessor);
+	}
+
+	/** {@inheritDoc} */
+    @Override
+    public void startDisplay(PrintStream out, int level) {
+        this.out = this.newTransformer(out);
+        this.declaration(out);
+        this.startTag(out, level, ELEROOT);
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public void endDisplay(PrintStream out, int level) {
+        this.endTag(out, level, ELEROOT);
+        try {
+            this.out.endPrefixMapping(XSI);
+            this.out.endDocument();
+        }
+        catch (SAXException e) {
+            throw new RuntimeException(e);
+        }
+        finally {
+            this.out = null;
+        }
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public void declaration(PrintStream out) {
+        try {
+            this.out.startDocument();
+            this.out.startPrefixMapping(XSI, XSI_URI);
+        }
+        catch (SAXException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public void startTag(PrintStream out, int level, String name) {
+        try {
+            // Use String interning to limit memory consumption.
+            name = name.intern();
+            if (log.isLoggable(Level.FINE)) {
+                log.fine("</" + name + '>');
+            }
+
+            this.out.startElement(this.uri, name, name, new AttributesImpl());
+        }
+        catch (SAXException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public void startTag(PrintStream out, int level, String name,
+                                                     String... attrs) {
+        try {
+            // Use String interning to limit memory consumption.
+            name = name.intern();
+
+            AttributesImpl atts = new AttributesImpl();
+            for (int i = 0; i < attrs.length; i += 2) {
+                String attrName = attrs[i].intern();
+                atts.addAttribute("", attrName, attrName,
+                                                "CDATA", attrs[i+1].intern());
+            }
+            if (log.isLoggable(Level.FINE)) {
+                StringBuilder buf = new StringBuilder(256);
+                buf.append('<').append(name);
+                if (attrs.length != 0) {
+                    for (int i = 0; i < attrs.length; i += 2) {
+                        buf.append(' ').append(attrs[i])
+                           .append("=\"").append(attrs[i+1]).append('"');
+                    }
+                }
+                log.fine(buf.append('>').toString());
+            }
+            this.out.startElement(this.uri, name, name, atts);
+        }
+        catch (SAXException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public void endTag(PrintStream out, int level, String name) {
+        String iname = name.intern();
+        try {
+            // Use String interning to limit memory footprint.
+            // name = name.intern();
+            if (log.isLoggable(Level.FINE)) {
+                log.fine("</" + iname + '>');
+            }
+            this.out.endElement(this.uri, iname, iname);
+        }
+        catch (SAXException e) {
+            throw new RuntimeException("Error end tag <" + iname + ">.", e);
+        }
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public void tag(PrintStream out, int level, String name, String content) {
+        try {
+            // Use String interning to limit memory footprint.
+            name = name.intern();
+            if (log.isLoggable(Level.FINE)) {
+                log.fine("<" + name + '>' + content + "</" + name + '>');
+            }
+            this.out.startElement(this.uri, name, name, new AttributesImpl());
+
+            if ((content != null) && (content.length() != 0)) {
+                char[] ch = content.toCharArray();
+                this.out.characters(ch, 0, ch.length);
+            }
+            this.out.endElement(this.uri, name, name);
+        }
+        catch (SAXException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * Create a new TrAX Transformer object to apply the
+     * {@link #setStylesheet specified XSLT stylesheet} to the XML
+     * output of this displayer.
+     * @param  out   the stream where to output the result of the
+     *               transformation.
+     * @return the SAX ContentHandler object of the XSLT processor
+     *         to which direct the SAX events of the XML output.
+     */
+    protected ContentHandler newTransformer(OutputStream out) {
+        // Ensure the transformer factory has been initialized.
+        getTransformerFactory(); 
+        try {
+            if (this.stylesheet == null && this.styleSheetFile != null) {
+                loadStyleSheet();
+            }
+            
+            // Get a new transformer, using the identity stylesheet
+            // if none was installed.
+            TransformerHandler h = (this.stylesheet != null)?
+                                    stf.newTransformerHandler(this.stylesheet):
+                                    stf.newTransformerHandler();
+            h.setResult(new StreamResult(out));
+            return h;
+        }
+        catch (TransformerException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * <i>[Dependency injection]</i> Sets the XSLT stylesheet to apply
+     * when outputting XML.
+     * @param  f   the XSLT stylesheet file or <code>null</code> to use
+     *             the identity transformation.
+     *
+     * @throws TransformerException if any error occurred while parsing
+     *         the stylesheet.
+     */
+    public void setStylesheet(File f) throws TransformerException {
+        if (f == null) {
+            throw new IllegalArgumentException("f");
+        }
+        this.styleSheetFile = f;
+        loadStyleSheet();
+    }
+
+    public void loadStyleSheet() throws TransformerException {
+        if (this.stylesheet != null) return;
+        if (this.styleSheetFile == null) return;
+        
+        String styleSheetPath = this.styleSheetFile.getAbsolutePath();
+        Templates t = stylesheets.get(styleSheetPath);
+        if (t == null) {
+            t = getTransformerFactory().newTemplates(new StreamSource(this.styleSheetFile));
+            stylesheets.put(styleSheetPath, t);
+        }
+        this.stylesheet = t;
+    }
+    
+    /**
+     * Returns the singleton instance of {@link SAXTransformerFactory}.
+     * @return the singleton instance of the SAX transformer factory
+     *         configured at the JVM level.
+     */
+    private static SAXTransformerFactory getTransformerFactory() {
+        if (stf == null) {
+            stf = (SAXTransformerFactory)(TransformerFactory.newInstance());
+        }
+        return stf;
+    }
+    
+    @ReportableProperty(order = 1, value = "StyleSheet file")
+	public String getStylesheetFile() {
+		return this.styleSheetFile.getAbsolutePath() + ((this.stylesheet == null)?" null stylesheet":"stylesheet");
+	}
+
+}

File src/main/java/org/jhove2/module/display/util/ContainerElement.java

+/**
+ * JHOVE2 - Next-generation architecture for format-aware characterization
+ *
+ * Copyright (c) 2009 by The Regents of the University of California,
+ * Ithaka Harbors, Inc., and The Board of Trustees of the Leland Stanford
+ * Junior University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * o Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * o Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * o Neither the name of the University of California/California Digital
+ *   Library, Ithaka Harbors/Portico, or Stanford University, nor the names of
+ *   its contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.jhove2.module.display.util;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.jhove2.module.display.util.ContainerMDWrapper.AttributeName;
+
+/**
+ *  ContainerElement
+ *
+ */
+public final class ContainerElement
+{
+	private Map<AttributeName,Object> attributes = new LinkedHashMap<AttributeName,Object>();
+	
+	private final String elementName;
+	
+	private final String value;
+	
+	/**
+	 * Creates a new ContainerElement
+	 */
+	public ContainerElement(){ this(null, null); }
+	public ContainerElement(String name){ this(name, null); }
+	public ContainerElement(String name, String value){ this.elementName = name; this.value = value; }
+	
+	/**
+	 * Gets attributes
+	 * @return <code>Map</code>
+	 */
+	public Map<AttributeName,Object> getAttributes()
+	{
+		return this.attributes;
+	}
+	
+	@Override
+	public String toString() {
+		return this.toString(null);
+	}
+
+	public String toString(String prefix)
+	{
+		StringBuilder stringElements = new StringBuilder();
+		stringElements.append("<");
+		if ((prefix != null) && (prefix.length() != 0)) {
+			stringElements.append(prefix).append(':');
+		}
+		stringElements.append(this.elementName);
+			
+			
+		for(Entry<AttributeName,Object> e : attributes.entrySet() )
+		{
+			AttributeName name = e.getKey();
+			Object v = e.getValue();
+			if( v != null && (! "".equals(v)))
+			{
+				stringElements.append(" ");
+				stringElements.append(name.key);
+				stringElements.append("=");
+				stringElements.append("\"");
+				stringElements.append(v.toString());
+				stringElements.append("\"");
+			}
+		}
+			
+		if( this.value != null)
+		{
+			stringElements.append(">");
+			stringElements.append(this.value);
+			stringElements.append("</");
+			if ((prefix != null) && (prefix.length() != 0)) {
+				stringElements.append(prefix).append(':');
+			}
+			stringElements.append(this.elementName).append(">");
+		}else{
+			stringElements.append("/>");
+		}		
+		return stringElements.toString();
+	}
+}

File src/main/java/org/jhove2/module/display/util/ContainerMDWrapper.java

+/**
+ * JHOVE2 - Next-generation architecture for format-aware characterization
+ *
+ * Copyright (c) 2009 by The Regents of the University of California,
+ * Ithaka Harbors, Inc., and The Board of Trustees of the Leland Stanford
+ * Junior University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * o Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * o Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * o Neither the name of the University of California/California Digital
+ *   Library, Ithaka Harbors/Portico, or Stanford University, nor the names of
+ *   its contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.jhove2.module.display.util;
+
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.FieldPosition;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jdom.JDOMException;
+
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+
+/**
+ * A wrapper for the data needed to build the containerMD section
+ * of the manifest. Data are gathered during the parse of the JHove2
+ * characterization result analysis and aggregated.
+ */
+public final class ContainerMDWrapper
+{	
+	public static final  String CONTAINER_PREFIX = "containerMD";
+
+	public static final  String CONTAINER_URI = "http://bibnum.bnf.fr/ns/containerMD-v1";
+
+	public static final DateFormat rawDateFormat = new MtSafeDateFormat("yyyyMMddHHmmss");
+
+	public static final DateFormat dateFormat = new MtSafeDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
+
+    private static Logger log = Logger.getLogger(ContainerMDWrapper.class.getName());
+
+	private final static Pattern hostExtractor = Pattern.compile("^([a-zA-Z]*):/{0,3}([.[^/#?:]]*)(?:.*)");
+	private final static long MIN_VALID_DATE = 19700101000000L;
+
+	/** ArcRecordSource handlers */
+	public Map<String,ContainerElement> encodings;
+	public Map<String,ContainerElement> formats;
+	public Map<String,ContainerElement> declaredMimeTypes;
+	public Map<String,ContainerElement> hosts;
+	public Map<String,ContainerElement> responses;
+
+	private long firstDateTime = -1L;
+	private long lastDateTime  = -1L;
+
+	private long minimumSize   = Long.MAX_VALUE;
+	private long maximumSize   = 0L;
+	private long globalSize	   = 0L;
+
+	/** Permitted attributes for the containerMD elements */
+	protected enum AttributeName
+	{
+		NUMBER("number"), 
+		NAME("name"),
+		SIZE("size"), 
+		TYPE("type"),
+		METHOD("method"),
+		ORDER("order"),
+		PROTOCOL_NAME("protocolName"),
+		PROTOCOL_VERSION("protocolVersion"), 
+		GLOBALSIZE("globalSize") ;
+		
+		public final String key;
+		
+		private AttributeName(String key) {
+			this.key = key;
+		}
+
+		@Override
+		public String toString() {
+			return this.key;
+		}
+	}
+
+	/**
+	 * Creates a new ContainerMDWrapper instance.
+	 */
+	public ContainerMDWrapper()
+	{
+		/** Initialization */
+		this.formats = new HashMap<String,ContainerElement>();
+		this.declaredMimeTypes = new HashMap<String,ContainerElement>();
+		this.encodings = new HashMap<String,ContainerElement>();
+		this.hosts     = new HashMap<String,ContainerElement>();
+		this.responses = new HashMap<String,ContainerElement>();
+		log.log(Level.FINE, "{}|ContainerMDWrapper 0x{}: new",
+		          new Object[] { 
+                    Thread.currentThread().getName(),
+                    Integer.toHexString(System.identityHashCode(this)) 
+                  });
+	}
+	
+	public void addEntry(String sourceName, long size, String dateTime,
+			             String format, String mimeType,
+			             String protocolVersion, String codeResponse) {
+	        log.log(Level.FINEST, "addEntry: {} ({}, {}, {})",
+	                  new Object[] { sourceName, Long.valueOf(size),
+	                                 dateTime, mimeType });
+		if (size >= 0L) {
+		    this.setMaximumSize(size);
+		    this.setMinimumSize(size);
+		    this.setGlobalSize(size);
+		}
+		if (! isBlank(dateTime)) {
+		    try {
+		        long l = Long.parseLong(dateTime);
+		        this.setFirstDateTime(l);
+		        this.setLastDateTime(l);
+		    }
+		    catch (Exception e) {
+		        log.log(Level.WARNING, "Invalid ARC entry date ({}) for {}",
+		                 new Object[] { dateTime, sourceName });
+		        /* Ignore... */
+		    }
+		}
+		Matcher m = hostExtractor.matcher(sourceName);
+		if (m.matches()) {
+			String protocol = m.group(1);
+			String hostName = m.group(2);
+
+			this.handleHost(hostName, size);
+			this.handleResponse(protocolVersion, protocol, codeResponse,size);
+		}
+		if ((mimeType != null) && (mimeType.length() != 0)) {
+			this.handleDeclaredMimeType(mimeType,size);
+		}
+		if ((format != null) && (format.length() != 0)) {
+			this.handleFormat(format, size);
+		}
+	}
+
+	/**
+	 * Checks whether hosts have been handled.
+	 * @return true | false
+	 */
+	public boolean hasHosts()
+	{
+		return !hosts.isEmpty();
+	}
+	
+	/**
+	 * Checks whether responses have been handled.
+	 * @return true | false
+	 */
+	public boolean hasResponses()
+	{
+		return !responses.isEmpty();
+	}
+	
+	/**
+	 * Checks whether encodings have been handled.
+	 * @return true | false
+	 */
+	public boolean hasEncodings()
+	{
+		return !encodings.isEmpty();
+	}
+	
+	/**
+	 * Checks whether formats have been handled.
+	 * @return true | false
+	 */
+	public boolean hasFormats() {
+		return !formats.isEmpty();
+	}
+	
+	/**
+	 * Checks whether declared mimeTypes have been handled.
+	 * @return true | false
+	 */
+	public boolean hasDeclaredMimeTypes() {
+		return !declaredMimeTypes.isEmpty();
+	}
+	
+	/**
+	 * Sets maximum size
+	 * @param size
+	 */
+	public void setMaximumSize(long size) {
+		if (this.maximumSize < size) { 
+			this.maximumSize = size;
+		}
+	}
+
+	/**
+	 * Gets maximumSize
+	 * @return <code>String</code>
+	 */
+	public String getMaximumSize() {
+		return Long.toString(this.maximumSize);
+	}
+
+	/**
+	 * Sets minimum size
+	 * @param size
+	 */
+	public void setMinimumSize(long size) {
+		if ((size >= 0L) && (this.minimumSize > size)) { 
+			this.minimumSize = size ;
+		}
+	}
+
+	/**
+	 * Sets the global size
+	 * @param size
+	 */
+	public void setGlobalSize(long size) {
+	    if (size >= 0L) {
+	        this.globalSize += size;
+	    }
+	}
+	/**
+	 * Gets the global size
+	 * @return <code>long</code>
+	 */
+	public long getGlobalSize() {
+		return this.globalSize;
+	}
+
+	/**
+	 * Gets minimumSize
+	 * @return <code>String</code>
+	 */
+	public String getMinimumSize() {
+	    log.log(Level.FINE, "{}|{}", 
+            new Object[] { Thread.currentThread().getName(), this });
+		return Long.toString(
+		    (this.minimumSize == Long.MAX_VALUE)? 0L: this.minimumSize);
+	}
+
+	/**
+	 * Sets firstLastTime
+	 * @param dateTime
+	 */
+	public void setFirstDateTime(long dateTime) {
+		if ((firstDateTime == -1L) || (firstDateTime > dateTime)) {
+			firstDateTime = dateTime;
+		}
+	}
+
+	/**
+	 * Gets firstDateTime
+	 * @return <code>String</code>
+	 * @throws ParseException
+	 */
+	public String getFirstDateTime() throws ParseException {
+		return formatDateTime(longToDate(this.firstDateTime));
+	}
+
+	/**
+	 * Sets lastDateTime
+	 * @param dateTime
+	 */
+	public void setLastDateTime(long dateTime) {
+		if (lastDateTime < dateTime) {
+			lastDateTime = dateTime;
+		}
+	}
+
+	/**
+	 * Gets lastDateTime
+	 * @return <code>String</code>
+	 * @throws ParseException
+	 */
+	public String getLastDateTime() throws ParseException {
+		return formatDateTime(longToDate(this.lastDateTime));
+	}
+
+	/**
+	 * Returns containerMD encoding elements formatted into XML.
+	 * @return <code>String</code>
+	 */
+	public String getEncodings() throws JDOMException, IOException
+	{
+		return toXml(this.encodings.values());
+	}
+	
+	/**
+	 * Returns containerMD declared mimeTypes elements formatted into XML.
+	 * @return <code>String</code>
+	 */
+	public String getDeclaredMimeTypes() throws JDOMException, IOException
+	{
+		return toXml(this.declaredMimeTypes.values());
+	}
+	
+	/**
+	 * Returns containerMD format elements formatted into XML.
+	 * @return <code>String</code>
+	 */
+	public String getFormats() throws JDOMException, IOException {
+		return toXml(this.formats.values());
+	}
+	
+	/**
+	 * Returns containerMD host elements formatted into XML.
+	 * @return <code>String</code>
+	 */
+	public String getHosts() throws JDOMException, IOException
+	{
+		return toXml(this.hosts.values());
+	}
+	
+	/**
+	 * Returns containerMD response elements formatted into XML.
+	 * @return <code>String</code>
+	 */
+	public String getResponses() throws JDOMException, IOException
+	{
+		return toXml(this.responses.values());
+	}
+	
+	/**
+	 * Formats a given long "yyyyMMddHHmmss" into a date 
+	 * @param date
+	 * @return Long
+	 */
+	protected Date longToDate(long date) throws ParseException {
+	    Date d = null;
+	    if (date >= MIN_VALID_DATE) {
+		d = rawDateFormat.parse(String.valueOf(date));
+	    }
+	    return d;
+	}
+	
+	/**
+	 * Formats a given date into a long "yyyyMMddHHmmss"
+	 * @param date
+	 * @return Long
+	 */
+	protected Long dateToLong(Date date) throws ParseException
+	{
+		String stringDate = rawDateFormat.format( date );
+		return Long.valueOf(stringDate);
+	}
+
+	/**
+	 * Formats a given date into "yyyy-MM-dd'T'HH:mm:ss'Z'".
+	 * 
+	 * @param DateTime
+	 * @return <code>String</code>
+	 * @throws ParseException
+	 */
+	protected String formatDateTime(Date date) throws ParseException {
+	    if (date == null) {
+	        throw new IllegalArgumentException("Invalid date: " + date);
+	    }
+	    return dateFormat.format(date);
+	}
+
+	public String toXml(Collection<ContainerElement> elts) 
+	                                    throws JDOMException, IOException {
+		StringBuilder stringElements = new StringBuilder();
+		for (ContainerElement e : elts) {
+			stringElements.append( e.toString(CONTAINER_PREFIX) );
+		}
+		return stringElements.toString();
+	}
+
+	/**
+	 * Handles distinct encodings
+	 * @param encoding
+	 */
+	public void handleEncoding(String type, String method)
+	{
+		if( !encodings.containsKey( method ))
+		{
+			ContainerElement container = new ContainerElement("encoding");
+
+			container.getAttributes().put(AttributeName.TYPE, type);
+			container.getAttributes().put(AttributeName.METHOD, method);
+			container.getAttributes().put(AttributeName.ORDER,
+			                              Integer.valueOf(encodings.size()+1));
+			encodings.put(method,container);
+		}
+	}
+	
+	/**
+	 * Handles distinct formats
+	 * @param format
+	 * @param size
+	 */
+	public void handleFormat(String format, long size)
+	{
+		ContainerElement container = formats.get( format );
+		if( container != null )
+		{
+			Map<AttributeName,Object> attrs = container.getAttributes();
+			((AtomicInteger)attrs.get(AttributeName.NUMBER)).incrementAndGet();
+			((AtomicLong)attrs.get(AttributeName.GLOBALSIZE)).addAndGet(size);
+		}else {
+			container =  new ContainerElement("format");
+
+			container.getAttributes().put(AttributeName.NAME, format);
+			container.getAttributes().put(AttributeName.NUMBER, new AtomicInteger(1));
+			container.getAttributes().put(AttributeName.GLOBALSIZE, new AtomicLong(size));
+
+			formats.put(format, container );
+		}
+	}
+	
+	/**
+	 * Handles distinct declared mimeTypes
+	 * @param mimeType
+	 */
+	public void handleDeclaredMimeType(String mimeType,long size)
+	{
+		ContainerElement container = declaredMimeTypes.get(mimeType);
+		if( container != null )
+		{
+			Map<AttributeName,Object> attrs = container.getAttributes();
+			((AtomicInteger)attrs.get(AttributeName.NUMBER)).incrementAndGet();
+			((AtomicLong)attrs.get(AttributeName.GLOBALSIZE)).addAndGet(size);
+		}else{
+			container =  new ContainerElement("declaredMimeType", mimeType);
+			
+			container.getAttributes().put(AttributeName.NUMBER, new AtomicInteger(1));
+			container.getAttributes().put(AttributeName.GLOBALSIZE, new AtomicLong(size));
+			declaredMimeTypes.put(mimeType, container );
+		}		
+	}
+	
+	/**
+	 * Handles distinct hosts
+	 * @param host
+	 * @param size
+	 */
+	public void handleHost(String host, long size)
+	{
+		ContainerElement container = hosts.get(host);
+		if( container != null )
+		{
+			Map<AttributeName,Object> attrs = container.getAttributes();
+			((AtomicInteger)attrs.get(AttributeName.NUMBER)).incrementAndGet();
+			((AtomicLong)attrs.get(AttributeName.GLOBALSIZE)).addAndGet(size);
+		}else{
+			container =  new ContainerElement("host", host);
+			
+			container.getAttributes().put(AttributeName.NUMBER, new AtomicInteger(1));
+			container.getAttributes().put(AttributeName.GLOBALSIZE, new AtomicLong(size));
+			hosts.put(host, container );
+		}
+	}
+	
+	/**
+	 * Handles distinct response
+	 * @param protocolVersion
+	 * @param protocolName
+	 * @param codeResponse
+	 */
+	public void handleResponse(String protocolVersion, String protocolName, String codeResponse, long size)
+	{
+		String key = protocolName + '|' + protocolVersion + '|' + codeResponse;
+		ContainerElement container = responses.get(key);
+		if( container != null )
+		{
+			Map<AttributeName,Object> attrs = container.getAttributes();
+			((AtomicInteger)attrs.get(AttributeName.NUMBER)).incrementAndGet();
+			((AtomicLong)attrs.get(AttributeName.GLOBALSIZE)).addAndGet(size);
+		}else{
+			container =  new ContainerElement("response", codeResponse);		
+			container.getAttributes().put(AttributeName.NUMBER, new AtomicInteger(1));
+			container.getAttributes().put(AttributeName.PROTOCOL_NAME, protocolName );
+			container.getAttributes().put(AttributeName.PROTOCOL_VERSION, protocolVersion );		
+			container.getAttributes().put(AttributeName.GLOBALSIZE, new AtomicLong(size));
+			responses.put(key, container );
+		}
+	}
+
+    @Override
+    public String toString() {
+        StringBuilder buf = new StringBuilder(512);
+        buf.append("ContainerMDWrapper 0x")
+           .append(Integer.toHexString(System.identityHashCode(this)))
+           .append(" { ");
+        buf.append("minimumSize=").append(this.minimumSize).append(", ");
+        buf.append("maximumSize=").append(this.maximumSize).append(", ");
+        buf.append("globalSize=").append(this.globalSize).append(", ");
+        buf.append("firstDateTime=").append(this.firstDateTime).append(", ");
+        buf.append("lastDateTime=").append(this.lastDateTime).append(", ");
+        buf.append("encodings=").append(this.encodings).append(", ");
+        buf.append("MIME types=").append(this.declaredMimeTypes).append(", ");
+        buf.append("formats=").append(this.formats).append(", ");
+        buf.append("hosts=").append(this.hosts).append(", ");
+        buf.append("responses=").append(this.responses);
+        return buf.append(" }").toString();
+    }
+
+    /**
+     * Check if a string is <code>null</code>, empty or contains only
+     * whitespace characters.
+     *
+     * @param  s   the string to check, may be <code>null</code>.
+     *
+     * @return <code>true</code> if the string is <code>null<code>,
+     *         empty ("") or contains only whitespaces characters.
+     */
+    public static boolean isBlank(String s) {
+        return ((s == null) || (s.trim().length() == 0));
+    }
+
+    /*
+     * Thread safe date formatter.
+     */
+    private final static class MtSafeDateFormat extends SimpleDateFormat {
+        /**
+		 * UID.
+		 */
+		private static final long serialVersionUID = -8797209035403605920L;
+
+		public MtSafeDateFormat(String pattern) {
+            super(pattern);
+            this.setLenient(false);
+            this.setTimeZone(TimeZone.getTimeZone("UTC"));
+        }
+
+        @Override
+        public synchronized Date parse(String source) throws ParseException {
+            return super.parse(source);
+        }
+
+        @Override
+        public synchronized StringBuffer format(Date date,
+                                                StringBuffer toAppendTo,
+                                                FieldPosition pos) {
+            return super.format(date, toAppendTo, pos);
+        }
+    }
+}

File src/main/java/org/jhove2/module/format/arc/ArcModule.java

 
         contentType = record.header.contentTypeStr;
         /*
-         * Arc Record Source.
+         * ARC Record Source.
          */
         Source recordSrc = new ArcRecordSource();
         recordSrc.setSourceAccessor(sourceFactory.createSourceAccessor(recordSrc));
+        recordSrc.setDeleteTempFileOnClose(jhove2.getInvocation().getDeleteTempFilesOnClose());
         recordSrc = parentSource.addChildSource(recordSrc);
         ++recordNumber;
         /*
         /*
          * Characterize payload.
          */
-        if (recurse && payload_stream != null && !record.hasEmptyPayload()) {
+        if (recurse && payload_stream != null && !record.hasPseudoEmptyPayload()) {
             characterizePayload(jhove2, sourceFactory, recordSrc, payload_stream, formatId);
         }
         if (payload_stream != null) {
          * Report errors.
          */
         reportValidationErrors(recordSrc, record, jhove2);
+        recordSrc.close();
     }
 
     protected void updateProtocols(ArcRecordData recordData) {
             Source recordSrc, InputStream payload_stream, FormatIdentification formatId)
                     throws EOFException, IOException, JHOVE2Exception {
         // Not all properties are ready yet, they are added as extras.
+    	String name = null;
         Source payloadSrc = sourceFactory.getSource(jhove2, payload_stream, name, null);
         if (payloadSrc != null) {
+        	payloadSrc.setDeleteTempFileOnClose(jhove2.getInvocation().getDeleteTempFilesOnClose());
             payloadSrc = recordSrc.addChildSource(payloadSrc);
             // Add presumptive format based on content-type.
             if(formatId != null){
                 if (src_input != null) {
                     src_input.close();
                 }
+                payloadSrc.close();
             }
         }
     }

File src/main/java/org/jhove2/module/format/arc/properties/ArcRecordData.java

         /*
          * Payload.
          */
-        bHasPayload = record.hasPayload() && !record.hasEmptyPayload();
+        bHasPayload = record.hasPayload() && !record.hasPseudoEmptyPayload();
         Payload payload = record.getPayload();
         if (payload != null) {
         	// payloadLength is reported back as ObjectSize in the Jhove2 specs

File src/main/java/org/jhove2/module/format/gzip/GzipModule.java

     public long parse(final JHOVE2 jhove2, Source source, Input input)
         throws EOFException, IOException, JHOVE2Exception {
         /*
-        // Check for parallel characterization mode.
-        ExecutorService threadPool = null;
-        if (this.nThreads > 1) {
-            threadPool = Executors.newFixedThreadPool(this.nThreads);
-        }
-        */
-        /*
          * Module init.
          */
         long consumed = 0L;
-        //this.deflateMemberCount.set(0L);
-        //this.invalidMembers.set(0L);
         deflateMemberCount = 0L;
         invalidMembers = 0L;
         validationMessages.clear();
         isValid = Validity.Undetermined;
-        //wovenFormatParser = null;
 
         // In GZip format, least-significant bytes come first.
         input.setByteOrder(ByteOrder.LITTLE_ENDIAN);
             int memberCount = 0;
             while ((gzipEntry = gzipReader.getNextEntry()) != null) {
                 // Wrap found member in a JHove2 Source object.
-                /*
-                final GzipMemberSource src = (GzipMemberSource)
-                        (SourceFactory.getSource(cfg.getTempPrefix(),
-                            cfg.getTempSuffix(), cfg.getBufferSize(), e,
-                            (doRecurse)? gz.getEntryInputStream(): null));
-                */
                 InputStream stream = gzipEntry.getInputStream();
                 String name = gzipEntry.fname;
                 Source src = factory.getSource(jhove2, stream, name, null);
                 if (src != null) {
+                    src.setDeleteTempFileOnClose(jhove2.getInvocation().getDeleteTempFilesOnClose());
                     memberCount++;
                     // Attach member to parent source.
                     source.addChildSource(src);
-
                     if (presumptiveFormat != null) {
                         src.addPresumptiveFormat(presumptiveFormat);
                     }
 
                     if (recurse) {
-                        // Characterize member data.
-                        if (memberCount == 1) {
-                            // First member: Check for woven format.
-                            // Set parent module.
-                            //src.setParentModule(this);
-                            // Characterize member content.
-                            characterizeMember(jhove2, src);
-                        }
-                        else {
-                            /*
-                            // All members but the first: characterize content.
-                            if (threadPool != null) {
-                                // Submit to thread pool for asynchronous
-                                // parallel execution.
-                                final long offset = gz.getOffset();
-                                threadPool.execute(new Runnable() {
-                                    public void run() {
-                                        try {
-                                            characterizeMember(jhove2, src);
-                                        }
-                                        catch (Exception e) {
-                                            handleError(e, jhove2, offset);
-                                        }
-                                    }
-                                });
-                                // Let executor threads a chance to run...
-                                // Thread.yield();
-                            }
-                            else {
-                            */
-                                // Sync. characterization in current thread.
-                                characterizeMember(jhove2, src);
-                            /*
-                            }
-                            */
-                        }
+                        characterizeMember(jhove2, src);
                     }
                     src.close();
                 }
                 gzipReader.close();
             }
             catch (Exception e) { /* Ignore... */ }
-
-            /*
-            // Shutdown thread pool (if any).
-            if (threadPool != null) {
-                threadPool.shutdown();
-                // Wait for completion of all characterization tasks.
-                boolean shutdownComplete = false;
-                do {
-                    try {
-                        threadPool.awaitTermination(2L, TimeUnit.HOURS);
-                        shutdownComplete = true;
-                    }
-                    catch (InterruptedException e) { /* Ignore... */ /*}
-                }
-                while (! shutdownComplete);
-            }
-            */
         }
         /*
          * Cleanup.
             throws JHOVE2Exception, IOException {
         Input input = source.getInput(jhove2);
         try {
-        	/*
-            if (wovenFormatParser != null) {
-                // Start timer.
-                TimerInfo timer = source.getTimerInfo();
-                timer.setStartTime();
-                try {
-                    // Update statistics.
-                    jhove2.getSourceCounter().incrementSourceCounter(source);
-                    // Configure temporary files deletion.
-                    source.setDeleteTempFileOnClose(jhove2.getInvocation()
-                            .getDeleteTempFilesOnClose());
-                    // Woven format => Delegate content handling.
-                    wovenFormatParser.parse(jhove2, source, input);
-                }
-                finally {
-                    // Delete temp. files and compute processing duration.
-                    source.close();
-                    timer.setEndTime();
-                }
-            }
-            else {
-                */
-                // Directly characterize content.
-                jhove2.characterize(source, input);
-                /*
-            }
-            */
+        	jhove2.characterize(source, input);
         }
         finally {
             // Make sure all file descriptors are properly closed.
         return recurse;
     }
 
-    /*
-    public void setParallelCharacterization(int level) {
-        if (level < 0) {
-            level = 0;
-        }
-        this.nThreads = level;
-    }
-    */
-
 }

File src/main/java/org/jhove2/module/format/warc/WarcModule.java

 
         contentType = record.header.contentTypeStr;
         /*
-         * Warc Record Source.
+         * WARC Record Source.
          */
         Source recordSrc = new WarcRecordSource();
         recordSrc.setSourceAccessor(sourceFactory.createSourceAccessor(recordSrc));
+        recordSrc.setDeleteTempFileOnClose(jhove2.getInvocation().getDeleteTempFilesOnClose());
         recordSrc = parentSource.addChildSource(recordSrc);
         ++warcRecordNumber;
         /*
             Source recordSrc, InputStream payload_stream, FormatIdentification formatId)
                     throws EOFException, IOException, JHOVE2Exception {
         // Not all properties are ready yet, they are added as extras.
+    	String name = null;
         Source payloadSrc = sourceFactory.getSource(jhove2, payload_stream, name, null);
         if (payloadSrc != null) {
+        	payloadSrc.setDeleteTempFileOnClose(jhove2.getInvocation().getDeleteTempFilesOnClose());
             payloadSrc = recordSrc.addChildSource(payloadSrc);
             // Add presumptive format based on content-type.
             if(formatId != null){