Stephen Abrams avatar Stephen Abrams committed ced3d43

Partial fix for Zip characterization

Comments (0)

Files changed (4)

src/main/java/org/jhove2/core/source/SourceFactoryUtil.java

 			source = new ZipDirectorySource(entry);
 		}
 		else {
+		    /* Recover the filename from the pathname. Although the path
+		     * separator always should be a forward slash (/), in practice a
+		     * backward slash (\) may be found.
+		     */
+		    String name = entry.getName();
+		    int in = name.lastIndexOf('/');
+		    if (in < 0) {
+		        in = name.lastIndexOf('\\');
+		    }
+		    if (in > -1) {
+		        name = name.substring(in+1);
+		    }
 	        InputStream stream = zip.getInputStream(entry);
-			source = new ZipFileSource(jhove2, entry, stream);
+			source = new ZipFileSource(jhove2, entry, stream, name);
 	        stream.close();
 		}
 		source.setSourceAccessor(sourceFactory.createSourceAccessor(source));
             source.setSourceAccessor(sourceFactory.createSourceAccessor(source));
             Iterator<String> iter = pathNames.iterator();
             while (iter.hasNext()) {
-                Source src = null;
                 String name = iter.next();
-                src = SourceFactoryUtil.getSource(jhove2, name, sourceFactory);
+                Source src = SourceFactoryUtil.getSource(jhove2, name, sourceFactory);
                 src = ((FileSetSource)source).addChildSource(src);
             }
         }

src/main/java/org/jhove2/core/source/ZipDirectorySource.java

 
 package org.jhove2.core.source;
 
-import java.io.File;
 import java.util.Date;
 import java.util.zip.ZipEntry;
 
 
 		this.isAggregate = true;
 		this.path = entry.getName();
-		/* Delete trailing slash (/), if found. */
-		int in = this.path.lastIndexOf(File.separator);
+		/* Delete trailing slash from path name, if necessary. Although this
+		 * always should be a forward slash (/), in practice a backward slash
+		 * \) may be found.
+		 */
+		int in = this.path.lastIndexOf('/');
+		if (in < 0) {
+		    in = this.path.lastIndexOf('\\');
+		}
 		if (in == this.path.length() - 1) {
 			this.path = this.path.substring(0, in);
 		}

src/main/java/org/jhove2/core/source/ZipFileSource.java

 	}
 	/**
 	 * Instantiate a new <code>ZipFileSource</code>.
-	 * @param tmpDirectory Temporary directory
-     * @param tmpPrefix Temporary file prefix
-     * @param tmpSuffix Temporary file suffix
-     * @param bufferSize Buffer size 
+	 * @param jhove2 JHOVE2 framework object 
+     * @param entry
+     *            Zip file entry
 	 * @param stream
 	 *            Input stream for the Zip file entry
-	 * @param entry
-	 *            Zip file entry
+	 * @param name
+	 *            File name
 	 * @throws IOException
 	 */
-	protected ZipFileSource(JHOVE2 jhove2, ZipEntry entry, InputStream stream)
+	protected ZipFileSource(JHOVE2 jhove2, ZipEntry entry, InputStream stream, String name)
 		throws IOException
 	{
-		super(jhove2, stream, entry.getName());
+		super(jhove2, stream, name);
 		this.path = entry.getName();
 		this.size = entry.getSize();
 		this.lastModified = new Date(entry.getTime());

src/main/java/org/jhove2/module/format/zip/ZipModule.java

 import java.nio.ByteOrder;
 import java.util.ArrayList;
 import java.util.Enumeration;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.TreeMap;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipFile;
 import org.jhove2.core.JHOVE2Exception;
 import org.jhove2.core.format.Format;
 import org.jhove2.core.io.Input;
+import org.jhove2.core.source.FileSetSource;
 import org.jhove2.core.source.Source;
 import org.jhove2.core.source.SourceFactory;
 import org.jhove2.module.format.BaseFormatModule;
 	                    	factory.getSource(jhove2, zip, entry);
 	                    if (src != null) {
 	                        String key = entry.getName();
-	                        /* Remove trailing slash. */
+	                        /* Remove trailing slash. Although this always
+	                         * should be a forward slash (/), in practice a
+	                         * backward slash (\) may be found. */
 	                        int len = key.length() - 1;
-	                        if (key.charAt(len) == '/') {
+	                        char ch = key.charAt(len);
+	                        if (ch == '/') {
+	                            key = key.substring(0, len);
+	                        }
+	                        else if (ch == '\\') {
 	                            key = key.substring(0, len);
 	                        }
 	                        map.put(key, src);
 	                ZipEntry entry = en.nextElement();
 	                String name = entry.getName();
 	                if (entry.isDirectory()) {
+                        /* Remove trailing slash. Although this always should
+                         * be a forward slash (/), in practice a backward
+                         * slash (\) may be found. */
 	                    int len = name.length() - 1;
-	                    if (name.charAt(len) == '/') {
+	                    char ch = name.charAt(len);
+	                    if (ch == '/') {
+	                        name = name.substring(0, len);
+	                    }
+	                    else if (ch == '\\') {
 	                        name = name.substring(0, len);
 	                    }
 	                    /* Get the source unit from the map. */
 	                    Source src = map.get(name);
 	                    if (src != null) {
+	                        /* Make sure to close the Input after
+	                         * characterization is completed.
+	                         */
 	                        Input inpt = src.getInput(jhove2);
 	                        try {
 	                            src = jhove2.characterize(src, inpt);
 	                            }
 	                        }
 	                        
+	                        /* Check if the pathname includes a directory
+	                         * component. Although the path separator always
+	                         * should be a forward slash (/), in practice a
+	                         * backward slash (\) may be found.
+	                         */
 	                        int in = name.lastIndexOf('/');
+	                        if (in < 0) {
+	                            in = name.lastIndexOf('\\');
+	                        }
 	                        if (in > -1 && in < name.length() - 1) {
 	                            /*
 	                             * Directory is a child of a Zip directory
 	                    Source src =
 	                    	factory.getSource(jhove2, zip, entry);
 	                    if (src != null) {
-	                        Input inpt = src.getInput(jhove2);
-	                        try {
-	                            src = jhove2.characterize(src, inpt);
+	                        /* Some FileSources may be children of a FileSetSource
+	                         * if the filename includes a directory that is not
+	                         * a Zip entry.  These FileSources should not be
+	                         * characterized now, since we'll characterize the
+	                         * FileSet later, including its children.
+	                         */
+	                        boolean hasFileSetParent = false;
+
+	                        /* Check if the pathname includes a directory
+	                         * component. Although the path separator always
+	                         * should be a forward slash (/), in practice a
+	                         * backward slash (\) may be found.
+	                         */
+	                        int in = name.lastIndexOf('/');
+	                        if (in < 0) {
+	                            in = name.lastIndexOf('\\');
 	                        }
-	                        finally {
-	                            if (inpt != null) {
-	                                inpt.close();
-	                            }
-	                        }
-
-	                        int in = name.lastIndexOf('/');
 	                        if (in < 0) {
 	                            /* File is a child of the Zip file. */
 	                        	src = source.addChildSource(src);
 	                            String key = name.substring(0, in);
 	                            Source parent = map.get(key);
 	                            if (parent != null) {
+	                                if (parent instanceof FileSetSource) {
+	                                    hasFileSetParent = true;
+	                                }
 	                            	src = parent.addChildSource(src);
 	                            }
+	                            else {
+	                                hasFileSetParent = true;
+	                                /* The filename includes a parent directory,
+	                                 * but that directory is not a Zip entry.
+	                                 * Create a FileSetSource, a child of the
+	                                 * Zip file and the parent of this file, to
+	                                 * represent the directory. 
+	                                 */
+	                                parent = jhove2.getSourceFactory().getFileSetSource();
+	                                parent = source.addChildSource(parent);
+                                    map.put(key, parent);
+	                                src = parent.addChildSource(src);
+	                            }
 	                        }
+	                        /* Only characterize sources that are not children
+	                         * of a FileSetSource.
+	                         */
+	                        if (!hasFileSetParent) {
+	                            /* Make sure to close the Input after
+	                             * characterization is completed.
+	                             */
+	                            Input inpt = src.getInput(jhove2);
+	                            try {
+	                                src = jhove2.characterize(src, inpt);
+	                            }
+	                            finally {
+	                                if (inpt != null) {
+	                                    inpt.close();
+	                                }
+                                }
+                            }
 	                    }
 	                }
 	            }
+	            /* Make sure to characterize any FileSets that were created to
+	             * represent non-Zip-entry directories.
+	             */
+	            Set<String> set = map.keySet();
+	            Iterator<String> iter = set.iterator();
+	            while (iter.hasNext()) {
+	                String key = iter.next();
+	                Source src = map.get(key);
+	                if (src != null && src instanceof FileSetSource) {
+                        /* Make sure to close the Input after
+                         * characterization is completed.
+                         */
+	                    Input inpt = src.getInput(jhove2);
+                        try {
+                            src = jhove2.characterize(src, inpt);
+                        }
+                        finally {
+                            if (inpt != null) {
+                                inpt.close();
+                            }
+                        }
+	                }
+	            }
 	        }  
 	        finally {
 	            zip.close();
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.