Commits

Anonymous committed 29f4fb6

hachoir-parser/image: vastly improve exif parser, closing issue #26

Comments (0)

Files changed (2)

hachoir-parser/hachoir_parser/image/exif.py

 """
-EXIF metadata parser (can be found in a JPEG picture for example)
+EXIF metadata parser; also parses TIFF file headers.
 
-Author: Victor Stinner
+Author: Victor Stinner, Robert Xiao
+
+References:
+- Exif 2.2 Specification (JEITA CP-3451)
+    http://www.exif.org/Exif2-2.PDF
+- TIFF 6.0 Specification
+    http://partners.adobe.com/public/developer/en/tiff/TIFF6.pdf
 """
 
-from hachoir_core.field import (FieldSet, ParserError,
+from hachoir_core.field import (FieldSet, SeekableFieldSet, ParserError,
     UInt8, UInt16, UInt32,
-    Int32, Enum, String,
-    Bytes, SubFile,
-    NullBytes, createPaddingField)
+    Int8, Int16, Int32,
+    Float32, Float64,
+    Enum, String, Bytes, SubFile,
+    NullBits, NullBytes, createPaddingField)
 from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN, NETWORK_ENDIAN
 from hachoir_core.text_handler import textHandler, hexadecimal
 from hachoir_core.tools import createDict
 
-MAX_COUNT = 1000
+MAX_COUNT = 1000 # maximum number of array entries in an IFD entry (excluding string types)
 
 def rationalFactory(class_name, size, field_class):
     class Rational(FieldSet):
 RationalInt32 = rationalFactory("RationalInt32", 64, Int32)
 RationalUInt32 = rationalFactory("RationalUInt32", 64, UInt32)
 
+class ASCIIString(String):
+    def __init__(self, parent, name, nbytes, description=None, strip=' \0', charset='ISO-8859-1', *args, **kwargs):
+        String.__init__(self, parent, name, nbytes, description, strip, charset, *args, **kwargs)
+
+class IFDTag(UInt16):
+    def getTag(self):
+        return self.parent.TAG_INFO.get(self.value, (hex(self.value), ""))
+    def createDisplay(self):
+        return self.getTag()[0]
+
 class BasicIFDEntry(FieldSet):
     TYPE_BYTE = 0
     TYPE_UNDEFINED = 7
     TYPE_SIGNED_RATIONAL = 10
     TYPE_INFO = {
          1: (UInt8, "BYTE (8 bits)"),
-         2: (String, "ASCII (8 bits)"),
+         2: (ASCIIString, "ASCII (8 bits)"),
          3: (UInt16, "SHORT (16 bits)"),
          4: (UInt32, "LONG (32 bits)"),
          5: (RationalUInt32, "RATIONAL (2x LONG, 64 bits)"),
+         6: (Int8, "SBYTE (8 bits)"),
          7: (Bytes, "UNDEFINED (8 bits)"),
-         9: (Int32, "SIGNED LONG (32 bits)"),
-        10: (RationalInt32, "SRATIONAL (2x SIGNED LONGs, 64 bits)"),
+         8: (Int16, "SSHORT (16 bits)"),
+         9: (Int32, "SLONG (32 bits)"),
+        10: (RationalInt32, "SRATIONAL (2x SLONG, 64 bits)"),
+        11: (Float32, "FLOAT (32 bits)"),
+        12: (Float64, "DOUBLE (64 bits)"),
     }
     ENTRY_FORMAT = createDict(TYPE_INFO, 0)
     TYPE_NAME = createDict(TYPE_INFO, 1)
+    TAG_INFO = {}
 
     def createFields(self):
-        yield Enum(textHandler(UInt16(self, "tag", "Tag"), hexadecimal), self.TAG_NAME)
-        yield Enum(textHandler(UInt16(self, "type", "Type"), hexadecimal), self.TYPE_NAME)
+        yield IFDTag(self, "tag", "Tag")
+        yield Enum(UInt16(self, "type", "Type"), self.TYPE_NAME)
+        self.value_cls = self.ENTRY_FORMAT.get(self['type'].value, Bytes)
+        if issubclass(self.value_cls, Bytes):
+            self.value_size = 8
+        else:
+            self.value_size = self.value_cls.static_size
         yield UInt32(self, "count", "Count")
-        if self["type"].value not in (self.TYPE_BYTE, self.TYPE_UNDEFINED) \
-        and  MAX_COUNT < self["count"].value:
+
+        if not issubclass(self.value_cls, Bytes) \
+          and self["count"].value > MAX_COUNT:
             raise ParserError("EXIF: Invalid count value (%s)" % self["count"].value)
-        value_size, array_size = self.getSizes()
 
-        # Get offset/value
-        if not value_size:
+        count = self['count'].value
+        totalsize = self.value_size * count
+        if count == 0:
             yield NullBytes(self, "padding", 4)
-        elif value_size <= 32:
-            if 1 < array_size:
-                name = "value[]"
+        elif totalsize <= 32:
+            name = "value"
+            if issubclass(self.value_cls, Bytes):
+                yield self.value_cls(self, name, count)
             else:
-                name = "value"
-            kw = {}
-            cls = self.value_cls
-            if cls is String:
-                args = (self, name, value_size/8, "Value")
-                kw["strip"] = " \0"
-                kw["charset"] = "ISO-8859-1"
-            elif cls is Bytes:
-                args = (self, name, value_size/8, "Value")
-            else:
-                args = (self, name, "Value")
-            for index in xrange(array_size):
-                yield cls(*args, **kw)
-
-            size = array_size * value_size
-            if size < 32:
-                yield NullBytes(self, "padding", (32-size)//8)
+                if count > 1:
+                    name += "[]"
+                for i in xrange(count):
+                    yield self.value_cls(self, name)
+            if totalsize < 32:
+                yield NullBits(self, "padding", 32-totalsize)
         else:
             yield UInt32(self, "offset", "Value offset")
 
-    def getSizes(self):
-        """
-        Returns (value_size, array_size): value_size in bits and
-        array_size in number of items.
-        """
-        # Create format
-        self.value_cls = self.ENTRY_FORMAT.get(self["type"].value, Bytes)
+    def createValue(self):
+        if "value" in self:
+            return self['value'].value
+        return None
 
-        # Set size
-        count = self["count"].value
-        if self.value_cls in (String, Bytes):
-            return 8 * count, 1
-        else:
-            return self.value_cls.static_size * count, count
+    def createDescription(self):
+        return "Entry: "+self["tag"].getTag()[1]
 
-class ExifEntry(BasicIFDEntry):
-    OFFSET_JPEG_SOI = 0x0201
+class IFDEntry(BasicIFDEntry):
     EXIF_IFD_POINTER = 0x8769
+    GPS_IFD_POINTER = 0x8825
+    INTEROP_IFD_POINTER = 0xA005
 
-    TAG_WIDTH = 0xA002
-    TAG_HEIGHT = 0xA003
-
-    TAG_GPS_LATITUDE_REF = 0x0001
-    TAG_GPS_LATITUDE = 0x0002
-    TAG_GPS_LONGITUDE_REF = 0x0003
-    TAG_GPS_LONGITUDE = 0x0004
-    TAG_GPS_ALTITUDE_REF = 0x0005
-    TAG_GPS_ALTITUDE = 0x0006
-    TAG_GPS_TIMESTAMP = 0x0007
-    TAG_GPS_DATESTAMP = 0x001d
-
-    TAG_IMG_TITLE = 0x010e
-    TAG_FILE_TIMESTAMP = 0x0132
-    TAG_SOFTWARE = 0x0131
-    TAG_CAMERA_MODEL = 0x0110
-    TAG_CAMERA_MANUFACTURER = 0x010f
-    TAG_ORIENTATION = 0x0112
-    TAG_EXPOSURE = 0x829A
-    TAG_FOCAL = 0x829D
-    TAG_BRIGHTNESS = 0x9203
-    TAG_APERTURE = 0x9205
-    TAG_USER_COMMENT = 0x9286
-
-    TAG_NAME = {
-        # GPS
-        0x0000: "GPS version ID",
-        0x0001: "GPS latitude ref",
-        0x0002: "GPS latitude",
-        0x0003: "GPS longitude ref",
-        0x0004: "GPS longitude",
-        0x0005: "GPS altitude ref",
-        0x0006: "GPS altitude",
-        0x0007: "GPS timestamp",
-        0x0008: "GPS satellites",
-        0x0009: "GPS status",
-        0x000a: "GPS measure mode",
-        0x000b: "GPS DOP",
-        0x000c: "GPS speed ref",
-        0x000d: "GPS speed",
-        0x000e: "GPS track ref",
-        0x000f: "GPS track",
-        0x0010: "GPS img direction ref",
-        0x0011: "GPS img direction",
-        0x0012: "GPS map datum",
-        0x0013: "GPS dest latitude ref",
-        0x0014: "GPS dest latitude",
-        0x0015: "GPS dest longitude ref",
-        0x0016: "GPS dest longitude",
-        0x0017: "GPS dest bearing ref",
-        0x0018: "GPS dest bearing",
-        0x0019: "GPS dest distance ref",
-        0x001a: "GPS dest distance",
-        0x001b: "GPS processing method",
-        0x001c: "GPS area information",
-        0x001d: "GPS datestamp",
-        0x001e: "GPS differential",
-
-        0x0100: "Image width",
-        0x0101: "Image height",
-        0x0102: "Number of bits per component",
-        0x0103: "Compression scheme",
-        0x0106: "Pixel composition",
-        TAG_ORIENTATION: "Orientation of image",
-        0x0115: "Number of components",
-        0x011C: "Image data arrangement",
-        0x0212: "Subsampling ratio Y to C",
-        0x0213: "Y and C positioning",
-        0x011A: "Image resolution width direction",
-        0x011B: "Image resolution in height direction",
-        0x0128: "Unit of X and Y resolution",
-
-        0x0111: "Image data location",
-        0x0116: "Number of rows per strip",
-        0x0117: "Bytes per compressed strip",
-        0x0201: "Offset to JPEG SOI",
-        0x0202: "Bytes of JPEG data",
-
-        0x012D: "Transfer function",
-        0x013E: "White point chromaticity",
-        0x013F: "Chromaticities of primaries",
-        0x0211: "Color space transformation matrix coefficients",
-        0x0214: "Pair of blank and white reference values",
-
-        TAG_FILE_TIMESTAMP: "File change date and time",
-        TAG_IMG_TITLE: "Image title",
-        TAG_CAMERA_MANUFACTURER: "Camera (Image input equipment) manufacturer",
-        TAG_CAMERA_MODEL: "Camera (Input input equipment) model",
-        TAG_SOFTWARE: "Software",
-        0x013B: "File change date and time",
-        0x8298: "Copyright holder",
-        0x8769: "Exif IFD Pointer",
-
-        TAG_EXPOSURE: "Exposure time",
-        TAG_FOCAL: "F number",
-        0x8822: "Exposure program",
-        0x8824: "Spectral sensitivity",
-        0x8827: "ISO speed rating",
-        0x8828: "Optoelectric conversion factor OECF",
-        0x9201: "Shutter speed",
-        0x9202: "Aperture",
-        TAG_BRIGHTNESS: "Brightness",
-        0x9204: "Exposure bias",
-        TAG_APERTURE: "Maximum lens aperture",
-        0x9206: "Subject distance",
-        0x9207: "Metering mode",
-        0x9208: "Light source",
-        0x9209: "Flash",
-        0x920A: "Lens focal length",
-        0x9214: "Subject area",
-        0xA20B: "Flash energy",
-        0xA20C: "Spatial frequency response",
-        0xA20E: "Focal plane X resolution",
-        0xA20F: "Focal plane Y resolution",
-        0xA210: "Focal plane resolution unit",
-        0xA214: "Subject location",
-        0xA215: "Exposure index",
-        0xA217: "Sensing method",
-        0xA300: "File source",
-        0xA301: "Scene type",
-        0xA302: "CFA pattern",
-        0xA401: "Custom image processing",
-        0xA402: "Exposure mode",
-        0xA403: "White balance",
-        0xA404: "Digital zoom ratio",
-        0xA405: "Focal length in 35 mm film",
-        0xA406: "Scene capture type",
-        0xA407: "Gain control",
-        0xA408: "Contrast",
-
-        0x9000: "Exif version",
-        0xA000: "Supported Flashpix version",
-        0xA001: "Color space information",
-        0x9101: "Meaning of each component",
-        0x9102: "Image compression mode",
-        TAG_WIDTH: "Valid image width",
-        TAG_HEIGHT: "Valid image height",
-        0x927C: "Manufacturer notes",
-        TAG_USER_COMMENT: "User comments",
-        0xA004: "Related audio file",
-        0x9003: "Date and time of original data generation",
-        0x9004: "Date and time of digital data generation",
-        0x9290: "DateTime subseconds",
-        0x9291: "DateTimeOriginal subseconds",
-        0x9292: "DateTimeDigitized subseconds",
-        0xA420: "Unique image ID",
-        0xA005: "Interoperability IFD Pointer"
+    TAG_INFO = {
+        # image data structure
+        0x0100: ("ImageWidth", "Image width"),
+        0x0101: ("ImageLength", "Image height"),
+        0x0102: ("BitsPerSample", "Number of bits per component"),
+        0x0103: ("Compression", "Compression scheme"),
+        0x0106: ("PhotometricInterpretation", "Pixel composition"),
+        0x0112: ("Orientation", "Orientation of image"),
+        0x0115: ("SamplesPerPixel", "Number of components"),
+        0x011C: ("PlanarConfiguration", "Image data arrangement"),
+        0x0212: ("YCbCrSubSampling", "Subsampling ratio of Y to C"),
+        0x0213: ("YCbCrPositioning", "Y and C positioning"),
+        0x011A: ("XResolution", "Image resolution in width direction"),
+        0x011B: ("YResolution", "Image resolution in height direction"),
+        0x0128: ("ResolutionUnit", "Unit of X and Y resolution"),
+        # recording offset
+        0x0111: ("StripOffsets", "Image data location"),
+        0x0116: ("RowsPerStrip", "Number of rows per strip"),
+        0x0117: ("StripByteCounts", "Bytes per compressed strip"),
+        0x0201: ("JPEGInterchangeFormat", "Offset to JPEG SOI"),
+        0x0202: ("JPEGInterchangeFormatLength", "Bytes of JPEG data"),
+        # image data characteristics
+        0x012D: ("TransferFunction", "Transfer function"),
+        0x013E: ("WhitePoint", "White point chromaticity"),
+        0x013F: ("PrimaryChromaticities", "Chromaticities of primaries"),
+        0x0211: ("YCbCrCoefficients", "Color space transformation matrix coefficients"),
+        0x0214: ("ReferenceBlackWhite", "Pair of black and white reference values"),
+        # other tags
+        0x0132: ("DateTime", "File change date and time"),
+        0x010E: ("ImageDescription", "Image title"),
+        0x010F: ("Make", "Image input equipment manufacturer"),
+        0x0110: ("Model", "Image input equipment model"),
+        0x0131: ("Software", "Software used"),
+        0x013B: ("Artist", "Person who created the image"),
+        0x8298: ("Copyright", "Copyright holder"),
+        # TIFF-specific tags
+        0x00FE: ("NewSubfileType", "NewSubfileType"),
+        0x00FF: ("SubfileType", "SubfileType"),
+        0x0107: ("Threshholding", "Threshholding"),
+        0x0108: ("CellWidth", "CellWidth"),
+        0x0109: ("CellLength", "CellLength"),
+        0x010A: ("FillOrder", "FillOrder"),
+        0x010D: ("DocumentName", "DocumentName"),
+        0x0118: ("MinSampleValue", "MinSampleValue"),
+        0x0119: ("MaxSampleValue", "MaxSampleValue"),
+        0x011D: ("PageName", "PageName"),
+        0x011E: ("XPosition", "XPosition"),
+        0x011F: ("YPosition", "YPosition"),
+        0x0120: ("FreeOffsets", "FreeOffsets"),
+        0x0121: ("FreeByteCounts", "FreeByteCounts"),
+        0x0122: ("GrayResponseUnit", "GrayResponseUnit"),
+        0x0123: ("GrayResponseCurve", "GrayResponseCurve"),
+        0x0124: ("T4Options", "T4Options"),
+        0x0125: ("T6Options", "T6Options"),
+        0x0129: ("PageNumber", "PageNumber"),
+        0x013C: ("HostComputer", "HostComputer"),
+        0x013D: ("Predictor", "Predictor"),
+        0x0140: ("ColorMap", "ColorMap"),
+        0x0141: ("HalftoneHints", "HalftoneHints"),
+        0x0142: ("TileWidth", "TileWidth"),
+        0x0143: ("TileLength", "TileLength"),
+        0x0144: ("TileOffsets", "TileOffsets"),
+        0x0145: ("TileByteCounts", "TileByteCounts"),
+        0x014C: ("InkSet", "InkSet"),
+        0x014D: ("InkNames", "InkNames"),
+        0x014E: ("NumberOfInks", "NumberOfInks"),
+        0x0150: ("DotRange", "DotRange"),
+        0x0151: ("TargetPrinter", "TargetPrinter"),
+        0x0152: ("ExtraSamples", "ExtraSamples"),
+        0x0153: ("SampleFormat", "SampleFormat"),
+        0x0154: ("SMinSampleValue", "SMinSampleValue"),
+        0x0155: ("SMaxSampleValue", "SMaxSampleValue"),
+        0x0156: ("TransferRange", "TransferRange"),
+        0x0200: ("JPEGProc", "JPEGProc"),
+        0x0203: ("JPEGRestartInterval", "JPEGRestartInterval"),
+        0x0205: ("JPEGLosslessPredictors", "JPEGLosslessPredictors"),
+        0x0206: ("JPEGPointTransforms", "JPEGPointTransforms"),
+        0x0207: ("JPEGQTables", "JPEGQTables"),
+        0x0208: ("JPEGDCTables", "JPEGDCTables"),
+        0x0209: ("JPEGACTables", "JPEGACTables"),
+        # IFD pointers
+        EXIF_IFD_POINTER: ("IFDExif", "Exif IFD Pointer"),
+        GPS_IFD_POINTER: ("IFDGPS", "GPS IFD Pointer"),
+        INTEROP_IFD_POINTER: ("IFDInterop", "Interoperability IFD Pointer"),
     }
 
-    def createDescription(self):
-        return "Entry: %s" % self["tag"].display
+class ExifIFDEntry(BasicIFDEntry):
+    TAG_INFO = {
+        # version
+        0x9000: ("ExifVersion", "Exif version"),
+        0xA000: ("FlashpixVersion", "Supported Flashpix version"),
+        # image data characteristics
+        0xA001: ("ColorSpace", "Color space information"),
+        # image configuration
+        0x9101: ("ComponentsConfiguration", "Meaning of each component"),
+        0x9102: ("CompressedBitsPerPixel", "Image compression mode"),
+        0xA002: ("PixelXDimension", "Valid image width"),
+        0xA003: ("PixelYDimension", "Valid image height"),
+        # user information
+        0x927C: ("MakerNote", "Manufacturer notes"),
+        0x9286: ("UserComment", "User comments"),
+        # related file information
+        0xA004: ("RelatedSoundFile", "Related audio file"),
+        # date and time
+        0x9003: ("DateTimeOriginal", "Date and time of original data generation"),
+        0x9004: ("DateTimeDigitized", "Date and time of digital data generation"),
+        0x9290: ("SubSecTime", "DateTime subseconds"),
+        0x9291: ("SubSecTimeOriginal", "DateTimeOriginal subseconds"),
+        0x9292: ("SubSecTimeDigitized", "DateTimeDigitized subseconds"),
+        # picture-taking conditions
+        0x829A: ("ExposureTime", "Exposure time"),
+        0x829D: ("FNumber", "F number"),
+        0x8822: ("ExposureProgram", "Exposure program"),
+        0x8824: ("SpectralSensitivity", "Spectral sensitivity"),
+        0x8827: ("ISOSpeedRatings", "ISO speed rating"),
+        0x8828: ("OECF", "Optoelectric conversion factor"),
+        0x9201: ("ShutterSpeedValue", "Shutter speed"),
+        0x9202: ("ApertureValue", "Aperture"),
+        0x9203: ("BrightnessValue", "Brightness"),
+        0x9204: ("ExposureBiasValue", "Exposure bias"),
+        0x9205: ("MaxApertureValue", "Maximum lens aperture"),
+        0x9206: ("SubjectDistance", "Subject distance"),
+        0x9207: ("MeteringMode", "Metering mode"),
+        0x9208: ("LightSource", "Light source"),
+        0x9209: ("Flash", "Flash"),
+        0x920A: ("FocalLength", "Lens focal length"),
+        0x9214: ("SubjectArea", "Subject area"),
+        0xA20B: ("FlashEnergy", "Flash energy"),
+        0xA20C: ("SpatialFrequencyResponse", "Spatial frequency response"),
+        0xA20E: ("FocalPlaneXResolution", "Focal plane X resolution"),
+        0xA20F: ("FocalPlaneYResolution", "Focal plane Y resolution"),
+        0xA210: ("FocalPlaneResolutionUnit", "Focal plane resolution unit"),
+        0xA214: ("SubjectLocation", "Subject location"),
+        0xA215: ("ExposureIndex", "Exposure index"),
+        0xA217: ("SensingMethod", "Sensing method"),
+        0xA300: ("FileSource", "File source"),
+        0xA301: ("SceneType", "Scene type"),
+        0xA302: ("CFAPattern", "CFA pattern"),
+        0xA401: ("CustomRendered", "Custom image processing"),
+        0xA402: ("ExposureMode", "Exposure mode"),
+        0xA403: ("WhiteBalance", "White balance"),
+        0xA404: ("DigitalZoomRatio", "Digital zoom ratio"),
+        0xA405: ("FocalLengthIn35mmFilm", "Focal length in 35 mm film"),
+        0xA406: ("SceneCaptureType", "Scene capture type"),
+        0xA407: ("GainControl", "Gain control"),
+        0xA408: ("Contrast", "Contrast"),
+        0xA409: ("Saturation", "Saturation"),
+        0xA40A: ("Sharpness", "Sharpness"),
+        0xA40B: ("DeviceSettingDescription", "Device settings description"),
+        0xA40C: ("SubjectDistanceRange", "Subject distance range"),
+        # other tags
+        0xA420: ("ImageUniqueID", "Unique image ID"),
+    }
 
-def sortExifEntry(a,b):
-    return int( a["offset"].value - b["offset"].value )
+class GPSIFDEntry(BasicIFDEntry):
+    TAG_INFO = {
+        0x0000: ("GPSVersionID", "GPS tag version"),
+        0x0001: ("GPSLatitudeRef", "North or South Latitude"),
+        0x0002: ("GPSLatitude", "Latitude"),
+        0x0003: ("GPSLongitudeRef", "East or West Longitude"),
+        0x0004: ("GPSLongitude", "Longitude"),
+        0x0005: ("GPSAltitudeRef", "Altitude reference"),
+        0x0006: ("GPSAltitude", "Altitude"),
+        0x0007: ("GPSTimeStamp", "GPS time (atomic clock)"),
+        0x0008: ("GPSSatellites", "GPS satellites used for measurement"),
+        0x0009: ("GPSStatus", "GPS receiver status"),
+        0x000A: ("GPSMeasureMode", "GPS measurement mode"),
+        0x000B: ("GPSDOP", "Measurement precision"),
+        0x000C: ("GPSSpeedRef", "Speed unit"),
+        0x000D: ("GPSSpeed", "Speed of GPS receiver"),
+        0x000E: ("GPSTrackRef", "Reference for direction of movement"),
+        0x000F: ("GPSTrack", "Direction of movement"),
+        0x0010: ("GPSImgDirectionRef", "Reference for direction of image"),
+        0x0011: ("GPSImgDirection", "Direction of image"),
+        0x0012: ("GPSMapDatum", "Geodetic survey data used"),
+        0x0013: ("GPSDestLatitudeRef", "Reference for latitude of destination"),
+        0x0014: ("GPSDestLatitude", "Latitude of destination"),
+        0x0015: ("GPSDestLongitudeRef", "Reference for longitude of destination"),
+        0x0016: ("GPSDestLongitude", "Longitude of destination"),
+        0x0017: ("GPSDestBearingRef", "Reference for bearing of destination"),
+        0x0018: ("GPSDestBearing", "Bearing of destination"),
+        0x0019: ("GPSDestDistanceRef", "Reference for distance to destination"),
+        0x001A: ("GPSDestDistance", "Distance to destination"),
+        0x001B: ("GPSProcessingMethod", "Name of GPS processing method"),
+        0x001C: ("GPSAreaInformation", "Name of GPS area"),
+        0x001D: ("GPSDateStamp", "GPS date"),
+        0x001E: ("GPSDifferential", "GPS differential correction"),
+    }
 
-class ExifIFD(FieldSet):
-    def seek(self, offset):
-        """
-        Seek to byte address relative to parent address.
-        """
-        padding = offset - (self.address + self.current_size)/8
-        if 0 < padding:
-            return createPaddingField(self, padding*8)
-        else:
-            return None
+class InteropIFDEntry(BasicIFDEntry):
+    TAG_INFO = {
+        0x0001: ("InteroperabilityIndex", "Interoperability Identification"),
+    }
+
+class IFD(SeekableFieldSet):
+    EntryClass = IFDEntry
+    def __init__(self, parent, name, base_addr):
+        self.base_addr = base_addr
+        SeekableFieldSet.__init__(self, parent, name)
 
     def createFields(self):
-        offset_diff = 6
         yield UInt16(self, "count", "Number of entries")
-        entries = []
-        next_chunk_offset = None
         count = self["count"].value
-        if not count:
-            return
-        while count:
-            addr = self.absolute_address + self.current_size
-            next = self.stream.readBits(addr, 32, NETWORK_ENDIAN)
-            if next in (0, 0xF0000000):
-                break
-            entry = ExifEntry(self, "entry[]")
-            yield entry
-            if entry["tag"].value in (ExifEntry.EXIF_IFD_POINTER, ExifEntry.OFFSET_JPEG_SOI):
-                next_chunk_offset = entry["value"].value + offset_diff
-            if 32 < entry.getSizes()[0]:
-                entries.append(entry)
-            count -= 1
-        yield UInt32(self, "next", "Next IFD offset")
-        try:
-            entries.sort( sortExifEntry )
-        except TypeError:
-            raise ParserError("Unable to sort entries!")
-        value_index = 0
-        for entry in entries:
-            padding = self.seek(entry["offset"].value + offset_diff)
-            if padding is not None:
-                yield padding
+        if count == 0:
+            raise ParserError("IFDs cannot be empty.")
+        for i in xrange(count):
+            yield self.EntryClass(self, "entry[]")
+        yield UInt32(self, "next", "Offset to next IFD")
+        for i, entry in enumerate(self.array('entry')):
+            if 'offset' not in entry:
+                continue
+            self.seekByte(entry['offset'].value+self.base_addr//8, relative=False)
+            count = entry['count'].value
+            name = "value[%s]"%i
+            if issubclass(entry.value_cls, Bytes):
+                yield entry.value_cls(self, name, count)
+            else:
+                if count > 1:
+                    name += "[]"
+                for i in xrange(count):
+                    yield entry.value_cls(self, name)
 
-            value_size, array_size = entry.getSizes()
-            if not array_size:
-                continue
-            cls = entry.value_cls
-            if 1 < array_size:
-                name = "value_%s[]" % entry.name
-            else:
-                name = "value_%s" % entry.name
-            desc = "Value of \"%s\"" % entry["tag"].display
-            if cls is String:
-                for index in xrange(array_size):
-                    yield cls(self, name, value_size/8, desc, strip=" \0", charset="ISO-8859-1")
-            elif cls is Bytes:
-                for index in xrange(array_size):
-                    yield cls(self, name, value_size/8, desc)
-            else:
-                for index in xrange(array_size):
-                    yield cls(self, name, desc)
-            value_index += 1
-        if next_chunk_offset is not None:
-            padding = self.seek(next_chunk_offset)
-            if padding is not None:
-                yield padding
+    def getEntryValues(self, entry):
+        n = int(entry.name.rsplit('[',1)[1].strip(']'))
+        if 'offset' in entry:
+            field = 'value[%d]'%n
+            base = self
+        else:
+            field = 'value'
+            base = entry
+        if field in base:
+            return [base[field]]
+        else:
+            return base.array(field)
 
-    def createDescription(self):
-        return "Exif IFD (id %s)" % self["id"].value
+class ExifIFD(IFD):
+    EntryClass = ExifIFDEntry
 
-class Exif(FieldSet):
+class GPSIFD(IFD):
+    EntryClass = GPSIFDEntry
+
+class InteropIFD(IFD):
+    EntryClass = InteropIFDEntry
+
+IFD_TAGS = {
+    IFDEntry.EXIF_IFD_POINTER: ('exif', ExifIFD),
+    IFDEntry.GPS_IFD_POINTER: ('exif_gps', GPSIFD),
+    IFDEntry.INTEROP_IFD_POINTER: ('exif_interop', InteropIFD),
+}
+
+def TIFF(self):
+    iff_start = self.absolute_address + self.current_size
+    yield String(self, "endian", 2, "Endian ('II' or 'MM')", charset="ASCII")
+    if self["endian"].value not in ("II", "MM"):
+        raise ParserError("Invalid endian!")
+    if self["endian"].value == "II":
+       self.endian = LITTLE_ENDIAN
+    else:
+       self.endian = BIG_ENDIAN
+
+    yield UInt16(self, "version", "TIFF version number")
+    yield UInt32(self, "img_dir_ofs", "Next image directory offset")
+    offsets = [(self['img_dir_ofs'].value, 'ifd[]', IFD)]
+    while offsets:
+        offset, name, klass = offsets.pop(0)
+        self.seekByte(offset+iff_start//8, relative=False)
+        ifd = klass(self, name, iff_start)
+        yield ifd
+        for entry in ifd.array('entry'):
+            tag = entry['tag'].value
+            if tag in IFD_TAGS:
+                name, klass = IFD_TAGS[tag]
+                offsets.append((ifd.getEntryValues(entry)[0].value, name+'[]', klass))
+        if ifd['next'].value != 0:
+            offsets.append((ifd['next'].value, 'ifd[]', IFD))
+
+class Exif(SeekableFieldSet):
     def createFields(self):
         # Headers
         yield String(self, "header", 6, "Header (Exif\\0\\0)", charset="ASCII")
         if self["header"].value != "Exif\0\0":
             raise ParserError("Invalid EXIF signature!")
-        yield String(self, "byte_order", 2, "Byte order", charset="ASCII")
-        if self["byte_order"].value not in ("II", "MM"):
-            raise ParserError("Invalid endian!")
-        if self["byte_order"].value == "II":
-           self.endian = LITTLE_ENDIAN
-        else:
-           self.endian = BIG_ENDIAN
-        yield UInt16(self, "version", "TIFF version number")
-        yield UInt32(self, "img_dir_ofs", "Next image directory offset")
-        while not self.eof:
-            addr = self.absolute_address + self.current_size
-            tag = self.stream.readBits(addr, 16, NETWORK_ENDIAN)
-            if tag == 0xFFD8:
-                size = (self._size - self.current_size) // 8
-                yield SubFile(self, "thumbnail", size, "Thumbnail (JPEG file)", mime_type="image/jpeg")
-                break
-            elif tag == 0xFFFF:
-                break
-            yield ExifIFD(self, "ifd[]", "IFD")
-        padding = self.seekBit(self._size)
-        if padding is not None:
-            yield padding
+        iff_start = self.absolute_address + self.current_size
+        for field in TIFF(self):
+            yield field
 
-
+        for ifd in self.array('ifd'):
+            data = {}
+            for i, entry in enumerate(ifd.array('entry')):
+                data[entry['tag'].display] = entry
+            if 'JPEGInterchangeFormat' in data and 'JPEGInterchangeFormatLength' in data:
+                offs = ifd.getEntryValues(data['JPEGInterchangeFormat'])[0].value
+                size = ifd.getEntryValues(data['JPEGInterchangeFormatLength'])[0].value
+                if size == 0: continue
+                self.seekByte(offs + iff_start//8, relative=False)
+                yield SubFile(self, "thumbnail[]", size, "Thumbnail (JPEG file)", mime_type="image/jpeg")

hachoir-parser/hachoir_parser/image/tiff.py

 """
 TIFF image parser.
 
-Authors: Victor Stinner and Sebastien Ponce
+Authors: Victor Stinner, Sebastien Ponce, Robert Xiao
 Creation date: 30 september 2006
 """
 
 from hachoir_parser import Parser
-from hachoir_core.field import (FieldSet, SeekableFieldSet, ParserError, RootSeekableFieldSet,
-    UInt16, UInt32, Bytes, String)
+from hachoir_core.field import FieldSet, SeekableFieldSet, RootSeekableFieldSet, Bytes
 from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN
-from hachoir_parser.image.exif import BasicIFDEntry
-from hachoir_core.tools import createDict
+from hachoir_parser.image.exif import TIFF
 
-MAX_COUNT = 250
-
-class IFDEntry(BasicIFDEntry):
-    static_size = 12*8
-
-    TAG_INFO = {
-        254: ("new_subfile_type", "New subfile type"),
-        255: ("subfile_type", "Subfile type"),
-        256: ("img_width", "Image width in pixels"),
-        257: ("img_height", "Image height in pixels"),
-        258: ("bits_per_sample", "Bits per sample"),
-        259: ("compression", "Compression method"),
-        262: ("photo_interpret", "Photometric interpretation"),
-        263: ("thres", "Thresholding"),
-        264: ("cell_width", "Cellule width"),
-        265: ("cell_height", "Cellule height"),
-        266: ("fill_order", "Fill order"),
-        269: ("doc_name", "Document name"),
-        270: ("description", "Image description"),
-        271: ("make", "Make"),
-        272: ("model", "Model"),
-        273: ("strip_ofs", "Strip offsets"),
-        274: ("orientation", "Orientation"),
-        277: ("sample_pixel", "Samples per pixel"),
-        278: ("row_per_strip", "Rows per strip"),
-        279: ("strip_byte", "Strip byte counts"),
-        280: ("min_sample_value", "Min sample value"),
-        281: ("max_sample_value", "Max sample value"),
-        282: ("xres", "X resolution"),
-        283: ("yres", "Y resolution"),
-        284: ("planar_conf", "Planar configuration"),
-        285: ("page_name", "Page name"),
-        286: ("xpos", "X position"),
-        287: ("ypos", "Y position"),
-        288: ("free_ofs", "Free offsets"),
-        289: ("free_byte", "Free byte counts"),
-        290: ("gray_resp_unit", "Gray response unit"),
-        291: ("gray_resp_curve", "Gray response curve"),
-        292: ("group3_opt", "Group 3 options"),
-        293: ("group4_opt", "Group 4 options"),
-        296: ("res_unit", "Resolution unit"),
-        297: ("page_nb", "Page number"),
-        301: ("color_respt_curve", "Color response curves"),
-        305: ("software", "Software"),
-        306: ("date_time", "Date time"),
-        315: ("artist", "Artist"),
-        316: ("host_computer", "Host computer"),
-        317: ("predicator", "Predicator"),
-        318: ("white_pt", "White point"),
-        319: ("prim_chomat", "Primary chromaticities"),
-        320: ("color_map", "Color map"),
-        321: ("half_tone_hints", "Halftone Hints"),
-        322: ("tile_width", "TileWidth"),
-        323: ("tile_length", "TileLength"),
-        324: ("tile_offsets", "TileOffsets"),
-        325: ("tile_byte_counts", "TileByteCounts"),
-        332: ("ink_set", "InkSet"),
-        333: ("ink_names", "InkNames"),
-        334: ("number_of_inks", "NumberOfInks"),
-        336: ("dot_range", "DotRange"),
-        337: ("target_printer", "TargetPrinter"),
-        338: ("extra_samples", "ExtraSamples"),
-        339: ("sample_format", "SampleFormat"),
-        340: ("smin_sample_value", "SMinSampleValue"),
-        341: ("smax_sample_value", "SMaxSampleValue"),
-        342: ("transfer_range", "TransferRange"),
-        512: ("jpeg_proc", "JPEGProc"),
-        513: ("jpeg_interchange_format", "JPEGInterchangeFormat"),
-        514: ("jpeg_interchange_format_length", "JPEGInterchangeFormatLength"),
-        515: ("jpeg_restart_interval", "JPEGRestartInterval"),
-        517: ("jpeg_lossless_predictors", "JPEGLosslessPredictors"),
-        518: ("jpeg_point_transforms", "JPEGPointTransforms"),
-        519: ("jpeg_qtables", "JPEGQTables"),
-        520: ("jpeg_dctables", "JPEGDCTables"),
-        521: ("jpeg_actables", "JPEGACTables"),
-        529: ("ycbcr_coefficients", "YCbCrCoefficients"),
-        530: ("ycbcr_subsampling", "YCbCrSubSampling"),
-        531: ("ycbcr_positioning", "YCbCrPositioning"),
-        532: ("reference_blackwhite", "ReferenceBlackWhite"),
-        33432: ("copyright", "Copyright"),
-        0x8769: ("ifd_pointer", "Pointer to next IFD entry"),
-    }
-    TAG_NAME = createDict(TAG_INFO, 0)
-
-    def __init__(self, *args):
-        FieldSet.__init__(self, *args)
-        tag = self["tag"].value
-        if tag in self.TAG_INFO:
-            self._name, self._description = self.TAG_INFO[tag]
-        else:
-            self._parser = None
-
-class IFD(FieldSet):
-    def __init__(self, *args):
-        FieldSet.__init__(self, *args)
-        self._size = 16 + self["count"].value * IFDEntry.static_size
-        self._has_offset = False
-
-    def createFields(self):
-        yield UInt16(self, "count")
-        if MAX_COUNT < self["count"].value:
-            raise ParserError("TIFF IFD: Invalid count (%s)"
-                % self["count"].value)
-        for index in xrange(self["count"].value):
-            yield IFDEntry(self, "entry[]")
+def getStrips(ifd):
+    data = {}
+    for i, entry in enumerate(ifd.array('entry')):
+        data[entry['tag'].display] = entry
+    # image data
+    if "StripOffsets" in data and "StripByteCounts" in data:
+        offs = ifd.getEntryValues(data["StripOffsets"])
+        bytes = ifd.getEntryValues(data["StripByteCounts"])
+        for off, byte in zip(offs, bytes):
+            yield off.value, byte.value
 
 class ImageFile(SeekableFieldSet):
     def __init__(self, parent, name, description, ifd):
         SeekableFieldSet.__init__(self, parent, name, description, None)
-        self._has_offset = False
         self._ifd = ifd
 
     def createFields(self):
-        datas = {}
-        for entry in self._ifd:
-            if type(entry) != IFDEntry:
-                continue
-            for c in entry:
-                if c.name != "offset":
-                    continue
-                self.seekByte(c.value, False)
-                desc = "data of ifd entry " + entry.name,
-                entryType = BasicIFDEntry.ENTRY_FORMAT[entry["type"].value]
-                count = entry["count"].value
-                if entryType == String:
-                    yield String(self, entry.name, count, desc, "\0", "ISO-8859-1")
-                else:    
-                    d = Data(self, entry.name, desc, entryType, count)
-                    datas[d.name] = d
-                    yield d
-                break
-        # image data
-        if "strip_ofs" in datas and "strip_byte" in datas:
-            for i in xrange(datas["strip_byte"]._count):
-                self.seekByte(datas["strip_ofs"]["value["+str(i)+"]"].value, False)
-                yield Bytes(self, "strip[]", datas["strip_byte"]["value["+str(i)+"]"].value)
-
-class Data(FieldSet):
-
-    def __init__(self, parent, name, desc, type, count):
-        size = type.static_size * count
-        FieldSet.__init__(self, parent, name, desc, size)
-        self._count = count
-        self._type = type
-
-    def createFields(self):
-        for i in xrange(self._count):
-            yield self._type(self, "value[]")
+        for off, byte in getStrips(self._ifd):
+            self.seekByte(off, relative=False)
+            yield Bytes(self, "strip[]", byte)
 
 class TiffFile(RootSeekableFieldSet, Parser):
     PARSER_TAGS = {
         "file_ext": ("tif", "tiff"),
         "mime": (u"image/tiff",),
         "min_size": 8*8,
-# TODO: Re-enable magic
         "magic": (("II\x2A\0", 0), ("MM\0\x2A", 0)),
         "description": "TIFF picture"
     }
         return True
 
     def createFields(self):
-        yield String(self, "endian", 2, 'Endian ("II" or "MM")', charset="ASCII")
-        yield UInt16(self, "version", "TIFF version number")
-        offset = UInt32(self, "img_dir_ofs[]", "Next image directory offset (in bytes from the beginning)")
-        yield offset
-        ifds = []
-        while True:
-            if offset.value == 0:
-                break
+        for field in TIFF(self):
+            yield field
 
-            self.seekByte(offset.value, relative=False)
-            ifd = IFD(self, "ifd[]", "Image File Directory", None)
-            ifds.append(ifd)
-            yield ifd
-            offset = UInt32(self, "img_dir_ofs[]", "Next image directory offset (in bytes from the beginning)")
-            yield offset
-        for ifd in ifds:
+        for ifd in self.array('ifd'):
+            offs = (off for off, byte in getStrips(ifd))
+            self.seekByte(min(offs), relative=False)
             image = ImageFile(self, "image[]", "Image File", ifd)
             yield image