Ben Whittaker Embed KPhotoAlbum Tags as Image Metadata

Created by Ben Whittaker
#!/usr/bin/env python

# This script embeds KPhotoAlbum keywords, people, and places as metadata in individual image files.
# To use this script:
# - Install the exiv2 command line tool (http://www.exiv2.org/)
# - Place kphoto2meta.py in the same directory as KPhotoAlbum's index.xml file
# - Open a console
# - Run "python kphoto2meta.py"
# Known limitations:
# - exiv2 currently can not read or write metadata for video files
# - If a file already has an embedded tag containing a comma, it may be inadvertently split into two separate tags

import xml.etree.ElementTree as ElementTree
import subprocess
import os.path

status = ""
def log(message):
    print status, message.encode('utf-8')

# "separator" implies that the key supports hierarchical tags (eg People/name).
# "multiple" indicates that each value should be added using exiv2's "add" command, instead of the "set" command
keysToUpdate = {
    "Xmp.digiKam.TagsList": { "separator": '/' },
    "Xmp.MicrosoftPhoto.LastKeywordXMP": { "separator": '/' },
    "Xmp.lr.hierarchicalSubject": { "separator": '|' },
    "Xmp.mediapro.CatalogSets": { "separator": '|' },
    "Xmp.dc.subject": {},
    "Iptc.Application2.Keywords": { "multiple": True },
}

# Load the KPhotoAlbum index
tree = ElementTree.parse('index.xml')
root = tree.getroot()

imageTags = root.find("images")

numImages = len(imageTags)
numFilesModifed = 0
current = 0

# Iterate through all images
for imageTag in imageTags:
    current += 1;
    status = "(" + str(current) + " / " + str(numImages) + ")"
    filename = imageTag.get("file")

    # Skip missing files
    if not os.path.isfile(filename):
        log("Missing file: " + filename)
        continue

    # Get keywords, people and places for the current image from the KPhotoAlbum index
    keywords = []
    people = []
    places = []
    for option in imageTag.iter("option"):
        optionName = option.get("name")
        if optionName == "Keywords":
            keywords = [child.get("value") for child in option]
        elif optionName == "People":
            people = [child.get("value") for child in option]
        elif optionName == "Places":
            places = [child.get("value") for child in option]

    # Merge everything into one list of tags
    newtags = keywords + ["People/" + person for person in people] + ["Places/" + place for place in places]
    
    # Remove commas, since they could lead to individual tags getting split
    # Also, convert newtags to a set instead of a list
    newtags = {tag.replace(',', "") for tag in newtags}

    log("File: " + filename)
    log("    KPhotoAlbum tags: " + ", ".join(newtags))

    # Only do the rest if there are new tags to add
    if newtags:
        oldtags = set()
        flattags = set() # flattags is used to keep track of non-hierarchical tags

        # Read existing metadata using exiv2
        try: output = subprocess.check_output(["exiv2", "-PEIXkv", "pr", filename], stderr=subprocess.PIPE)
        except subprocess.CalledProcessError as e:
            if e.returncode == 253: pass # A return code of 253 seems to indicate missing metadata
            else:
                log("    Error reading image metadata: exiv2 returned with code " + str(e.returncode))
                continue

        # Parse exiv2 output to find existing tags
        for line in output.decode('utf-8', errors="ignore").splitlines():
            pair = line.split(None, 1)
            if len(pair) != 2: continue
            key, vals = pair

            # Identify which key this is, and whether it has a separator (and is thus hierarchical)
            info = keysToUpdate.get(key, None)
            if info == None: continue
            separator = info.get("separator", None)

            # Add tags to sets
            for val in vals.split(','):
                val = val.strip()
                if separator != None: oldtags.add(val.replace(separator, '/'))
                else: flattags.add(val)

        # Only keep flat tags that don't have matching hierarchical tags
        for tag in oldtags: flattags.discard(tag.rpartition('/')[2])
        for tag in flattags: oldtags.add(tag)

        log("    Existing tags: " + ", ".join(oldtags))

        # Merge old and new tags
        tags = oldtags | newtags

        log("    Combined tags: " + ", ".join(tags))

        # Skip the rest if existing tags and combined tags are equal
        if oldtags == tags:
            log("    Existing tags and combined tags are already equal, skipping file")
            continue

        # Convert tags to sorted list
        tags == sorted(tags)

        # Compose list of edits to be made by exiv2
        edits = []
        for pair in keysToUpdate.items():
            key, info = pair
            separator = info.get("separator", None)
            multiple = info.get("multiple", None)

            edits.append("del " + key)

            if separator: formattedTags = [tag.replace('/', separator) for tag in tags]
            else: formattedTags = [tag.rpartition('/')[2] for tag in tags]

            if multiple:
                for tag in formattedTags:
                    edits.append("add " + key + " " + tag)
            else:
                for tag in formattedTags:
                    edits.append("set " + key + " " + tag)

        log("    Writing combined tags")

        # Call exiv2 to embed the tags in the file
        command = ["exiv2"]
        for edit in edits: command += ["-M", edit]
        command += ["mo", filename]
        subprocess.call(command, stderr=subprocess.PIPE)
        numFilesModifed += 1

print "Done! " + str(numFilesModifed) + " images modified"

Comments (0)