hgbook-de / de / autoid.py

#!/usr/bin/env python
#
# Add unique ID attributes to para tags.  This script should only be
# run by one person, since otherwise it introduces the possibility of
# chaotic conflicts among tags.

import glob, os, re, sys

tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)
untagged = re.compile('<para>')

names = glob.glob('ch*.xml') + glob.glob('app*.xml')

# First pass: find the highest-numbered paragraph ID.

biggest_id = 0
seen = set()
errs = 0

for name in names:
    for m in tagged.finditer(open(name).read()):
        i = int(m.group(1),16)
        if i in seen:
            print >> sys.stderr, '%s: duplication of ID %s' % (name, i)
            errs += 1
        seen.add(i)
        if i > biggest_id:
            biggest_id = i

def retag(s):
    global biggest_id
    biggest_id += 1
    return '<para id="x_%x">' % biggest_id

# Second pass: add IDs to paragraphs that currently lack them.

for name in names:
    f = open(name).read()
    f1 = untagged.sub(retag, f)
    if f1 != f:
        tmpname = name + '.tmp'
        fp = open(tmpname, 'w')
        fp.write(f1)
        fp.close()
        os.rename(tmpname, name)

sys.exit(errs)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.