Source

lukeplant_python / lukeplant_me_uk / bibleverses / sword.py

Full commit
import Sword
import re

sword_mgr = Sword.SWMgr()
#sword_mgr = Sword.SWMgr(None, None, True, Sword.EncodingFilterMgr(Sword.ENC_UTF8))
sword_mgr.setGlobalOption("Footnotes", "Off")
sword_mgr.setGlobalOption("Cross-references", "Off")


_bible_modules = {}
def _get_module(name):
    try:
        return _bible_modules[name]
    except KeyError:
        mod = sword_mgr.getModule(name)
        _bible_modules[name] = mod
        return mod

def _get_plain_text(mod):
    # .StripText() removes line breaks, with especially bad results
    # in the case of poetry, so we have a custom method.
    text = mod.RenderText()
    sourcetype = mod.getConfigEntry("SourceType")
    if sourcetype == "OSIS":
        text = re.sub(r'<l[^>]*?type="x-br"[^>]*?/>', '\n', text) # line breaks
        text = re.sub(r'<lb[^>]*?type="x-end-paragraph"[^>]*?/>', '\n', text)
        text = re.sub(r'<divineName>(.*?)</divineName>', lambda m: m.groups()[0].upper(), text)
        text = re.sub(r'<[^>]*>', '', text) # all other tags
    elif sourcetype == "ThML":
        if mod.Name() == "WHNU":
            # Doesn't seem to have any markup
            pass
        else:
            raise Exception("Don't know how to render ThML modules (except WHNU).")
    else:
        raise Exception("Don't know how to render '%s' modules" % sourcetype)
    return text.decode("UTF-8")

def get_bible_text(verseref, modulename, raw=False):
    mod = _get_module(modulename)
    vk = Sword.VerseKey.castTo(mod.getKey())
    vk.AutoNormalize(0)
    lk = vk.ParseVerseList(verseref, "Genesis 1:1", True)
    out = []
    if raw:
        conv = lambda mod: mod.RenderText().decode("UTF-8")
    else:
        conv = _get_plain_text
    for i in range(0, lk.Count()):
        e = Sword.VerseKey.castTo(lk.getElement(i))
        if e is not None:
            mod.Key(e.LowerBound())
            while mod.Key().compare(e.UpperBound()) <= 0:
                out.append(conv(mod))
                if not out[-1].endswith("\n") and not out[-1].endswith(" "):
                    out.append(u" ")
                mod.increment(1)
        else:
            mod.Key(lk.getElement(i))
            out.append(conv(mod))
    return u''.join(out)

_book_repl = {"Revelation of John":"Revelation",
              "Psalms": "Psalm"}
def get_bookname(vk):
    bk = vk.getBookName()
    return _book_repl.get(bk, bk)

_parse_verse_re = re.compile("[^\:,]* ([\d:,\- abc]*)") 
def correct_verse_ref(verseref):
    vk = Sword.VerseKey(verseref)
    book = get_bookname(vk)
    # Sword cannot easily return the rest of it nicely,
    # so we hack around with parsing bits ourselves.    
    return book + " " + _parse_verse_re.match(verseref).groups()[0]