Commits

Luke Plant committed a6ff00a

Unicodised, and added support for retrieving Bible verses where they are missing

  • Participants
  • Parent commits 5cbfd27

Comments (0)

Files changed (3)

File bibleverses/read.py

 import yaml
 import sqlite3
-import Sword
-import re
+
+# The YAML library correctly interprets UTF8 sequences in the input
+# data, but it returns Python 'unicode' objects only if unicode
+# characters are found, 'str' objects otherwise.  This is annoying,
+# but we simply build custom objects for everything, and initialise
+# using 'foo = unicode(yaml_foo_data)', which is safe.
 
 VERSEYAMLPAGENAME = 'BibleMemorisationYaml'
 TOPICYAMLPAGENAME = 'BibleMemorisationTopicYaml'
 SCHEMEYAMLPAGENAME = 'BibleMemorisationSchemeYaml'
 DATABASE = "/home/luke/misc/notes/db/trac.db"
 
-_parse_verse_re = re.compile("^[\dI]* *[^ ]* (.*)") 
-def correct_verse_ref(verseref):
-    vk = Sword.VerseKey(verseref)
-    book = vk.getBookName()
-    # Sword cannot easily return the rest of it nicely,
-    # so we hack around with parsing bits ourselves.
-    
-    return book + " " + _parse_verse_re.match(verseref).groups()[0]
+from sword import get_bible_text, correct_verse_ref
 
 def get_page(name):
     c = sqlite3.connect(DATABASE)
 class Verse(object):
     def __init__(self, verse, texts=None, categories=None):
         self.verse = correct_verse_ref(verse)
-        self.texts = texts or {}
-        self.categories = categories or {}
+        if not texts:
+            texts = {}
+        if not texts.has_key('ESV'):
+            texts['ESV'] = get_bible_text(self.verse, 'ESV')
+        for k, v in texts.items():
+            texts[k] = unicode(v)
+        self.texts = texts
+        self.categories = categories or {}        
 
     def __str__(self):
         return self.verse
 
     def __repr__(self):
-        return "<Verse: %s>" % self
+        return u"<Verse: %s>" % self
 
 def get_verses():
     """Returns the verse data, in a format directly based on the source YAML (lists, strings, dicts)"""
         return None
 
     def __repr__(self):
-        return "<TopicNode: %s>" % self.name
+        return u"<TopicNode: %s>" % self.name
 
 def get_topictree():
     rawtree = list(yaml.load(get_topictree_raw_data()))
 
 class TopicEntry(object):
     def __init__(self, topic, verse, comment, level):
-        self.topic = topic
-        self.verse = verse
-        self.comment = comment
+        self.topic = unicode(topic)
+        self.verse = unicode(verse)
+        self.comment = unicode(comment)
         self.level = level
     def __repr__(self):
-        return '<Entry: %s || %s || level %d || %s>' % (self.verse, self.topic, self.level, self.comment)
+        return u'<Entry: %s || %s || level %d || %s>' % (self.verse, self.topic, self.level, self.comment)
 
 def get_topic_entries(verses):
     """Builds a dictionary of topics with their associated verses"""
     ORDER_LINEAR = 'linear'
     ORDER_CYCLE_TOPICS = 'cycle-topics'
     ORDERS = set([ORDER_LINEAR, ORDER_CYCLE_TOPICS])
-    def __init__(self, name='', description='', topics=None, 
+    def __init__(self, name=u'', description=u'', topics=None, 
                  max=None, previous=None, order=None, levels=None,
                  items=None):
         if name == "" or name is None:
-            raise ValueError("Scheme does not have name")
-        self.name = name
-        self.description = description or ''
+            raise ValueError(u"Scheme does not have name")
+        self.name = unicode(name)
+        self.description = unicode(description or '')
         self.topics = topics
         self.max = max
         self.previous = previous or []
         if order not in Scheme.ORDERS:
-            raise ValueError("Order '%s' for scheme '%s' is unknown" % (order, name) )
+            raise ValueError(u"Order '%s' for scheme '%s' is unknown" % (order, name) )
         self.order = order
         self.levels = levels or []
         self.items = items or []
+
     def __repr__(self):
-        return "<Scheme: %s>" % self.name
+        return u"<Scheme: %s>" % self.name
 
 def get_schemes():
     data = list(yaml.load_all(get_schemes_raw_data()))
         for name in s.previous:
             s2 = scheme_dict.get(name)
             if s2 is None:
-                raise ValueError("Scheme '%s' referred to by scheme '%s' does not exist." \
+                raise ValueError(u"Scheme '%s' referred to by scheme '%s' does not exist." \
                                      % (name, s.name))
             prevlist.append(s2)
         s.previous = prevlist

File bibleverses/sword.py

+import Sword
+import re
+
+sword_mgr = Sword.SWMgr()
+#sword_mgr = Sword.SWMgr(None, None, True, Sword.EncodingFilterMgr(Sword.ENC_UTF8))
+sword_mgr.setGlobalOption("Footnotes", "Off")
+sword_mgr.setGlobalOption("Cross-references", "Off")
+
+
+_bible_modules = {}
+def _get_module(name):
+    try:
+        return _bible_modules[name]
+    except KeyError:
+        mod = sword_mgr.getModule(name)
+        _bible_modules[name] = mod
+        return mod
+
+def _get_plain_text(mod):
+    # .StripText() removes line breaks, with especially bad results
+    # in the case of poetry, so we have a custom method.
+    text = mod.RenderText()
+    sourcetype = mod.getConfigEntry("SourceType")
+    if sourcetype == "OSIS":
+        text = re.sub(r'<l[^>]*?type="x-br"[^>]*?/>', '\n', text) # line breaks
+        text = re.sub(r'<lb[^>]*?type="x-end-paragraph"[^>]*?/>', '\n', text)
+        text = re.sub(r'<[^>]*>', '', text) # all other tags
+    else:
+        raise Exception("Don't know how to render '%s' modules" % sourcetype)
+    return text.decode("UTF-8")
+
+def get_bible_text(verseref, modulename, raw=False):
+    mod = _get_module(modulename)
+    vk = Sword.VerseKey.castTo(mod.getKey())
+    vk.AutoNormalize(0)
+    lk = vk.ParseVerseList(verseref, "Genesis 1:1", True)
+    out = []
+    if raw:
+        conv = lambda mod: mod.RenderText()
+    else:
+        conv = _get_plain_text
+    for i in range(0, lk.Count()):
+        e = Sword.VerseKey.castTo(lk.getElement(i))
+        if e is not None:
+            mod.Key(e.LowerBound())
+            while mod.Key().compare(e.UpperBound()) <= 0:
+                out.append(conv(mod))
+                out.append(u" ")
+                mod.increment(1)
+        else:
+            mod.Key(lk.getElement(i))
+            out.append(conv(mod))
+    return u''.join(out)
+
+_parse_verse_re = re.compile("^[\dI]* *[^ ]* (.*)") 
+def correct_verse_ref(verseref):
+    vk = Sword.VerseKey(verseref)
+    book = vk.getBookName()
+    # Sword cannot easily return the rest of it nicely,
+    # so we hack around with parsing bits ourselves.    
+    return book + " " + _parse_verse_re.match(verseref).groups()[0]

File bibleverses/update.py

     write_page(SCHEMESPAGENAME, make_schemes_page(verses, topictree, schemes))
 
 def make_hebrew_verse(text):
-    return """
+    return u"""
 {{{
 #!html
 <div style="font-size: 2em; font-family: Baekmuk Batang,serif;"><blockquote>""" + \
-text.replace("\n", "<br/>") + """
+text.replace(u"\n", u"<br/>") + u"""
 <blockquote/></div>
 }}}"""
 
 def write_verse_text(verse, buf):
     for version, text in verse.texts.items():
-        buf.write("\n\n")
+        buf.write(u"\n\n")
         if version == "BHS":
             # Hebrew
             buf.write(make_hebrew_verse(text))
         else:
-            for l in text.strip().split("\n"):
-                buf.write(' ')
+            for l in text.strip().split(u"\n"):
+                buf.write(u' ')
                 buf.write(l)
-                buf.write(" [[BR]]\n")
+                buf.write(u" [[BR]]\n")
 
 def make_done_page(verses):
     buf = StringIO()
     buf.write(
-"""
+u"""
 = Bible Memorisation Verses =
 
 This page is autogenerated from BibleMemorisationYaml - do not edit.
 
 """)
     for v in verses:
-        buf.write('bible:"%s"\n' % v)
+        buf.write(u'bible:"%s"\n' % v)
         write_verse_text(v, buf)
-        buf.write("\n")
+        buf.write(u"\n")
     return buf.getvalue()
 
 
 def bible_entry(entry):
-    return "bible:\"%s\" -- %s\n" % (entry.verse, entry.comment)
+    return u"bible:\"%s\" -- %s\n" % (entry.verse, entry.comment)
 
 def make_topics_page(verses, topictree, extras):
     buf = StringIO()
-    buf.write("""
+    buf.write(u"""
 = Bible Memorisation Topics =
 
 This page is autogenerated from BibleMemorisationYaml and BibleMemorisationTopicYaml - do not edit
 """)
     
     def print_topic_node(node, buf, indentation_level=0):
-        padding = " " * indentation_level
-        buf.write("%s * '''%s'''\n" % (padding, node.name))
+        padding = u" " * indentation_level
+        buf.write(u"%s * '''%s'''\n" % (padding, node.name))
         if len(node.entries) > 0:
             for e in node.entries:
-                buf.write("%s   * %s" % (padding, bible_entry(e)))
+                buf.write(u"%s   * %s" % (padding, bible_entry(e)))
         for n in node.nodes:
             print_topic_node(n, buf, indentation_level+2)
     
     for n in topictree.nodes:
         print_topic_node(n, buf)
 
-    buf.write("\n\n== Uncategorised ==\n")
+    buf.write(u"\n\n== Uncategorised ==\n")
     for k, val in extras.iteritems():
-        buf.write(" * %s\n" % k)
+        buf.write(u" * %s\n" % k)
         for e in val:
-            buf.write("   * %s" % bible_entry(e))
+            buf.write(u"   * %s" % bible_entry(e))
         
     return buf.getvalue()
 
 def make_schemes_page(verses, topictree, schemes):
     buf = StringIO()
-    buf.write("""
+    buf.write(u"""
 = Bible Memorisation Schemes =
 
 [[TOC]]
 """)
     build_schemes(verses, topictree, schemes)
     for s in schemes:
-        buf.write("\n== %s ==\n" % s.name)
-        buf.write(s.description + "\n\n")
+        buf.write(u"\n== %s ==\n" % s.name)
+        buf.write(s.description + u"\n\n")
         for item in s.items:
-            buf.write("\nbible:\"%s\"\n" % item.verse.verse)
+            buf.write(u"\nbible:\"%s\"\n" % item.verse.verse)
             for e in item.entries:
-                buf.write(" * '''%s'''" % e.topic)
+                buf.write(u" * '''%s'''" % e.topic)
                 if e.comment:
-                    buf.write(" :: %s" % e.comment)
-                buf.write("\n")
+                    buf.write(u" :: %s" % e.comment)
+                buf.write(u"\n")
             write_verse_text(item.verse, buf)
     return buf.getvalue()