Source

pyorg / pyorg / parser.py

Full commit
"""Parser related stuff
"""
import re
from datetime import date
def maxhead(text):
    """tell which is the topmost level, if there isn't one, return None
    """
    levels = []
    for line in text.splitlines():
        match = re.match('^(\*+)\s(.*?)\s*$', line)
        if match:
            levels.append(len(match.groups()[0]))
    
    if levels == []:
        return None
    return min(levels)

def splitheads(text):
    """Split up the topmost headers returning a list of the headers
    with the associated text.
    """
    
    top_level = maxhead(text)
    if top_level is None:
        return []

    indexes = [m.start() for m in 
               re.finditer('^(\*{%i})\s(.*?)\s*$'%maxhead(text), text, flags = re.MULTILINE)]
    ret = []
    for i in range(len(indexes)):
        start = indexes[i]
        end = indexes[i+1] if i+1 < len(indexes) else len(text)
        ret.append(text[start:end])
    
    return ret
            
def parse_tags(head):
    """parse the tag entries on org headlines, return a list of them
    """
    match = re.search(r':([\w:]*):\s*$', head)
    if match:
        return match.group(1).split(':')
    else:
        return []

def parse_properties(props):
    """parse the contents of the properties section into a dictionary
    """
    regex = re.compile("^\s*:(\w+):\s*(.*?)\s*$",re.MULTILINE)
    return dict(regex.findall(props))

def parse_date(datestr):
    """parse a typical date formatted in the org mode style
    """
    regex = re.compile("<(\d{4})-(\d{2})-(\d{2}) .*?>")
    match = regex.search(datestr)
    if match:
        y,m,d = [int(it) for it in match.groups()]
        return date(y,m,d)
        
class Node(object):
    """represent a typical headline+text in an orgnode file
    """
    def __init__(self, text):
        self.text = text
        self.headline = text.splitlines()[0]
        
        self.level = maxhead(text)
        self.tags = parse_tags(self.headline)

        nohead = '\n'.join(text.splitlines()[1:])
        self.childs = [Node(head) for head in splitheads(nohead)]