Source

pysecsgem / secsgem / sml.py

Full commit
"""Support for SML string. (SEMI Markup Language)

This module provides SML string manipulation functions.
"""


__author__ = 'Gabriel AHTUNE'
__version__ = '2.0'
__date__ = "4 November 2012"


import re


def get_function(sml):
    """ Get the function number in SML string.

    >>> get_function("S113F23 W <L <U1 123><U2 1234><L <U1 1>>>")
    23

    """
    res = re.search("S\d+F(\d+)", sml.lstrip())
    if res:
        return int(res.group(1))
    else:
        return -1


def get_stream(sml):
    """ Get the stream number in SML string.

    >>> get_stream("S3F23 W <L <U1 123><U2 1234><L <U1 1>>>")
    3

    """
    res = re.search("S(\d+)F\d+", sml.lstrip())
    if res is not None:
        return int(res.group(1))
    else:
        return -1


def get_w_flag(sml):
    """ Get the W flag, return true or false.

    >>> get_w_flag("S113F23 W <L <U1 123><U2 1234><L <U1 1>>>")
    True
    >>> get_w_flag("S113F23 <L <U1 123><U2 1234><L <U1 1>>>")
    False

    """
    return re.match("S\d+F\d+\s+[Ww]", sml.lstrip()) is not None or \
        re.match("S\d+F\d+\s+1", sml.lstrip()) is not None


def get_msg(sml):
    """ Get the content of the sml message

    >>> get_msg("S113F23 W <L <U1 123><U2 1234><L <U1 1>>>")
    '<L <U1 123><U2 1234><L <U1 1>>>'
    >>> get_msg("S113F23 <L <U1 123><U2 1234><L <U1 1>>>")
    '<L <U1 123><U2 1234><L <U1 1>>>'
    >>> get_msg("S113F23 ")
    ''

    """
    head = re.match("S\d+F\d+\s+W{0,1}", sml.lstrip())
    if head is not None:
        start = len(head.group(0))
        return sml[start:].strip()
    return ''


def sml2list(sml):
    """ Build a list structure according to a SML string.

    return the element if no list in the sml
    >>> sml2list("<L <U1 123><U2 1234><L <U1 1>>>")
    ['<U1 123>', '<U2 1234>', ['<U1 1>']]
    >>> sml2list("<L <U1 123><U2 1234><L <U1 1><L <U1 2>>>>")
    ['<U1 123>', '<U2 1234>', ['<U1 1>', ['<U1 2>']]]
    >>> sml2list("<U1 123>")
    '<U1 123>'
    >>> sml2list('<L <A "<<<>>123">>')
    ['<A "<<<>>123">']
    >>> sml2list('')
    ''

    """
    if re.search(">[\s\n\t\r]*$", sml) is None:
        return ""

    stack = []
    while sml != "":
        if re.match("\s*<L", sml) is not None:  # If it's the begining
            start = sml.find("<L")
            end = start + 2
            sml = sml[end:]
            stack.append([])
        else:                         # If it's not the a list begin
            if re.match("\s*>", sml):  # Current list finished
                element = stack.pop()  # close the list
                sml = sml[sml.find('>') + 1:]
                if stack:
                    stack[-1].append(element)
                else:
                    stack.append(element)
            else:
                end = len(sml) - 1      # Get the element
                end = find_element_end(sml)
                element = sml[0:end].strip()
                sml = sml[end:]

                if stack:                   # append to current list
                    stack[-1].append(element)
                else:
                    return element
    return stack[0]


def find_element_end(sml, start=0):
    """
    >>> find_element_end('<L <A "<<<>>123">>')
    18
    """
    in_quote = False
    indent = 0
    end = sml[start:].find('<')
    for i in range(start + end, len(sml)):
        if sml[i] == '"':
            if in_quote:
                in_quote = False
            else:
                in_quote = True
        if not in_quote:
            if sml[i] == '<':
                indent += 1
            if sml[i] == '>':
                indent -= 1
            if indent == 0:
                end = i + 1
                return end
    raise Exception("SML message not well formed")


def tosml(element):
    """Return a SML string"""
    if type(element) is str:
        return element

    if element.kind == "A":
        return '<A "{}">'.format(element.value)
    else:
        if type(element.value) is list:
            return '<{} {}>'.format(element.kind, ''.join(element.value))
        else:
            return '<{} {}>'.format(element.kind, element.value)


def kind(sml_element):
    """Return the type of the element."""
    return re.match("\s*<([\w\d]+)", sml_element).group(1)


def value(sml_element):
    """ Return the value of the element.

    - If List return sml2list
    - If Ux return integer
    - If A return string
    - If Boolean return True or False
    - If B return string
    >>> value('<A "hello">')
    'hello'
    >>> value('<U1 123>')
    123
    >>> value('<U1 1 2 3>')
    [1, 2, 3]
    >>> value('<L>')
    []
    >>> value('<U2 1234>')
    1234
    >>> value('<B 0x3C 3F 0x78 0x6D 0x6C 0x20>')
    [60, 63, 120, 109, 108, 32]
    >>> value('<A "12<>">')
    '12<>'
    >>> value('<A "\"">')
    '"'
    """

    if sml_element[1] == 'L':
        return sml2list(sml_element)
    elif sml_element[1] in ('F', 'I', 'U'):
        e = sml_element[sml_element.find(" ", 1) + 1: sml_element.find(">")]
        result = [int(i) for i in e.split(" ")]
        if len(result) == 1:
            return result[0]
        else:
            return result
    elif sml_element[1] == 'A':
        result = re.search('<A\s*"(.*)"\s*>$', sml_element).group(1)
        result = result.replace(r'\"', '"')
        return result
    elif sml_element[1:3] == "BO":
        return "1" in sml_element
    elif sml_element[1:3] == "B ":
        result = sml_element[sml_element.find(" ", 1) + 1:
                             sml_element.find(">")]
        result = [int(i, 16) for i in result.split(" ")]
        if len(result) == 1:
            return result[0]
        else:
            return result


def compress(sml):
    """ compress a secs message

    This function remove all extra characters, including new lines, of
    the SML string

    >>> compress('<L [3]\\n    <U1 123>\\n    <U2 1234>\\n    <L [1]\\n        <U1 1>\\n    >\\n>')
    '<L <U1 123><U2 1234><L <U1 1>>>'

    """
    sml = sml.replace("\t", "")
    sml = sml.replace("\n", "")
    sml = sml.replace("\r", "")
    sml = re.sub("\s\s+", " ", sml)
    sml = re.sub(">\s+<", "><", sml)
    sml = re.sub(">\s+>", ">>", sml)
    sml = re.sub("\s+>", ">", sml)
    sml = re.sub("<L[\s\[\]/\d]+", "<L", sml)
    sml = re.sub("<([\w\d]+)\s*\[[/\d]+\]", "<\\1", sml)
    sml = sml.replace("<L<", "<L <")
    sml = sml.replace("<L<", "<L <")
    sml = sml.replace("<L>", "<L >")

    return sml


def print_list(l, indent=0):
    if l == b'':
        return ""
    space_indent = "    "
    res = []
    for element in l:
        if type(element) is list:
            res.append(space_indent * indent + "<L")
            res.append(print_list(element, indent + 1))
            res.append(space_indent * (indent) + ">")
        else:
            res.append(space_indent * indent + str(element))
    res = "\n".join(res)
    res = re.sub("<L[\n\s]*>", "<L>", res)
    return res


def prettify(msg):
    """Prettify a SML message

    This function prettify a msg string (indent the lists)
    >>> prettify(['<U4 3>', '<U4 1066>', []])
    '<L\\n    <U4 3>\\n    <U4 1066>\\n    <L>\\n>'
    """
    if msg == "":
        return ""
    if type(msg) is str:
        S = get_stream(msg)
        F = get_function(msg)
        W = 'W ' if get_w_flag(msg) else ''
        content = get_msg(msg)
        if S != -1:
            return "S{}F{} {}\n{}".format(
                S, F, W, print_list([sml2list(content)], 0))
        else:
            return print_list([sml2list(msg)], 0)
    else:
        return print_list([msg], 0)