Source

thesis / tools / zdecode.py

Full commit
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
A simple script for z-decoding a file based on the rules in:
http://hackage.haskell.org/trac/ghc/wiki/Commentary/Compiler/SymbolNames

Example usage: python zdecode.py example.hcr
"""
from __future__ import print_function
import sys
import string


MAPPING = {'ZL': '(', 'ZR': ')', 'ZM': '[',
            'ZN': ']', 'ZC': ':', 'ZZ': 'Z',
            'zz': 'z', 'za': '&', 'zb': '|', 'zc': '^',
            'zd': '$', 'ze': '=', 'zg': '>', 'zh': '#',
            'zi': '.', 'zl': '<', 'zm': '-', 'zn': '!',
            'zp': '+', 'zq': '\\', 'zs': '/',
            'zt': '*', 'zu': '_', 'zv': '%'}


def z_decode(text):
    KEYS = set(MAPPING.keys())
    decoded = ''
    lexeme = ''

    for char in text:
        key = lexeme + char

        if key in KEYS:
            key = MAPPING[key]
        elif char in ('Z', 'z'):
            decoded += lexeme
            lexeme = char
            continue

        # Hex encoded char, incomplete
        elif lexeme.startswith('z'):
            if char in string.hexdigits:
                lexeme += char
                continue
            elif char == 'U':
                pass  # TODO: missing support for this

        # Tuples, boxed or unboxed
        elif lexeme.startswith('Z'):
            if char in string.digits:
                lexeme += char
                continue
            elif char == 'T':
                key = '(%s)' % ','.join([''] * int(lexeme[1:]))
            elif char == 'H':
                key = '(#%s#)' % ','.join([''] * int(lexeme[1:]))

        decoded += key
        lexeme = ''

    decoded += lexeme
    return decoded


def main(infile):
    with open(infile, 'r') as f:
        text = f.read()
    print(z_decode(text))


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Error, missing path of input file.")
        print(__doc__)
        sys.exit(2)
    main(sys.argv[1])
    sys.exit()