piotrlegnica / stuff

Various stuff and utilities.

Clone this repository (size: 50.9 KB): HTTPS / SSH
$ hg clone http://bitbucket.org/piotrlegnica/stuff/

Changed (Δ5.3 KB):

raw changeset »

.hgignore (1 lines added, 0 lines removed)

README.rst (7 lines added, 0 lines removed)

brainfuck.py (164 lines added, 0 lines removed)

Up to file-list .hgignore:

1
1
glob:*.exe
2
2
glob:*.obj
3
3
glob:*.py[co]
4
glob:brainfuck_tests

Up to file-list README.rst:

@@ -20,6 +20,11 @@ bigints-add.cpp
20
20
    as integer type. Uses base-10 internally, so it requires some memory, but
21
21
    it works. I'm pretty sure there's much room for improvement.
22
22
23
brainfuck.py
24
    My awesome Brainfuck compiler. Generates `flat assembler`_ source code for either
25
    Linux (using ELF formatter and syscalls for I/O) or Windows
26
    (using PE console formatter and msvcrt's getchar/putchar for I/O).
27
23
28
cpp-comments-filter.cpp
24
29
    Like above; removes C++ comments (single- and multiline) from stdin, and outputs
25
30
    result to stdout, using simple state machine. Probably can be improved, too.
@@ -103,3 +108,5 @@ writeColor.py
103
108
    ``writeColor(text, foreground, background, terminator)``, where background and terminator
104
109
    are optional (terminator defaults to ``\n``). Probably could be improved, feel free to send
105
110
    patches. See code for color table (append "-bold" to use brighter versions).
111
112
.. _`flat assembler`: http://flatassembler.net

Up to file-list brainfuck.py:

1
# Simple brainfuck-to-asm "compiler"
2
# by PiotrLegnica
3
# WTFPL licensed
4
import hashlib, os
5
6
def optimisable(fn):
7
    fn.optimisable = True
8
    return fn
9
10
class CodeGen(object):
11
    def __init__(self): self.loops = []
12
    
13
    def template(self, code): raise NotImplementedError
14
    
15
    def incr(self):     raise NotImplementedError
16
    def decr(self):     raise NotImplementedError
17
    def forward(self):  raise NotImplementedError
18
    def backward(self): raise NotImplementedError
19
    def outp(self):     raise NotImplementedError
20
    def inp(self):      raise NotImplementedError
21
    def loopb(self):    raise NotImplementedError
22
    def loope(self):    raise NotImplementedError
23
24
class X86CodeGen(CodeGen):
25
    @optimisable
26
    def incr(self, count = None):
27
        if count is not None:
28
            return ['add byte [ebx], {0:d}'.format(count)]
29
        return ['inc byte [ebx]']
30
    @optimisable
31
    def decr(self, count = None):
32
        if count is not None:
33
            return ['sub byte [ebx], {0:d}'.format(count)]
34
        return ['dec byte [ebx]']
35
    @optimisable
36
    def forward(self, count = None):
37
        if count is not None:
38
            return ['add ebx, {0:d}'.format(count)]
39
        return ['inc ebx']
40
    @optimisable
41
    def backward(self, count = None):
42
        if count is not None:
43
            return ['sub ebx, {0:d}'.format(count)]
44
        return ['dec ebx']
45
    def loopb(self):
46
        loopID = hashlib.sha1(os.urandom(16)).hexdigest()
47
        self.loops.append(loopID)
48
        return [
49
            'loop_{0}:'.format(loopID),
50
            'cmp byte [ebx], 0',
51
            'je end_loop_{0}'.format(loopID),
52
        ]
53
    def loope(self):
54
        assert len(self.loops) > 0, 'Unbalanced loops'
55
        loopID = self.loops.pop()
56
        return [
57
            'cmp byte [ebx], 0',
58
            'jne loop_{0}'.format(loopID),
59
            'end_loop_{0}:'.format(loopID),
60
        ]
61
62
class Win32X86CodeGen(X86CodeGen):
63
    'Generates Windows console program, using msvcrt routines for input and output'
64
    
65
    def template(self, code):
66
        tpl = [
67
            'format PE console', 'entry start',
68
            "include 'win32a.inc'",
69
            "section '.text' code readable executable",
70
            'start:', 'mov ebx, cell', 'brainfuck:'
71
        ]
72
        tpl.extend(code)
73
        tpl.extend([
74
            'endcat:', 'invoke ExitProcess, 0',
75
            "section '.data' data readable writable",
76
            'cell rb 65536',
77
            "section '.idata' import data readable writeable",
78
            "library msvcrt, 'msvcrt.dll', kernel32, 'kernel32.dll'",
79
            "import msvcrt, putch, 'putchar', getch, 'getchar'",
80
            "import kernel32, ExitProcess, 'ExitProcess'"
81
        ])
82
        return tpl
83
    def inp(self):
84
        return ['ccall [getch]', 'mov [ebx], eax']
85
    def outp(self):
86
        return ['ccall [putch], [ebx]']
87
88
class LinuxX86CodeGen(X86CodeGen):
89
    'Generates Linux program, using syscalls for input and output'
90
    
91
    def template(self, code):
92
        tpl = [
93
            'format elf executable', 'entry start',
94
            'stdin equ 0', 'stdout equ 1',
95
            'sys_exit equ 1', 'sys_read equ 3', 'sys_write equ 4',
96
            'segment readable executable',
97
            'start:', 'mov ebx, cell', 'brainfuck:'
98
        ]
99
        tpl.extend(code)
100
        tpl.extend([
101
            'endcat:', 'mov eax, sys_exit', 'xor ebx, ebx', 'int $80',
102
            'segment readable writable', 'cell rb 65536'
103
        ])
104
        return tpl
105
    def inp(self):
106
        return [
107
            'push ebx', 'mov ecx, ebx',
108
            'mov eax, sys_read', 'mov ebx, stdin',
109
            'mov edx, 1', 'int $80', 'pop ebx'
110
        ]
111
    def outp(self):
112
        return [
113
            'push ebx', 'mov ecx, ebx',
114
            'mov eax, sys_write', 'mov ebx, stdout',
115
            'mov edx, 1', 'int $80', 'pop ebx'
116
        ]
117
118
def compile(code, codegen):
119
    import itertools
120
    
121
    opcodes = []
122
    instrs = {
123
        '+': codegen.incr,
124
        '-': codegen.decr,
125
        '>': codegen.forward,
126
        '<': codegen.backward,
127
        ',': codegen.inp,
128
        '.': codegen.outp,
129
        '[': codegen.loopb,
130
        ']': codegen.loope,
131
    }
132
    
133
    for instr in code:
134
        if instr not in instrs: continue
135
        opcodes.append(instrs[instr])
136
    
137
    # lame code size optimiser
138
    generated = []
139
    for opcode, group in itertools.groupby(opcodes):
140
        if not getattr(opcode, 'optimisable', False):
141
            for opcode in group:
142
                generated.extend(opcode())
143
        else:
144
            group = list(group)
145
            generated.extend(opcode(len(group)))
146
    
147
    return codegen.template(generated)
148
149
if __name__ == '__main__':
150
    import sys
151
    
152
    if len(sys.argv) < 4:
153
        print 'brainfuck.py linux/win32 input.bf output.asm'
154
        sys.exit(1)
155
    
156
    codegen = {
157
        'linux': LinuxX86CodeGen, 'win32': Win32X86CodeGen
158
    }[sys.argv[1]]()
159
    
160
    import contextlib
161
    with contextlib.nested(open(sys.argv[2], 'rb'), open(sys.argv[3], 'wb')) as (input, output):
162
        code = input.read()
163
        asm  = compile(code, codegen)
164
        output.write('\n'.join(asm))