Commits

cheery committed efb55d3

some more benchmarks

Comments (0)

Files changed (7)

-FLAGS="--std=c99 -shared -fPIC"
+FLAGS="--std=c99 -shared -fPIC -lrt -O2"
 
 gcc src/obj.c src/vm.c -o vm.so $FLAGS
+gcc src/obj.c src/vm.c -o vm32.so $FLAGS -m32

native_code_experiment.py

+# -*- coding: utf-8 -*-
+"""
+    :copyright: 2010 by Henri Tuhola <henri.tuhola@gmail.com>
+"""
+from parse import parse_file, Symbol
+from g386.elf import Dynamic, executable
+from g386.x86 import *
+from g386.core import Bytes, Assembly
+import sys
+
+Object_size = 8
+
+def encode_dword(value):
+    return ''.join(map(chr, [
+        (value >> 0) & 0xFF,
+        (value >> 8) & 0xFF,
+        (value >> 16) & 0xFF,
+        (value >> 24) & 0xFF,
+    ]))
+    
+
+def encode_constants(constants):
+    header = encode_dword(len(constants))
+    data = [header]
+    for k in constants:
+        if isinstance(k, int):
+            data.extend(['\x00', encode_dword(k)])
+        else:
+            data.extend(['\x01', k.encode('utf-8'), '\x00'])
+    return Bytes(''.join(data))
+
+dynamic = Dynamic('x86')
+dynamic.require('libc.so.6')
+dynamic.require('./vm32.so')
+
+def ccall(symbol):
+    return Assembly([
+        mov(r0, dynamic.extern(symbol)),
+        call(Indirect(r0, 0)),
+    ])
+
+def scall(symbol, argc, *args):
+    return Assembly([
+        subi(r4, argc*4),
+        Assembly(args),
+        ccall(symbol),
+        addi(r4, argc*4),
+    ])
+
+def vmcall(callee, argc):
+    """
+    It's bit weird there. C stack grows downward so I had to make
+    a very weird calling convention. The args and callee come
+    in reverse into the vm stack.
+    """
+    cdecl = Assembly([])
+    native = Assembly([])
+    success = Assembly([])
+    return Assembly([
+        getlocal(callee),
+        ldw(r1, I(r0,0)), subi(r1, 0x401), # t_CApi
+        cjump(4, cdecl), # jump when equal
+        ldw(r1, I(r0,0)), subi(r1, 0x2), #t_Function
+        cjump(4, native),
+        scall('vmcall_error', 0),
+
+        cdecl, # calls it
+        subi(r4, 4*3),
+        mov(r1, r0),
+        subi(r0, Object_size*argc), # step to the object list.
+        stw(I(r4,8), r0),
+        stwi(I(r4,4), argc),
+        stw(I(r4,0), r1), # callee gets the retval.
+        call(I(r1,4)), # call the second argument in callee
+        jump(success),
+        
+        native,
+        subi(r0, Object_size*argc), # apply the stack top.
+        mov(r4, r0),
+        mov(r0, argc),
+        call(I(r4,Object_size*argc+4)), # call the procedure
+        # quite hacky!
+        getlocal(callee), # closure_exit doesn't need to know argc
+        stw(I(r0,0), r2),
+        stw(I(r0,4), r3),
+
+        success,
+        mov(r0, r5), # recover the stack state.
+        subi(r0, 256*Object_size),
+        mov(r4, r0),
+    ])
+
+def closure_entry(argc):
+    body = Assembly([])
+    loadargs = [loada(i,i) for i in range(argc)]
+    return Assembly([
+        subi(r0, argc),
+        cjump(4, body),
+        scall('vmentry_error', 0),
+        body,
+        subi(r4, 4), # produces record like: {last_frame, pc}
+        stw(I(r4,0), r5), # last frame
+        mov(r5, r4), # this frame
+        subi(r4, 256*Object_size),
+        # args can be located from r5+Object_size
+        Assembly(loadargs),
+    ])
+
+def closure_exit(index):
+    return Assembly([
+        getlocal(index), ldw(r2, I(r0,0)), ldw(r3, I(r0,4)),
+        mov(r4, r5), addi(r4, 4), # retrieves the program counter.
+        ldw(r5, I(r5,0)),
+        ret(),
+    ])
+
+def getlocal(index, to=r0):
+    return Assembly([
+        mov(to, r5),
+        subi(to, Object_size*index + Object_size),
+    ])
+
+def getk(index):
+    return Assembly([
+        mov(r0, r6),
+        addi(r0, Object_size*index),
+    ])
+
+def loadk(locindex, kindex):
+    return Assembly([
+        getk(kindex),
+        ldw(r2, I(r0,0)),
+        ldw(r3, I(r0,4)),
+        getlocal(locindex),
+        stw(I(r0,0), r2),
+        stw(I(r0,4), r3),
+    ])
+
+def copylocal(locindex, source):
+    return Assembly([
+        getlocal(source),
+        ldw(r2, I(r0,0)),
+        ldw(r3, I(r0,4)),
+        getlocal(locindex),
+        stw(I(r0,0), r2),
+        stw(I(r0,4), r3),
+    ])
+
+def loada(locindex, aindex):
+    return Assembly([
+        mov(r0, r5),
+        addi(r0, Object_size*(aindex+1)),
+        ldw(r2, I(r0,0)), ldw(r3, I(r0,4)),
+        getlocal(locindex),
+        stw(I(r0,0), r2), stw(I(r0,4), r3),
+    ])
+
+def loadnil(locindex):
+    return Assembly([
+        getlocal(locindex),
+        stwi(I(r0,0), 0), stwi(I(r0,4), 0),
+    ])
+
+def getglobal(locindex, kindex):
+    return scall('get_global', 2,
+        getlocal(locindex),
+        stw(I(r4,4), r0),
+        getk(kindex),
+        stw(I(r4,0), r0),
+    )
+
+I = Indirect
+
+def closure(locindex, proc):
+    return Assembly([
+        getlocal(locindex),
+        stwi(I(r0,0), 0x2), # t_Function
+        stwi(I(r0,4), proc),
+    ])
+
+def ifclause(cond, then):
+    skippy = Assembly([])
+    return Assembly([
+        Assembly(cond),
+        ldw(r1, I(r0, 4)), # just HOPE it's boolean.
+        subi(r1, 0x0),
+        cjump(4, skippy),
+        Assembly(then),
+        skippy,
+    ])
+
+def whileclause(cond, step):
+    retry = Assembly([])
+    skippy = Assembly([])
+    return Assembly([
+        retry,
+        Assembly(cond),
+        ldw(r1, I(r0, 4)), # just HOPE it's boolean.
+        subi(r1, 0x0),
+        cjump(4, skippy),
+        Assembly(step),
+        jump(retry),
+        skippy,
+    ])
+
+# only one constant pool per file can decrease data locality,
+# but makes it easier to handle.
+constants = [
+    'start_benchmark',
+    'stop_benchmark',
+    '<',
+    '-',
+    '*',
+    'print',
+    0,
+    1,
+    5,
+    1000000,
+]
+
+constant_pool = encode_constants(constants)
+
+factorial_proc = Assembly([
+    closure_entry(argc=2),
+    ifclause(
+        cond=[
+            getglobal(2, constants.index('<')),
+            copylocal(3, 1), # load k
+            loadk(4, constants.index(1)),
+            vmcall(2, argc=2),
+            getlocal(2),
+        ],
+        then=[
+            getglobal(2, constants.index('*')),
+            copylocal(3, 0), # load self
+            getglobal(4, constants.index('-')),
+            loadk(5, constants.index(1)),
+            copylocal(6, 1),
+            vmcall(4, argc=2),
+            copylocal(5, 0), # load self
+            vmcall(3, argc=2),
+            copylocal(4, 1), # load k
+            vmcall(2, argc=2),
+            closure_exit(2),
+        ]
+    ),
+#    getglobal(0, 0), loadk(1, 2), vmcall(0, argc=1),
+#    loadnil(0),
+    closure_exit(1),
+])
+
+entry = Assembly([
+    ccall('vm_init'),
+
+    mov(r5, r4),
+    subi(r4, 32), # count of locals function is using.
+
+    scall('obj_init_constant_pool', 1,
+        stwi(I(r4, 0), constant_pool),
+    ),
+    mov(r6, r0),
+
+    getglobal(0, constants.index('start_benchmark')),
+    vmcall(0, argc=0),
+
+    # factorial
+    closure(0, factorial_proc),
+    # i
+    loadk(1, constants.index(1000000)),
+    whileclause(
+        cond=[
+            getglobal(2, constants.index('<')),
+            copylocal(3, 1), # copy i.
+            loadk(4, constants.index(0)),
+            vmcall(2, argc=2),
+            getlocal(2),
+        ],
+        step=[
+            copylocal(2, 0), # copy factorial
+            loadk(3, constants.index(5)),
+            copylocal(4, 0), # factorial doesn't have upvalues.
+            vmcall(2, argc=2),
+
+            getglobal(2, constants.index('-')),
+            loadk(3, constants.index(1)),
+            copylocal(4, 1), # get i
+            vmcall(2, argc=2),
+            copylocal(1, 2), # store i
+        ],
+    ),
+
+    getglobal(1, constants.index('stop_benchmark')),
+    vmcall(1, argc=0),
+
+# to create factorial.. I yet need:
+# .. to write it.
+
+#    scall('obj_print', 3,
+#        mov(r0, r6),
+#        addi(r0, 0*8),
+#        stw(I(r4,8), r0),
+#        stwi(I(r4,4), 1),
+#        stw(I(r4,0), r4),
+#    ),
+
+#    getglobal(0, 0),
+#    loadk(1, 1),
+#    loadk(2, 0),
+#    closure(3, hello_proc),
+#    vmcall(3, argc=0),
+#    vmcall(0, argc=3),
+
+#    scall('obj_print', 3,
+#        stw(I(r4,0), r4),
+#        stw(I(r4,4), 1),
+#        getlocal(0),
+#        stw(I(r4,8), r0),
+#    ),
+
+#    scall('obj_print', 3,
+#        stw(I(r4,0), r4),
+#        stw(I(r4,4), 1),
+#        getk(2),
+#        stw(I(r4,8), r0),
+#    ),
+
+])
+program = [entry, factorial_proc, constant_pool]
+
+#for stmt in parse_file(sys.argv[1]):
+#    pass
+
+entry.program.extend([
+    scall('exit', 1,
+        stwi(Indirect(r4,0), 0),
+    )
+])
+
+executable(
+    open('out.app', 'w'),
+    program,
+    dynamic,
+    entry,
+)
 /// error handling
 ///
 
+void vmcall_error() {
+    fprintf(stderr, "vmcall failed\n");
+    exit(1);
+}
+
+void vmentry_error() {
+    fprintf(stderr, "vmentry failed\n");
+    exit(1);
+}
+
 void fatal_error(char* name, int line) {
     fprintf(stderr, "fatal error in %s, line %d\n", name, line);
     exit(1);
 /// the first standard library call
 ///
 Object obj_print(int argc, Object* args) {
-    if (argc != 1) FATAL_ERROR;
+    if (argc < 1) FATAL_ERROR;
     Object obj = args[0];
     switch (obj.type) {
         case t_Nil:
             FATAL_ERROR;
     }
     printf("\n");
+    if (argc > 1) {
+        return obj_print(argc-1, args+1);
+    }
     return Nil;
 }
 
 void make_integer(int value, Object* obj) {
     *obj = (Object){t_Integer, {i:value}};
 }
+
+static int decode_dword(unsigned char* data) {
+    return data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24;
+}
+
+Object* obj_init_constant_pool(char* data) {
+    int count = decode_dword(data);
+    Object* constants = malloc(count*sizeof(Object));
+    data += 4;
+    for (int i = 0; i < count; ++i) {
+        switch(*data++) {
+            case 0:
+                constants[i] = Integer(decode_dword(data));
+                data += 4;
+                break;
+            case 1:
+                cstr_intern(data, &constants[i]);
+                while (*data++ != 0);
+                break;
+        }
+    }
+    return constants;
+}
 #define Nil (Object){t_Nil, NULL}
 #define True (Object){t_Boolean, {i:1}}
 #define False (Object){t_Boolean, {i:0}}
-
+#define Integer(k) (Object){t_Integer, {i:(k)}}
 
 Object obj_print(int argc, Object *args);
 
+// required by the system timer.
+#define _GNU_SOURCE
 #include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
 #include "obj.h"
 // typedef struct FunctionDefinition {
 //     int *instructions;
     dict_set(&globals, key, obj);
 }
 
+extern void get_global(Object *key, Object *obj) {
+    dict_get(&globals, *key, obj);
+}
+
 Object obj_is_less_than(int argc, Object* args) {
     if (argc != 2) FATAL_ERROR;
     if (args[0].type != t_Integer) FATAL_ERROR;
     return accum;
 }
 
+struct timespec benchmark_begun;
+Object start_benchmark(int argc, Object* args) {
+    if (argc != 0) FATAL_ERROR;
+    int res = clock_gettime(CLOCK_MONOTONIC, &benchmark_begun);
+    if (res != 0) FATAL_ERROR;
+    return Nil;
+}
+
+Object stop_benchmark(int argc, Object* args) {
+    if (argc != 0) FATAL_ERROR;
+    struct timespec stop;
+    int res = clock_gettime(CLOCK_MONOTONIC, &stop);
+    if (res != 0) FATAL_ERROR;
+    double seconds = difftime(stop.tv_sec, benchmark_begun.tv_sec);
+    long nseconds = stop.tv_nsec - benchmark_begun.tv_nsec;
+    seconds += (double)nseconds * 1e-09;
+    printf("benchmark result(vm): %e seconds\n", seconds, nseconds);
+    return Nil;
+}
+
 extern int vm_init() {
     dict_init(&globals);
     set_global("hello", CSTRING("lol"));
     set_global("<", CAPI(obj_is_less_than));
     set_global("*", CAPI(obj_mul));
     set_global("-", CAPI(obj_sub));
+    set_global("start_benchmark", CAPI(start_benchmark));
+    set_global("stop_benchmark", CAPI(stop_benchmark));
 //    dict_set(&globals, CSTRING("hello"), CSTRING("lol"));
 //    dict_set(&globals, CSTRING("print"), CAPI(obj_print));
 }
+#define _GNU_SOURCE
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+struct timespec benchmark_begun;
+void start_benchmark() {
+    int res = clock_gettime(CLOCK_MONOTONIC, &benchmark_begun);
+    if (res != 0) printf("fail\n");
+}
+
+void stop_benchmark() {
+    struct timespec stop;
+    int res = clock_gettime(CLOCK_MONOTONIC, &stop);
+    if (res != 0) printf("fail\n");
+    double seconds = difftime(stop.tv_sec, benchmark_begun.tv_sec);
+    long nseconds = stop.tv_nsec - benchmark_begun.tv_nsec;
+    seconds += (double)nseconds * 1e-09;
+    printf("benchmark result(vm): %e seconds\n", seconds, nseconds);
+}
+
+int factorial(int k) {
+    if (1 < k)
+        return k * factorial(k-1);
+}
+int main(int argc) {
+    start_benchmark();
+    int i = 1000000;
+    while (0 < i) {
+        factorial(5);
+        i = i - 1;
+    }
+    stop_benchmark();
+    return 0;
+}

test/factorial.lisp

         (return (* k (factorial (- k 1)))))
     (return k)
 )
+(start_benchmark)
 (= i 1000000)
 (while (< 0 i )
     (factorial 5)
     (= i (- i 1))
 )
+(stop_benchmark)