1. Mark Shannon
  2. hotpy_2

Commits

Mark Shannon  committed 0a47172 Draft

Do not hold constants in register on exits - was causing excessive customisation. Add FAST_LOAD_GLOBAL and COPY_MOVE register instructions

  • Participants
  • Parent commits 00a7ef1
  • Branches default

Comments (0)

Files changed (17)

File HotPy/gen_format_code.py

View file
-#! ./python
+#! ./python -S
 import collections
 
 def get_formats_from_file(file):
     'o', 'oo', 'ro', 'rro', 'rrro', 'oK', 'rK', 'rrk',
     'rKE', 'rKKE', 'roK', 'rroK', 'rok', 'rrkkk',
     'rE', 'rrE', 'rrrE', 'roE', 'rroE', 'rrroE',
-    'rrrroE', 'rrrrE', 'rokkkE', 'rrok', 'p',
-    'rpE', 'rpoE', 'rpooE', 'rpoooE', 'rroo'
+    'rrrroE', 'rrrrE', 'rokkkE', 'rrok', 'p', 'oKKK',
+    'rpE', 'rpoE', 'rpooE', 'rpoooE', 'rroo', 'roo'
 ]
 
 def defines(formats, outfile):

File HotPy/trace_makeopcodetargets.py

View file
                  'const_to_register', 'exit_if_false', 'exit_if_true',
                  'ensure_type', 'ensure_value', 'ensure_slot',
                  'value_from_object_dict_or_exit_const', 'setup_block',
-                 'poly_type_exit', 'two_move' ]
+                 'poly_type_exit', 'two_move', 'copy_move', 'fast_load_global' ]
 
 ops = [ '' ] * 256
 op_used = [ False ] * 256

File Include/dictobject.h

View file
 PyAPI_FUNC(int) _PyDict_SetItemId(PyObject *dp, struct _Py_Identifier *key, PyObject *item);
 PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key);
 
+PyObject * _PyDict_FastLoadGlobal(PyObject *, PyObject *, short *);
+
 void _PyDict_MakeGuarded(PyObject *dp);
 int _PyDict_AddGuard(PyObject *dp, PyObject *key, PyObject *value, PyObject *trace);
 

File Include/optimiser.h

View file
 #include "trace_opcode.h"
 #include "structmember.h"
 
-#define HOTPY_CONTEXT_DEPTH 10
+#define HOTPY_CONTEXT_DEPTH 6
 
 typedef struct _hotpy_instruction_pointer {
     PyCodeObject *code;
     void (*delete_fast)(HotPyOptimiser*, int);
     void (*get_globals)(HotPyOptimiser*);
     void (*load_from_globals)(HotPyOptimiser*, PyObject*);
+    void (*fast_load_from_globals)(HotPyOptimiser*, PyObject*, PyObject*);
     void (*store_to_globals)(HotPyOptimiser*, PyObject*);
     void (*has_special)(HotPyOptimiser*, PyObject*);
     void (*load_register)(HotPyOptimiser*, int);

File Include/register_macros.h

View file
 #define FORMAT_ID_kkK 2
 #define FORMAT_ID_o 3
 #define FORMAT_ID_oK 4
-#define FORMAT_ID_oo 5
-#define FORMAT_ID_p 6
-#define FORMAT_ID_r 7
-#define FORMAT_ID_rE 8
-#define FORMAT_ID_rK 9
-#define FORMAT_ID_rKE 10
-#define FORMAT_ID_rKKE 11
-#define FORMAT_ID_ro 12
-#define FORMAT_ID_roE 13
-#define FORMAT_ID_roK 14
-#define FORMAT_ID_rok 15
-#define FORMAT_ID_rokkkE 16
-#define FORMAT_ID_rpE 17
-#define FORMAT_ID_rpoE 18
-#define FORMAT_ID_rpooE 19
-#define FORMAT_ID_rpoooE 20
-#define FORMAT_ID_rr 21
-#define FORMAT_ID_rrE 22
-#define FORMAT_ID_rrK 23
-#define FORMAT_ID_rrk 24
-#define FORMAT_ID_rrkkk 25
-#define FORMAT_ID_rro 26
-#define FORMAT_ID_rroE 27
-#define FORMAT_ID_rroK 28
-#define FORMAT_ID_rrok 29
-#define FORMAT_ID_rroo 30
-#define FORMAT_ID_rrr 31
-#define FORMAT_ID_rrrE 32
-#define FORMAT_ID_rrro 33
-#define FORMAT_ID_rrroE 34
-#define FORMAT_ID_rrrrE 35
-#define FORMAT_ID_rrrroE 36
+#define FORMAT_ID_oKKK 5
+#define FORMAT_ID_oo 6
+#define FORMAT_ID_p 7
+#define FORMAT_ID_r 8
+#define FORMAT_ID_rE 9
+#define FORMAT_ID_rK 10
+#define FORMAT_ID_rKE 11
+#define FORMAT_ID_rKKE 12
+#define FORMAT_ID_ro 13
+#define FORMAT_ID_roE 14
+#define FORMAT_ID_roK 15
+#define FORMAT_ID_rok 16
+#define FORMAT_ID_rokkkE 17
+#define FORMAT_ID_roo 18
+#define FORMAT_ID_rpE 19
+#define FORMAT_ID_rpoE 20
+#define FORMAT_ID_rpooE 21
+#define FORMAT_ID_rpoooE 22
+#define FORMAT_ID_rr 23
+#define FORMAT_ID_rrE 24
+#define FORMAT_ID_rrK 25
+#define FORMAT_ID_rrk 26
+#define FORMAT_ID_rrkkk 27
+#define FORMAT_ID_rro 28
+#define FORMAT_ID_rroE 29
+#define FORMAT_ID_rroK 30
+#define FORMAT_ID_rrok 31
+#define FORMAT_ID_rroo 32
+#define FORMAT_ID_rrr 33
+#define FORMAT_ID_rrrE 34
+#define FORMAT_ID_rrro 35
+#define FORMAT_ID_rrroE 36
+#define FORMAT_ID_rrrrE 37
+#define FORMAT_ID_rrrroE 38
 
 #define FORMAT_E \
     int e0; \
     k0 = instruction_word >> 16; \
     assert(consistent_format(next_instr[-1] & 255, "oK"))
 
+#define FORMAT_oKKK \
+    int r0, k0, k1, k2; \
+    r0 = (instruction_word >> 8) & 255; \
+    k0 = instruction_word >> 16; \
+    instruction_word = *next_instr++; \
+    k1 = instruction_word & ((1 << 16)-1); \
+    k2 = instruction_word >> 16; \
+    assert(consistent_format(next_instr[-2] & 255, "oKKK"))
+
 #define FORMAT_oo \
     int r0, r1; \
     r0 = (instruction_word >> 8) & 255; \
     e0 = instruction_word >> 16; \
     assert(consistent_format(next_instr[-2] & 255, "rokkkE"))
 
+#define FORMAT_roo \
+    int r0, r1, r2; \
+    r0 = (instruction_word >> 8) & 255; \
+    r1 = (instruction_word >> 16) & 255; \
+    r2 = (instruction_word >> 24); \
+    assert(consistent_format(next_instr[-1] & 255, "roo"))
+
 #define FORMAT_rpE \
     int r0, r1, e0; \
     r0 = (instruction_word >> 8) & 255; \

File Include/register_write_functions.h

View file
 }
 
 static void
+write_oKKK(HotPyOptimiser *x, int op, int k0, int k1, int k2)
+{
+    int out0;
+    assert(k0 >= 0);
+    assert(k1 >= 0);
+    assert(k2 >= 0);
+    out0 = choose_register(x);
+    assert(consistent_format(op, "oKKK"));
+    write_word_112(x, op, out0, k0);
+    write_word_22(x, k1, k2);
+    push_register(x, out0);
+}
+
+static void
 write_oo(HotPyOptimiser *x, int op)
 {
     int out0;
 }
 
 static void
+write_roo(HotPyOptimiser *x, int op)
+{
+    int r0 = pop_as_register(x, 0);
+    int out0;
+    int out1;
+    out0 = choose_register(x);
+    out1 = choose_register(x);
+    assert(consistent_format(op, "roo"));
+    write_word_1111(x, op, r0, out0, out1);
+    push_register(x, out0);
+    push_register(x, out1);
+}
+
+static void
 write_rpE(HotPyOptimiser *x, int op, int exit)
 {
     int r1 = pop_as_register(x, 1);
 }
 
 void
+print_oKKK(FILE* out, uint32_t **instr_ptr)
+{
+    uint32_t *next_instr = *instr_ptr;
+    uint32_t instruction_word = *next_instr++;
+    int op = instruction_word & 255;
+    FORMAT_oKKK;
+    *instr_ptr = next_instr;
+    fprintf(out, "%s o%d K%d K%d K%d\n", _HotPy_Instruction_Names[op], r0, k0, k1, k2);
+}
+
+void
 print_oo(FILE* out, uint32_t **instr_ptr)
 {
     uint32_t *next_instr = *instr_ptr;
 }
 
 void
+print_roo(FILE* out, uint32_t **instr_ptr)
+{
+    uint32_t *next_instr = *instr_ptr;
+    uint32_t instruction_word = *next_instr++;
+    int op = instruction_word & 255;
+    FORMAT_roo;
+    *instr_ptr = next_instr;
+    fprintf(out, "%s r%d o%d o%d\n", _HotPy_Instruction_Names[op], r0, r1, r2);
+}
+
+void
 print_rpE(FILE* out, uint32_t **instr_ptr)
 {
     uint32_t *next_instr = *instr_ptr;
 }
 
 static uint32_t *
+defuses_for_oKKK(uint32_t *next_instr, int *defs, int *uses)
+{
+    uint32_t instruction_word = *next_instr++;
+    FORMAT_oKKK;
+    *defs++ = r0;
+    (void)k0;
+    (void)k1;
+    (void)k2;
+    *defs++ = -1;
+    *uses++ = -1;
+    return next_instr;
+}
+
+static uint32_t *
 defuses_for_oo(uint32_t *next_instr, int *defs, int *uses)
 {
     uint32_t instruction_word = *next_instr++;
 }
 
 static uint32_t *
+defuses_for_roo(uint32_t *next_instr, int *defs, int *uses)
+{
+    uint32_t instruction_word = *next_instr++;
+    FORMAT_roo;
+    *uses++ = r0;
+    *defs++ = r1;
+    *defs++ = r2;
+    *defs++ = -1;
+    *uses++ = -1;
+    return next_instr;
+}
+
+static uint32_t *
 defuses_for_rpE(uint32_t *next_instr, int *defs, int *uses)
 {
     uint32_t instruction_word = *next_instr++;
 }
 
 static uint32_t *
+relabel_uses_oKKK(uint32_t *next_instr, unsigned char *relabel_table)
+{
+    uint32_t instruction_word = *next_instr++;
+    (void)instruction_word; /* Stop compiler complaining */
+    next_instr[-1] = instruction_word;
+    instruction_word = *next_instr++;
+    next_instr[-1] = instruction_word;
+    return next_instr;
+}
+
+static uint32_t *
 relabel_uses_oo(uint32_t *next_instr, unsigned char *relabel_table)
 {
     uint32_t instruction_word = *next_instr++;
 }
 
 static uint32_t *
+relabel_uses_roo(uint32_t *next_instr, unsigned char *relabel_table)
+{
+    int reg;
+    uint32_t instruction_word = *next_instr++;
+    (void)instruction_word; /* Stop compiler complaining */
+    reg = (instruction_word >> 8) & 255; \
+    instruction_word = (instruction_word & 0xffff00ff) | (relabel_table[reg] << 8);
+    next_instr[-1] = instruction_word;
+    return next_instr;
+}
+
+static uint32_t *
 relabel_uses_rpE(uint32_t *next_instr, unsigned char *relabel_table)
 {
     int reg;
 }
 
 static uint32_t *
+relabel_defs_oKKK(uint32_t *next_instr, unsigned char *relabel_table)
+{
+    int reg;
+    uint32_t instruction_word = *next_instr++;
+    (void)instruction_word; /* Stop compiler complaining */
+    reg = (instruction_word >> 8) & 255; \
+    instruction_word = (instruction_word & 0xffff00ff) | (relabel_table[reg] << 8);
+    next_instr[-1] = instruction_word;
+    instruction_word = *next_instr++;
+    next_instr[-1] = instruction_word;
+    return next_instr;
+}
+
+static uint32_t *
 relabel_defs_oo(uint32_t *next_instr, unsigned char *relabel_table)
 {
     int reg;
 }
 
 static uint32_t *
+relabel_defs_roo(uint32_t *next_instr, unsigned char *relabel_table)
+{
+    int reg;
+    uint32_t instruction_word = *next_instr++;
+    (void)instruction_word; /* Stop compiler complaining */
+    reg = (instruction_word >> 16) & 255; \
+    instruction_word = (instruction_word & 0xff00ffff) | (relabel_table[reg] << 16);
+    reg = (instruction_word >> 24); \
+    instruction_word = (instruction_word & 0x00ffffff) | (relabel_table[reg] << 24);
+    next_instr[-1] = instruction_word;
+    return next_instr;
+}
+
+static uint32_t *
 relabel_defs_rpE(uint32_t *next_instr, unsigned char *relabel_table)
 {
     uint32_t instruction_word = *next_instr++;
 }
 
 static HotPyContext *
+get_exit_context_oKKK(uint32_t *instr, PyObject *exits)
+{
+   return NULL;
+}
+
+static HotPyContext *
 get_exit_context_oo(uint32_t *instr, PyObject *exits)
 {
    return NULL;
 }
 
 static HotPyContext *
+get_exit_context_roo(uint32_t *instr, PyObject *exits)
+{
+   return NULL;
+}
+
+static HotPyContext *
 get_exit_context_rpE(uint32_t *instr, PyObject *exits)
 {
     uint32_t instruction_word = *instr++;

File Include/trace_opcode.h

View file
 #define SETUP_BLOCK 210
 #define POLY_TYPE_EXIT 211
 #define TWO_MOVE 212
-/* Opcode 213 unused */
-/* Opcode 214 unused */
+#define COPY_MOVE 213
+#define FAST_LOAD_GLOBAL 214
 /* Opcode 215 unused */
 /* Opcode 216 unused */
 /* Opcode 217 unused */
 "SETUP_BLOCK",
 "POLY_TYPE_EXIT",
 "TWO_MOVE",
-NULL,
-NULL,
+"COPY_MOVE",
+"FAST_LOAD_GLOBAL",
 NULL,
 NULL,
 NULL,

File Objects/dictobject.c

View file
     DK_DECREF(keys);
 }
 
+PyObject *
+_PyDict_FastLoadGlobal(PyObject *op, PyObject *key, short *cached_index)
+{
+    PyObject *builtins;
+    Py_hash_t hash;
+    PyDictObject *mp = (PyDictObject *)op;
+    PyDictKeyEntry *ep;
+    short index = *cached_index;
+    PyObject **value_addr;
+    assert(PyDict_CheckExact(mp));
+    assert(PyUnicode_CheckExact(key));
+    if (*cached_index >= DK_SIZE(mp->ma_keys))
+        goto slow_path;
+    ep = &mp->ma_keys->dk_entries[index];
+    if (ep->me_key != key)
+        goto slow_path;
+    assert(!_PyDict_HasSplitTable(mp));
+    return ep->me_value;
+slow_path:
+    hash = ((PyASCIIObject *) key)->hash;
+    if (hash == -1) {
+        hash = PyObject_Hash(key);
+        if (hash == -1) {
+            PyErr_Clear();
+            return NULL;
+        }
+    }
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
+    if (ep == NULL) {
+        PyErr_Clear();
+    } else if (*value_addr) {
+        index = ep - & mp->ma_keys->dk_entries[0];  /* Overflow doesn't matter */
+        *cached_index = index;
+        return *value_addr;
+    }
+    builtins = PyDict_GetItemString(op, "__builtins__");
+    if (builtins == NULL)
+        return NULL;
+    if (PyModule_Check(builtins))
+        builtins = PyObject_GetAttrString(builtins, "__dict__");
+    if (builtins == NULL)
+        return NULL;
+    return PyDict_GetItem(builtins, key);
+
+}
+
 extern int HotPyGuard_AddTraceToSet(PyObject *, PyObject *);
 extern void HotPyGuard_InvalidateSet(PyObject *);
 

File Python/recording_interpreter.c

View file
     assert(ctx->cf_instrs[0].code);
 }
 
+static void
+record_get_globals(HotPyOptimiser* x, PyFrameObject *f, FrameStack *frame_stack)
+{
+    HotPyContext * _ctx;
+    x->instruction_count += 6;
+    x->get_globals(x);
+    x->store_register(x, HOTPY_RETURN_VALUE_REGISTER);
+    _ctx = x->require_value(x, f->f_globals);
+    if (_ctx)
+        FrameStack_SaveToContext(frame_stack, f->f_lasti, block_code(f), _ctx);
+    x->load_register(x, HOTPY_RETURN_VALUE_REGISTER);
+    _ctx = x->require_value(x, f->f_builtins);
+    if (_ctx)
+        FrameStack_SaveToContext(frame_stack, f->f_lasti, block_code(f), _ctx);
+    x->push_constant(x, f->f_globals);
+    x->push_constant(x, f->f_builtins);
+}
+
 #undef CALL_SURROGATE
 #define CALL_SURROGATE(func, n) \
 do { \
         TARGET(STORE_GLOBAL)
             RECORD_SET_LASTI(f->f_lasti);
             w = GETITEM(names, oparg);
-            RECORD_INST(get_globals);
+            /* XXX -- Probably shouldn't specialise in eval() like context */
+            record_get_globals(optimiser, f, frame_stack);
             RECORD_OPCODE(POP_TOP);
             RECORD_INST_WITH_NAME(store_to_globals, w);
             v = POP();
         TARGET(LOAD_GLOBAL)
             RECORD_SET_LASTI(f->f_lasti);
             w = GETITEM(names, oparg);
-            RECORD_INST(get_globals);
+            record_get_globals(optimiser, f, frame_stack);
             RECORD_INST_WITH_NAME(load_from_globals, w);
             if (PyDict_CheckExact(f->f_globals)
                 && PyDict_CheckExact(f->f_builtins)) {

File Python/register_interpreter.c

View file
             SET_REGISTER(r2, x);
             FAST_DISPATCH();
 
+        TARGET(FAST_LOAD_GLOBAL)
+            FORMAT_oKKK;
+            PyObject *globals = CONSTANT(k0);
+            PyObject *key = GETITEM(trace->trace_names, k1);
+#ifdef WORDS_BIGENDIAN
+            short *k2_addr = (short *)&next_instr[-1];
+#else
+            short *k2_addr = ((short *)&next_instr[-1]) + 1;
+#endif
+            assert (*k2_addr == k2);
+            PyObject *x = _PyDict_FastLoadGlobal(globals, key, k2_addr);
+            if (x == NULL) {
+                if (!PyErr_Occurred())
+                    format_exc_check_arg(PyExc_NameError,
+                                         GLOBAL_NAME_ERROR_MSG, key);
+                goto on_error;
+            }
+            Py_INCREF(x);
+            SET_REGISTER(r0, x);
+            FAST_DISPATCH();
+
         TARGET(VALUE_FROM_OBJECT_DICT_OR_EXIT_CONST)
             FORMAT_rokkkE;
             PyObject **dict_ptr;
             SET_REGISTER(r3, val1);
             FAST_DISPATCH();
 
+        TARGET(COPY_MOVE)
+            FORMAT_roo;
+            PyObject *val;
+            val = registers[r0];
+            Py_INCREF(val);
+            Py_INCREF(val);
+            SET_REGISTER(r1, val);
+            SET_REGISTER(r2, val);
+            FAST_DISPATCH();
+
         TARGET(RETURN_VALUE)
             FORMAT_r;
             PyObject *retval = registers[r0];
         } else {
             _HotPy_Stats.invalid_exits++;
             Py_DECREF(trace);
-            return HotPyTrace_Execute(tstate, exit_trace);
+            return _HotPy_TraceExcecuteCold(tstate, exit_trace, "trace transfer");
         }
     }
     LOG_FMT("Cold simple or binary exit. Count=%d\n", exit->execution_count);
             TRANSFER_TO(exit_trace, exit->exit_context);
         }
         else {
-            _HotPy_Stats.invalid_exits++;
-            LOG_FMT("Invalid Trace Exit. Count=%d\n", exit_trace->execution_count);
+#ifdef Py_DEBUG
+            char buf[100];
+            sprintf(buf, "Poly object = %lx", (long)value);
             Py_DECREF(trace);
-            return HotPyTrace_Execute(tstate, exit_trace);
+            return _HotPy_TraceExcecuteCold(tstate, exit_trace, buf);
+#else
+            Py_DECREF(trace);
+            return _HotPy_TraceExcecuteCold(tstate, exit_trace, "poly");
+#endif
         }
     }
 rare_exit:

File Python/register_opcode_targets.h

View file
 &&TARGET_SETUP_BLOCK,
 &&TARGET_POLY_TYPE_EXIT,
 &&TARGET_TWO_MOVE,
-&&_unknown_opcode,
-&&_unknown_opcode,
+&&TARGET_COPY_MOVE,
+&&TARGET_FAST_LOAD_GLOBAL,
 &&_unknown_opcode,
 &&_unknown_opcode,
 &&_unknown_opcode,

File Python/trace_D_O_C.c

View file
 }
 
 static void
-write_constants(ByteBuffer *buffer, PyObject *dict)
-{
-    PyObject *key, *value;
-    Py_ssize_t i = 0;
-    Py_ssize_t size = 0;
-    while (PyIdDict_Next(dict, &i, &key, &value)) {
-        if (INDEX(value))
-            size++;
-    }
-    write_int(buffer, size);
-    i = 0;
-    while (PyIdDict_Next(dict, &i, &key, &value)) {
-        assert(key == VALUE(value));
-        if (INDEX(value)) {
-            size--;
-            write_int(buffer, INDEX(value));
-            write_value(buffer, VALUE(value));
-        }
-    }
-    assert(size == 0);
-}
-
-static void
 save_state_to_context(HotPyOptimiser *x, HotPyContext *context)
 {
     int i;
         write_byte(&buffer, DK_FRAME);
         write_frame(&buffer, opt->frame_stack[i]);
     }
-    write_constants(&buffer, opt->constants);
     write_byte(&buffer, 0);
     Py_CLEAR(context->cf_deferred_state);
     context->cf_deferred_state =
 }
 
 static void
-read_constants(unsigned char **code, PyObject *consts, HotPy_DOC* opt)
-{
-    Py_ssize_t i, size = read_int(code);
-    for (i = 0 ; i < size; i++) {
-        int reg = read_int(code);
-        PyObject *obj = get_constant(opt, read_value(code, consts));
-        set_constant_register(opt, obj, reg);
-    }
-}
-
-static void
-constants_registers(unsigned char **code, char *registers)
-{
-    Py_ssize_t i, size = read_int(code);
-    for (i = 0 ; i < size; i++) {
-        int reg = read_int(code);
-        read_int(code); /* value */
-        registers[reg] = 1;
-    }
-}
-
-static void
 object_registers(unsigned char **code, char *registers);
 
 static void
     }
 }
 
-#ifdef Py_DEBUG
-static int
-skip_constants(unsigned char **code, PyObject *consts)
-{
-    Py_ssize_t i, size = read_int(code);
-    for (i = 0 ; i < size; i++) {
-        read_int(code);
-        read_value(code, consts);
-    }
-    return 1;
-}
-#endif
-
 static void
 materialise_stack_frame(HotPyOptimiser *x, DeferredFrame *f, int real_depth)
 {
     }
     opt->top_frame = opt->frame_stack[opt->frame_depth-1];
     assert(opt->frame_depth <= HOTPY_CONTEXT_DEPTH);
-    read_constants(&code, context->cf_constants, opt);
     if (*code != 0) {
         fprintf(stderr, "Error in DOC state encoding");
         /* Raise System Error? */
         code++;
         frame_registers(&code, registers);
     }
-    constants_registers(&code, registers);
     if (*code != 0) {
         fprintf(stderr, "Error in DOC state encoding");
         /* Raise System Error? */
     return buf + 1;
 }
 
-
-static void
-dump_constants(PyObject *dict, char *buf)
-{
-    PyObject *key, *value;
-    Py_ssize_t i = 0;
-    buf += sprintf(buf, "Constants:\n");
-    while (PyIdDict_Next(dict, &i, &key, &value)) {
-        assert(key == VALUE(value));
-        if (INDEX(value))
-            buf += sprintf(buf, "%s in r%d\n",
-                           Py_TYPE(VALUE(value))->tp_name, INDEX(value));
-    }
-}
-
-
 static void
 clear_opt(HotPy_DOC *opt)
 {
     for (i = opt.frame_depth-1; i>= 0; i--) {
         buf = dump_frame(opt.frame_stack[i], buf);
     }
-    dump_constants(opt.constants, buf);
     clear_opt(&opt);
     Py_DECREF(opt.constants);
 }
     stack[0]->f_back = next_frame;
     Py_CLEAR(tstate->real_frame);
     tstate->real_frame = stack[frames-1];
-    assert(skip_constants(&code, context->cf_constants));
     assert(*code == 0);
 }
 
         v = real_value();
         REGISTER(index) = v;
     }
+    if (!is_copyable(v)) {
+        REGISTER(index) = real_value();
+        materialise(x, v);
+        v = REGISTER(index);
+        x->next->store_register(x->next, index);
+    }
     if (v == REAL_VALUE)
         x->next->load_register(x->next, index);
     Py_INCREF(v);
     PyObject *v;
     Py_CLEAR(REGISTER(index));
     v = POP();
-    if (!is_copyable(v)) {
-        materialise(x, v);
-        v = real_value();
-    }
     v = MAKE_DEFERRED(x, v);
     REGISTER(index) = v;
     SHOW_STACK();
 }
 
 static void
+fast_load_from_globals(HotPyOptimiser *x, PyObject *globals, PyObject *name)
+{
+    HotPy_DOC *opt = (HotPy_DOC *)x;
+    x->next->fast_load_from_globals(x->next, globals, name);
+    PUSH(real_value());
+}
+
+static void
 store_to_globals(HotPyOptimiser *x, PyObject *name)
 {
     materialise_stack_top(x, 2);
 static void
 store_member(HotPyOptimiser *x, PyMemberDef *def)
 {
-    assert(0);
     materialise_stack_top(x, 2);
     x->next->store_member(x->next, def);
 }
     x->opt_base.delete_fast = delete_fast;
     x->opt_base.get_globals = get_globals;
     x->opt_base.load_from_globals = load_from_globals;
+    x->opt_base.fast_load_from_globals = fast_load_from_globals;
     x->opt_base.store_to_globals = store_to_globals;
     x->opt_base.has_special = has_special;
     x->opt_base.load_register = load_register;

File Python/trace_logger.c

View file
 static int
 start(HotPyOptimiser *x, HotPyTraceObject *trace, PyFrameObject *f, HotPyContext *c, char *reason)
 {
+    char buf[2000];
     int i;
     fprintf(OUT, "Starting Trace %lx at line %d", (long)trace,
             PyFrame_GetLineNumber(f));
     fprintf(OUT, " of ");
     print_str(OUT, f->f_code->co_name);
     fprintf(OUT, "\nReason: %s\n--------------\n", reason);
+    HotPyContext_Dump(&buf[0], c);
+    fprintf(OUT, "%s", buf);
     INDENT = indent_str + strlen(indent_str);
     assert(strlen(indent_str) > HOTPY_CONTEXT_DEPTH);
     for (i = 0; i < HOTPY_CONTEXT_DEPTH; i++) {
 OPCODE_WITH_ARG(store_register)
 
 static void
+fast_load_from_globals(HotPyOptimiser *x, PyObject *globals, PyObject *name)
+{
+    fprintf(OUT, "%sfast_load_from_globals ", INDENT);
+    print_str(OUT, name);
+    fprintf(OUT, "\n");
+    x->next->fast_load_from_globals(x->next, globals, name);
+}
+
+static void
 fast_frame(HotPyOptimiser *x, int n, PyCodeObject *code)
 {
     fprintf(OUT, "%sfast_frame %d\n", INDENT, n);
     x->opt_base.delete_fast = delete_fast;
     x->opt_base.get_globals = get_globals;
     x->opt_base.load_from_globals = load_from_globals;
+    x->opt_base.fast_load_from_globals = fast_load_from_globals;
     x->opt_base.store_to_globals = store_to_globals;
     x->opt_base.has_special = has_special;
     x->opt_base.load_register = load_register;

File Python/trace_opcode_targets.h

View file
 &&_unknown_opcode, /* SETUP_BLOCK */
 &&_unknown_opcode, /* POLY_TYPE_EXIT */
 &&_unknown_opcode, /* TWO_MOVE */
-&&_unknown_opcode,
-&&_unknown_opcode,
+&&_unknown_opcode, /* COPY_MOVE */
+&&_unknown_opcode, /* FAST_LOAD_GLOBAL */
 &&_unknown_opcode,
 &&_unknown_opcode,
 &&_unknown_opcode,

File Python/trace_recorder_register.c

View file
         case TWO_MOVE:
             print_rroo(out, &code);
             break;
+        case COPY_MOVE:
+            print_roo(out, &code);
+            break;
+        case FAST_LOAD_GLOBAL:
+            print_oKKK(out, &code);
+            break;
         default:
             fprintf(stderr, "Unexpected opcode: %s\n", _HotPy_Instruction_Names[op]);
             exit(-1);
         case HAS_SPECIAL: case BUILD_SLICE: case OVERRIDES:
         case VALUE_FROM_OBJECT_DICT_OR_EXIT_CONST:
         case STORE_TO_GLOBALS: case SET_IN_OBJECT_DICT_CONST:
+        case FAST_LOAD_GLOBAL:
             assert(length_via_format_matches(op, 2));
             out[0] = instr[0];
             out[1] = instr[1];
                 uint32_t from1 = (instr[1] >> 8) & 255;
                 uint32_t to1 = (instr[1] >> 16) & 255;
                 assert (from1 != to1);
-                if (from1 != to) {
+                if (to == from1 || to == to1) {
+                    instr[0] = COPY_MOVE | (from << 8) |
+                    (to << 16) | (to1 << 24);
+                    instr++;
+                    break;
+                }
+                else {
                     instr[0] = TWO_MOVE | (from << 8) |
                                (from1 << 16) | (to << 24);
                     instr[1] = to1;
         case HAS_SPECIAL: case BUILD_SLICE: case OVERRIDES:
         case VALUE_FROM_OBJECT_DICT_OR_EXIT_CONST:
         case STORE_TO_GLOBALS: case SET_IN_OBJECT_DICT_CONST:
+        case FAST_LOAD_GLOBAL:
             assert(length_via_format_matches(op, 2));
             instr += 2;
             break;
     assert(slot < (1 << 8));
     assert(def->offset % sizeof(PyObject*) == 0);
     assert((def->flags & READONLY) == 0);
-    if (def->type == T_OBJECT) {
+    if (def->type == T_OBJECT || def->type == T_OBJECT_EX) {
         write_rrk(x, STORE_SLOT, slot);
     }
     else {
 }
 
 static void
+fast_load_from_globals(HotPyOptimiser *x, PyObject *globals, PyObject *name)
+{
+    HotPyTraceRecorder *opt = (HotPyTraceRecorder *)x;
+    int k0 = index_for_constant(opt, globals);
+    int k1 = index_for_name(x, name);
+    write_oKKK(x, FAST_LOAD_GLOBAL, k0, k1, 0);
+}
+
+static void
 store_to_globals(HotPyOptimiser *x, PyObject *name)
 {
     int k = index_for_name(x, name);
     x->opt_base.delete_fast = delete_fast;
     x->opt_base.get_globals = get_globals;
     x->opt_base.load_from_globals = load_from_globals;
+    x->opt_base.fast_load_from_globals = fast_load_from_globals;
     x->opt_base.store_to_globals = store_to_globals;
     x->opt_base.has_special = has_special;
     x->opt_base.load_register = load_register;
         return defuses_for_r(instr, defs, uses);
     case SET_IN_OBJECT_DICT_CONST:
         return defuses_for_rrkkk(instr, defs, uses);
+    case FAST_LOAD_GLOBAL:
+        return defuses_for_oKKK(instr, defs, uses);
     default:
         fprintf(stderr, "Unexpected opcode: %s\n", _HotPy_Instruction_Names[op]);
         abort();
     }
     rcount = forward_pass(moves, mcount, events, ecount, relabels);
     rcount += backward_pass(moves, mcount, events, ecount, relabels + rcount);
+    //rcount += forward_pass(moves, mcount, events, ecount, relabels + rcount);
+    //rcount += backward_pass(moves, mcount, events, ecount, relabels + rcount);
     qsort(relabels, rcount, sizeof(RelabelEvent), relabel_compare);
     return rcount;
 }
         return relabel_uses_r(instr, relabel_uses_table);
     case SET_IN_OBJECT_DICT_CONST:
         return relabel_uses_rrkkk(instr, relabel_uses_table);
+    case FAST_LOAD_GLOBAL:
+        return relabel_uses_oKKK(instr, relabel_uses_table);
     default:
         fprintf(stderr, "Unexpected opcode: %s\n", _HotPy_Instruction_Names[op]);
         abort();
         return relabel_defs_r(instr, relabel_defs_table);
     case SET_IN_OBJECT_DICT_CONST:
         return relabel_defs_rrkkk(instr, relabel_defs_table);
+    case FAST_LOAD_GLOBAL:
+        return relabel_defs_oKKK(instr, relabel_defs_table);
     default:
         fprintf(stderr, "Unexpected opcode: %s\n", _HotPy_Instruction_Names[op]);
         abort();
         return get_exit_context_r(instr, exits);
     case SET_IN_OBJECT_DICT_CONST:
         return get_exit_context_rrkkk(instr, exits);
+    case FAST_LOAD_GLOBAL:
+        return get_exit_context_oKKK(instr, exits);
     default:
         fprintf(stderr, "Unexpected opcode: %s\n", _HotPy_Instruction_Names[op]);
         abort();

File Python/trace_recorder_stack.c

View file
 }
 
 static void
+fast_load_from_globals(HotPyOptimiser *x, PyObject *globals, PyObject *name)
+{
+    HotPyTraceRecorder *opt = (HotPyTraceRecorder *)x;
+    PyObject *builtins = PyDict_GetItemString(globals, "__builtins__");
+    push_constant(x, globals);
+    if (builtins)
+        push_constant(x, builtins);
+    else
+        Py_CLEAR(opt->trace);
+    opcode_with_name(x, LOAD_FROM_GLOBAL_AND_BUILTINS, name);
+}
+
+static void
 store_to_globals(HotPyOptimiser *x, PyObject *name)
 {
     opcode_with_name(x, STORE_TO_GLOBALS, name);
     x->opt_base.delete_fast = delete_fast;
     x->opt_base.get_globals = get_globals;
     x->opt_base.load_from_globals = load_from_globals;
+    x->opt_base.fast_load_from_globals = fast_load_from_globals;
     x->opt_base.store_to_globals = store_to_globals;
     x->opt_base.has_special = has_special;
     x->opt_base.load_register = load_register;

File Python/trace_specialiser.c

View file
     }
 }
 
+static int
+all_capitals(PyObject *name)
+{
+    Py_ssize_t i, len = PyUnicode_GET_LENGTH(name);
+    void *data = PyUnicode_DATA(name);
+    int kind = PyUnicode_KIND(name);
+    int cased = 0;
+    if (len > 80)
+        return 0;
+    for (i = 0; i < len; i++) {
+        const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+        if (Py_UNICODE_ISLOWER(ch) || Py_UNICODE_ISTITLE(ch))
+            return 0;
+        else if (!cased && Py_UNICODE_ISUPPER(ch))
+            cased = 1;
+    }
+    return cased;
+}
+
 /* Use out-of-line guards to convert any global function or class to a constant
  */
 static void
         PyObject *value = PyDict_GetItem(globals, name);
         if (value && (PyFunction_Check(value) ||
                       PyType_Check(value) ||
-                      PyCFunction_Check(value))) {
+                      PyCFunction_Check(value) || all_capitals(name))) {
             _PyDict_MakeGuarded(globals);
             if (_PyDict_AddGuard(globals, name, value, (PyObject *)opt->trace))
                 const_val = value;
             if (builtins) {
                 PyObject *value = PyDict_GetItem(builtins, name);
                 if (value) {
+                    _PyDict_MakeGuarded(globals);
                     _PyDict_MakeGuarded(builtins);
                     if (_PyDict_AddGuard(globals, name, NULL, (PyObject *)opt->trace) &&
                         _PyDict_AddGuard(builtins, name, value, (PyObject *)opt->trace))
         PUSH(new_value(const_val));
         x->next->push_constant(x->next, const_val);
     }
+    else if (globals) {
+        x->next->opcode(x->next, POP_TOP);
+        x->next->opcode(x->next, POP_TOP);
+        x->next->fast_load_from_globals(x->next, globals, name);
+        PUSH(new_unknown());
+    }
     else {
+        x->next->load_from_globals(x->next, name);
         PUSH(new_unknown());
-        x->next->load_from_globals(x->next, name);
     }
 }
 
 static void
+fast_load_from_globals(HotPyOptimiser *x, PyObject *globals, PyObject *name)
+{
+    HotPySpecialiser *opt = (HotPySpecialiser *)x;
+    x->next->fast_load_from_globals(x->next, globals, name);
+    PUSH(new_unknown());
+}
+
+static void
 store_to_globals(HotPyOptimiser *x, PyObject *name)
 {
     HotPySpecialiser *opt = (HotPySpecialiser *)x;
             Py_DECREF(w);
             if (def && def->flags != READONLY && def->offset < (1 << 16) &&
                 (def->type == T_OBJECT || def->type == T_OBJECT_EX)) {
-                x->next->opcode_with_arg(x->next, STORE_SLOT, def->offset);
-                x->next->opcode(x->next, POP_TOP);
+                x->next->store_member(x->next, def);
                 return;
             }
             break;
     x->opt_base.delete_fast = delete_fast;
     x->opt_base.get_globals = get_globals;
     x->opt_base.load_from_globals = load_from_globals;
+    x->opt_base.fast_load_from_globals = fast_load_from_globals;
     x->opt_base.store_to_globals = store_to_globals;
     x->opt_base.has_special = has_special;
     x->opt_base.load_register = load_register;