Commits

ebo committed ce81126

Added Support for BasicBlocks and Functions in JIT IL

  • Participants
  • Parent commits 6afd688

Comments (0)

Files changed (1)

 # HG changeset patch
-# Parent 74394a6ac5c71289efc938f67a8e6dc69919bae8
+# Parent a1ffb6c6871186e9992bb9a567d395443a17748b
 
 diff --git a/.hgignore b/.hgignore
 --- a/.hgignore
 +  this->engine_ = llvm::EngineBuilder(this->module_)
 +    .setErrorStr(&error)
 +    .setEngineKind(llvm::EngineKind::JIT)
-+    .setUseMCJIT(true)
++    .setUseMCJIT(false)
 +    .create();
 +
 +  if (engine_ == NULL) {
 new file mode 100644
 --- /dev/null
 +++ b/JIT/global_jit_data.h
-@@ -0,0 +1,203 @@
+@@ -0,0 +1,223 @@
 +// -*- C++ -*-
 +//
 +
 +  PyTypeObject *arg1;
 +};
 +
++inline std::ostream& operator<<(std::ostream& stream, const TraceEntry &e)
++{
++  stream << "<TE: " << e.code
++         << " " << e.pc
++         << " " << e.opcode
++         << " " << e.oparg
++         << " " << e.flag
++         << ">";
++  return stream;
++}
++
 +struct PyJITTracer {
 +public:
 +  static PyJITTracer *Get();
 +  {
 +    if (!active_ || suspended_)
 +      return;
++    // std::cout << "MP: " << trace_[trace_.size() - 1] << "\n";
 +    merge_point_ = true;
 +  }
 +
 +    }
 +  }
 +
++  void dump() {
++    std::cout << "Trace:";
++    std::cout << " " << (active_ ? "active" : "inactive");
++    std::cout << " " << (suspended_ ? "suspended" : "recording");
++    std::cout << " size: " << trace_.size();
++    std::cout << "\n";
++  }
++
 +private:
 +  // This is called when a merge point is encountered and compiles the trace
 +  void do_trace(PyCodeObject *code, size_t start);
 new file mode 100644
 --- /dev/null
 +++ b/JIT/jit_opcode.cc
-@@ -0,0 +1,679 @@
+@@ -0,0 +1,675 @@
 +#include "Python.h"
 +#include "opcode.h"
 +
 +
 +  JITCallFasterFunction::JITCallFasterFunction(JITValue *func,
 +                                               PyCodeObject *code,
-+                                               std::vector<JITOpcode *> &opcodes,
++                                               JITBasicFunction *function,
 +                                               std::vector<JITValue *> &args,
 +                                               std::vector<JITValue *> &stack,
 +                                               std::vector<JITValue *> &locals)
 +    : func_(func), code_(code),
-+      opcodes_(opcodes), args_(args),
++      function_(function), args_(args),
 +      stack_(stack), locals_(locals)
 +  {
 +    func->addUser(this);
 +    JITFasterFBuilder new_builder(fbuilder, inline_bail, bail_result,
 +                                  as_code,
 +                                  globals, fbuilder.getBuiltins());
-+    typedef std::vector<JITOpcode *>::iterator iter_t;
-+    for (iter_t iter = opcodes_.begin(), end = opcodes_.end();
-+         iter != end; ++iter) {
-+      (*iter)->emit(new_builder);
-+    }
++    function_->emit(new_builder);
 +  }
 +
 +  void
 new file mode 100644
 --- /dev/null
 +++ b/JIT/jit_opcode.h
-@@ -0,0 +1,1148 @@
+@@ -0,0 +1,1237 @@
 +// -*- C++ -*-
 +#ifndef PYTHON_JIT_OPCODE_H_
 +#define PYTHON_JIT_OPCODE_H_
 +    return NULL;
 +  }
 +
++  class JITBasicBlock {
++  public:
++    JITBasicBlock()
++    {}
++
++    virtual ~JITBasicBlock()
++    {
++      typedef std::vector<JITOpcode *>::iterator iter_t;
++      for(iter_t i = opcodes_.begin(), e = opcodes_.end(); i != e; ++i) {
++        delete *i;
++      }
++    }
++
++    void push_back(JITOpcode *opcode)
++    {
++      opcodes_.push_back(opcode);
++    }
++
++    void emit(JITFunctionBuilder &builder)
++    {
++      typedef std::vector<JITOpcode *>::iterator iter_t;
++      for(iter_t i = opcodes_.begin(), e = opcodes_.end(); i != e; ++i) {
++        (*i)->emit(builder);
++      }
++    }
++
++    void dump()
++    {
++      typedef std::vector<JITOpcode *>::iterator iter_t;
++      for(iter_t i = opcodes_.begin(), e = opcodes_.end(); i != e; ++i) {
++        (*i)->dump();
++      }
++    }
++
++  private:
++    std::vector<JITOpcode *> opcodes_;
++  };
++
++  class JITBasicFunction {
++  public:
++    JITBasicFunction()
++    {
++      preamble_ = new JITBasicBlock();
++      entry_ = new JITBasicBlock();
++      blocks_.push_back(entry_);
++    }
++    virtual ~JITBasicFunction()
++    {
++      typedef std::vector<JITBasicBlock *>::iterator iter_t;
++      for(iter_t i = blocks_.begin(), e = blocks_.end(); i != e; ++i) {
++        delete *i;
++      }
++    }
++
++    JITBasicBlock *getEntryBlock()
++    {
++      return entry_;
++    }
++
++    void push_back(JITBasicBlock *block)
++    {
++      blocks_.push_back(block);
++    }
++
++    void emit(JITFunctionBuilder &builder)
++    {
++      assert(blocks_.size() == 1);
++      typedef std::vector<JITBasicBlock *>::iterator iter_t;
++      for(iter_t i = blocks_.begin(), e = blocks_.end(); i != e; ++i) {
++        (*i)->emit(builder);
++      }
++    }
++
++    void dump()
++    {
++      typedef std::vector<JITBasicBlock *>::iterator iter_t;
++      for(iter_t i = blocks_.begin(), e = blocks_.end(); i != e; ++i) {
++        (*i)->dump();
++      }
++    }
++
++  private:
++    JITBasicBlock *preamble_;
++    JITBasicBlock *entry_;
++    std::vector<JITBasicBlock *> blocks_;
++  };
++
 +  class JITValue : public JITOpcode {
 +  public:
 +    virtual llvm::Value *getValue(JITFunctionBuilder &fbuilder) const
 +  public:
 +    JITCallFasterFunction(JITValue *func,
 +                          PyCodeObject *code,
-+                          std::vector<JITOpcode *> &opcodes,
++                          JITBasicFunction *function,
 +                          std::vector<JITValue *> &args,
 +                          std::vector<JITValue *> &stack,
 +                          std::vector<JITValue *> &locals);
 +
 +    ~JITCallFasterFunction()
-+    {}
++    {
++      delete function_;
++    }
 +
 +    virtual void dump()
 +    {
 +      std::cout << "CALL_FUNCTION (Faster) BEGIN\n";
-+      typedef std::vector<JITOpcode *>::iterator iter_t;
-+      for (iter_t iter = opcodes_.begin(), end = opcodes_.end();
-+           iter != end; ++iter) {
-+        (*iter)->dump();
-+      }
++      // typedef std::vector<JITOpcode *>::iterator iter_t;
++      // for (iter_t iter = opcodes_.begin(), end = opcodes_.end();
++      //      iter != end; ++iter) {
++      //   (*iter)->dump();
++      // }
 +      std::cout << "CALL_FUNCTION (Faster) END\n";
 +    }
 +
 +  private:
 +    JITValue *func_;
 +    PyCodeObject *code_;
-+    std::vector<JITOpcode *> opcodes_;
++    JITBasicFunction *function_;
 +    std::vector<JITValue *> args_;
 +    std::vector<JITValue *> stack_;
 +    std::vector<JITValue *> locals_;
 new file mode 100644
 --- /dev/null
 +++ b/JIT/jit_tracer.cc
-@@ -0,0 +1,801 @@
+@@ -0,0 +1,814 @@
 +#include "Python.h"
 +#include "opcode.h"
 +
 +  }
 +
 +  // Generate a set of immediate instructions
-+  jit::JITTraceAnalysis analysis;
++  jit::JITBasicFunction bfunc;
++  jit::JITTraceAnalysis analysis(&bfunc);
 +  tr_begin = cut_trace.begin();
 +  tr_end = cut_trace.end();
 +  analysis.analyseTrace(tr_begin, tr_end);
 +
 +  if (merge_point_) {
 +    merge_point_ = false;
++    // if (trace_.size() > 0 && trace_[0].opcode == JIT_CALL) {
++    //   std::cout << trace_[0] << "\n";
++    //   std::cout << entry << "\n";
++    //   std::cout << "yyy\n";
++    // }
 +    for (size_t i = 0; i < trace_.size(); ++i) {
 +      TraceEntry &old = trace_[i];
 +      if (entry == old) {
 +        suspend();
++        // dump();
 +        do_trace(code, i);
 +        unsuspend();
 +        return;
 +  trace_.push_back(entry);
 +
 +  if (trace_.size() > 500) {
-+    suspend();
-+    unsuspend();
++    clear();
 +    return;
 +  }
 +}
 +
 +  // *******************************
 +
-+  JITTraceAnalysis::JITTraceAnalysis()
-+    : stack_counter_(0)
++  JITTraceAnalysis::JITTraceAnalysis(JITBasicFunction *func)
++    : stack_counter_(0),
++      function_(func),
++      block_(NULL)
++  {
++    block_ = function_->getEntryBlock();
++  }
++
++  JITTraceAnalysis::~JITTraceAnalysis()
 +  {
 +  }
 +
 +    for (size_t i = 0; i < loaded_locals_.size(); ++i) {
 +      loaded_locals_[i]->emit(builder);
 +    }
-+    for (size_t i = 0; i < trace_.size(); ++i) {
-+      trace_[i]->emit(builder);
-+    }
++
++    function_->emit(builder);
 +  }
 +
 +  void
 +      func = mfunc;
 +    }
 +
-+    JITTraceAnalysis analysis;
++    JITBasicFunction *bfunc = new JITBasicFunction();
++    JITTraceAnalysis analysis(bfunc);
 +    iter = next + 1;
 +    JITValue *res = analysis.analyseTrace(py_code, func, args, iter, end);
-+    this->createCallFasterFunction(func, py_code, analysis.trace(),
++    this->createCallFasterFunction(func, py_code, bfunc,
 +                                   args, stack, locals);
 +    assert(iter->opcode == RETURN_VALUE);
 +    assert((iter + 1)->opcode == JIT_EXIT_FUNC);
 +    JITValue *func = stack.Pop();
 +
 +    if (createCallFunctionOp(func, args, stack, locals, iter, end)) {
-+      std::cout << "TEST OK!\n";
++      // std::cout << "TEST OK!\n";
 +      return;
 +    }
-+    else {
-+      std::cout << "TEST Failed!\n";
-+    }
++    // else {
++    //   std::cout << "TEST Failed!\n";
++    // }
 +
 +    std::vector<TraceEntry>::iterator next = iter + 1;
 +
 new file mode 100644
 --- /dev/null
 +++ b/JIT/jit_tracer.h
-@@ -0,0 +1,260 @@
+@@ -0,0 +1,266 @@
 +// -*- C++ -*-
 +#ifndef PYTHON_JIT_TRACER_H_
 +#define PYTHON_JIT_TRACER_H_
 +  struct JITTraceAnalysis {
 +    typedef std::vector<JITValue *> stack_t;
 +
-+    JITTraceAnalysis();
++    JITTraceAnalysis(JITBasicFunction *func);
++    ~JITTraceAnalysis();
 +
 +    JITValue *analyseTrace(PyCodeObject *co,
 +                           JITValue *func,
 +      for (size_t i = 0; i < loaded_stack_.size(); ++i) {
 +        loaded_stack_[i]->dump();
 +      }
-+      for (size_t i = 0; i < trace_.size(); ++i) {
-+        trace_[i]->dump();
-+      }
++      function_->dump();
 +    }
 +
 +    void emit(JITFunctionState &state);
 +                                        JITStack &stack, JITLocals &locals)
 +    {
 +      JITCallFunction *r = new JITCallFunction(func, args, stack.stack, locals.locals);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITCallFasterFunction *
 +    createCallFasterFunction(JITValue *func, PyCodeObject *code,
-+                             std::vector<JITOpcode *> &opcodes,
++                             JITBasicFunction *bfunc,
 +                             std::vector<JITValue *> &args,
 +                             JITStack &stack, JITLocals &locals)
 +    {
 +      JITCallFasterFunction *r =
 +        new JITCallFasterFunction(func, code,
-+                                  opcodes, args,
++                                  bfunc, args,
 +                                  stack.stack, locals.locals);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    createMethodGetSelf(JITValue *arg)
 +    {
 +      JITMethodGetSelf *r = new JITMethodGetSelf(arg);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    createMethodGetFunc(JITValue *arg)
 +    {
 +      JITMethodGetFunc *r = new JITMethodGetFunc(arg);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    createExtractValue(JITValue *arg, PyTypeObject *pytype=NULL)
 +    {
 +      JITExtractValue *r = new JITExtractValue(arg, pytype);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITOpcodeInfo *createOpcodeInfo(int lasti, JITStack &stack, JITLocals &locals)
 +    {
 +      JITOpcodeInfo *r = new JITOpcodeInfo(lasti, stack.stack, locals.locals);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITForIter *createForIter(JITValue *v)
 +    {
 +      JITForIter *r = new JITForIter(v);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITGenericBinOp *createGenericBinOp(BinaryOpImpl *impl, bool inplace, JITValue *v, JITValue *w)
 +    {
 +      JITGenericBinOp *r = new JITGenericBinOp(impl, inplace, v, w);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITCompareOp *createCompareOp(JITValue *v, JITValue *w, int oparg)
 +    {
 +      JITCompareOp *r = new JITCompareOp(v, w, oparg);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITGuardNull *createGuardNull(JITValue *v, BLACKHOLE reason, int lasti, JITStack &stack, JITLocals &locals)
 +    {
 +      JITGuardNull *r = new JITGuardNull(v, reason, lasti, stack.stack, locals.locals);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITGuardTrue *createGuardTrue(JITValue *v, BLACKHOLE reason, int lasti, JITStack &stack, JITLocals &locals)
 +    {
 +      JITGuardTrue *r = new JITGuardTrue(v, reason, lasti, stack.stack, locals.locals);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITGuardFalse *createGuardFalse(JITValue *v, BLACKHOLE reason, int lasti, JITStack &stack, JITLocals &locals)
 +    {
 +      JITGuardFalse *r = new JITGuardFalse(v, reason, lasti, stack.stack, locals.locals);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITGuardType *createGuardType(JITValue *v, PyTypeObject *type, BLACKHOLE reason, int lasti, JITStack &stack, JITLocals &locals)
 +    {
 +      JITGuardType *r = new JITGuardType(v, type, reason, lasti, stack.stack, locals.locals);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITLoadConst *createLoadConst(int oparg)
 +    {
 +      JITLoadConst *r = new JITLoadConst(oparg);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITLoadGlobal *createLoadGlobal(int oparg)
 +    {
 +      JITLoadGlobal *r = new JITLoadGlobal(oparg);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITNullValue *createNullValue()
 +    {
 +      JITNullValue *r = new JITNullValue();
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITLoadAttr *createLoadAttr(JITValue *v, int oparg)
 +    {
 +      JITLoadAttr *r = new JITLoadAttr(v, oparg);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +                                  JITStack &stack, JITLocals &locals)
 +    {
 +      JITStoreAttr *r = new JITStoreAttr(target, arg, oparg, stack.stack, locals.locals);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +                                      JITStack &stack, JITLocals &locals)
 +    {
 +      JITStoreSubscr *r = new JITStoreSubscr(v0, v1, v2, stack.stack, locals.locals);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITIncRef *createIncRef(JITValue *v)
 +    {
 +      JITIncRef *r = new JITIncRef(v);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITDecRef *createDecRef(JITValue *v)
 +    {
 +      JITDecRef *r = new JITDecRef(v);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITXDecRef *createXDecRef(JITValue *v)
 +    {
 +      JITXDecRef *r = new JITXDecRef(v);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
 +    JITTraceEnd *createTraceEnd(JITStack &stack, JITLocals &locals)
 +    {
 +      JITTraceEnd *r = new JITTraceEnd(stack.stack, locals.locals);
-+      trace_.push_back(r);
++      block_->push_back(r);
 +      return r;
 +    }
 +
-+    std::vector<JITOpcode *> &trace()
-+    {
-+      return trace_;
++    JITBasicBlock *current_block() const
++    {
++      return block_;
++    }
++
++    JITBasicFunction *function() const
++    {
++      return function_;
 +    }
 +
 +  private:
 +    std::vector<JITStackValue *> loaded_stack_;
 +    int stack_counter_;
 +
-+    std::vector<JITOpcode *> trace_;
++
++    JITBasicFunction *function_;
++    JITBasicBlock *block_;
 +  };
 +
 +
  
  platform: $(BUILDPYTHON) pybuilddir.txt
  	$(RUNSHARED) $(PYTHON_FOR_BUILD) -c 'import sys ; from sysconfig import get_platform ; print(get_platform()+"-"+sys.version[0:3])' >platform
-@@ -490,33 +517,33 @@
+@@ -491,33 +518,33 @@
  
  # Build static library
  # avoid long command lines, same as LIBRARY_OBJS
  
  # Copy up the gdb python hooks into a position where they can be automatically
  # loaded by gdb during Lib/test/test_gdb.py
-@@ -555,7 +582,7 @@
+@@ -556,7 +583,7 @@
  # for a shared core library; otherwise, this rule is a noop.
  $(DLLLIBRARY) libpython$(VERSION).dll.a: $(LIBRARY_OBJS)
  	if test -n "$(DLLLIBRARY)"; then \
  			$(LIBS) $(MODLIBS) $(SYSLIBS) $(LDLAST); \
  	else true; \
  	fi
-@@ -590,7 +617,7 @@
+@@ -591,7 +618,7 @@
  	fi
  
  Modules/_testembed: Modules/_testembed.o $(LIBRARY) $(LDLIBRARY) $(PY3LIBRARY)
  
  ############################################################################
  # Importlib
-@@ -629,10 +656,10 @@
+@@ -630,10 +657,10 @@
  		-o $@ $(srcdir)/Modules/getpath.c
  
  Modules/python.o: $(srcdir)/Modules/python.c
  
  Python/dynload_shlib.o: $(srcdir)/Python/dynload_shlib.c Makefile
  	$(CC) -c $(PY_CORE_CFLAGS) \
-@@ -724,7 +751,7 @@
+@@ -725,7 +752,7 @@
  $(OPCODETARGETS_H): $(OPCODETARGETGEN_FILES)
  	$(OPCODETARGETGEN) $(OPCODETARGETS_H)
  
  
  Python/formatter_unicode.o: $(srcdir)/Python/formatter_unicode.c \
  				$(BYTESTR_DEPS)
-@@ -822,11 +849,35 @@
+@@ -823,11 +850,35 @@
  		$(srcdir)/Include/unicodeobject.h \
  		$(srcdir)/Include/warnings.h \
  		$(srcdir)/Include/weakrefobject.h \
  
  ######################################################################
  
-@@ -1171,6 +1222,7 @@
+@@ -1172,6 +1223,7 @@
  			echo Skip install of $(LIBRARY) - use make frameworkinstall; \
  		fi; \
  	fi
  	$(INSTALL_DATA) Modules/config.c $(DESTDIR)$(LIBPL)/config.c
  	$(INSTALL_DATA) Modules/python.o $(DESTDIR)$(LIBPL)/python.o
  	$(INSTALL_DATA) $(srcdir)/Modules/config.c.in $(DESTDIR)$(LIBPL)/config.c.in
-@@ -1299,6 +1351,16 @@
+@@ -1300,6 +1352,16 @@
  .c.o:
  	$(CC) -c $(PY_CORE_CFLAGS) -o $@ $<
  
  # Run reindent on the library
  reindent:
  	./$(BUILDPYTHON) $(srcdir)/Tools/scripts/reindent.py -r $(srcdir)/Lib
-@@ -1356,6 +1418,8 @@
+@@ -1357,6 +1419,8 @@
  	find build -name 'fficonfig.h' -exec rm -f {} ';' || true
  	find build -name '*.py' -exec rm -f {} ';' || true
  	find build -name '*.py[co]' -exec rm -f {} ';' || true
  #include <locale.h>
  
  #ifdef __FreeBSD__
-@@ -72,3 +73,15 @@
+@@ -64,3 +65,15 @@
      return res;
  }
  #endif
      if (throwflag) { /* support for generator.throw() */
          why = WHY_EXCEPTION;
          goto on_error;
-@@ -1315,11 +1345,53 @@
+@@ -1313,6 +1343,48 @@
+             }
+         }
  
-         /* Extract opcode and argument */
- 
++        /* Check for native function */
++
 +        if (co->co_jit && co->co_jit->mask[f->f_lasti / 8] > 0
 +            && (co->co_jit->mask[f->f_lasti / 8] & (1 << (f->f_lasti % 8))) > 0) {
-+          trace->trace(co, f->f_lasti, JIT_CALL, 0);
 +          f->f_stacktop = stack_pointer;
 +
 +          typedef int(*pyfunc_t)(PyFrameObject*);
 +          _PyTime_gettimeofday(&t);
 +          double t0 = (double)t.tv_sec + t.tv_usec * 1e-6;
 +
++          trace->trace(co, f->f_lasti, JIT_CALL, 0);
++          trace->clear();
++          trace->trace(co, f->f_lasti, JIT_CALL, 0);
++
 +          int reason = native_func(f);
 +          int val = jit::PyJIT_BlackHoleEval(f, reason);
 +
 +          f->f_stacktop = NULL;
 +          next_instr = first_instr + f->f_lasti;
 +
-+          // std::cout << "Finished Trace " << f->f_lasti << " " << val << std::endl;
-+
 +          if (val == jit::BH_EXC) {
 +            x = 0;
 +            goto on_error;
 +          x = Py_None;
 +        }
 +
+         /* Extract opcode and argument */
+ 
          opcode = NEXTOP();
-         oparg = 0;   /* allows oparg to be stored in a register because
+@@ -1320,6 +1392,9 @@
              it doesn't have to be remembered across a full loop */
          if (HAS_ARG(opcode))
              oparg = NEXTARG();
      dispatch_opcode:
  #ifdef DYNAMIC_EXECUTION_PROFILE
  #ifdef DXPAIRS
-@@ -1364,6 +1436,7 @@
+@@ -1364,6 +1439,7 @@
                  PUSH(x);
                  FAST_DISPATCH();
              }
              format_exc_check_arg(PyExc_UnboundLocalError,
                  UNBOUNDLOCAL_ERROR_MSG,
                  PyTuple_GetItem(co->co_varnames, oparg));
-@@ -1378,6 +1451,7 @@
+@@ -1378,6 +1454,7 @@
          PREDICTED_WITH_ARG(STORE_FAST);
          TARGET(STORE_FAST)
              v = POP();
              SETLOCAL(oparg, v);
              FAST_DISPATCH();
  
-@@ -1468,41 +1542,53 @@
+@@ -1468,41 +1545,53 @@
              Py_DECREF(w);
              SET_TOP(x);
              if (x != NULL) DISPATCH();
              if (PyUnicode_CheckExact(v))
                  x = PyUnicode_Format(v, w);
              else
-@@ -1511,11 +1597,14 @@
+@@ -1511,11 +1600,14 @@
              Py_DECREF(w);
              SET_TOP(x);
              if (x != NULL) DISPATCH();
              if (PyUnicode_CheckExact(v) &&
                       PyUnicode_CheckExact(w)) {
                  x = unicode_concatenate(v, w, f, next_instr);
-@@ -1530,26 +1619,33 @@
+@@ -1530,26 +1622,33 @@
              Py_DECREF(w);
              SET_TOP(x);
              if (x != NULL) DISPATCH();
              break;
  
          TARGET(BINARY_LSHIFT)
-@@ -1627,56 +1723,73 @@
+@@ -1627,56 +1726,73 @@
          TARGET(INPLACE_POWER)
              w = POP();
              v = TOP();
              if (PyUnicode_CheckExact(v) &&
                       PyUnicode_CheckExact(w)) {
                  x = unicode_concatenate(v, w, f, next_instr);
-@@ -1691,16 +1804,20 @@
+@@ -1691,16 +1807,20 @@
              Py_DECREF(w);
              SET_TOP(x);
              if (x != NULL) DISPATCH();
              break;
  
          TARGET(INPLACE_LSHIFT)
-@@ -1711,6 +1828,7 @@
+@@ -1711,6 +1831,7 @@
              Py_DECREF(w);
              SET_TOP(x);
              if (x != NULL) DISPATCH();
              break;
  
          TARGET(INPLACE_RSHIFT)
-@@ -1764,6 +1882,7 @@
+@@ -1764,6 +1885,7 @@
              Py_DECREF(v);
              Py_DECREF(w);
              if (err == 0) DISPATCH();
              break;
  
          TARGET(DELETE_SUBSCR)
-@@ -2123,6 +2242,8 @@
+@@ -2123,6 +2245,8 @@
              w = GETITEM(names, oparg);
              if (PyDict_CheckExact(f->f_globals)
                  && PyDict_CheckExact(f->f_builtins)) {
                  x = _PyDict_LoadGlobal((PyDictObject *)f->f_globals,
                                         (PyDictObject *)f->f_builtins,
                                         w);
-@@ -2130,6 +2251,7 @@
+@@ -2130,6 +2254,7 @@
                      if (!PyErr_Occurred())
                          format_exc_check_arg(PyExc_NameError,
                                               GLOBAL_NAME_ERROR_MSG, w);
                      break;
                  }
                  Py_INCREF(x);
-@@ -2144,6 +2266,7 @@
+@@ -2144,6 +2269,7 @@
                              format_exc_check_arg(
                                          PyExc_NameError,
                                          GLOBAL_NAME_ERROR_MSG, w);
                          break;
                      }
                  }
-@@ -2281,6 +2404,7 @@
+@@ -2281,6 +2407,7 @@
              Py_DECREF(v);
              SET_TOP(x);
              if (x != NULL) DISPATCH();
              break;
  
          TARGET(COMPARE_OP)
-@@ -2374,24 +2498,37 @@
+@@ -2374,24 +2501,37 @@
  
          PREDICTED_WITH_ARG(POP_JUMP_IF_FALSE);
          TARGET(POP_JUMP_IF_FALSE)
              DISPATCH();
  
          PREDICTED_WITH_ARG(POP_JUMP_IF_TRUE);
-@@ -2468,6 +2605,9 @@
+@@ -2468,6 +2608,9 @@
          PREDICTED_WITH_ARG(JUMP_ABSOLUTE);
          TARGET(JUMP_ABSOLUTE)
              JUMPTO(oparg);
  #if FAST_LOOPS
              /* Enabling this path speeds-up all while and for-loops by bypassing
                 the per-loop checks for signals.  By default, this should be turned-off
-@@ -2507,11 +2647,14 @@
+@@ -2507,11 +2650,14 @@
              }
              if (PyErr_Occurred()) {
                  if (!PyErr_ExceptionMatches(
              x = v = POP();
              Py_DECREF(v);
              JUMPBY(oparg);
-@@ -2682,6 +2825,7 @@
+@@ -2682,6 +2828,7 @@
              PUSH(x);
              if (x != NULL)
                  DISPATCH();
              break;
          }
  
-@@ -3077,6 +3221,7 @@
+@@ -3077,6 +3224,7 @@
  
      /* pop frame */
  exit_eval_frame:
      Py_LeaveRecursiveCall();
      tstate->frame = f->f_back;
  
-@@ -4030,6 +4175,8 @@
+@@ -4030,6 +4178,8 @@
      PyObject **pfunc = (*pp_stack) - n - 1;
      PyObject *func = *pfunc;
      PyObject *x, *w;
 diff --git a/configure b/configure
 --- a/configure
 +++ b/configure
-@@ -637,6 +637,7 @@
+@@ -657,6 +657,7 @@
  OTHER_LIBTOOL_OPT
  UNIVERSAL_ARCH_FLAGS
  BASECFLAGS
  OPT
  ABIFLAGS
  LN
-@@ -653,6 +654,7 @@
+@@ -673,6 +674,7 @@
  AR
  RANLIB
  USE_INLINE
  GNULD
  LINKCC
  LDVERSION
-@@ -679,6 +681,13 @@
+@@ -699,6 +701,13 @@
  LDFLAGS
  CFLAGS
  CC
  EXPORT_MACOSX_DEPLOYMENT_TARGET
  CONFIGURE_MACOSX_DEPLOYMENT_TARGET
  SGI_ABI
-@@ -761,6 +770,7 @@
+@@ -781,6 +790,7 @@
  with_framework_name
  enable_framework
  with_gcc
  with_cxx_main
  with_suffix
  enable_shared
-@@ -789,6 +799,7 @@
+@@ -809,6 +819,7 @@
        ac_precious_vars='build_alias
  host_alias
  target_alias
  CC
  CFLAGS
  LDFLAGS
-@@ -1433,6 +1444,11 @@
+@@ -1451,6 +1462,11 @@
                            specify an alternate name of the framework built
                            with --enable-framework
    --without-gcc           never use gcc
    --with-cxx-main=<compiler>
                            compile main() and link python executable with C++
                            compiler
-@@ -1465,6 +1481,7 @@
+@@ -1483,6 +1499,7 @@
                            default on supported compilers)
  
  Some influential environment variables:
    CC          C compiler command
    CFLAGS      C compiler flags
    LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
-@@ -3459,6 +3476,239 @@
+@@ -3487,6 +3504,239 @@
  (it is also a good idea to do 'make clean' before compiling)" "$LINENO" 5
  fi
  
  # Don't let AC_PROG_CC set the default CFLAGS. It normally sets -g -O2
  # when the compiler supports them, but we don't always want -O2, and
  # we set -g later.
-@@ -4339,14 +4589,26 @@
+@@ -4366,14 +4616,26 @@
  	esac
  else
  
  preset_cxx="$CXX"
  if test -z "$CXX"
  then
-@@ -5417,6 +5679,12 @@
+@@ -5444,6 +5706,12 @@
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GNULD" >&5
  $as_echo "$GNULD" >&6; }
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for inline" >&5
  $as_echo_n "checking for inline... " >&6; }
  if ${ac_cv_c_inline+:} false; then :
-@@ -6171,6 +6439,7 @@
+@@ -6198,6 +6466,7 @@
  # tweak OPT based on compiler and platform, only if the user didn't set
  # it on the command line
  
  if test "${OPT-unset}" = "unset"
  then
      case $GCC in
-@@ -6195,22 +6464,28 @@
+@@ -6222,22 +6491,28 @@
  		# Optimization messes up debuggers, so turn it off for
  		# debug builds.
  		OPT="-g -O0 -Wall $STRICT_PROTO"
  	;;
      esac
  fi
-@@ -14919,7 +15194,7 @@
+@@ -14998,7 +15273,7 @@
  done
  
  
 diff --git a/configure.ac b/configure.ac
 --- a/configure.ac
 +++ b/configure.ac
-@@ -574,6 +574,75 @@
+@@ -576,6 +576,75 @@
  (it is also a good idea to do 'make clean' before compiling)])
  fi
  
  # Don't let AC_PROG_CC set the default CFLAGS. It normally sets -g -O2
  # when the compiler supports them, but we don't always want -O2, and
  # we set -g later.
-@@ -659,11 +728,23 @@
+@@ -661,11 +730,23 @@
  			CXX=$withval
  		fi;;
  	esac], [
  preset_cxx="$CXX"
  if test -z "$CXX"
  then
-@@ -849,6 +930,12 @@
+@@ -851,6 +932,12 @@
  esac
  AC_MSG_RESULT($GNULD)
  
  AC_C_INLINE
  if test "$ac_cv_c_inline" != no ; then
          AC_DEFINE(USE_INLINE, 1, [Define to use the C99 inline keyword.])
-@@ -1065,6 +1152,7 @@
+@@ -1067,6 +1154,7 @@
  # tweak OPT based on compiler and platform, only if the user didn't set
  # it on the command line
  AC_SUBST(OPT)
  if test "${OPT-unset}" = "unset"
  then
      case $GCC in
-@@ -1089,22 +1177,28 @@
+@@ -1091,22 +1179,28 @@
  		# Optimization messes up debuggers, so turn it off for
  		# debug builds.
  		OPT="-g -O0 -Wall $STRICT_PROTO"
  	;;
      esac
  fi
-@@ -4520,7 +4614,7 @@
+@@ -4552,7 +4646,7 @@
  done
  
  AC_SUBST(SRCDIRS)
 diff --git a/pyconfig.h.in b/pyconfig.h.in
 --- a/pyconfig.h.in
 +++ b/pyconfig.h.in
-@@ -1299,6 +1299,9 @@
+@@ -1311,6 +1311,9 @@
  /* Define to 1 if libintl is needed for locale functions. */
  #undef WITH_LIBINTL
  
 diff --git a/setup.py b/setup.py
 --- a/setup.py
 +++ b/setup.py
-@@ -736,6 +736,8 @@
+@@ -739,6 +739,8 @@
          # CSV files
          exts.append( Extension('_csv', ['_csv.c']) )