Commits

Maciej Fijalkowski committed a4d78ab

copy pyconza talk

  • Participants
  • Parent commits 99911d2

Comments (0)

Files changed (20)

talk/rupy2012/pyconza2012/Makefile

+
+talk.pdf: talk.rst author.latex title.latex stylesheet.latex
+	rst2beamer.py --stylesheet=stylesheet.latex --documentoptions=14pt --output-encoding=utf-8 talk.rst talk.latex || exit
+	sed 's/\\date{}/\\input{author.latex}/' -i talk.latex || exit
+	sed 's/\\maketitle/\\input{title.latex}/' -i talk.latex || exit
+	pdflatex talk.latex  || exit
+
+view: talk.pdf
+	evince talk.pdf &
+
+xpdf: talk.pdf
+	xpdf talk.pdf &

talk/rupy2012/pyconza2012/author.latex

+\definecolor{rrblitbackground}{rgb}{0.0, 0.0, 0.0}
+
+\title[Python performance characteristics]{Python performance characteristics}
+\author[Maciej Fijałkowski]
+{Maciej Fijałkowski}
+
+\institute{PyCon 2012}
+\date{October 5, 2012}

talk/rupy2012/pyconza2012/beamerdefs.txt

+.. colors
+.. ===========================
+
+.. role:: green
+.. role:: red
+
+
+.. general useful commands
+.. ===========================
+
+.. |pause| raw:: latex
+
+   \pause
+
+.. |small| raw:: latex
+
+   {\small
+
+.. |end_small| raw:: latex
+
+   }
+
+.. |scriptsize| raw:: latex
+
+   {\scriptsize
+
+.. |end_scriptsize| raw:: latex
+
+   }
+
+.. |strike<| raw:: latex
+
+   \sout{
+
+.. closed bracket
+.. ===========================
+
+.. |>| raw:: latex
+
+   }
+
+
+.. example block
+.. ===========================
+
+.. |example<| raw:: latex
+
+   \begin{exampleblock}{
+
+
+.. |end_example| raw:: latex
+
+   \end{exampleblock}
+
+
+
+.. alert block
+.. ===========================
+
+.. |alert<| raw:: latex
+
+   \begin{alertblock}{
+
+
+.. |end_alert| raw:: latex
+
+   \end{alertblock}
+
+
+
+.. columns
+.. ===========================
+
+.. |column1| raw:: latex
+
+   \begin{columns}
+      \begin{column}{0.45\textwidth}
+
+.. |column2| raw:: latex
+
+      \end{column}
+      \begin{column}{0.45\textwidth}
+
+
+.. |end_columns| raw:: latex
+
+      \end{column}
+   \end{columns}
+
+
+
+.. |snake| image:: ../img/py-web-new.png
+           :scale: 15%
+           
+
+
+.. nested blocks
+.. ===========================
+
+.. |nested| raw:: latex
+
+   \begin{columns}
+      \begin{column}{0.85\textwidth}
+
+.. |end_nested| raw:: latex
+
+      \end{column}
+   \end{columns}

talk/rupy2012/pyconza2012/examples/alloc.py

+
+import sys, time
+
+def f():
+    l = [None]
+    for i in xrange(int(sys.argv[1])):
+        l[0] = (i,)
+
+def g():
+    m = int(sys.argv[1])
+    l = [None] * m
+    for i in xrange(m):
+        l[i] = (i,)
+
+t0 = time.time()
+f()
+t1 = time.time()
+g()
+t2 = time.time()
+print "long living", t2 - t1, "short living", t1 - t0

talk/rupy2012/pyconza2012/examples/calls.py

+
+import sys, time
+
+def inner(a, b, c):
+    pass
+
+def simple_call(a, b, c):
+    inner(a, b, c)
+
+def simple_call2(a, b, c):
+    inner(a, c=c, b=b)
+
+def simple_method(a, b, c):
+    c.m(a, b)
+
+def star_call(a, b, c):
+    inner(*(a, b, c))
+
+def star_call_complex(a, b, c):
+    inner(*(a, b), **{'c': c})
+
+def abomination(a, b, c):
+    inner(**locals())
+
+class A(object):
+    def m(self, a, b):
+        pass
+
+def run(func):
+    count = int(sys.argv[1])
+    t0 = time.time()
+    o = A()
+    for i in xrange(count):
+        func(i, i, o)
+    tk = time.time()
+    t = (tk - t0) / count
+    print "%s %.2e per call, %d cycles" % (func.func_name, t, int(t * 1.7e9))
+
+for f in [simple_call, simple_call2, simple_method, star_call, star_call_complex, abomination]:
+    run(f)
+

talk/rupy2012/pyconza2012/examples/datastructure.py

+
+class View(object):
+    def __init__(self, arr, start, stop):
+        self.arr = arr
+        self.start = start
+        self.stop = stop
+
+    def __getitem__(self, item):
+        if not isinstance(item, int):
+            return NotImplemented
+        if self.start + item <= self.stop:
+            raise IndexError
+        return self.arr[self.start + item]
+
+class Wrapper(object):
+    def __init__(self, arr):
+        self.arr = arr
+
+    def __getitem__(self, item):
+        if isinstance(item, int):
+            return self.arr[item]
+        elif isinstance(item, slice):
+            if item.step != 1 or item.start < 0 or item.stop < 0:
+                raise TypeError("step not implemented")
+            return View(self.arr, item.start, item.stop)
+        return NotImplemented

talk/rupy2012/pyconza2012/examples/interpreter.py

+
+(LOAD_FAST, LOAD_CONST, COMPARE_OP, POP_JUMP_IF_FALSE,
+ ADD, STORE_FAST, JUMP_ABSOLUTE) = range(7)
+
+has_arg = [True, True, False, True, False, True, True]
+
+class BaseObject(object):
+    def add(left, right):
+        # try right
+        return right.radd(left)
+
+    def radd(self, left):
+        raise TypeError
+
+class Long(BaseObject):
+    pass
+
+class Integer(BaseObject):
+    def __init__(self, v):
+        self.intval = v
+    
+    def add(self, right):
+        if isinstance(right, Integer):
+            try:
+                return Integer(self.intval + right.intval)
+            except OverflowError:
+                return Long(self.intval).add(Long(right.intval))
+        else:
+            return right.radd(self)
+
+def interpret(bytecode, variables, constants):
+    stack = []
+    pos = 0
+    arg0 = None
+    while True:
+        b = ord(bytecode[pos])
+        if has_arg[b]:
+            pos += 1
+            arg0 = ord(bytecode[pos])
+        if b == LOAD_FAST:
+            stack.append(variables[arg0])
+        elif b == LOAD_CONST:
+            stack.append(constants[arg0])
+        elif b == COMPARE_OP:
+            right = stack.pop()
+            left = stack.pop()
+            stack.append(left.compare(right))
+        elif b == ADD:
+            right = stack.pop()
+            left = stack.pop()
+            stack.append(left.add(right))
+        elif b == POP_JUMP_IF_FALSE:
+            val = stack.pop()
+            if not val.is_true():
+                pos = arg0
+                continue
+        elif b == STORE_FAST:
+            variables[arg0] = stack.pop()
+        elif b == JUMP_ABSOLUTE:
+            pos = arg0
+            continue
+        pos += 1
+
+
+def f(a, b):
+    return a + b
+
+stack.append(variables[arg0])
+stack.append(variables[arg0])
+right = stack.pop()
+left = stack.pop()
+stack.append(left.add(right))

talk/rupy2012/pyconza2012/examples/jit01.py

+
+def f():
+    i = 0
+    while i < 1000000:
+        i = i + 1
+    return i
+
+if __name__ == '__main__':
+    f()
+

talk/rupy2012/pyconza2012/stm-talk/Makefile

+# you can find rst2beamer.py here:
+# http://codespeak.net/svn/user/antocuni/bin/rst2beamer.py
+
+# WARNING: to work, it needs this patch for docutils
+# https://sourceforge.net/tracker/?func=detail&atid=422032&aid=1459707&group_id=38414
+
+talk.pdf: talk.rst author.latex title.latex stylesheet.latex
+	rst2beamer.py --stylesheet=stylesheet.latex --documentoptions=14pt talk.rst talk.latex || exit
+	sed 's/\\date{}/\\input{author.latex}/' -i talk.latex || exit
+	sed 's/\\maketitle/\\input{title.latex}/' -i talk.latex || exit
+	pdflatex talk.latex  || exit
+
+view: talk.pdf
+	evince talk.pdf &
+
+xpdf: talk.pdf
+	xpdf talk.pdf &

talk/rupy2012/pyconza2012/stm-talk/author.latex

+\definecolor{rrblitbackground}{rgb}{0.0, 0.0, 0.0}
+
+\title[PyPy in Production]{PyPy}
+\author[Armin Rigo]
+{Armin Rigo}
+
+\institute{PyCon ZA 2012}
+\date{October 4, 2012}

talk/rupy2012/pyconza2012/stm-talk/beamerdefs.txt

+.. colors
+.. ===========================
+
+.. role:: green
+.. role:: red
+
+
+.. general useful commands
+.. ===========================
+
+.. |pause| raw:: latex
+
+   \pause
+
+.. |small| raw:: latex
+
+   {\small
+
+.. |end_small| raw:: latex
+
+   }
+
+.. |scriptsize| raw:: latex
+
+   {\scriptsize
+
+.. |end_scriptsize| raw:: latex
+
+   }
+
+.. |strike<| raw:: latex
+
+   \sout{
+
+.. closed bracket
+.. ===========================
+
+.. |>| raw:: latex
+
+   }
+
+
+.. example block
+.. ===========================
+
+.. |example<| raw:: latex
+
+   \begin{exampleblock}{
+
+
+.. |end_example| raw:: latex
+
+   \end{exampleblock}
+
+
+
+.. alert block
+.. ===========================
+
+.. |alert<| raw:: latex
+
+   \begin{alertblock}{
+
+
+.. |end_alert| raw:: latex
+
+   \end{alertblock}
+
+
+
+.. columns
+.. ===========================
+
+.. |column1| raw:: latex
+
+   \begin{columns}
+      \begin{column}{0.45\textwidth}
+
+.. |column2| raw:: latex
+
+      \end{column}
+      \begin{column}{0.45\textwidth}
+
+
+.. |end_columns| raw:: latex
+
+      \end{column}
+   \end{columns}
+
+
+
+.. |snake| image:: ../../img/py-web-new.png
+           :scale: 15%
+           
+
+
+.. nested blocks
+.. ===========================
+
+.. |nested| raw:: latex
+
+   \begin{columns}
+      \begin{column}{0.85\textwidth}
+
+.. |end_nested| raw:: latex
+
+      \end{column}
+   \end{columns}

talk/rupy2012/pyconza2012/stm-talk/demo1.py

+
+class Number(object):
+
+   def __init__(self, num):
+       self.num = num
+
+   def __add__(self, other):
+       return Number(self.num + other.num)
+
+   def __invert__(self):
+       return Number(~self.num)
+
+def foo(n):
+    total = Number(0)
+    for i in range(n):
+        total += Number(i)
+        total += ~ Number(i)
+    return total.num
+

talk/rupy2012/pyconza2012/stm-talk/standards.png

Added
New image

talk/rupy2012/pyconza2012/stm-talk/stylesheet.latex

+\usepackage{ulem}
+\usetheme{Boadilla}
+\usecolortheme{whale}
+\setbeamercovered{transparent}
+\setbeamertemplate{navigation symbols}{}
+
+\definecolor{darkgreen}{rgb}{0, 0.5, 0.0}
+\newcommand{\docutilsrolegreen}[1]{\color{darkgreen}#1\normalcolor}
+\newcommand{\docutilsrolered}[1]{\color{red}#1\normalcolor}
+
+\newcommand{\green}[1]{\color{darkgreen}#1\normalcolor}
+\newcommand{\red}[1]{\color{red}#1\normalcolor}

talk/rupy2012/pyconza2012/stm-talk/talk.rst

+.. include:: beamerdefs.txt
+
+============================================================
+PyPy
+============================================================
+
+
+PyPy is...
+--------------------------
+
+* Another Python interpreter
+
+* with a JIT compiler
+
+
+PyPy was...
+-------------------
+
+* Around since 2003
+
+* (advertised as) production ready since December 2010
+
+  - release 1.4
+
+* Funding
+
+  - EU FP6 programme
+
+  - Eurostars programme
+
+  - donations
+
+  - ...
+
+
+PyPy 1.9: current status
+------------------------
+
+* Faster
+
+  - **1.7x** than 1.5 (Summer 2011)
+
+  - **2.2x** than 1.4 (December 2010)
+
+  - **5.5x** than CPython
+
+* Implements Python 2.7.3
+
+* Many more "PyPy-friendly" programs than before
+
+* Packaging
+
+  - |scriptsize| Debian, Ubuntu, Fedora, Homebrew, Gentoo, ArchLinux, ... |end_scriptsize|
+
+  - |scriptsize| Windows (32bit only), OS X |end_scriptsize|
+
+* C extension compatibility
+
+  - runs (big part of) **PyOpenSSL** and **lxml**
+
+
+PyPy organization
+-----------------
+
+* Part of SFC -- Software Freedom Conservancy
+
+  - Bradley successfully fighting U.S. bureaucracy
+
+  - we are happy about it
+
+
+* Funding model
+
+  - py3k, numpy, STM
+
+  - more than 100'000$ in donations
+
+  - from individuals, large companies and the PSF
+
+
+PyPy's JIT compiler
+-------------------
+
+* Removes abstraction
+
+* Almost never gives up
+
+* x86-32, x86-64, ARMv7, (POWER64)
+
+* (Works with other languages)
+
+
+Real world applications
+-----------------------
+
+* Positive feedback
+
+* http://speed.pypy.org/
+
+
+
+py3k
+------------------------
+
+* ``py3k`` branch in mercurial
+
+  - developed in parallel
+
+  - Python 3 written in Python 2
+
+* Focus on correctness
+
+* Dropped some interpreter optimizations for now
+
+* First 90% done, remaining 90% not done
+
+* Majority of the funds by Google
+
+
+NumPy
+-----
+
+* progress going slowly
+
+* multi dimensional arrays, broadcasting, fancy indexing
+
+* all dtypes, except complex, strings and objects
+
+* good results for performance
+
+
+STM
+---------------------------
+
+* Software Transactional Memory
+
+* "Remove the GIL"
+
+* But also, new models (better than threads)
+
+
+
+Calling C
+---------
+
+.. image:: standards.png
+   :scale: 60%
+   :align: center
+
+Calling C landscape
+-------------------
+
+* CPython C extensions
+
+* SWIG, SIP, wrapper generators
+
+* ctypes
+
+* Cython
+
+* CFFI (our new thing)
+
+CFFI
+----------
+
+|scriptsize|
+|example<| Example |>|
+
+  .. sourcecode:: pycon
+
+   >>> from cffi import FFI
+   >>> ffi = FFI()
+   >>> ffi.cdef("""
+   ...     int printf(const char *format, ...);
+   ... """)
+   >>> C = ffi.dlopen(None)
+   >>> arg = ffi.new("char[]", "world")
+   >>> C.printf("hi there, %s!\n", arg)
+   hi there, world!
+
+|end_example|
+|end_scriptsize|
+
+CFFI
+----
+
+* Many more examples
+
+* Including macro calls and most subtleties of C
+
+* http://cffi.readthedocs.org
+
+
+STM
+---
+
+
+Conclusion
+----------
+
+* Try out PyPy on real code
+
+* http://pypy.org/
+
+* Thank you!

talk/rupy2012/pyconza2012/stm-talk/title.latex

+\begin{titlepage}
+\begin{figure}[h]
+\includegraphics[width=60px]{../../img/py-web-new.png}
+\end{figure}
+\end{titlepage}

talk/rupy2012/pyconza2012/stylesheet.latex

+\usepackage{ulem}
+\usetheme{Boadilla}
+\usecolortheme{whale}
+\setbeamercovered{transparent}
+\setbeamertemplate{navigation symbols}{}
+
+\definecolor{darkgreen}{rgb}{0, 0.5, 0.0}
+\newcommand{\docutilsrolegreen}[1]{\color{darkgreen}#1\normalcolor}
+\newcommand{\docutilsrolered}[1]{\color{red}#1\normalcolor}
+
+\newcommand{\green}[1]{\color{darkgreen}#1\normalcolor}
+\newcommand{\red}[1]{\color{red}#1\normalcolor}

talk/rupy2012/pyconza2012/talk.pdf

Binary file added.

talk/rupy2012/pyconza2012/talk.rst

+.. include:: beamerdefs.txt
+
+==================================
+Python performance characteristics
+==================================
+
+Who am I?
+---------
+
+* Maciej Fijałkowski (yes this is unicode)
+
+* PyPy core developer for I don't remember
+
+* performance freak
+
+What this talk is about?
+------------------------
+
+* python performance (or lack of it)
+
+* why does it matter
+
+* what can we do about it
+
+|pause|
+
+* how Python implementations work
+
+How does CPython work?
+----------------------
+
+* simple bytecode interpreter
+
+* each bytecode looks up in a list, executes
+
+* most operations have dynamic dispatch on types
+
+CPython performance landscape
+-----------------------------
+
+* each operation has a cost
+
+* C is cheaper than Python
+
+* use ``map`` vs iterations etc.
+
+Python performance message
+---------------------------
+
+* according to Guido
+
+* "Avoid overengineering datastructures. Tuples are better than objects (try namedtuple too though). Prefer simple fields over getter/setter functions."
+
+* "Built-in datatypes are your friends. Use more numbers, strings, tuples, lists, sets, dicts. Also check out the collections library, esp. deque."
+
+* "Be suspicious of function/method calls; creating a stack frame is expensive."
+
+* "The universal speed-up is rewriting small bits of code in C. Do this only when all else fails."
+
+What does it mean?
+------------------
+
+* don't use abstractions
+
+|pause|
+
+* don't use Python
+
+But also
+--------
+
+* measure!
+
+* there are so many variables, you cannot care without benchmarks
+
+|pause|
+
+* if you have no benchmarks, you don't care
+
+This is not how I want to write software
+----------------------------------------
+
+* I like my abstractions
+
+* I like Python
+
+* I don't want to rewrite stuff for performance
+
+|pause|
+
+* in C/C++
+
+Second best
+-----------
+
+* keep my abstractions
+
+* do arcane voodoo to keep my programs fast
+
+* but you have to understand the voodo in the first place
+
+But Python performance!
+-----------------------
+
+* there is no such thing as language performance
+
+* there is implementation performance
+
+* the language might be easier or harder to optimize
+
+* CPython performance characteristics is relatively straightforward
+
+What is PyPy?
+-------------
+
+* PyPy is a Python interpreter (that's what we care about)
+
+* PyPy is a toolchain for creating dynamic language implementations
+
+* also, an Open Source project that has been around for a while
+
+Compilers vs interpreters
+-------------------------
+
+* compilers compile language X (C, Python) to a lower level language
+  (C, assembler) ahead of time
+
+* interpreters compile language X to bytecode and have a big interpreter
+  loop
+
+|pause|
+
+* PyPy has a hybrid approach. It's an interpreter, but hot paths are
+  compiled directly to assembler during runtime
+
+What is just in time (JIT) compilation?
+---------------------------------------
+
+* few different flavors
+
+* observe runtime values
+
+* compile code with agressive optimizations
+
+* have checks if assumptions still stand
+
+So what PyPy does?
+------------------
+
+* interprets a Python program
+
+* the JIT observes python **interpreter**
+
+* producing code through the path followed by the interpreter
+
+* compiles loops and functions
+
+Some properties
+---------------
+
+* the code speed **changes** over time
+
+* hopefully from slow to fast
+
+* you need to warm up things before they get fast
+
+Some example
+------------
+
+* integer addition!
+
+Abstractions
+------------
+
+* inlining, malloc removal
+
+* abstractions are cheap
+
+|pause|
+
+* if they don't introduce too much complexity
+
+Questions?
+----------
+
+* Thank you!
+
+* http://pypy.org
+
+* http://baroquesoftware.com
+
+Few words about garbage collection
+----------------------------------
+
+* ``CPython``: refcounting + cyclic collector
+
+* ``PyPy``: generational mark & sweep
+
+|pause|
+
+* errr....
+
+The rest
+--------
+
+* I'll explain various PyPy strategies
+
+* ideally all this knowledge will be unnecessary
+
+* this is the second best, how to please the JIT compiler
+
+Allocations (PyPy)
+------------------
+
+* allocation is expensive
+
+* for a good GC, short living objects don't matter
+
+* it's better to have a small persistent structure and abstraction
+  on allocation
+
+|pause|
+
+* copying however is expensive
+
+* we have hacks for strings, but they're not complete
+
+Calls
+-----
+
+* Python calls are an incredible mess
+
+* simple is better than complex
+
+* simple call comes with no cost, the cost grows with growing complexity
+
+Attribute access
+----------------
+
+* if optimized, almost as fast as local var access
+
+* ``dict`` lookup optimized away
+
+* class attributes considered constant
+
+* meta programming is better than dynamism
+
+* objects for small number of constant keys, dicts for large
+  numbers of changing keys
+
+Other sorts of loops
+--------------------
+
+* there is more!
+
+* ``tuple(iterable)``, ``map(iterable)``, ``re.search``
+
+* they're all jitted
+
+* not all nicely
+
+Summary
+-------
+
+* we hope this knowledge will not be needed
+
+* the more you care, the better you need to know

talk/rupy2012/pyconza2012/title.latex

+\begin{titlepage}
+\begin{figure}[h]
+\includegraphics[width=60px]{../img/py-web-new.png}
+\end{figure}
+\end{titlepage}