1. Pypy
  2. Untitled project
  3. extradoc

Source

extradoc / talk / rupy2009 / talk.tex

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
\documentclass[utf8x]{beamer}

\mode<presentation>
{
  \usetheme{Warsaw}

  %\setbeamercovered{transparent}
}

\usepackage[english]{babel}

\usepackage[utf8x]{inputenc}

\usepackage{times}
\usepackage[T1]{fontenc}
\usepackage{color}

\title{The speed of PyPy}

\author{Maciej Fijałkowski}

\institute[merlinux GmbH]
{ merlinux GmbH }

\date{RuPy, November 7th 2009, Poznań}

\begin{document}

\begin{frame}
  \titlepage
\end{frame}

\begin{frame}

  \frametitle{Story about Python's speed}
  \begin{figure}
    \includegraphics[width=.8\textwidth]{img1.jpg}
  \end{figure}

\end{frame}

\begin{frame}
  \frametitle{Speed of Python}
  \begin{itemize}
     \item Python is slow
  \end{itemize}
  \pause
  \begin{figure}
    \includegraphics[width=.6\textwidth]{peasant_and_birdnester-400.jpg}
  \end{figure}
  
\end{frame}

\begin{frame}
  \frametitle{Speed of Python}
  \begin{itemize}
    \item Is Python really slow?
      \pause
    \item Sometimes, for some usecases
      \pause
    \item Let's have a look at some examples
  \end{itemize}

\end{frame}

\begin{frame}
  \frametitle{Nomenclature}
  \begin{itemize}
    \item Python - a programming language
    \item CPython - main implementation of Python
    \item JVM - Java Virtual Machine - VM used to run Java, among others
    \item JIT - Just in time compiler
    \item Psyco - JIT for Python
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Example run}
  \begin{itemize}
    \item Float example, stolen from factor blog
  \end{itemize}
  \vspace{.5cm}
  \begin{tabular}{| l | c | r |}
    \hline
    & CPython & Java (hotspot client mode) \\
    Average of 10 runs: & 7.6s & 0.77s \\
    \hline
  \end{tabular}
  \vspace{.5cm}
  \pause
  \begin{itemize}
    \item Python is 10x slower than Java
      \pause
    \item Python is 10x slower than Java on this particular benchmark
      \pause
    \item CPython is 10x slower than Java on this particular benchmark
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{More about this example}
  \begin{tabular}{| l | c | c | c | c |}
    \hline
    & CPython & JVM & Psyco & PyPy \\
    Average of 10 runs & 7.6s & 0.77s & 4.4s & 1.3s \\
    \hline
  \end{tabular}
  \vspace{.5cm}
  \pause
  \begin{itemize}
    \item So, it's CPython that is slow on this particular benchmark
      \pause
    \item Same example, using numpy and vectorization about 3x faster than JVM
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Python's speed}
  \begin{itemize}
    \item Instead of: ``Why is Python slow?''
      \pause
    \item Better: ``Why is Python hard to optimize?''
      \pause
    \item Even better: ``How are we going to fix it?''
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Some evidence}
  \begin{figure}
    \includegraphics[width=1.0\textwidth]{time.png}
  \end{figure}
\end{frame}

\begin{frame}
  \frametitle{Why is Python hard to optimize?}
  \begin{itemize}
    \item Duck typing (dynamic dispatch)
    \item Frames
    \item Object encapsulation
    \item Dictionaries of instances
    \item Changing globals
    \item Ability to dynamically change builtins
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Duck typing}
  \begin{itemize}
    \item Dispatching over item type
    \item {\tt z = x + y}
    \item Needs to check what the type of {\tt x} and {\tt y} is
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Frames}
  \begin{itemize}
    \item Python interpreters use frames on heap (instead of stack)
    \item Locals are stored on those frames
    \item Intermediate results are store on valuestack of frames
    \item In fact, you can access frames via {\tt sys.\_getframe() or
      traceback}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Object encapsulation}
  \begin{itemize}
    \item Also called boxing
    \item Each object, even {\tt int}, has to be boxed
    \item Requires allocations and indirection
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Example - addition}
  \begin{itemize}
    \item {\tt z = x + y}
    \item read value for {\tt x} from frame, store on valuestack
    \item read value for {\tt x} from frame, store on valuestack
    \item allocate new integer
    \item read two values from valuestack, add and store the result in
      freshly allocated integer
    \item move the result from valuestack to locals
      \pause
    \item {\color{red} in fact, should be one assembler instruction}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Dictionaries of instances}
  \begin{itemize}
    \item Need to perform a dictionary lookup for {\tt x.y}
    \item There are {\bf three} lookups per method call
      (descriptor, object, type)
    \item {\bf Two} for attribute access
      (descriptor, object)
    \item Looks like list lookup should be enough
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Changing globals}
  \begin{itemize}
    \item Global symbols can change at any moment in time
    \item Makes inlining hard
    \item Requires global dict lookups, even for constants
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Ability to dynamically change builtins}
  \begin{itemize}
    \item You can say {\tt int = my\_function}
    \item But you can't {\tt int.\_\_add\_\_ = my\_method}
    \item Still messes up optimizations
    \item Global lookup is also a dictionary lookup, even if globals
      don't change
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Interpreting vs compiling}
  \begin{itemize}
    \item Apparently, processors are good at branch prediction
    \item We didn't measure much of a difference, less than 2x overall
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{CPython specific problems}
  \begin{itemize}
    \item In general, CPython is fairly well optimized
    \item refcounting is an inefficient garbage collection scheme
    \item GIL
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Dynamic compilation to the rescue}
  \begin{itemize}
    \item You don't pay for feature, until you actually use it
    \item In static compilation, compiler has to prove that bad
      things can't happen
    \item Impossible in Python
    \item With dynamic compilation, you just throw away compiled
      code in case things go wrong or start over
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Dealing with frames}
  \begin{itemize}
    \item Allocate frame
      \pause
    \item Use C stack, but remember where frame fields are
      living on the stack
      \pause
    \item Be able to reconstruct frame on demand (for example
      {\tt sys.\_getframe() was called})
      \pause
    \item The effect is that you don't pay for frames, unless
      you really use them
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Dealing with dynamic dispatch}
  \begin{itemize}
    \item The answer is to simply specialize over types
    \item Provides possibly multiple versions of compiled code for
      single Python code
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Dealing with object encapsulation}
  \begin{itemize}
    \item ``Virtual objects''
    \item Also known as escape analysis
    \item If object does not ``escape'', don't allocate it at all
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{How does it work in practice?}
  \begin{itemize}
    \pause
    \item Pretty well
      \pause
    \item A reasonable speedup over CPython
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Dealing with attribute access}
  \begin{itemize}
    \item Fairly complex task
    \item Sharing dict, more or less the same effect as V8's hidden
      classes
      \pause
    \item Python is a very complex language
    \item Shadowing methods with attributes
    \item Descriptors before attributes
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Other important optimizations}
  \begin{itemize}
    \item Caching globals
    \item Caching builtins
    \item A lot of smaller ones
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Bird's view of JIT}
  \begin{itemize}
    \item Mixed mode - interpreter \& JIT for hot paths
    \item Tracing JIT (like TraceMonkey), not up-front
    \item 
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{CPython vs PyPy}
  \begin{itemize}
    \item I use CPython for everyday usage
      \pause
    \item But personally, I hope to change it in next months
      \pause
    \item ... in places where performance matters, but that don't
      depend on third party C modules (like numpy)
      \pause
    \item ... like building and developing PyPy
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Status of PyPy}
  \begin{itemize}
    \item Very compliant Python interpreter
    \item Most of important stdlib modules
    \item Differencies are agreed to be implementation details
      \pause
    \item {\color{red} or bugs}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Examples of working programs}
  \begin{itemize}
    \item Django (sqlite only)
    \item Twisted
    \item PyPy's translation toolchain
    \item ctypes
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Status of JIT}
  \begin{itemize}
    \item Because the way it's constructed, handles all Python language
      features (unlike for example {\bf Psyco})
    \item Changes very quickly these days
    \item Ready for cautious tests
    \item Not ready as a drop-in replacement of CPython
      \pause
    \item {\color{green} yet!}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Adapt today!}
  \begin{itemize}
    \item A fact: People rely on deep obscure features of language
    \item Examples:
      \pause
      \begin{itemize}
        \item {\tt except ImportError, e: \\ \quad if str(e) != ...:
          raise}
          \pause
        \item Exact naming of list comprehension variable
          \pause
        \item Reliance on reference counting
      \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Profit tomorrow!}
  \begin{itemize}
    \item We plan to release JIT-ready version somewhere early 2010
      \pause
    \item It should be able to speed up real-world programs
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{How you can help}
  \begin{itemize}
    \item It's all open source after all ...
    \item Try running existing programs
    \item Profile, report bugs
      \pause
    \item Talk to your boss
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Thank you!}
  \begin{itemize}
    \item This talk is already online (with all examples):
      {\tt http://codespeak.net/svn/pypy/dist/extradoc/talk/rupy2009/talk.pdf}
    \item {\tt http://morepypy.blogspot.com}
    \item \#pypy on freenode
    \item If you want to know more about PyPy, feel free to bug me
      around (like, how does the JIT work?)
    \item Any questions?
  \end{itemize}
\end{frame}

\end{document}