extradoc / talk / vmil2012 / zotero.bib

	address = {Salt Lake City, Utah},
	title = {Efficient implementation of the {Smalltalk-80} system},
	isbn = {0-89791-125-3},
	url = {},
	doi = {10.1145/800017.800542},
	abstract = {The Smalltalk-80* programming language includes dynamic storage allocation, full upward funargs, and universally polymorphic procedures; the Smalltalk-80 programming system features interactive execution with incremental compilation, and implementation portability. These features of modern programming systems are among the most difficult to implement efficiently, even individually. A new implementation of the Smalltalk-80 system, hosted on a small microprocessor-based computer, achieves high performance while retaining complete (object code) compatibility with existing implementations. This paper discusses the most significant optimization techniques developed over the course of the project, many of which are applicable to other languages. The key idea is to represent certain runtime state (both code and data) in more than one form, and to convert between forms when needed.},
	booktitle = {{POPL}},
	publisher = {{ACM}},
	author = {Deutsch, L. Peter and Schiffman, Allan M.},
	year = {1984}

	address = {Pittsburgh, Pennsylvania, {USA}},
	title = {Improving compiler-runtime separation with {XIR}},
	isbn = {978-1-60558-910-7},
	url = {},
	doi = {10.1145/1735997.1736005},
	abstract = {Intense research on virtual machines has highlighted the need for flexible software architectures that allow quick evaluation of new design and implementation techniques. The interface between the compiler and runtime system is a principal factor in the flexibility of both components and is critical to enabling rapid pursuit of new optimizations and features. Although many virtual machines have demonstrated modularity for many components, significant dependencies often remain between the compiler and the runtime system components such as the object model and memory management system. This paper addresses this challenge with a carefully designed strict compiler-runtime interface and the {XIR} language. Instead of the compiler backend lowering object operations to machine operations using hard-wired runtime-specific logic, {XIR} allows the runtime system to implement this logic, simultaneously simplifying and separating the backend from runtime-system details. In this paper we describe the design and implementation of this compiler-runtime interface and the {XIR} language in the {C1X} dynamic compiler, a port of the {HotSpotTM} Client compiler. Our results show a significant reduction in backend complexity with {XIR} and an overall reduction in the compiler-runtime interface complexity while still generating comparable quality code with only minor impact on compilation time.},
	booktitle = {Proceedings of the 6th {ACM} {SIGPLAN/SIGOPS} international conference on Virtual execution environments},
	publisher = {{ACM}},
	author = {Titzer, Ben L. and Würthinger, Thomas and Simon, Doug and Cintra, Marcelo},
	year = {2010},
	keywords = {compilers, intermediate representations, Java, jit, lowering, object model, register allocation, runtime interface, software architecture, virtual machines},
	pages = {39--50}

	address = {Wellington, New Zealand},
	title = {Evaluating the dynamic behaviour of {Python} applications},
	isbn = {978-1-920682-72-9},
	url = {},
	abstract = {The Python programming language is typical among dynamic languages in that programs written in it are not susceptible to static analysis. This makes efficient static program compilation difficult, as well as limiting the amount of early error detection that can be performed. Prior research in this area tends to make assumptions about the nature of programs written in Python, restricting the expressiveness of the language. One may question why programmers are drawn to these languages at all, if only to use them in a static-friendly style. In this paper we present our results after measuring the dynamic behaviour of 24 production-stage open source Python programs. The programs tested included arcade games, {GUI} applications and non-interactive batch programs. We found that while most dynamic activity occurs during program startup, dynamic activity after startup cannot be discounted entirely.},
	booktitle = {Proceedings of the Thirty-Second Australasian Conference on Computer Science - Volume 91},
	publisher = {Australian Computer Society, Inc.},
	author = {Holkner, Alex and Harland, James},
	year = {2009},
	keywords = {dynamic languages, python and compilers},
	pages = {19--28}

	address = {Hagenberg, Austria},
	title = {Towards a Jitting {VM} for Prolog execution},
	isbn = {978-1-4503-0132-9},
	url = {},
	doi = {10.1145/1836089.1836102},
	abstract = {Most Prolog implementations are implemented in low-level languages such as C and are based on a variation of the {WAM} instruction set, which enhances their performance but makes them hard to write. In addition, many of the more dynamic features of Prolog (like assert), despite their popularity, are not well supported. We present a high-level continuation-based Prolog interpreter based on the {PyPy} project. The {PyPy} project makes it possible to easily and efficiently implement dynamic languages. It provides tools that automatically generate a just-in-time compiler for a given interpreter of the target language, by using partial evaluation techniques. The resulting Prolog implementation is surprisingly efficient: it clearly outperforms existing interpreters of Prolog in high-level languages such as Java. Moreover, on some benchmarks, our system outperforms state-of-the-art {WAM-based} Prolog implementations. Our paper aims to show that declarative languages such as Prolog can indeed benefit from having a just-in-time compiler and that {PyPy} can form the basis for implementing programming languages other than Python.},
	booktitle = {{PPDP}},
	publisher = {{ACM}},
	author = {Bolz, Carl Friedrich and Leuschel, Michael and Schneider, David},
	year = {2010},
	keywords = {interpreters, jit, logic programming, partial evaluation}

	address = {{Reno/Tahoe}, Nevada, {USA}},
	title = {{SPUR:} a trace-based {JIT} compiler for {CIL}},
	isbn = {978-1-4503-0203-6},
	shorttitle = {{SPUR}},
	url = {},
	doi = {10.1145/1869459.1869517},
	abstract = {Tracing just-in-time compilers {(TJITs)} determine frequently executed traces (hot paths and loops) in running programs and focus their optimization effort by emitting optimized machine code specialized to these traces. Prior work has established this strategy to be especially beneficial for dynamic languages such as {JavaScript}, where the {TJIT} interfaces with the interpreter and produces machine code from the {JavaScript} trace.},
	booktitle = {{OOPSLA}},
	publisher = {{ACM}},
	author = {Bebenita, Michael and Brandner, Florian and Fahndrich, Manuel and Logozzo, Francesco and Schulte, Wolfram and Tillmann, Nikolai and Venter, Herman},
	year = {2010},
	keywords = {cil, dynamic compilation, javascript, just-in-time, tracing}

	address = {Toronto, Ontario, Canada},
	title = {An analysis of the dynamic behavior of {JavaScript} programs},
	isbn = {978-1-4503-0019-3},
	url = {},
	doi = {10.1145/1806596.1806598},
	abstract = {The {JavaScript} programming language is widely used for web programming and, increasingly, for general purpose computing. As such, improving the correctness, security and performance of {JavaScript} applications has been the driving force for research in type systems, static analysis and compiler techniques for this language. Many of these techniques aim to reign in some of the most dynamic features of the language, yet little seems to be known about how programmers actually utilize the language or these features. In this paper we perform an empirical study of the dynamic behavior of a corpus of widely-used {JavaScript} programs, and analyze how and why the dynamic features are used. We report on the degree of dynamism that is exhibited by these {JavaScript} programs and compare that with assumptions commonly made in the literature and accepted industry benchmark suites.},
	booktitle = {Proceedings of the 2010 {ACM} {SIGPLAN} conference on Programming language design and implementation},
	publisher = {{ACM}},
	author = {Richards, Gregor and Lebresne, Sylvain and Burg, Brian and Vitek, Jan},
	year = {2010},
	keywords = {dynamic behavior, dynamic metrics, execution tracing, javascript, program analysis},
	pages = {1--12}

	address = {New York, {NY}, {USA}},
	series = {{VEE} '05},
	title = {Escape analysis in the context of dynamic compilation and deoptimization},
	isbn = {1-59593-047-7},
	location = {Chicago, {IL}, {USA}},
	doi = {10.1145/1064979.1064996},
	abstract = {In object-oriented programming languages, an object is said to escape the method or thread in which it was created if it can also be accessed by other methods or threads. Knowing which objects do not escape allows a compiler to perform aggressive {optimizations.This} paper presents a new intraprocedural and interprocedural algorithm for escape analysis in the context of dynamic compilation where the compiler has to cope with dynamic class loading and deoptimization. It was implemented for Sun Microsystems' Java {HotSpot™} client compiler and operates on an intermediate representation in {SSA} form. We introduce equi-escape sets for the efficient propagation of escape information between related objects. The analysis is used for scalar replacement of fields and synchronization removal, as well as for stack allocation of objects and fixed-sized arrays. The results of the interprocedural analysis support the compiler in inlining decisions and allow actual parameters to be allocated on the caller {stack.Under} certain circumstances, the Java {HotSpot™} {VM} is forced to stop executing a method's machine code and transfer control to the interpreter. This is called deoptimization. Since the interpreter does not know about the scalar replacement and synchronization removal performed by the compiler, the deoptimization framework was extended to reallocate and relock objects on demand.},
	booktitle = {Proceedings of the 1st {ACM/USENIX} international conference on Virtual execution environments},
	publisher = {{ACM}},
	author = {Kotzmann, Thomas and Mössenböck, Hanspeter},
	year = {2005},
	note = {{ACM} {ID:} 1064996},
	keywords = {algorithms, allocation/deallocation strategies, deoptimization},
	pages = {111–120}

	address = {Austin, Texas, {USA}},
	title = {Allocation removal by partial evaluation in a tracing {JIT}},
	abstract = {The performance of many dynamic language implementations suffers from high allocation rates and runtime type checks. This makes dynamic languages less applicable to purely algorithmic problems, despite their growing popularity. In this paper we present a simple compiler optimization based on online partial evaluation to remove object allocations and runtime type checks in the context of a tracing {JIT.} We evaluate the optimization using a Python {VM} and find that it gives good results for all our (real-life) benchmarks.},
	booktitle = {{PEPM}},
	author = {Bolz, Carl Friedrich and Cuni, Antonio and Fijałkowski, Maciej and Leuschel, Michael and Pedroni, Samuele and Rigo, Armin},
	year = {2011},
	keywords = {code generation, experimentation, interpreters, languages, optimization, partial evaluation, performance, run-time environments, tracing jit}

	address = {New York, {NY}, {USA}},
	series = {{ICOOOLPS} '11},
	title = {Runtime feedback in a meta-tracing {JIT} for efficient dynamic languages},
	isbn = {978-1-4503-0894-6},
	url = {},
	doi = {10.1145/2069172.2069181},
	abstract = {Meta-tracing {JIT} compilers can be applied to a variety of different languages without explicitly encoding language semantics into the compiler. So far, they lacked a way to give the language implementor control over runtime feedback. This restricted their performance. In this paper we describe the mechanisms in {PyPy’s} meta-tracing {JIT} that can be used to control runtime feedback in language-specific ways. These mechanisms are flexible enough to express classical {VM} techniques such as maps and runtime type feedback.},
	booktitle = {Proceedings of the 6th Workshop on Implementation, Compilation, Optimization of Object-Oriented Languages, Programs and Systems},
	publisher = {{ACM}},
	author = {Bolz, Carl Friedrich and Cuni, Antonio and Fijałkowski, Maciej and Leuschel, Michael and Pedroni, Samuele and Rigo, Armin},
	year = {2011},
	keywords = {code generation, interpreter, meta-programming, runtime feedback, tracing jit},
	pages = {9:1–9:8}

	address = {New York, {NY}, {USA}},
	series = {{MSR} '11},
	title = {How developers use the dynamic features of programming languages: the case of {Smalltalk}},
	isbn = {978-1-4503-0574-7},
	shorttitle = {How developers use the dynamic features of programming languages},
	url = {},
	doi = {10.1145/1985441.1985448},
	abstract = {The dynamic and reflective features of programming languages are powerful constructs that programmers often mention as extremely useful. However, the ability to modify a program at runtime can be both a boon-in terms of flexibility-, and a curse-in terms of tool support. For instance, usage of these features hampers the design of type systems, the accuracy of static analysis techniques, or the introduction of optimizations by compilers. In this paper, we perform an empirical study of a large Smalltalk codebase- often regarded as the poster-child in terms of availability of these features-, in order to assess how much these features are actually used in practice, whether some are used more than others, and in which kinds of projects. These results are useful to make informed decisions about which features to consider when designing language extensions or tool support.},
	booktitle = {Proceedings of the 8th Working Conference on Mining Software Repositories},
	publisher = {{ACM}},
	author = {Callaú, Oscar and Robbes, Romain and Tanter, {\'{E}} and Röthlisberger, David},
	year = {2011},
	keywords = {dynamic languages, smalltalk, static analysis},
	pages = {23–32}

	title = {Array bounds check elimination in the context of deoptimization},
	volume = {74},
	issn = {0167-6423},
	url = {},
	doi = {10.1016/j.scico.2009.01.002},
	abstract = {Whenever an array element is accessed, Java virtual machines execute a compare instruction to ensure that the index value is within the valid bounds. This reduces the execution speed of Java programs. Array bounds check elimination identifies situations in which such checks are redundant and can be removed. We present an array bounds check elimination algorithm for the Java {HotSpot(TM)} {VM} based on static analysis in the just-in-time compiler. The algorithm works on an intermediate representation in static single assignment form and maintains conditions for index expressions. It fully removes bounds checks if it can be proven that they never fail. Whenever possible, it moves bounds checks out of loops. The static number of checks remains the same, but a check inside a loop is likely to be executed more often. If such a check fails, the executing program falls back to interpreted mode, avoiding the problem that an exception is thrown at the wrong place. The evaluation shows a speedup near to the theoretical maximum for the scientific {SciMark} benchmark suite and also significant improvements for some Java Grande benchmarks. The algorithm slightly increases the execution speed for the {SPECjvm98} benchmark suite. The evaluation of the {DaCapo} benchmarks shows that array bounds checks do not have a significant impact on the performance of object-oriented applications.},
	number = {5-6},
	journal = {Sci. Comput. Program.},
	author = {Würthinger, Thomas and Wimmer, Christian and Mössenböck, Hanspeter},
	month = mar,
	year = {2009},
	keywords = {Array bounds check elimination, Java, just-in-time compilation, optimization, performance},
	pages = {279–295}

	address = {New York, {NY}, {USA}},
	series = {{PLDI} '92},
	title = {Debugging optimized code with dynamic deoptimization},
	isbn = {0-89791-475-9},
	url = {},
	doi = {10.1145/143095.143114},
	abstract = {{SELF's} debugging system provides complete source-level debugging (expected behavior) with globally optimized code. It shields the debugger from optimizations performed by the compiler by dynamically deoptimizing code on demand. Deoptimization only affects the procedure activations that are actively being debugged; all other code runs at full speed. Deoptimization requires the compiler to supply debugging information at discrete interrupt points; the compiler can still perform extensive optimizations between interrupt points without affecting debuggability. At the same time, the inability to interrupt between interrupt points is invisible to the user. Our debugging system also handles programming changes during debugging. Again, the system provides expected behavior: it is possible to change a running program and immediately observe the effects of the change. Dynamic deoptimization transforms old compiled code (which may contain inlined copies of the old version of the changed procedure) into new versions reflecting the current source-level state. To the best of our knowledge, {SELF} is the first practical system providing full expected behavior with globally optimized code.},
	booktitle = {Proceedings of the {ACM} {SIGPLAN} 1992 conference on Programming language design and implementation},
	publisher = {{ACM}},
	author = {Hölzle, Urs and Chambers, Craig and Ungar, David},
	year = {1992},
	pages = {32–43}

	address = {Washington, {DC}, {USA}},
	series = {{CGO} '07},
	title = {Run-Time Support for Optimizations Based on Escape Analysis},
	isbn = {0-7695-2764-7},
	url = {},
	doi = {10.1109/CGO.2007.34},
	abstract = {The {JavaTM} programming language does not allow the programmer to influence memory management. An object is usually allocated on the heap and deallocated by the garbage collector when it is not referenced any longer. Under certain conditions, the virtual machine can allocate objects on the stack or eliminate their allocation via scalar replacement. However, even if the dynamic compiler guarantees that the conditions are fulfilled, the optimizations require support by the run-time environment. We implemented a new escape analysis algorithm for Sun Microsystems' Java {HotSpotTM} {VM.} The results are used to replace objects with scalar variables, to allocate objects on the stack, and to remove synchronization. This paper deals with the representation of optimized objects in the debugging information and with reallocation and garbage collection support for a safe execution of optimized methods. Assignments to fields of parameters that can refer to both stack and heap objects are associated with an extended write barrier which skips card marking for stack objects. The traversal of objects during garbage collection uses a wrapper that abstracts from stack objects and presents their pointer fields as root pointers to the garbage collector. When a previously compiled and currently executing method must be continued in the interpreter because dynamic class loading invalidates the machine code, execution is suspended and compiler optimizations are undone. Scalar-replaced objects are reallocated on the heap lazily when control returns to the invalidated method, whereas stack-allocated objects must be reallocated immediately before program execution resumes. After reallocation, objects for which synchronization was removed are relocked.},
	urldate = {2012-09-06},
	booktitle = {Proceedings of the International Symposium on Code Generation and Optimization},
	publisher = {{IEEE} Computer Society},
	author = {Kotzmann, Thomas and Mossenbock, Hanspeter},
	year = {2007},
	pages = {49–60}

	address = {Montreal, Quebec, Canada},
	title = {{RPython:} a step towards reconciling dynamically and statically typed {OO} languages},
	isbn = {978-1-59593-868-8},
	shorttitle = {{RPython}},
	url = {},
	doi = {10.1145/1297081.1297091},
	abstract = {Although the C-based interpreter of Python is reasonably fast, implementations on the {CLI} or the {JVM} platforms offers some advantages in terms of robustness and interoperability. Unfortunately, because the {CLI} and {JVM} are primarily designed to execute statically typed, object-oriented languages, most dynamic language implementations cannot use the native bytecodes for common operations like method calls and exception handling; as a result, they are not able to take full advantage of the power offered by the {CLI} and {JVM.}},
	booktitle = {{DLS}},
	publisher = {{ACM}},
	author = {Ancona, Davide and Ancona, Massimo and Cuni, Antonio and Matsakis, Nicholas D.},
	year = {2007},
	keywords = {{JVM}, .net, Python}

	address = {Portland, Oregon, {USA}},
	title = {{PyPy's} approach to virtual machine construction},
	isbn = {1-59593-491-X},
	url = {},
	doi = {10.1145/1176617.1176753},
	abstract = {The {PyPy} project seeks to prove both on a research and a practical level the feasibility of constructing a virtual machine {(VM)} for a dynamic language in a dynamic language - in this case, Python. The aim is to translate (i.e. compile) the {VM} to arbitrary target environments, ranging in level from {C/Posix} to {Smalltalk/Squeak} via Java and {CLI/.NET}, while still being of reasonable efficiency within these {environments.A} key tool to achieve this goal is the systematic reuse of the Python language as a system programming language at various levels of our architecture and translation process. For each level, we design a corresponding type system and apply a generic type inference engine - for example, the garbage collector is written in a style that manipulates simulated pointer and address objects, and when translated to C these operations become C-level pointer and address instructions.},
	booktitle = {{DLS}},
	publisher = {{ACM}},
	author = {Rigo, Armin and Pedroni, Samuele},
	year = {2006},
	keywords = {metacircularity, Python, retargettable code generation, type inference, {VM}}

	title = {Efficiently Computing Static Single Assignment Form and the Control Dependence Graph},
	volume = {13},
	number = {4},
	journal = {{ACM} Transactions on Programming Languages and Systems},
	author = {Cytron, Ron and Ferrante, Jeanne and Rosen, Barry K. and Wegman, Mark N. and Zadeck, F. Kenneth},
	month = oct,
	year = {1991},
	pages = {451–490}

	address = {Genova, Italy},
	title = {Tracing the meta-level: {PyPy's} tracing {JIT} compiler},
	isbn = {978-1-60558-541-3},
	shorttitle = {Tracing the meta-level},
	url = {},
	doi = {10.1145/1565824.1565827},
	abstract = {We attempt to apply the technique of Tracing {JIT} Compilers in the context of the {PyPy} project, i.e., to programs that are interpreters for some dynamic languages, including Python. Tracing {JIT} compilers can greatly speed up programs that spend most of their time in loops in which they take similar code paths. However, applying an unmodified tracing {JIT} to a program that is itself a bytecode interpreter results in very limited or no speedup. In this paper we show how to guide tracing {JIT} compilers to greatly improve the speed of bytecode interpreters. One crucial point is to unroll the bytecode dispatch loop, based on two kinds of hints provided by the implementer of the bytecode interpreter. We evaluate our technique by applying it to two {PyPy} interpreters: one is a small example, and the other one is the full Python interpreter.},
	booktitle = {{ICOOOLPS}},
	publisher = {{ACM}},
	author = {Bolz, Carl Friedrich and Cuni, Antonio and Fijałkowski, Maciej and Rigo, Armin},
	year = {2009},
	pages = {18--25}

	address = {Monterey, California},
	title = {The {Java} {HotSpot} server compiler},
	url = {},
	abstract = {The Java {HotSpotTM} Server Compiler achieves improved asymptotic performance through a combination of object-oriented and classical-compiler optimizations. Aggressive inlining using class-hierarchy analysis reduces function call overhead and provides opportunities for many compiler optimizations.},
	booktitle = {Proceedings of the Java Virtual Machine Research and Technology Symposium on Java Virtual Machine Research and Technology Symposium - Volume 1},
	publisher = {{USENIX} Association},
	author = {Paleczny, Michael and Vick, Christopher and Click, Cliff},
	year = {2001},
	keywords = {toread}

	title = {Back to the Future in One Week — Implementing a {Smalltalk} {VM} in {PyPy}},
	url = {},
	abstract = {We report on our experiences with the Spy project, including implementation details and benchmark results. Spy is a re-implementation of the Squeak (i.e. Smalltalk-80) {VM} using the {PyPy} toolchain. The {PyPy} project allows code written in {RPython}, a subset of Python, to be translated
to a multitude of different backends and architectures. During the translation, many aspects of the implementation can be
independently tuned, such as the garbage collection algorithm or threading implementation. In this way, a whole host of interpreters
can be derived from one abstract interpreter definition. Spy aims to bring these benefits to Squeak, allowing for greater portability and, eventually, improved performance. The current
Spy codebase is able to run a small set of benchmarks that demonstrate performance superior to many similar Smalltalk {VMs}, but
which still run slower than in Squeak itself. Spy was built from scratch over the course of a week during a joint Squeak-{PyPy} Sprint in Bern last autumn.},
	booktitle = {Self-Sustaining Systems},
	author = {Bolz, Carl Friedrich and Kuhn, Adrian and Lienhard, Adrian and Matsakis, Nicholas and Nierstrasz, Oscar and Renggli, Lukas and Rigo, Armin and Verwaest, Toon},
	year = {2008},
	pages = {123--139}

	title = {A third-generation {SELF} implementation: reconciling responsiveness with performance},
	volume = {29},
	shorttitle = {A third-generation {SELF} implementation},
	url = {},
	doi = {10.1145/191081.191116},
	abstract = {Programming systems should be both responsive (to support rapid development) and efficient (to complete computations quickly). Pure object-oriented languages are harder to implement efficiently since they need optimization to achieve good performance. Unfortunately, optimization conflicts with interactive responsiveness because it tends to produce long compilation pauses, leading to unresponsive programming environments. Therefore, to achieve good responsiveness, existing exploratory programming environments such as the Smalltalk-80 environment rely on interpretation or non-optimizing dynamic compilation. But such systems pay a price for their interactiveness, since they may execute programs several times slower than an optimizing {system.SELF-93} reconciles high performance with responsiveness by combining a fast, non-optimizing compiler with a slower, optimizing compiler. The resulting system achieves both excellent performance (two or three times faster than existing Smalltalk systems) and good responsiveness. Except for situations requiring large applications to be (re)compiled from scratch, the system allows for pleasant interactive use with few perceptible compilation pauses. To our knowledge, {SELF-93} is the first implementation of a pure object-oriented language achieving both good performance and good {responsiveness.When} measuring interactive pauses, it is imperative to treat multiple short pauses as one longer pause if the pauses occur in short succession, since they are perceived as one pause by the user. We propose a definition of pause clustering and show that clustering can make an order-of-magnitude difference in the pause time distribution.},
	number = {10},
	journal = {{SIGPLAN} Not.},
	author = {Hölzle, Urs and Ungar, David},
	year = {1994},
	keywords = {interactivity, recompilation, self},
	pages = {229--243}