Anonymous avatar Anonymous committed 2400da8

r6385@telaviv1: shlomi | 2009-02-12 19:36:30 +0200
Added the Optimizing code for speed article from the wikibooks with
a script to cleanup its XHTML.

Comments (0)

Files changed (4)

bin/cleanup-mediawiki-html.pl

+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use XML::LibXML;
+use List::MoreUtils qw(firstidx);
+
+my $parser = XML::LibXML->new();
+
+sub remove
+{
+    my $node = shift;
+
+    my $parent = $node->parentNode;
+
+    $parent->removeChild($node);
+
+    return;
+}
+
+
+sub empty
+{
+    my $node = shift;
+
+    $node->removeChildNodes();
+
+    return $node;
+}
+
+sub stringify
+{
+    my $list = shift;
+
+    return 
+    [
+        map { (ref($_) eq "") ? XML::LibXML::Text->new($_) : $_ } @$list
+    ];
+}
+
+sub replace
+{
+    my $node = shift;
+    my $childs = shift;
+
+    empty($node);
+
+    foreach my $c (@{
+            stringify((ref($childs) eq "ARRAY") ? $childs : [$childs])
+        }
+    )
+    {
+        $node->appendChild($c);
+    }
+
+    return $node;
+}
+
+sub xhtml_con
+{
+    my $node = shift;
+
+    my $xc = XML::LibXML::XPathContext->new($node);
+    $xc->registerNs('xhtml', 'http://www.w3.org/1999/xhtml');
+
+    return $xc;
+}
+
+my $doc = $parser->parse_file(
+    "./lib/htmls/from-mediawiki/orig/Optimizing_Code_for_Speed-rev1.html"
+);
+
+sub is_empty_or_comment
+{
+    my $node = shift;
+
+    if ($node->nodeType() == XML_COMMENT_NODE)
+    {
+        return 1;
+    }
+    
+    if ($node->nodeType() == XML_TEXT_NODE)
+    {
+        if ($node->data() !~ /\S/)
+        {
+            return 1;
+        }
+    }
+
+    return;
+}
+
+my $xc = XML::LibXML::XPathContext->new($doc);
+$xc->registerNs('xhtml', 'http://www.w3.org/1999/xhtml');
+
+# Remove the table-of-contents because we generate our own.
+my ($toc) =  $xc->findnodes("//xhtml:table[\@id='toc']");
+remove($toc);
+
+{
+    my ($el) = $xc->findnodes("//xhtml:div[\@class='printfooter']");
+
+    my $parent = $el->parentNode;
+
+    my $innermost = 1;
+
+    while ($parent)
+    {
+        my $start = $el;
+        # In subsequent iterations - we want to remove the node only
+        # after the parent
+        if ($innermost)
+        {
+            my $next_el = $start->previousSibling();
+            while ($next_el && is_empty_or_comment($next_el))
+            {
+                $start = $next_el;
+                $next_el = $start->previousSibling();
+            }
+            # $start is now OK.
+        }
+        else
+        {
+            $start = $start->nextSibling();
+        }
+        
+        my $to_del = $start;
+        my $next_to_del;
+        while ($to_del)
+        {
+            # Once removed - $to_del won't have a nextSibling().
+            # Elementary, my dear Watson!
+            $next_to_del = $to_del->nextSibling();
+            $parent->removeChild($to_del);
+        }
+        continue
+        {
+            $to_del = $next_to_del;
+        }
+    }
+    continue
+    {
+        $el = $parent;
+        $parent = $el->parentNode();
+        $innermost = 0;
+    }
+}
+
+foreach my $el ($xc->findnodes("//xhtml:script"))
+{
+    remove($el);
+}
+
+foreach my $attr (qw(rel class title))
+{
+    foreach my $el ($xc->findnodes("//*[\@$attr]"))
+    {
+        $el->removeAttribute($attr);
+    }
+}
+
+foreach my $a_el ($xc->findnodes("//xhtml:p/xhtml:a[\@id]"))
+{
+    my $id = $a_el->getAttribute("id");
+
+    my $p = $a_el->parentNode();
+
+    my $h_tag = $p->nextSibling();
+    
+    while(   $h_tag->nodeType() != XML_ELEMENT_NODE
+          || $h_tag->nodeName() !~ m{\Ah}
+      )
+    {
+        $h_tag = $h_tag->nextSibling();
+    }
+
+    remove($p);
+
+    $h_tag->setAttribute("id", $id);
+
+    my ($span) = xhtml_con($h_tag)->findnodes("xhtml:span/xhtml:a");
+
+    $span = $span->parentNode;
+
+    remove($span);
+
+    ($span) = xhtml_con($h_tag)->findnodes("xhtml:span");
+
+    replace($h_tag, $span->textContent());
+}
+
+foreach my $a_el ($xc->findnodes("//xhtml:a[\@href]"))
+{
+    my $href = $a_el->getAttribute("href");
+    if ($href =~ m{\A/})
+    {
+        $a_el->setAttribute("href", "http://en.wikibooks.org$href");
+    }
+}
+
+print $doc->toString();
+

bin/preproc-optimizing-docbook.pl

     return $node;
 }
 
+=begin Code
+
 {
     my ($title) = $doc->findnodes("/book/bookinfo/title");
     replace($title, "Optimizing Code for Speed");
     replace($legal, "Permission to use, copy, modify and distribute this document under version 2.5 (or later) of the Creative Commons' Attribution License or version 1.2 (or later) of the GNU Free Documentation License ");
 }
 
+=end Code
+
+=cut
+
+my $book = $doc->findnodes("/book");
+my $bookinfo = $book->findnodes("bookinfo");
+
+$book->removeChild($bookinfo);
+
+my $article = $book->findnodes("article");
+replace($book, [$article->childNodes()]);
+$article = $book;
+$article->setNodeName("article");
+
+
+
+
 print $doc->toString();

lib/htmls/from-mediawiki/orig/Optimizing_Code_for_Speed-rev1.html

+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
+	<head>
+		<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+		<meta http-equiv="Content-Style-Type" content="text/css" />
+		<meta name="generator" content="MediaWiki 1.15alpha" />
+		<meta name="keywords" content="Optimizing Code for Speed,Optimizing Code for Speed/Outline,Jguk,Shlomif,Dallas1278" />
+		<link rel="alternate" type="application/x-wiki" title="Edit this page" href="http://en.wikibooks.org/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit" />
+		<link rel="edit" title="Edit this page" href="http://en.wikibooks.org/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit" />
+		<link rel="shortcut icon" href="/favicon.ico" />
+		<link rel="search" type="application/opensearchdescription+xml" href="/w/opensearch_desc.php" title="Wikibooks (en)" />
+		<link rel="copyright" href="http://www.gnu.org/copyleft/fdl.html" />
+		<link rel="alternate" type="application/rss+xml" title="Wikibooks RSS Feed" href="http://en.wikibooks.org/w/index.php?title=Special:RecentChanges&amp;feed=rss" />
+		<link rel="alternate" type="application/atom+xml" title="Wikibooks Atom Feed" href="http://en.wikibooks.org/w/index.php?title=Special:RecentChanges&amp;feed=atom" />
+		<title>Optimizing Code for Speed - Wikibooks, collection of open-content textbooks</title>
+		<link rel="stylesheet" href="/skins-1.5/common/shared.css?203xx" type="text/css" media="screen" />
+		<link rel="stylesheet" href="/skins-1.5/common/commonPrint.css?203xx" type="text/css" media="print" />
+		<link rel="stylesheet" href="/skins-1.5/monobook/main.css?203xx" type="text/css" media="screen" />
+		<link rel="stylesheet" href="/skins-1.5/chick/main.css?203xx" type="text/css" media="handheld" />
+		<!--[if lt IE 5.5000]><link rel="stylesheet" href="/skins-1.5/monobook/IE50Fixes.css?203xx" type="text/css" media="screen" /><![endif]-->
+		<!--[if IE 5.5000]><link rel="stylesheet" href="/skins-1.5/monobook/IE55Fixes.css?203xx" type="text/css" media="screen" /><![endif]-->
+		<!--[if IE 6]><link rel="stylesheet" href="/skins-1.5/monobook/IE60Fixes.css?203xx" type="text/css" media="screen" /><![endif]-->
+		<!--[if IE 7]><link rel="stylesheet" href="/skins-1.5/monobook/IE70Fixes.css?203xx" type="text/css" media="screen" /><![endif]-->
+		<link rel="stylesheet" href="/w/extensions/FlaggedRevs/flaggedrevs.css?51" type="text/css" />
+		<link rel="stylesheet" href="/w/index.php?title=MediaWiki:Common.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=2678400&amp;action=raw&amp;maxage=2678400" type="text/css" />
+		<link rel="stylesheet" href="/w/index.php?title=MediaWiki:Print.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=2678400&amp;action=raw&amp;maxage=2678400" type="text/css" media="print" />
+		<link rel="stylesheet" href="/w/index.php?title=MediaWiki:Handheld.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=2678400&amp;action=raw&amp;maxage=2678400" type="text/css" media="handheld" />
+		<link rel="stylesheet" href="/w/index.php?title=MediaWiki:Monobook.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=2678400&amp;action=raw&amp;maxage=2678400" type="text/css" />
+		<link rel="stylesheet" href="/w/index.php?title=-&amp;action=raw&amp;maxage=2678400&amp;gen=css" type="text/css" />
+		<!--[if lt IE 7]><script type="text/javascript" src="/skins-1.5/common/IEFixes.js?203xx"></script>
+		<meta http-equiv="imagetoolbar" content="no" /><![endif]-->
+
+		<script type= "text/javascript">/*<![CDATA[*/
+		var skin = "monobook";
+		var stylepath = "/skins-1.5";
+		var wgArticlePath = "/wiki/$1";
+		var wgScriptPath = "/w";
+		var wgScript = "/w/index.php";
+		var wgVariantArticlePath = false;
+		var wgActionPaths = {};
+		var wgServer = "http://en.wikibooks.org";
+		var wgCanonicalNamespace = "";
+		var wgCanonicalSpecialPageName = false;
+		var wgNamespaceNumber = 0;
+		var wgPageName = "Optimizing_Code_for_Speed";
+		var wgTitle = "Optimizing Code for Speed";
+		var wgAction = "view";
+		var wgArticleId = "83059";
+		var wgIsArticle = true;
+		var wgUserName = null;
+		var wgUserGroups = null;
+		var wgUserLanguage = "en";
+		var wgContentLanguage = "en";
+		var wgBreakFrames = false;
+		var wgCurRevisionId = "1407451";
+		var wgVersion = "1.15alpha";
+		var wgEnableAPI = true;
+		var wgEnableWriteAPI = true;
+		var wgSeparatorTransformTable = ["", ""];
+		var wgDigitTransformTable = ["", ""];
+		var wgMWSuggestTemplate = "http://en.wikibooks.org/w/api.php?action=opensearch\x26search={searchTerms}\x26namespace={namespaces}";
+		var wgDBname = "enwikibooks";
+		var wgSearchNamespaces = [0, 4, 112];
+		var wgMWSuggestMessages = ["with suggestions", "no suggestions"];
+		var wgRestrictionEdit = [];
+		var wgRestrictionMove = [];
+		/*]]>*/</script>
+
+		<script type="text/javascript" src="/skins-1.5/common/wikibits.js?203xx"><!-- wikibits js --></script>
+		<!-- Head Scripts -->
+		<script type="text/javascript" src="/skins-1.5/common/ajax.js?203xx"></script>
+		<script type="text/javascript" src="/skins-1.5/common/mwsuggest.js?203xx"></script>
+<script type="text/javascript">/*<![CDATA[*/
+var wgNotice='';var wgNoticeLocal='';
+/*]]>*/</script>		<script type="text/javascript" src="http://upload.wikimedia.org/centralnotice/wikibooks/en/centralnotice.js?203xx"></script>
+<script type="text/javascript">
+var wgFlaggedRevsParams = {"tags": {"composition": 3, "accuracy": 2, "coverage": 2}};
+var wgFlaggedRevsParams2 = {"tags": {"reliability": 3, "completeness": 2, "npov": 2, "presentation": 1}};
+var wgStableRevisionId = 0;
+var wgAjaxFeedback = {"sendingMsg": "Submitting...", "sentMsg": "Thank you!"}
+var wgAjaxReview = {"sendingMsg": "Submitting...", "sentMsg": "Review complete!", "actioncomplete": "Action complete"}
+</script>
+<script type="text/javascript" src="/w/extensions/FlaggedRevs/flaggedrevs.js?51"></script>
+		<script type="text/javascript" src="/w/index.php?title=-&amp;action=raw&amp;gen=js&amp;useskin=monobook"><!-- site js --></script>
+	</head>
+<body class="mediawiki ltr ns-0 ns-subject page-Optimizing_Code_for_Speed skin-monobook">
+	<div id="globalWrapper">
+		<div id="column-content">
+	<div id="content">
+		<a name="top" id="top"></a>
+		<div id="siteNotice"><script type='text/javascript'>if (wgNotice != '') document.writeln(wgNotice);</script><script type="text/javascript" language="JavaScript">
+/* <![CDATA[ */
+document.writeln("\x3cdiv style=\"text-align:center;\"\x3e\x3cb\x3eEnglish Wikibooks has installed \x3ca href=\"http://www.mediawiki.org/wiki/FlaggedRevs\" class=\"extiw\" title=\"mw:FlaggedRevs\"\x3eFlagged Revisions\x3c/a\x3e\x3c/b\x3e, which shows a stable version to anonymous users where possible.\x3cbr/\x3eYou can choose to see newer versions if there are any; \x3ca href=\"/wiki/Special:UserLogin\" title=\"Special:UserLogin\"\x3elogin\x3c/a\x3e to see the newest version all the time. Give feedback at \x3ca href=\"/wiki/Help_talk:Article_validation\" title=\"Help talk:Article validation\" class=\"mw-redirect\"\x3eHelp talk:Article validation\x3c/a\x3e\x3c/div\x3e\n");
+/* ]]> */
+</script></div>		<h1 id="firstHeading" class="firstHeading">Optimizing Code for Speed</h1>
+		<div id="bodyContent">
+			<h3 id="siteSub">From Wikibooks, the open-content textbooks collection</h3>
+			<div id="contentSub"><div id='mw-revisiontag' class='flaggedrevs_notice plainlinks noprint'><span class='fr-icon-current plainlinks'></span>There are no reviewed revisions of this page, so it may <b>not</b> have been <a href="/wiki/Help:Page_validation" title="Help:Page validation" class="mw-redirect">checked</a> for quality.</div></div>
+									<div id="jump-to-nav">Jump to: <a href="#column-one">navigation</a>, <a href="#searchInput">search</a></div>			<!-- start content -->
+			<div style="background-color:#FDD">
+<p>This work is licensed under the:</p>
+<ol>
+<li><a href="http://creativecommons.org/licenses/by/2.5/" class="external text" title="http://creativecommons.org/licenses/by/2.5/" rel="nofollow">The Creative Commons' Attribution License version 2.5</a> - or at your option any higher version.</li>
+<li><a href="http://www.gnu.org/copyleft/fdl.html" class="external text" title="http://www.gnu.org/copyleft/fdl.html" rel="nofollow">The GNU Free Documentation License</a> version 1.2 - or at your option any higher version.</li>
+</ol>
+<p>Please keep it this way. If you don't like either or both licenses - feel free to fork it and have a different derived license. (While properly accepting the terms of either license as starting points).</p>
+<p>Authors: <a href="/wiki/User:Shlomif" title="User:Shlomif">User:Shlomif</a>.</p>
+</div>
+<table id="toc" class="toc" summary="Contents">
+<tr>
+<td>
+<div id="toctitle">
+<h2>Contents</h2>
+</div>
+<ul>
+<li class="toclevel-1"><a href="#Other_Resources"><span class="tocnumber">1</span> <span class="toctext">Other Resources</span></a></li>
+<li class="toclevel-1"><a href="#Introduction"><span class="tocnumber">2</span> <span class="toctext">Introduction</span></a>
+<ul>
+<li class="toclevel-2"><a href="#In_which_programs_is_optimization_required.3F"><span class="tocnumber">2.1</span> <span class="toctext">In which programs is optimization required?</span></a></li>
+<li class="toclevel-2"><a href="#When_to_optimize.3F"><span class="tocnumber">2.2</span> <span class="toctext">When to optimize?</span></a></li>
+<li class="toclevel-2"><a href="#When_not_to_optimize.3F"><span class="tocnumber">2.3</span> <span class="toctext">When not to optimize?</span></a>
+<ul>
+<li class="toclevel-3"><a href="#Optimization_vs._Modularity_and_Readability"><span class="tocnumber">2.3.1</span> <span class="toctext">Optimization vs. Modularity and Readability</span></a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toclevel-1"><a href="#Strategy_for_Optimization"><span class="tocnumber">3</span> <span class="toctext">Strategy for Optimization</span></a></li>
+<li class="toclevel-1"><a href="#Order_of_Complexity_Optimizations"><span class="tocnumber">4</span> <span class="toctext">Order of Complexity Optimizations</span></a>
+<ul>
+<li class="toclevel-2"><a href="#What_is_order_of_complexity.3F"><span class="tocnumber">4.1</span> <span class="toctext">What is order of complexity?</span></a></li>
+<li class="toclevel-2"><a href="#Some_examples_for_reducing_Order_of_Complexity"><span class="tocnumber">4.2</span> <span class="toctext">Some examples for reducing Order of Complexity</span></a>
+<ul>
+<li class="toclevel-3"><a href="#Lookup"><span class="tocnumber">4.2.1</span> <span class="toctext">Lookup</span></a>
+<ul>
+<li class="toclevel-4"><a href="#Case_study_for_Lookup_-_the_Freecell_Solver_States_Collection"><span class="tocnumber">4.2.1.1</span> <span class="toctext">Case study for Lookup - the Freecell Solver States Collection</span></a></li>
+</ul>
+</li>
+<li class="toclevel-3"><a href="#Counting_Sort_instead_of_Comparison-Based_Sort"><span class="tocnumber">4.2.2</span> <span class="toctext">Counting Sort instead of Comparison-Based Sort</span></a></li>
+<li class="toclevel-3"><a href="#Joel_Spolsky.27s_Schlemiel_the_Painter.27s_Syndrome"><span class="tocnumber">4.2.3</span> <span class="toctext">Joel Spolsky's Schlemiel the Painter's Syndrome</span></a></li>
+</ul>
+</li>
+<li class="toclevel-2"><a href="#Note_about_Order-of-Complexity_Reduction"><span class="tocnumber">4.3</span> <span class="toctext">Note about Order-of-Complexity Reduction</span></a></li>
+</ul>
+</li>
+<li class="toclevel-1"><a href="#Factor_Optimizations"><span class="tocnumber">5</span> <span class="toctext">Factor Optimizations</span></a>
+<ul>
+<li class="toclevel-2"><a href="#What_are_Factor_Optimizations.3F"><span class="tocnumber">5.1</span> <span class="toctext">What are Factor Optimizations?</span></a></li>
+<li class="toclevel-2"><a href="#The_Motivation_for_Factor_Optimizations"><span class="tocnumber">5.2</span> <span class="toctext">The Motivation for Factor Optimizations</span></a></li>
+<li class="toclevel-2"><a href="#Examples_for_Factor_Optimizations"><span class="tocnumber">5.3</span> <span class="toctext">Examples for Factor Optimizations</span></a>
+<ul>
+<li class="toclevel-3"><a href="#Managing_Pointers_to_Structs_Instead_of_the_Structs_Themselves"><span class="tocnumber">5.3.1</span> <span class="toctext">Managing Pointers to Structs Instead of the Structs Themselves</span></a></li>
+<li class="toclevel-3"><a href="#Reducing_Memory_Consumption"><span class="tocnumber">5.3.2</span> <span class="toctext">Reducing Memory Consumption</span></a>
+<ul>
+<li class="toclevel-4"><a href="#Note_about_the_Memory-Speed_Tradeoff"><span class="tocnumber">5.3.2.1</span> <span class="toctext">Note about the Memory-Speed Tradeoff</span></a></li>
+</ul>
+</li>
+<li class="toclevel-3"><a href="#Parallelization"><span class="tocnumber">5.3.3</span> <span class="toctext">Parallelization</span></a></li>
+<li class="toclevel-3"><a href="#Putting_the_Most_Important_struct_Members_First"><span class="tocnumber">5.3.4</span> <span class="toctext">Putting the Most Important struct Members First</span></a></li>
+<li class="toclevel-3"><a href="#Copy-on-write"><span class="tocnumber">5.3.5</span> <span class="toctext">Copy-on-write</span></a></li>
+<li class="toclevel-3"><a href="#Caching"><span class="tocnumber">5.3.6</span> <span class="toctext">Caching</span></a></li>
+<li class="toclevel-3"><a href="#Avoiding_Copying_Altogether"><span class="tocnumber">5.3.7</span> <span class="toctext">Avoiding Copying Altogether</span></a></li>
+<li class="toclevel-3"><a href="#Inline_Functions"><span class="tocnumber">5.3.8</span> <span class="toctext">Inline Functions</span></a></li>
+<li class="toclevel-3"><a href="#The_Schwartzian_Transform"><span class="tocnumber">5.3.9</span> <span class="toctext">The Schwartzian Transform</span></a></li>
+</ul>
+</li>
+<li class="toclevel-2"><a href="#Call_for_a_Catalog_of_Optimizations"><span class="tocnumber">5.4</span> <span class="toctext">Call for a Catalog of Optimizations</span></a></li>
+</ul>
+</li>
+<li class="toclevel-1"><a href="#Optimization_by_Changing_the_Dependencies"><span class="tocnumber">6</span> <span class="toctext">Optimization by Changing the Dependencies</span></a></li>
+<li class="toclevel-1"><a href="#Optimizing_by_Reducing_Feature-Set"><span class="tocnumber">7</span> <span class="toctext">Optimizing by Reducing Feature-Set</span></a>
+<ul>
+<li class="toclevel-2"><a href="#Optimizing_by_Compromising_on_Security"><span class="tocnumber">7.1</span> <span class="toctext">Optimizing by Compromising on Security</span></a></li>
+</ul>
+</li>
+<li class="toclevel-1"><a href="#Conclusion"><span class="tocnumber">8</span> <span class="toctext">Conclusion</span></a></li>
+<li class="toclevel-1"><a href="#Thanks"><span class="tocnumber">9</span> <span class="toctext">Thanks</span></a></li>
+</ul>
+</td>
+</tr>
+</table>
+<script type="text/javascript">
+//<![CDATA[
+ if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } 
+//]]>
+</script>
+<p><a name="Other_Resources" id="Other_Resources"></a></p>
+<h2><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=1" title="Edit section: Other Resources">edit</a>]</span> <span class="mw-headline">Other Resources</span></h2>
+<ul>
+<li><a href="/wiki/Optimizing_Code_for_Speed/Outline" title="Optimizing Code for Speed/Outline">Optimizing Code for Speed/Outline</a></li>
+</ul>
+<p><a name="Introduction" id="Introduction"></a></p>
+<h2><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=2" title="Edit section: Introduction">edit</a>]</span> <span class="mw-headline">Introduction</span></h2>
+<p>Optimization of code is the term that was applied to a process in which a code is tuned to be better in some respects: either speed, memory consumption, Input/Output (disk read and writes or network reads and writes), etc. In Mathematics, Optimization means a process in which one finds the values with the best performance. In Computing where programs are very complex, usually optimizing for speed in the mathematical sense is impossible. Instead the term has come to mean just advancing in the direction of better performance in one or more respects.</p>
+<p>This document will focus on optimizing code to run faster. However, as you will see later, doing this may involve having to optimize the code in a different aspect. Furthermore, often when programmers are trying to optimize one aspect of a program, they are doing so in order to increase speed.</p>
+<p><a name="In_which_programs_is_optimization_required.3F" id="In_which_programs_is_optimization_required.3F"></a></p>
+<h3><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=3" title="Edit section: In which programs is optimization required?">edit</a>]</span> <span class="mw-headline">In which programs is optimization required?</span></h3>
+<p>There are several type of programs in which optimization is required. One type of them is <a href="http://en.wikipedia.org/wiki/Real-time_computing" class="extiw" title="wikipedia:Real-time computing">real time programs</a>. Despite common mis-conception, these are not necessarily programs that need to be very fast. Instead real time programs are programs that must respond to certain events within a certain time limit that was dedicated to the event. So for example, if the user presses a keyboard key while inside a word processor window, it should display the character relatively promptly. A 5 seconds delay will be unacceptable and as such a word processor is a kind of application that has some real-time constraints.</p>
+<p>Some parts of real-time systems may need optimization. However, once the time falls below the required delay, no other optimization is necessary. (At least not until testing proves it is again exceeding the limit.)</p>
+<p>Other programs that require optimization are programs that cannot be fast enough. Examples for such programs are Artificial Intelligence programs, games, multimedia programs, virtual machines and interpreters for programming languages, or any type of publicly available library or module whose use "in the wild" may necessitate it to be very fast.</p>
+<p>And naturally another type of programs are programs that are too slow or perceived to be too slow. While programs of some problem domains are very slow due to the complexity of the calculations involved, that is usually not the case, and nothing prevents a program from being fast. Such programs that are considered too slow should be optimized to make them run faster.</p>
+<p><a name="When_to_optimize.3F" id="When_to_optimize.3F"></a></p>
+<h3><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=4" title="Edit section: When to optimize?">edit</a>]</span> <span class="mw-headline">When to optimize?</span></h3>
+<p>The rule of the thumb is that one should optimize if the program is not fast enough compared to the desired speed. A common misconception is that if a program is not fast enough now, it will run faster as hardware gets faster. However that is not always the case.</p>
+<p>If your program is slow, it is usually because the code is not optimized (such as due to the fact it uses sub-optimal algorithms). While better hardware will usually improve performance, it does not guarantee a noticeable speed increase. Furthermore the progress in hardware speed has allowed programmers to write more wasteful code (see <a href="http://www.paulgraham.com/hundred.html" class="external text" title="http://www.paulgraham.com/hundred.html" rel="nofollow">Paul Graham's "The Hundred Years Language" essay</a>), but some programs are still excessively slow.</p>
+<p>When writing a program on a modern hardware, one should make it fast enough already. Some people have claimed that we have already reached the end of <a href="http://en.wikipedia.org/wiki/Moore%27s_law" class="extiw" title="wikipedia:Moore's law">Moore's First Law</a> in regards to uni-core processors, and we cannot expect single-processed single-threaded program to run faster. In any case, even if the program runs well on a high-end platform, it may still need to run on older hardware.</p>
+<p>We've all seen the fact that while computers got faster, software has often become slower to run unless the hardware is upgraded. <a href="http://en.wikipedia.org/wiki/Gates%27_Law" class="extiw" title="wikipedia:Gates' Law">The so-called "Gates' Law"</a> claims that commercial programs decrease in speed by half every 18 months, due to various reasons. It is well known that the various versions of the <a href="http://en.wikipedia.org/wiki/DOS" class="extiw" title="wikipedia:DOS">DOS</a> operating system ran adequately on a PC XT's and 286's and that a Intel 386 was a "lean and mean DOS machine" as a certain journalist claimed back then. On the other hand, Microsoft Windows 3.0 and Microsoft Windows 3.1 already required a fast 486 computer to be ran comfortably, while Windows 95 was barely usable there and needed a Pentium computer. Windows XP already ran slowly on a Pentium machine and required a high end Pentium III or Pentium 4 computer. <a href="http://en.wikipedia.org/wiki/Windows_Vista" class="extiw" title="wikipedia:Windows Vista">Windows Vista</a> requires even more hardware resources than Windows XP, up to the point that many computers in use today cannot run it comfortably.</p>
+<p>Now, while software simulations that run directly against the CPU and memory (and possibly hard-disk) are still running much faster than before, the responsiveness of the system itself does not seem to improve much.</p>
+<p><a name="When_not_to_optimize.3F" id="When_not_to_optimize.3F"></a></p>
+<h3><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=5" title="Edit section: When not to optimize?">edit</a>]</span> <span class="mw-headline">When not to optimize?</span></h3>
+<p>One should not optimize when the program in question runs fast enough. However, if it also has to run adequately on slower hardware, handle a greater load, or work well with fewer resources at its disposal, then the program might need to be optimized further based on such requirements.</p>
+<p><a name="Optimization_vs._Modularity_and_Readability" id="Optimization_vs._Modularity_and_Readability"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=6" title="Edit section: Optimization vs. Modularity and Readability">edit</a>]</span> <span class="mw-headline">Optimization vs. Modularity and Readability</span></h4>
+<p>Optimization may have a bad influence on such code-quality factors as modularity and readability. Consider for example, what the developers of <a href="http://gmplib.org/" class="external text" title="http://gmplib.org/" rel="nofollow">GMP (GNU Multi-precision)</a>, wrote in <a href="http://gmplib.org/manual/Introduction-to-GMP.html#Introduction-to-GMP" class="external text" title="http://gmplib.org/manual/Introduction-to-GMP.html#Introduction-to-GMP" rel="nofollow">their manual</a>:</p>
+<blockquote>
+<p>The speed of GMP is achieved by using fullwords as the basic arithmetic type, by using sophisticated algorithms, by including carefully optimized assembly code for the most common inner loops for many different CPUs, and <b>by a general emphasis on speed (as opposed to simplicity or elegance</b>).</p>
+</blockquote>
+<p>Also consider that short <a href="http://en.wikipedia.org/wiki/Subroutine" class="extiw" title="wikipedia:Subroutine">functions</a> and <a href="http://en.wikipedia.org/wiki/Method_(computer_science)" class="extiw" title="wikipedia:Method (computer science)">methods</a> are considered preferable over longer methods, to allow for greater code re-use, testing, and self-documentation. (See the <a href="http://c2.com/cgi/wiki?ExtractMethod" class="external text" title="http://c2.com/cgi/wiki?ExtractMethod" rel="nofollow">"Extract Method" refactoring pattern</a>). However, function or method calls are considered a relatively costly operation, and so this extra code-quality, will probably make the program slower.</p>
+<p>Therefore, it's not hard to imagine that a developer who wishes to save some extra cycles will merge a function that gets used only once or a few times into its caller code (or incorporating its code using a <a href="http://en.wikipedia.org/wiki/Macro_(computerscience)" class="extiw" title="wikipedia:Macro (computerscience)">macro</a>, or a preprocessing stage if possible), and therefore making the code less modular.</p>
+<p>Similarly, highly optimized code may also become much more unreadable. For example, the C Preprocessor's macros are notorious for making code that uses them unreadable. I heard <a href="http://www.mail-archive.com/linux-il@cs.huji.ac.il/msg15826.html" class="external text" title="http://www.mail-archive.com/linux-il@cs.huji.ac.il/msg15826.html" rel="nofollow">some complaints</a> that mentioned that <a href="http://en.wikipedia.org/wiki/OpenSSL" class="extiw" title="wikipedia:OpenSSL">OpenSSL</a>'s code, which is based on the preprocessor macros, identically named static functions and lots of code-generation is very hard to understand and follow due to these facts.</p>
+<p>Similar complaints are constantly voiced against <a href="http://en.wikipedia.org/wiki/XS_(Perl)" class="extiw" title="wikipedia:XS (Perl)">perl5's XS</a>, its interface for creating subroutines in ANSI C and other lower-level languages. The API was designed to be as efficient and fast as possible and, as a result, is hard to learn, understand and follow.</p>
+<p><a name="Strategy_for_Optimization" id="Strategy_for_Optimization"></a></p>
+<h2><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=7" title="Edit section: Strategy for Optimization">edit</a>]</span> <span class="mw-headline">Strategy for Optimization</span></h2>
+<p>The first thing you need to do before optimizing is make sure that your code has many automated tests with a good test coverage. Optimizing the code (or any other kind of modification) may break something, and the automated tests will catch that.</p>
+<p>The next thing you need to do is to make sure your code is modular. Duplicate code and other properties that make a code non-modular can prevent a clear analysis of the code's bottlenecks by profiling it.</p>
+<p>Afterwards, it is important to <a href="http://en.wikipedia.org/wiki/Performance_analysis" class="extiw" title="wikipedia:Performance analysis">profile your code using a good software profiler</a>. Profiling will help show which code does not take a lot of time to run and therefore it would be best not to invest a lot of optimization effort in it. One should start by optimizing the most time-consuming tasks first.</p>
+<p>It takes some expertise to knowing how to properly analyze the results given by the profiler, and seeing what needs to be optimized. For example, some IO-intensive routines may appear to consume a lot of time, while in fact that time cannot be effectively eliminated because the amount of IO is minimal (while still being relatively time consuming). I feel that discussing this aspect of profiling further is orthogonal to the mission of this article, so I'm not going to cover it here.</p>
+<p>Finally, it is a good idea that someone will review the code and and give general remarks on speed bottlenecks found there. To quote Eric Raymond from <a href="http://www.catb.org/~esr/writings/cathedral-bazaar/" class="external text" title="http://www.catb.org/~esr/writings/cathedral-bazaar/" rel="nofollow">"The Cathedral and the Bazaar"</a> "Given enough eyeballs, all bugs become shallow".</p>
+<p><a name="Order_of_Complexity_Optimizations" id="Order_of_Complexity_Optimizations"></a></p>
+<h2><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=8" title="Edit section: Order of Complexity Optimizations">edit</a>]</span> <span class="mw-headline">Order of Complexity Optimizations</span></h2>
+<p><a name="What_is_order_of_complexity.3F" id="What_is_order_of_complexity.3F"></a></p>
+<h3><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=9" title="Edit section: What is order of complexity?">edit</a>]</span> <span class="mw-headline">What is order of complexity?</span></h3>
+<p>Generally, an algorithm has an <a href="http://en.wikipedia.org/wiki/Computational_complexitytheory" class="extiw" title="wikipedia:Computational complexitytheory">asymptotic comptutational complexity</a>. Assuming the input is of size N, we can say that the algorithm will finish at O(N), O(N^2), O(N^3), O(N*log(N)) etc. This means that it is a certain mathematical expression of the size of the input, and the algorithm finishes between two factors of it.</p>
+<p>Generally, the smaller the order of complexity of the program's underlying algorithm, the faster the it will run and the better it will scale as the input gets larger. Thus, we should often seek more efficient algorithms in order to reduce the order of complexity.</p>
+<p><a name="Some_examples_for_reducing_Order_of_Complexity" id="Some_examples_for_reducing_Order_of_Complexity"></a></p>
+<h3><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=10" title="Edit section: Some examples for reducing Order of Complexity">edit</a>]</span> <span class="mw-headline">Some examples for reducing Order of Complexity</span></h3>
+<p><a name="Lookup" id="Lookup"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=11" title="Edit section: Lookup">edit</a>]</span> <span class="mw-headline">Lookup</span></h4>
+<p>Let's suppose we're looking for a certain item in a collection of <b>N</b> items. A naïve algorithm for looking for such an item would be to go over all the items one after the other, and see if they match this item. Then we can stop when we found this item, or declare that it wasn't found if we did not find it.</p>
+<p>This is called a <a href="http://en.wikipedia.org/wiki/Linear_search" class="extiw" title="wikipedia:Linear search">linear search</a>, and has an average (and worst-case) complexity of O(N). Now, if we're going to do such a naïve lookup many times, then it will cost us O(N) every time. And this is usually unacceptable.</p>
+<p>A better idea would be to use a more efficient lookup. For example, a <a href="http://en.wikipedia.org/wiki/Binary_search" class="extiw" title="wikipedia:Binary search">Binary search</a> is O(log(N)). It assumes we keep the existing items in a sorted array, or in a balanced tree. A <a href="http://en.wikipedia.org/wiki/Hash_table" class="extiw" title="wikipedia:Hash table">Hash table</a> is a heuristic that with a good design of its underlying parameters provides an <b>average</b> O(1) lookup, but often O(log(N)) is good enough.</p>
+<p><a name="Case_study_for_Lookup_-_the_Freecell_Solver_States_Collection" id="Case_study_for_Lookup_-_the_Freecell_Solver_States_Collection"></a></p>
+<h5><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=12" title="Edit section: Case study for Lookup - the Freecell Solver States Collection">edit</a>]</span> <span class="mw-headline">Case study for Lookup - the Freecell Solver States Collection</span></h5>
+<p><a href="http://fc-solve.berlios.de/" class="external text" title="http://fc-solve.berlios.de/" rel="nofollow">Freecell Solver</a> is a library written in <a href="http://en.wikipedia.org/wiki/ANSI_C" class="extiw" title="wikipedia:ANSI C">ANSI C</a> that solves deals of <a href="http://en.wikipedia.org/wiki/FreeCell" class="extiw" title="wikipedia:FreeCell">FreeCell</a> and similar "full-knowledge" Solitaire games. Freecell Solver used traditional <a href="http://en.wikipedia.org/wiki/Game_artificial_intelligence" class="extiw" title="wikipedia:Game artificial intelligence">Game artificial intelligence</a> heuristics from the beginning such as <a href="http://en.wikipedia.org/wiki/Depth_First_Search" class="extiw" title="wikipedia:Depth First Search">Depth First Search</a> and <a href="http://en.wikipedia.org/wiki/Best_First_Search" class="extiw" title="wikipedia:Best First Search">Best First Search</a>. As a result, there was a need to maintain a collection of the previously encountered board positions (or "states") so they won't be checked twice.</p>
+<p>The very first version of the solver (written in <a href="http://en.wikipedia.org/wiki/Perl" class="extiw" title="wikipedia:Perl">Perl</a>) used a linear search over an array. That proved to be too slow to be effective to solve even the most elementary deals. Afterwards, the program was re-implemented in C, and used a sorted array, with a "sort margin" that was sorted using ANSI C's <a href="http://www.cplusplus.com/reference/clibrary/cstdlib/qsort.html" class="external text" title="http://www.cplusplus.com/reference/clibrary/cstdlib/qsort.html" rel="nofollow">qsort's function</a>, which performs the <a href="http://en.wikipedia.org/wiki/Quick_Sort" class="extiw" title="wikipedia:Quick Sort">Quick Sort</a> algorithm at an average complexity of O(N*log(N)), giving the program an average lookup of O(log(N)) and an accumulated addition of between O(N*log(N)) and O(N^2).</p>
+<p>Later versions made optional use of two <a href="http://en.wikipedia.org/wiki/Balanced_Binary_Tree" class="extiw" title="wikipedia:Balanced Binary Tree">Balanced Binary Trees</a> libraries: <a href="http://www.stanford.edu/~blp/avl/" class="external text" title="http://www.stanford.edu/~blp/avl/" rel="nofollow">libavl</a> and <a href="http://libredblack.sourceforge.net/" class="external text" title="http://libredblack.sourceforge.net/" rel="nofollow">libredblack</a>, which have a maximal O(log(N)) lookup and insertion and an accumulative O(N*log(N)) run-time. Sometimes later, a custom hash table was coded, whose run-time was even somewhat faster than the balanced binary trees, and had an <b>average</b> run-time of O(N). This hash was later further optimized using micro-optimizations.</p>
+<p><a name="Counting_Sort_instead_of_Comparison-Based_Sort" id="Counting_Sort_instead_of_Comparison-Based_Sort"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=13" title="Edit section: Counting Sort instead of Comparison-Based Sort">edit</a>]</span> <span class="mw-headline">Counting Sort instead of Comparison-Based Sort</span></h4>
+<p>Normal comparison-based sort algorithms such as <a href="http://en.wikipedia.org/wiki/Quick_Sort" class="extiw" title="wikipedia:Quick Sort">Quick Sort</a>, <a href="http://en.wikipedia.org/wiki/Merge_Sort" class="extiw" title="wikipedia:Merge Sort">Merge Sort</a> or <a href="http://en.wikipedia.org/wiki/Heap_Sort" class="extiw" title="wikipedia:Heap Sort">Heap Sort</a> run at O(N*log(N)). This is much better than naïve comparison-based algorithms such as "Insertion Sort", or "Bubble Sort" which run at O(N^2). However, we can improve upon O(N*log(N)) too, in some cases.</p>
+<p>One of them is if the keys in question are, or can be mapped to integers in a certain range. In this case, we can use an algorithm such as <a href="http://en.wikipedia.org/wiki/Counting_sort" class="extiw" title="wikipedia:Counting sort">"Counting Sort"</a>, to sort at a time of roughly O(N). We can also try using <a href="http://en.wikipedia.org/wiki/Radix_sort" class="extiw" title="wikipedia:Radix sort">"Radix sort"</a> along with counting sort, if we have more than one such "digit" in the radix (as long as their number remain constant).</p>
+<p>On the other hand, sometimes using Insertion Sort is preferable over Merge Sort or Quick Sort, if the number of elements being sorted is very small, as the latter algorithms have a lot of overhead.</p>
+<p><a name="Joel_Spolsky.27s_Schlemiel_the_Painter.27s_Syndrome" id="Joel_Spolsky.27s_Schlemiel_the_Painter.27s_Syndrome"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=14" title="Edit section: Joel Spolsky's Schlemiel the Painter's Syndrome">edit</a>]</span> <span class="mw-headline">Joel Spolsky's Schlemiel the Painter's Syndrome</span></h4>
+<p>In his <a href="http://www.joelonsoftware.com/articles/fog0000000319.html" class="external text" title="http://www.joelonsoftware.com/articles/fog0000000319.html" rel="nofollow">article "Back to Basics"</a>, Joel Spolsky illustrates a common (and unnecessary) pattern that increases the complexity of programs. Essentially, when one does in C:</p>
+<pre>
+  char string[1000];
+  strcpy (string, "One ");
+  strcat (string, "Two ");
+  strcat (string, "Three ");
+  strcat (string, "Four ");
+  .
+  .
+  .
+</pre>
+<p>And so forth, then the strcat calls will keep starting from the beginning of the string and seek the (increasing) end times and again. As a result, the complexity of appending N strings each with a limited length, becomes O(N^2) instead of O(N).</p>
+<p>Eliminating such problematic mis-implementations in the code can yield a substantial speed-increase.</p>
+<p><a name="Note_about_Order-of-Complexity_Reduction" id="Note_about_Order-of-Complexity_Reduction"></a></p>
+<h3><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=15" title="Edit section: Note about Order-of-Complexity Reduction">edit</a>]</span> <span class="mw-headline">Note about Order-of-Complexity Reduction</span></h3>
+<p>It should be noted that some algorithms with a proven lower Big-O notation than equivalent ones, are either too complicated to be effectively implemented or have a huge runtime factor that will make them impractical for most reasonable data-sets, or both. For example, there's an algorithm for finding the Median (= middle element) of an array in linear (O(N)) time, that was discovered in the nineties, but it's very complex and is a very "big" linear time (with a huge factor), that an efficient O(N*Log(N)) sorting would normally be more efficient. That and we are very often interested in the Median for optimizing sorting, and so it would make sense not to use this algorithm in the first place.</p>
+<p><a name="Factor_Optimizations" id="Factor_Optimizations"></a></p>
+<h2><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=16" title="Edit section: Factor Optimizations">edit</a>]</span> <span class="mw-headline">Factor Optimizations</span></h2>
+<p><a name="What_are_Factor_Optimizations.3F" id="What_are_Factor_Optimizations.3F"></a></p>
+<h3><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=17" title="Edit section: What are Factor Optimizations?">edit</a>]</span> <span class="mw-headline">What are Factor Optimizations?</span></h3>
+<p>Factor Optimizations are the opposite of order of complexity optimizations: they don't change the overall complexity of the algorithm, but rather make it run faster by a certain factor. As an extreme (but not so unrealistic) example, I may increase the formula for calculating the time that my program runs from 5 seconds times N^2 (where N is the length of the input) to 1 second times N^2. One can see that we increase the optimization by a certain factor, and still don't handle potential scalability problems.</p>
+<p><a name="The_Motivation_for_Factor_Optimizations" id="The_Motivation_for_Factor_Optimizations"></a></p>
+<h3><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=18" title="Edit section: The Motivation for Factor Optimizations">edit</a>]</span> <span class="mw-headline">The Motivation for Factor Optimizations</span></h3>
+<p>Are factor optimizations worth-your-time? Some people don't seem to think so. For example Eric Raymond has <a href="http://catb.org/esr/writings/taoup/html/ch12s01.html" class="external text" title="http://catb.org/esr/writings/taoup/html/ch12s01.html" rel="nofollow">this to say in "The Art of Unix Programming"</a>:</p>
+<blockquote>
+<p>One is the exponential effect of Moore's Law — the smartest, cheapest, and often fastest way to collect performance gains is to wait a few months for your target hardware to become more capable. Given the cost ratio between hardware and programmer time, there are almost always better things to do with your time than to optimize a working system.</p>
+<p>We can get mathematically specific about this. It is almost never worth doing optimizations that reduce resource use by merely a constant factor; it's smarter to concentrate effort on cases in which you can reduce average-case running time or space use from O(n^2) to O(n) or O(n log n),[112] or similarly reduce from a higher order. Linear performance gains tend to be rapidly swamped by Moore's Law.[113]</p>
+</blockquote>
+<p>Randall Hyde presents an opposing view in <a href="http://www.onlamp.com/pub/a/onlamp/2004/05/06/writegreatcode.html" class="external text" title="http://www.onlamp.com/pub/a/onlamp/2004/05/06/writegreatcode.html" rel="nofollow">his OnLAMP.com feature, "Why Learning Assembly Language is Still a Good Idea"</a>:</p>
+<blockquote>
+<p>Most of the time you can achieve very good performance boosts by simply improving the implementation of an existing algorithm. A computer scientist may argue that a constant improvement in performance isn't as good as, say, going from an algorithm with O(n^2) performance to one with O(n lg n) performance, but the truth is that most of the time a constant factor of two or three times improvement, applied throughout a piece of software, can make the difference between a practical application and one that is simply too slow to comfortably use. And it is exactly this type of optimization with which most modern programmers have little experience.</p>
+</blockquote>
+<p>So which viewpoint is correct? I tend to think that Raymond is wrong, while Hyde is correct. The factors in which your program is running are still important, and can make a world of difference. Some factor optimizations may yield huge benefits and will make you and your users happier.</p>
+<p>Raymond is also misled because one cannot expect their end-users to upgrade their machines. Furthermore, it seems that we've <a href="http://www.gotw.ca/publications/concurrency-ddj.htm" class="external text" title="http://www.gotw.ca/publications/concurrency-ddj.htm" rel="nofollow">hit the end of the linear CPU speed increase</a> for Semiconductor-based CPUs, and that we cannot expect a linear code to become faster with new hardware. Highly parallelized code may become faster, but parallelization is not always possible, or desirable.</p>
+<p><a href="http://www.mail-archive.com/linux-il@cs.huji.ac.il/msg27751.html" class="external text" title="http://www.mail-archive.com/linux-il@cs.huji.ac.il/msg27751.html" rel="nofollow">Another illustrative story</a> about why it's not a good idea to depend on Moore's law to speed up your code was posted by Gilboa Davara to the Linux-IL mailing list. Quoting from there while editing for coherency:</p>
+<blockquote>
+<p>A couple of years ago I worked for a medical software development company. I was working on the database development side. (We had our own proprietary object oriented database)</p>
+<p>Our database was pretty cool; it could handle an hospital level load on a dual Pentium Pro machine. (Which was a far cry from most big iron machines that were used back then.)</p>
+<p>Our medical software side used PowerBuilder (and later VB) to develop the medical applications. To put it mildly, the medical application itself, was by far, slower and heavier then the medical database that it was built upon. While 50 clients could run easily on a Pentium I 90 MHz with 32MB of RAM , the medical application ran like shit on a Pentium I 166 MHz with 64MB of RAM machine!</p>
+<p>And every-time we pointed this anomaly to the med team, they claimed that "new machines are bound, new CPUs; by the time we are out, CPU power won't be an issue."</p>
+<p>You know what, that med software now runs slower than a dead dog on a top-level Pentium 3 / Pentium 4 / Athlon machine… nothing has changed.</p>
+</blockquote>
+<p><a name="Examples_for_Factor_Optimizations" id="Examples_for_Factor_Optimizations"></a></p>
+<h3><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=19" title="Edit section: Examples for Factor Optimizations">edit</a>]</span> <span class="mw-headline">Examples for Factor Optimizations</span></h3>
+<p><a name="Managing_Pointers_to_Structs_Instead_of_the_Structs_Themselves" id="Managing_Pointers_to_Structs_Instead_of_the_Structs_Themselves"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=20" title="Edit section: Managing Pointers to Structs Instead of the Structs Themselves">edit</a>]</span> <span class="mw-headline">Managing Pointers to Structs Instead of the Structs Themselves</span></h4>
+<p>If we have a collection of many C/C++-structs (structures that contain an adjacent number of elements of possibly different data types), then swapping two such structs (say for re-ordering or sorting) will require a lot of memory access. On the other hand if we manage pointers to such structures, with permanent addresses, then swapping two 32-bit or 64-bit pointers will be relatively cheap.</p>
+<p>The first ANSI C release (0.2.0) of Freecell Solver allocated a direct array of large C-structs, and then sorted them and binary searched them. In the 0.4.0 release, an array of pointers to individually-malloc()-ed structs was implemented instead, which resulted in a huge boost of speed. This taught me a valuable lesson on how to make a smart use of pointers as an optimization tool.</p>
+<p><a name="Reducing_Memory_Consumption" id="Reducing_Memory_Consumption"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=21" title="Edit section: Reducing Memory Consumption">edit</a>]</span> <span class="mw-headline">Reducing Memory Consumption</span></h4>
+<p>The more memory an application, or its individual elements consumes, the more cache misses it has, the more page swapping it requires, and it becomes more probable for data to require more than one cache line. As a result, reducing the size of a program can often lead to speed benefits.</p>
+<p>As <a href="http://www.mail-archive.com/linux-il@cs.huji.ac.il/msg07076.html" class="external text" title="http://www.mail-archive.com/linux-il@cs.huji.ac.il/msg07076.html" rel="nofollow">documented in a post to Linux-IL</a>, when Freecell Solver was converted from representing cards as 32-bit values, and converted to representing them using 8-bit octets, it became much faster. This is likely due to less swapping, due to less cache misses, and because more cards could fit in the Pentium's 32-byte cache row.</p>
+<p><a href="http://lwn.net/Articles/82495/" class="external text" title="http://lwn.net/Articles/82495/" rel="nofollow">The "Cost of Inline Functions" LWN.net story</a> is also illustrative. When one function in the Linux kernel was un-inlined, it made the kernel somewhat faster. The reason was that all of the inlined instances of it occupied a (relatively) large amount of memory per-instance, and as a result, the kernel was larger, and there were more cache misses.</p>
+<p><a name="Note_about_the_Memory-Speed_Tradeoff" id="Note_about_the_Memory-Speed_Tradeoff"></a></p>
+<h5><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=22" title="Edit section: Note about the Memory-Speed Tradeoff">edit</a>]</span> <span class="mw-headline">Note about the Memory-Speed Tradeoff</span></h5>
+<p>After reading what was written here, you may think this contradicts the common Memory-Speed Trade-off "truism". The Memory/Speed Trade-off has its origins in theoretical computer science, where it is shown that for certain tasks, one can reduce the run-time's asymptotic complexity by increasing the asymptotic amount of memory used (and vice versa). For example, we can sometimes reduce the asymptotic run-time from O(N^2) to O(N) by increasing the memory from O(1) to O(N).</p>
+<p>This is all nice and true, but it doesn't contradict the fact that given the architecture of contemporary computer hardware and operating systems, the less memory a program uses (while the logic of the algorithm remains the same) - the faster it will probably run. It's not an asymptotic trade-off, but rather a gain-gain situation.</p>
+<p><a name="Parallelization" id="Parallelization"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=23" title="Edit section: Parallelization">edit</a>]</span> <span class="mw-headline">Parallelization</span></h4>
+<p>By <a href="http://en.wikipedia.org/wiki/Parallel_computing" class="extiw" title="wikipedia:Parallel computing">parallelizing</a> a task, one can split it into several lines-of-executions (processes, threads, tasks on different computers in the cluster, etc.) that each will run in parallel. As a result, the complete task itself will hopefully finish faster. A good example for parallelization is performing the same time-consuming operation on a lot of inputs. If we assign different processes subsets of the inputs, then they will likely finish it faster.</p>
+<p>Lately, parallelization has become especially attractive for making code run faster due to the advancement of multi-core CPUs. However, it should be noted that parallelization is still limited by some factors such as locking, serialization and de-serialization of the inter-process data, context-switching, CPU and operating system-constraints, and the fact that often the number of tasks will exceed the number of available processors. Most importantly, <a href="http://en.wikipedia.org/wiki/Amdahl%27s_law" class="extiw" title="wikipedia:Amdahl's law">Amdahl's law</a> rules that the serial part of the task (by definition) cannot be parallelized, and so limits the amount of gain from the parallelization.</p>
+<p><a name="Putting_the_Most_Important_struct_Members_First" id="Putting_the_Most_Important_struct_Members_First"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=24" title="Edit section: Putting the Most Important struct Members First">edit</a>]</span> <span class="mw-headline">Putting the Most Important struct Members First</span></h4>
+<p>In the Linux kernel the order of the struct members is ordered so the most important members fit within the Pentium architecture's 32-byte cache line size. This way, access to the struct in general is sped up because all of its members remain in the cache line most of the time and can be accessed more quickly.</p>
+<p><a name="Copy-on-write" id="Copy-on-write"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=25" title="Edit section: Copy-on-write">edit</a>]</span> <span class="mw-headline">Copy-on-write</span></h4>
+<p>Another useful optimization is known as <a href="http://en.wikipedia.org/wiki/Copy-on-write" class="extiw" title="wikipedia:Copy-on-write">copy-on-write</a>. A good example for this is when implementing a virtual machine for a programming language, and where we assign a variable to another one. While we can duplicate the contents of the variable each time, it would be cheaper to just increase the reference count, and wait for one of the variables to change before they are separated.</p>
+<p>If the contents of the copies are significant, then copy-on-write can yield significant savings in time.</p>
+<p><a name="Caching" id="Caching"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=26" title="Edit section: Caching">edit</a>]</span> <span class="mw-headline">Caching</span></h4>
+<p>If we perform many costly queries, then caching commonly-issued queries along with the results we got in memory is likely to increase the overall performance of the program. Caching is implemented in all kinds of software - from operating system kernels that hold recently-accessed file-system entries in cache, to database servers that keep caches of the various stages of the queries given to them.</p>
+<p>There's a variation on caching called <a href="http://en.wikipedia.org/wiki/Memoization" class="extiw" title="wikipedia:Memoization">Memoization</a>, in which we never relieve of our results. It can be demonstrated that by memoizing the naïve (tree-recursive) Fibonacci-number calculation, one can actually reduce its complexity from an exponential one to a linear one.</p>
+<p>One should note that caching or memoization cannot be done in many cases, for example, if the queries have most kinds of side-effects.</p>
+<p><a name="Avoiding_Copying_Altogether" id="Avoiding_Copying_Altogether"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=27" title="Edit section: Avoiding Copying Altogether">edit</a>]</span> <span class="mw-headline">Avoiding Copying Altogether</span></h4>
+<p><a href="http://tech.groups.yahoo.com/group/hackers-il/message/2517" class="external text" title="http://tech.groups.yahoo.com/group/hackers-il/message/2517" rel="nofollow">This Hackers-IL Post</a> gave the case for avoiding unnecessary copying of objects in order to increase performance. Calling copy constructors excessively can have a negative impact on performance, and reducing the number of calls to a minimum can improve performance. Just copying a large contiguous area in memory, several times, may have a negative effect on performance and eliminating it, may also turn out to be beneficial.</p>
+<p><a name="Inline_Functions" id="Inline_Functions"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=28" title="Edit section: Inline Functions">edit</a>]</span> <span class="mw-headline">Inline Functions</span></h4>
+<p>Despite what was said earlier, in the context of reducing memory consumption, inlining functions in languages such as C or C++ can often have a positive effect on performance. Function calls are costly operations, and have a lot of overhead, and avoiding the function call by inserting the code in-place whenever the function is used, can increase performance. Inline functions become more and more beneficial the shorter they are, and if the memory occupied by the in-place calls is smaller than the memory used without inlining.</p>
+<p>If you are unsure whether inlining a certain function has a positive or negative effect, then benchmark and see.</p>
+<p><a name="The_Schwartzian_Transform" id="The_Schwartzian_Transform"></a></p>
+<h4><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=29" title="Edit section: The Schwartzian Transform">edit</a>]</span> <span class="mw-headline">The Schwartzian Transform</span></h4>
+<p>The <a href="http://en.wikipedia.org/wiki/Schwartzian_transform" class="extiw" title="wikipedia:Schwartzian transform">Schwartzian transform</a> is a way to optimize certain kinds of comparison-based sort operations. If the keys of the function that we want to compare take a lot of time to calculate (such as if they require access to the hard-disk), then we can first prepare an equivalent array with pairs of the inputs and their keys, then sort the pairs based on the keys, and finally filter only the original inputs from the pairs.</p>
+<p>It should be noted that the Schwartzian transform actually reduced the order of complexity of the number of times the keys are calculated from O(N*log(N)) to O(N). However, the overall order of complexity of the sort remains O(N*log(N)).</p>
+<p><a name="Call_for_a_Catalog_of_Optimizations" id="Call_for_a_Catalog_of_Optimizations"></a></p>
+<h3><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=30" title="Edit section: Call for a Catalog of Optimizations">edit</a>]</span> <span class="mw-headline">Call for a Catalog of Optimizations</span></h3>
+<p>I believe it would be a good idea to concentrate all the known optimizations in one place, in a similar fashion to the <a href="http://www.refactoring.com/catalog/" class="external text" title="http://www.refactoring.com/catalog/" rel="nofollow">Catalog of Refactorings</a> and the <a href="http://en.wikipedia.org/wiki/Portland_Pattern_Repository" class="extiw" title="wikipedia:Portland Pattern Repository">Portland Pattern Repository</a>. This would be a list that people can read for completeness, and to realize where their code can be improved, and for general inspiration, and to facilitate communication.</p>
+<p>I'm not aware of a similar effort as of the time of this writing, and it will be useful. This section only covered a very small subset of the possible optimization strategies, just to give a taste of them, and did not aim to be comprehensive.</p>
+<p><a name="Optimization_by_Changing_the_Dependencies" id="Optimization_by_Changing_the_Dependencies"></a></p>
+<h2><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=31" title="Edit section: Optimization by Changing the Dependencies">edit</a>]</span> <span class="mw-headline">Optimization by Changing the Dependencies</span></h2>
+<p>Often the original choice of programming languages, or libraries can affect the execution speed, because they are not very optimized, too bloated, or otherwise too slow. Moving to a different programming language or library, including possibly replacing third-party code with your own, may have a positive effect on one's program's speed.</p>
+<p>For example, if you're doing a lot of number-crunching code in dynamic, P-code languages such as Perl, Python or Ruby (at least without utilizing such APIs as <a href="http://pdl.perl.org/" class="external text" title="http://pdl.perl.org/" rel="nofollow">PDL</a> or <a href="http://www.scipy.org/" class="external text" title="http://www.scipy.org/" rel="nofollow">SciPy</a>) or otherwise your code involves many loops and conditionals, then you can expect it to run slower than if it was written in C or Assembler.</p>
+<p>Similarly, I found that <a href="http://en.wikipedia.org/wiki/GLib" class="extiw" title="wikipedia:GLib">GLib</a>'s balanced binary trees performed more poorly than those of libavl and libredblack, and that its hash performed more poorly than my own custom hash (which was since then optimized further). As a result, eliminating a dependency on GLib's data structures may improve the performance of the program.</p>
+<p><a name="Optimizing_by_Reducing_Feature-Set" id="Optimizing_by_Reducing_Feature-Set"></a></p>
+<h2><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=32" title="Edit section: Optimizing by Reducing Feature-Set">edit</a>]</span> <span class="mw-headline">Optimizing by Reducing Feature-Set</span></h2>
+<p>"There ain't no such thing as a free lunch". The more features your program has, the more code is required to implement them, which in turn consumes more memory, and slows things down. Furthermore, it requires more conditionals and often even loops, which also require extra processing. Thus, by removing features from one's application, one can have faster code.</p>
+<p>For example, in <a href="http://www.joelonsoftware.com/articles/APIWar.html" class="external text" title="http://www.joelonsoftware.com/articles/APIWar.html" rel="nofollow">"How Microsoft Lost the API War"</a>, Joel Spolsky tells that Microsoft has gone to great lengths to maintain bug-to-bug (binary) backward compatibility for their Windows-line of operating system. As a result, buggy software that relied on API quirks that got changed, often could still work in later versions of Windows, because Microsoft made sure that Windows supplied them with a similar environment. As a result of maintaining all of this backward compatibility, there was more code in Microsoft's products, which may have contributed to the common and notorious perception that they became slower with each release.</p>
+<p><a name="Optimizing_by_Compromising_on_Security" id="Optimizing_by_Compromising_on_Security"></a></p>
+<h3><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=33" title="Edit section: Optimizing by Compromising on Security">edit</a>]</span> <span class="mw-headline">Optimizing by Compromising on Security</span></h3>
+<p>I can go further than that, and claim that one can often optimize one's code by compromising on its security. The less sanity checks, input checks, etc. a code has - the faster it will run. For example, a code that does not have a lot of checking for the failure (and graceful handling) of dynamic memory allocation calls (in languages such as C without managed programming) such as <tt>malloc()</tt>, will run faster. (Until and if it runs out of memory and then crashes.)</p>
+<p>I read somewhere that <a href="http://tech.groups.yahoo.com/group/hackers-il/message/1688" class="external text" title="http://tech.groups.yahoo.com/group/hackers-il/message/1688" rel="nofollow">programmers complained that checking and handling errors in their programs made them have to write about 4 times more code</a>, which makes sense. All this code slows the program down when everything runs as expected.</p>
+<p>Note that "Here be dragons". It's probably never a good idea to compromise on security in order to increase speed. But I've mentioned it here for completeness sake.</p>
+<p><a name="Conclusion" id="Conclusion"></a></p>
+<h2><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=34" title="Edit section: Conclusion">edit</a>]</span> <span class="mw-headline">Conclusion</span></h2>
+<p>When I mentioned this article to some people on IRC (while it was still incomplete), someone said that "The first rule of optimization is 'Don't!'". I think such attitude is harmful. While I agree that "premature optimization is the root of all evil", sometimes one's code is just too slow and needs to be optimized. And if your code is what <a href="http://www.joelonsoftware.com/articles/FiveWorlds.html" class="external text" title="http://www.joelonsoftware.com/articles/FiveWorlds.html" rel="nofollow">Joel Spolsky calls "Shrinkwrap"</a> (i.e: code that is sold or otherwise distributed and made available for public consumption) then you can often not make assumptions on how fast it needs to be, and the faster it is - the better.</p>
+<p>Speed and optimizing other resources are one important factor <a href="http://www.shlomifish.org/philosophy/computers/high-quality-software/" class="external text" title="http://www.shlomifish.org/philosophy/computers/high-quality-software/" rel="nofollow">in the general, abstract quality of programs</a>. If your program is slow, it is likely going to make your users frustrated and unhappy, which will be a failure in your mission as a software developer. So it is important that your program is fast enough, if not very much so.</p>
+<p>My aim in this document was to explain the "why"'s, "what"'s and to a lesser extent "how"'s of optimization. I hope I was successful, and that people who read it will be more willing to optimize their software and more clueful in how to do so.</p>
+<p><a name="Thanks" id="Thanks"></a></p>
+<h2><span class="editsection">[<a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit&amp;section=35" title="Edit section: Thanks">edit</a>]</span> <span class="mw-headline">Thanks</span></h2>
+<p>I'd like to thank Daan for expressing a lot of interest in this article, and for his constant encouragement and input. Limbic_Region has given me some useful input for this article by email. Several people have made some contributions to this article: <a href="/wiki/User:Jguk" title="User:Jguk">Jguk</a>, <a href="/w/index.php?title=User:Dallas1278&amp;action=edit&amp;redlink=1" class="new" title="User:Dallas1278 (does not exist)">Dallas1278</a> and the IPs 84.27.42.81, 85.146.243.13, 203.196.46.108, 70.176.53.73, and 213.129.10.120 . I'd also like to thank all the originators of the sources that I quoted or referenced in the article.</p>
+
+
+<!-- 
+NewPP limit report
+Preprocessor node count: 128/1000000
+Post-expand include size: 361/2048000 bytes
+Template argument size: 2/2048000 bytes
+Expensive parser function count: 0/500
+-->
+
+<!-- Saved in parser cache with key enwikibooks:pcache:idhash:83059-0!1!0!default!!en!2 and timestamp 20090212165913 -->
+<div class="printfooter">
+Retrieved from "<a href="http://en.wikibooks.org/wiki/Optimizing_Code_for_Speed">http://en.wikibooks.org/wiki/Optimizing_Code_for_Speed</a>"</div>
+			<div id='catlinks' class='catlinks'><div id="mw-normal-catlinks"><a href="/wiki/Special:Categories" title="Special:Categories">Subject</a>:&#32;<span dir='ltr'><a href="/wiki/Category:Computing" title="Category:Computing">Computing</a></span></div><div id="mw-hidden-catlinks" class="mw-hidden-cats-hidden">Hidden category:&#32;<span dir='ltr'><a href="/wiki/Category:Alphabetical/O" title="Category:Alphabetical/O">Alphabetical/O</a></span></div></div>			<!-- end content -->
+						<div class="visualClear"></div>
+		</div>
+	</div>
+		</div>
+		<div id="column-one">
+	<div id="p-cactions" class="portlet">
+		<h5>Views</h5>
+		<div class="pBody">
+			<ul>
+	
+				 <li id="ca-nstab-main" class="selected"><a href="/wiki/Optimizing_Code_for_Speed" title="View the content page [c]" accesskey="c">Book</a></li>
+				 <li id="ca-talk" class="new"><a href="/w/index.php?title=Talk:Optimizing_Code_for_Speed&amp;action=edit&amp;redlink=1" title="Discussion about the content page [t]" accesskey="t">Discussion</a></li>
+				 <li id="ca-edit"><a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit" title="You can edit this page.&#10;Please use the preview button before saving. [e]" accesskey="e">Edit this page</a></li>
+				 <li id="ca-history"><a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;action=history" title="Past versions of this page. [h]" accesskey="h">History</a></li>			</ul>
+		</div>
+	</div>
+	<div class="portlet" id="p-personal">
+		<h5>Personal tools</h5>
+		<div class="pBody">
+			<ul>
+				<li id="pt-login"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=Optimizing_Code_for_Speed" title="You are encouraged to log in, it is not mandatory however. [o]" accesskey="o">Log in / create account</a></li>
+			</ul>
+		</div>
+	</div>
+	<div class="portlet" id="p-logo">
+		<a style="background-image: url(http://upload.wikimedia.org/wikibooks/en/b/bc/Wiki.png);" href="/wiki/Main_Page" title="Visit the Main Page [z]" accesskey="z"></a>
+	</div>
+	<script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
+	<div class='generated-sidebar portlet' id='p-navigation'>
+		<h5>Navigation</h5>
+		<div class='pBody'>
+			<ul>
+				<li id="n-mainpage"><a href="/wiki/Main_Page" title="Visit the Main Page">Main Page</a></li>
+				<li id="n-help"><a href="/wiki/Help:Contents" title="The place to find out.">Help</a></li>
+				<li id="n-Cookbook"><a href="/wiki/Cookbook:Table_of_Contents">Cookbook</a></li>
+				<li id="n-Wikijunior"><a href="/wiki/Wikijunior">Wikijunior</a></li>
+				<li id="n-Browse"><a href="/wiki/Wikibooks:Card_Catalog_Office">Browse</a></li>
+				<li id="n-Featured-Books"><a href="/wiki/Wikibooks:Featured_books">Featured Books</a></li>
+				<li id="n-recentchanges"><a href="/wiki/Special:RecentChanges" title="The list of recent changes in the wiki. [r]" accesskey="r">Recent changes</a></li>
+				<li id="n-sitesupport"><a href="http://wikimediafoundation.org/wiki/Donate" title="Support us">Donations</a></li>
+			</ul>
+		</div>
+	</div>
+	<div id="p-search" class="portlet">
+		<h5><label for="searchInput">Search</label></h5>
+		<div id="searchBody" class="pBody">
+			<form action="/wiki/Special:Search" id="searchform"><div>
+				<input id="searchInput" name="search" type="text" title="Search Wikibooks [f]" accesskey="f" value="" />
+				<input type='submit' name="go" class="searchButton" id="searchGoButton"	value="Go" title="Go to a page with this exact name if exists" />&nbsp;
+				<input type='submit' name="fulltext" class="searchButton" id="mw-searchButton" value="Search" title="Search the pages for this text" />
+			</div></form>
+		</div>
+	</div>
+	<div class='generated-sidebar portlet' id='p-community'>
+		<h5>community</h5>
+		<div class='pBody'>
+			<ul>
+				<li id="n-currentevents"><a href="/wiki/Wikibooks:Bulletin_board" title="Find background information on current events">Bulletin Board</a></li>
+				<li id="n-portal"><a href="/wiki/Wikibooks:Community_Portal" title="About the project, what you can do, where to find things">Community portal</a></li>
+				<li id="n-Reading-room"><a href="/wiki/Wikibooks:Reading_room">Reading room</a></li>
+				<li id="n-Help-cleanup"><a href="/wiki/Wikibooks:Maintenance">Help cleanup</a></li>
+				<li id="n-Policy-discussions-and-votes"><a href="/wiki/Wikibooks:Policies_and_guidelines/Vote">Policy discussions and votes</a></li>
+				<li id="n-contact"><a href="/wiki/Wikibooks:Contact_us">Contact us</a></li>
+			</ul>
+		</div>
+	</div>
+	<div class='generated-sidebar portlet' id='p-Create_a_collection'>
+		<h5>Create a collection</h5>
+		<div class='pBody'>
+<ul id="collectionPortletList"><li>
+	<a href="/w/index.php?title=Special:Book/add_article/&amp;arttitle=Optimizing_Code_for_Speed&amp;oldid=0" onclick="collectionCall('AddArticle', [wgNamespaceNumber, wgTitle, 0]); return false;" rel="nofollow">Add wiki page</a>
+</li>							<li><a href="http://en.wikibooks.org/wiki/Help:Collections">Collections help</a></li></ul><script type="text/javascript">
+/* <![CDATA[ */
+	function collectionCall(func, args) {
+		sajax_request_type = 'POST';
+		sajax_do_call('wfAjaxCollection' + func, args, function(xhr) {
+			sajax_request_type = 'GET';
+			sajax_do_call('wfAjaxCollectionGetPortlet', [func], function(xhr) {
+				document.getElementById('collectionPortletList').parentNode.innerHTML = xhr.responseText;
+			});
+		});
+	}
+/* ]]> */
+</script>		</div>
+	</div>
+	<div class="portlet" id="p-tb">
+		<h5>Toolbox</h5>
+		<div class="pBody">
+			<ul>
+				<li id="t-whatlinkshere"><a href="/wiki/Special:WhatLinksHere/Optimizing_Code_for_Speed" title="List of all wiki pages that link here [j]" accesskey="j">What links here</a></li>
+				<li id="t-recentchangeslinked"><a href="/wiki/Special:RecentChangesLinked/Optimizing_Code_for_Speed" title="Recent changes in pages linked from this page [k]" accesskey="k">Related changes</a></li>
+<li id="t-specialpages"><a href="/wiki/Special:SpecialPages" title="List of all special pages [q]" accesskey="q">Special pages</a></li>
+				<li id="t-print"><a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;printable=yes" title="Printable version of this page [p]" accesskey="p">Printable version</a></li>				<li id="t-permalink"><a href="/w/index.php?title=Optimizing_Code_for_Speed&amp;oldid=1407451" title="Permanent link to this version of the page">Permanent link</a></li>
+				<li id="t-download-as-pdf"><a href="/w/index.php?title=Special:Book/render_article/&amp;arttitle=Optimizing_Code_for_Speed&amp;oldid=1407451&amp;writer=rl" rel="nofollow">PDF version</a></li>
+			</ul>
+		</div>
+	</div>
+		</div><!-- end of the left (by default at least) column -->
+			<div class="visualClear"></div>
+			<div id="footer">
+				<div id="f-poweredbyico"><a href="http://www.mediawiki.org/"><img src="/skins-1.5/common/images/poweredby_mediawiki_88x31.png" alt="Powered by MediaWiki" /></a></div>
+				<div id="f-copyrightico"><a href="http://wikimediafoundation.org/"><img src="/images/wikimedia-button.png" border="0" alt="Wikimedia Foundation"/></a></div>
+			<ul id="f-list">
+					<li id="lastmod"> This page was last modified on 12 February 2009, at 16:59.</li>
+					<li id="copyright">All text is available under the terms of the <a class='internal' href="/wiki/GNU_Free_Documentation_License">GNU Free Documentation License</a> (see <b><a class='internal' href="/wiki/Wikibooks:Copyrights">Copyrights</a></b> for details).<br />
+Wikibooks® is a registered trademark of the Wikimedia Foundation, Inc.<br /></li>
+					<li id="privacy"><a href="http://wikimediafoundation.org/wiki/Privacy_policy" title="wikimedia:Privacy policy">Privacy policy</a></li>
+					<li id="about"><a href="/wiki/Wikibooks:About" title="Wikibooks:About">About Wikibooks</a></li>
+					<li id="disclaimer"><a href="/wiki/Wikibooks:General_disclaimer" title="Wikibooks:General disclaimer">Disclaimers</a></li>
+			</ul>
+		</div>
+</div>
+
+		<script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
+<!-- Served by srv187 in 0.111 secs. --></body></html>

lib/htmls/from-mediawiki/processed/Optimizing_Code_For_Speed-rev1.html

+<?xml version="1.0"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
+	<head>
+		<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+		<meta http-equiv="Content-Style-Type" content="text/css" />
+		<meta name="generator" content="MediaWiki 1.15alpha" />
+		<meta name="keywords" content="Optimizing Code for Speed,Optimizing Code for Speed/Outline,Jguk,Shlomif,Dallas1278" />
+		<link type="application/x-wiki" href="http://en.wikibooks.org/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit" />
+		<link href="http://en.wikibooks.org/w/index.php?title=Optimizing_Code_for_Speed&amp;action=edit" />
+		<link href="/favicon.ico" />
+		<link type="application/opensearchdescription+xml" href="/w/opensearch_desc.php" />
+		<link href="http://www.gnu.org/copyleft/fdl.html" />
+		<link type="application/rss+xml" href="http://en.wikibooks.org/w/index.php?title=Special:RecentChanges&amp;feed=rss" />
+		<link type="application/atom+xml" href="http://en.wikibooks.org/w/index.php?title=Special:RecentChanges&amp;feed=atom" />
+		<title>Optimizing Code for Speed - Wikibooks, collection of open-content textbooks</title>
+		<link href="/skins-1.5/common/shared.css?203xx" type="text/css" media="screen" />
+		<link href="/skins-1.5/common/commonPrint.css?203xx" type="text/css" media="print" />
+		<link href="/skins-1.5/monobook/main.css?203xx" type="text/css" media="screen" />
+		<link href="/skins-1.5/chick/main.css?203xx" type="text/css" media="handheld" />
+		<!--[if lt IE 5.5000]><link rel="stylesheet" href="/skins-1.5/monobook/IE50Fixes.css?203xx" type="text/css" media="screen" /><![endif]-->
+		<!--[if IE 5.5000]><link rel="stylesheet" href="/skins-1.5/monobook/IE55Fixes.css?203xx" type="text/css" media="screen" /><![endif]-->
+		<!--[if IE 6]><link rel="stylesheet" href="/skins-1.5/monobook/IE60Fixes.css?203xx" type="text/css" media="screen" /><![endif]-->
+		<!--[if IE 7]><link rel="stylesheet" href="/skins-1.5/monobook/IE70Fixes.css?203xx" type="text/css" media="screen" /><![endif]-->
+		<link href="/w/extensions/FlaggedRevs/flaggedrevs.css?51" type="text/css" />
+		<link href="/w/index.php?title=MediaWiki:Common.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=2678400&amp;action=raw&amp;maxage=2678400" type="text/css" />
+		<link href="/w/index.php?title=MediaWiki:Print.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=2678400&amp;action=raw&amp;maxage=2678400" type="text/css" media="print" />
+		<link href="/w/index.php?title=MediaWiki:Handheld.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=2678400&amp;action=raw&amp;maxage=2678400" type="text/css" media="handheld" />
+		<link href="/w/index.php?title=MediaWiki:Monobook.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=2678400&amp;action=raw&amp;maxage=2678400" type="text/css" />
+		<link href="/w/index.php?title=-&amp;action=raw&amp;maxage=2678400&amp;gen=css" type="text/css" />
+		<!--[if lt IE 7]><script type="text/javascript" src="/skins-1.5/common/IEFixes.js?203xx"></script>
+		<meta http-equiv="imagetoolbar" content="no" /><![endif]-->
+
+		
+
+		
+		<!-- Head Scripts -->
+		
+		
+		
+
+
+		
+	</head>
+<body>
+	<div id="globalWrapper">
+		<div id="column-content">
+	<div id="content">
+		<a name="top" id="top"></a>
+		<div id="siteNotice"></div>		<h1 id="firstHeading">Optimizing Code for Speed</h1>
+		<div id="bodyContent">
+			<h3 id="siteSub">From Wikibooks, the open-content textbooks collection</h3>
+			<div id="contentSub"><div id="mw-revisiontag"><span></span>There are no reviewed revisions of this page, so it may <b>not</b> have been <a href="http://en.wikibooks.org/wiki/Help:Page_validation">checked</a> for quality.</div></div>
+									<div id="jump-to-nav">Jump to: <a href="#column-one">navigation</a>, <a href="#searchInput">search</a></div>			<!-- start content -->
+			<div style="background-color:#FDD">
+<p>This work is licensed under the:</p>
+<ol>
+<li><a href="http://creativecommons.org/licenses/by/2.5/">The Creative Commons' Attribution License version 2.5</a> - or at your option any higher version.</li>
+<li><a href="http://www.gnu.org/copyleft/fdl.html">The GNU Free Documentation License</a> version 1.2 - or at your option any higher version.</li>
+</ol>
+<p>Please keep it this way. If you don't like either or both licenses - feel free to fork it and have a different derived license. (While properly accepting the terms of either license as starting points).</p>
+<p>Authors: <a href="http://en.wikibooks.org/wiki/User:Shlomif">User:Shlomif</a>.</p>
+</div>
+
+
+
+<h2 id="Other_Resources">Other Resources</h2>
+<ul>
+<li><a href="http://en.wikibooks.org/wiki/Optimizing_Code_for_Speed/Outline">Optimizing Code for Speed/Outline</a></li>
+</ul>
+
+<h2 id="Introduction">Introduction</h2>
+<p>Optimization of code is the term that was applied to a process in which a code is tuned to be better in some respects: either speed, memory consumption, Input/Output (disk read and writes or network reads and writes), etc. In Mathematics, Optimization means a process in which one finds the values with the best performance. In Computing where programs are very complex, usually optimizing for speed in the mathematical sense is impossible. Instead the term has come to mean just advancing in the direction of better performance in one or more respects.</p>
+<p>This document will focus on optimizing code to run faster. However, as you will see later, doing this may involve having to optimize the code in a different aspect. Furthermore, often when programmers are trying to optimize one aspect of a program, they are doing so in order to increase speed.</p>
+
+<h3 id="In_which_programs_is_optimization_required.3F">In which programs is optimization required?</h3>
+<p>There are several type of programs in which optimization is required. One type of them is <a href="http://en.wikipedia.org/wiki/Real-time_computing">real time programs</a>. Despite common mis-conception, these are not necessarily programs that need to be very fast. Instead real time programs are programs that must respond to certain events within a certain time limit that was dedicated to the event. So for example, if the user presses a keyboard key while inside a word processor window, it should display the character relatively promptly. A 5 seconds delay will be unacceptable and as such a word processor is a kind of application that has some real-time constraints.</p>
+<p>Some parts of real-time systems may need optimization. However, once the time falls below the required delay, no other optimization is necessary. (At least not until testing proves it is again exceeding the limit.)</p>
+<p>Other programs that require optimization are programs that cannot be fast enough. Examples for such programs are Artificial Intelligence programs, games, multimedia programs, virtual machines and interpreters for programming languages, or any type of publicly available library or module whose use "in the wild" may necessitate it to be very fast.</p>
+<p>And naturally another type of programs are programs that are too slow or perceived to be too slow. While programs of some problem domains are very slow due to the complexity of the calculations involved, that is usually not the case, and nothing prevents a program from being fast. Such programs that are considered too slow should be optimized to make them run faster.</p>
+
+<h3 id="When_to_optimize.3F">When to optimize?</h3>
+<p>The rule of the thumb is that one should optimize if the program is not fast enough compared to the desired speed. A common misconception is that if a program is not fast enough now, it will run faster as hardware gets faster. However that is not always the case.</p>
+<p>If your program is slow, it is usually because the code is not optimized (such as due to the fact it uses sub-optimal algorithms). While better hardware will usually improve performance, it does not guarantee a noticeable speed increase. Furthermore the progress in hardware speed has allowed programmers to write more wasteful code (see <a href="http://www.paulgraham.com/hundred.html">Paul Graham's "The Hundred Years Language" essay</a>), but some programs are still excessively slow.</p>
+<p>When writing a program on a modern hardware, one should make it fast enough already. Some people have claimed that we have already reached the end of <a href="http://en.wikipedia.org/wiki/Moore%27s_law">Moore's First Law</a> in regards to uni-core processors, and we cannot expect single-processed single-threaded program to run faster. In any case, even if the program runs well on a high-end platform, it may still need to run on older hardware.</p>
+<p>We've all seen the fact that while computers got faster, software has often become slower to run unless the hardware is upgraded. <a href="http://en.wikipedia.org/wiki/Gates%27_Law">The so-called "Gates' Law"</a> claims that commercial programs decrease in speed by half every 18 months, due to various reasons. It is well known that the various versions of the <a href="http://en.wikipedia.org/wiki/DOS">DOS</a> operating system ran adequately on a PC XT's and 286's and that a Intel 386 was a "lean and mean DOS machine" as a certain journalist claimed back then. On the other hand, Microsoft Windows 3.0 and Microsoft Windows 3.1 already required a fast 486 computer to be ran comfortably, while Windows 95 was barely usable there and needed a Pentium computer. Windows XP already ran slowly on a Pentium machine and required a high end Pentium III or Pentium 4 computer. <a href="http://en.wikipedia.org/wiki/Windows_Vista">Windows Vista</a> requires even more hardware resources than Windows XP, up to the point that many computers in use today cannot run it comfortably.</p>
+<p>Now, while software simulations that run directly against the CPU and memory (and possibly hard-disk) are still running much faster than before, the responsiveness of the system itself does not seem to improve much.</p>
+
+<h3 id="When_not_to_optimize.3F">When not to optimize?</h3>
+<p>One should not optimize when the program in question runs fast enough. However, if it also has to run adequately on slower hardware, handle a greater load, or work well with fewer resources at its disposal, then the program might need to be optimized further based on such requirements.</p>
+
+<h4 id="Optimization_vs._Modularity_and_Readability">Optimization vs. Modularity and Readability</h4>
+<p>Optimization may have a bad influence on such code-quality factors as modularity and readability. Consider for example, what the developers of <a href="http://gmplib.org/">GMP (GNU Multi-precision)</a>, wrote in <a href="http://gmplib.org/manual/Introduction-to-GMP.html#Introduction-to-GMP">their manual</a>:</p>
+<blockquote>
+<p>The speed of GMP is achieved by using fullwords as the basic arithmetic type, by using sophisticated algorithms, by including carefully optimized assembly code for the most common inner loops for many different CPUs, and <b>by a general emphasis on speed (as opposed to simplicity or elegance</b>).</p>
+</blockquote>
+<p>Also consider that short <a href="http://en.wikipedia.org/wiki/Subroutine">functions</a> and <a href="http://en.wikipedia.org/wiki/Method_(computer_science)">methods</a> are considered preferable over longer methods, to allow for greater code re-use, testing, and self-documentation. (See the <a href="http://c2.com/cgi/wiki?ExtractMethod">"Extract Method" refactoring pattern</a>). However, function or method calls are considered a relatively costly operation, and so this extra code-quality, will probably make the program slower.</p>
+<p>Therefore, it's not hard to imagine that a developer who wishes to save some extra cycles will merge a function that gets used only once or a few times into its caller code (or incorporating its code using a <a href="http://en.wikipedia.org/wiki/Macro_(computerscience)">macro</a>, or a preprocessing stage if possible), and therefore making the code less modular.</p>
+<p>Similarly, highly optimized code may also become much more unreadable. For example, the C Preprocessor's macros are notorious for making code that uses them unreadable. I heard <a href="http://www.mail-archive.com/linux-il@cs.huji.ac.il/msg15826.html">some complaints</a> that mentioned that <a href="http://en.wikipedia.org/wiki/OpenSSL">OpenSSL</a>'s code, which is based on the preprocessor macros, identically named static functions and lots of code-generation is very hard to understand and follow due to these facts.</p>
+<p>Similar complaints are constantly voiced against <a href="http://en.wikipedia.org/wiki/XS_(Perl)">perl5's XS</a>, its interface for creating subroutines in ANSI C and other lower-level languages. The API was designed to be as efficient and fast as possible and, as a result, is hard to learn, understand and follow.</p>
+
+<h2 id="Strategy_for_Optimization">Strategy for Optimization</h2>
+<p>The first thing you need to do before optimizing is make sure that your code has many automated tests with a good test coverage. Optimizing the code (or any other kind of modification) may break something, and the automated tests will catch that.</p>
+<p>The next thing you need to do is to make sure your code is modular. Duplicate code and other properties that make a code non-modular can prevent a clear analysis of the code's bottlenecks by profiling it.</p>
+<p>Afterwards, it is important to <a href="http://en.wikipedia.org/wiki/Performance_analysis">profile your code using a good software profiler</a>. Profiling will help show which code does not take a lot of time to run and therefore it would be best not to invest a lot of optimization effort in it. One should start by optimizing the most time-consuming tasks first.</p>
+<p>It takes some expertise to knowing how to properly analyze the results given by the profiler, and seeing what needs to be optimized. For example, some IO-intensive routines may appear to consume a lot of time, while in fact that time cannot be effectively eliminated because the amount of IO is minimal (while still being relatively time consuming). I feel that discussing this aspect of profiling further is orthogonal to the mission of this article, so I'm not going to cover it here.</p>
+<p>Finally, it is a good idea that someone will review the code and and give general remarks on speed bottlenecks found there. To quote Eric Raymond from <a href="http://www.catb.org/~esr/writings/cathedral-bazaar/">"The Cathedral and the Bazaar"</a> "Given enough eyeballs, all bugs become shallow".</p>
+
+<h2 id="Order_of_Complexity_Optimizations">Order of Complexity Optimizations</h2>
+
+<h3 id="What_is_order_of_complexity.3F">What is order of complexity?</h3>
+<p>Generally, an algorithm has an <a href="http://en.wikipedia.org/wiki/Computational_complexitytheory">asymptotic comptutational complexity</a>. Assuming the input is of size N, we can say that the algorithm will finish at O(N), O(N^2), O(N^3), O(N*log(N)) etc. This means that it is a certain mathematical expression of the size of the input, and the algorithm finishes between two factors of it.</p>
+<p>Generally, the smaller the order of complexity of the program's underlying algorithm, the faster the it will run and the better it will scale as the input gets larger. Thus, we should often seek more efficient algorithms in order to reduce the order of complexity.</p>
+
+<h3 id="Some_examples_for_reducing_Order_of_Complexity">Some examples for reducing Order of Complexity</h3>
+
+<h4 id="Lookup">Lookup</h4>
+<p>Let's suppose we're looking for a certain item in a collection of <b>N</b> items. A na&#xEF;ve algorithm for looking for such an item would be to go over all the items one after the other, and see if they match this item. Then we can stop when we found this item, or declare that it wasn't found if we did not find it.</p>
+<p>This is called a <a href="http://en.wikipedia.org/wiki/Linear_search">linear search</a>, and has an average (and worst-case) complexity of O(N). Now, if we're going to do such a na&#xEF;ve lookup many times, then it will cost us O(N) every time. And this is usually unacceptable.</p>
+<p>A better idea would be to use a more efficient lookup. For example, a <a href="http://en.wikipedia.org/wiki/Binary_search">Binary search</a> is O(log(N)). It assumes we keep the existing items in a sorted array, or in a balanced tree. A <a href="http://en.wikipedia.org/wiki/Hash_table">Hash table</a> is a heuristic that with a good design of its underlying parameters provides an <b>average</b> O(1) lookup, but often O(log(N)) is good enough.</p>
+
+<h5 id="Case_study_for_Lookup_-_the_Freecell_Solver_States_Collection">Case study for Lookup - the Freecell Solver States Collection</h5>
+<p><a href="http://fc-solve.berlios.de/">Freecell Solver</a> is a library written in <a href="http://en.wikipedia.org/wiki/ANSI_C">ANSI C</a> that solves deals of <a href="http://en.wikipedia.org/wiki/FreeCell">FreeCell</a> and similar "full-knowledge" Solitaire games. Freecell Solver used traditional <a href="http://en.wikipedia.org/wiki/Game_artificial_intelligence">Game artificial intelligence</a> heuristics from the beginning such as <a href="http://en.wikipedia.org/wiki/Depth_First_Search">Depth First Search</a> and <a href="http://en.wikipedia.org/wiki/Best_First_Search">Best First Search</a>. As a result, there was a need to maintain a collection of the previously encountered board positions (or "states") so they won't be checked twice.</p>
+<p>The very first version of the solver (written in <a href="http://en.wikipedia.org/wiki/Perl">Perl</a>) used a linear search over an array. That proved to be too slow to be effective to solve even the most elementary deals. Afterwards, the program was re-implemented in C, and used a sorted array, with a "sort margin" that was sorted using ANSI C's <a href="http://www.cplusplus.com/reference/clibrary/cstdlib/qsort.html">qsort's function</a>, which performs the <a href="http://en.wikipedia.org/wiki/Quick_Sort">Quick Sort</a> algorithm at an average complexity of O(N*log(N)), giving the program an average lookup of O(log(N)) and an accumulated addition of between O(N*log(N)) and O(N^2).</p>
+<p>Later versions made optional use of two <a href="http://en.wikipedia.org/wiki/Balanced_Binary_Tree">Balanced Binary Trees</a> libraries: <a href="http://www.stanford.edu/~blp/avl/">libavl</a> and <a href="http://libredblack.sourceforge.net/">libredblack</a>, which have a maximal O(log(N)) lookup and insertion and an accumulative O(N*log(N)) run-time. Sometimes later, a custom hash table was coded, whose run-time was even somewhat faster than the balanced binary trees, and had an <b>average</b> run-time of O(N). This hash was later further optimized using micro-optimizations.</p>
+
+<h4 id="Counting_Sort_instead_of_Comparison-Based_Sort">Counting Sort instead of Comparison-Based Sort</h4>
+<p>Normal comparison-based sort algorithms such as <a href="http://en.wikipedia.org/wiki/Quick_Sort">Quick Sort</a>, <a href="http://en.wikipedia.org/wiki/Merge_Sort">Merge Sort</a> or <a href="http://en.wikipedia.org/wiki/Heap_Sort">Heap Sort</a> run at O(N*log(N)). This is much better than na&#xEF;ve comparison-based algorithms such as "Insertion Sort", or "Bubble Sort" which run at O(N^2). However, we can improve upon O(N*log(N)) too, in some cases.</p>
+<p>One of them is if the keys in question are, or can be mapped to integers in a certain range. In this case, we can use an algorithm such as <a href="http://en.wikipedia.org/wiki/Counting_sort">"Counting Sort"</a>, to sort at a time of roughly O(N). We can also try using <a href="http://en.wikipedia.org/wiki/Radix_sort">"Radix sort"</a> along with counting sort, if we have more than one such "digit" in the radix (as long as their number remain constant).</p>
+<p>On the other hand, sometimes using Insertion Sort is preferable over Merge Sort or Quick Sort, if the number of elements being sorted is very small, as the latter algorithms have a lot of overhead.</p>
+
+<h4 id="Joel_Spolsky.27s_Schlemiel_the_Painter.27s_Syndrome">Joel Spolsky's Schlemiel the Painter's Syndrome</h4>
+<p>In his <a href="http://www.joelonsoftware.com/articles/fog0000000319.html">article "Back to Basics"</a>, Joel Spolsky illustrates a common (and unnecessary) pattern that increases the complexity of programs. Essentially, when one does in C:</p>
+<pre>
+  char string[1000];
+  strcpy (string, "One ");
+  strcat (string, "Two ");
+  strcat (string, "Three ");
+  strcat (string, "Four ");
+  .
+  .
+  .
+</pre>
+<p>And so forth, then the strcat calls will keep starting from the beginning of the string and seek the (increasing) end times and again. As a result, the complexity of appending N strings each with a limited length, becomes O(N^2) instead of O(N).</p>
+<p>Eliminating such problematic mis-implementations in the code can yield a substantial speed-increase.</p>
+
+<h3 id="Note_about_Order-of-Complexity_Reduction">Note about Order-of-Complexity Reduction</h3>
+<p>It should be noted that some algorithms with a proven lower Big-O notation than equivalent ones, are either too complicated to be effectively implemented or have a huge runtime factor that will make them impractical for most reasonable data-sets, or both. For example, there's an algorithm for finding the Median (= middle element) of an array in linear (O(N)) time, that was discovered in the nineties, but it's very complex and is a very "big" linear time (with a huge factor), that an efficient O(N*Log(N)) sorting would normally be more efficient. That and we are very often interested in the Median for optimizing sorting, and so it would make sense not to use this algorithm in the first place.</p>
+
+<h2 id="Factor_Optimizations">Factor Optimizations</h2>
+
+<h3 id="What_are_Factor_Optimizations.3F">What are Factor Optimizations?</h3>
+<p>Factor Optimizations are the opposite of order of complexity optimizations: they don't change the overall complexity of the algorithm, but rather make it run faster by a certain factor. As an extreme (but not so unrealistic) example, I may increase the formula for calculating the time that my program runs from 5 seconds times N^2 (where N is the length of the input) to 1 second times N^2. One can see that we increase the optimization by a certain factor, and still don't handle potential scalability problems.</p>
+
+<h3 id="The_Motivation_for_Factor_Optimizations">The Motivation for Factor Optimizations</h3>
+<p>Are factor optimizations worth-your-time? Some people don't seem to think so. For example Eric Raymond has <a href="http://catb.org/esr/writings/taoup/html/ch12s01.html">this to say in "The Art of Unix Programming"</a>:</p>
+<blockquote>
+<p>One is the exponential effect of Moore's Law &#x2014; the smartest, cheapest, and often fastest way to collect performance gains is to wait a few months for your target hardware to become more capable. Given the cost ratio between hardware and programmer time, there are almost always better things to do with your time than to optimize a working system.</p>
+<p>We can get mathematically specific about this. It is almost never worth doing optimizations that reduce resource use by merely a constant factor; it's smarter to concentrate effort on cases in which you can reduce average-case running time or space use from O(n^2) to O(n) or O(n log n),[112] or similarly reduce from a higher order. Linear performance gains tend to be rapidly swamped by Moore's Law.[113]</p>
+</blockquote>
+<p>Randall Hyde presents an opposing view in <a href="http://www.onlamp.com/pub/a/onlamp/2004/05/06/writegreatcode.html">his OnLAMP.com feature, "Why Learning Assembly Language is Still a Good Idea"</a>:</p>
+<blockquote>
+<p>Most of the time you can achieve very good performance boosts by simply improving the implementation of an existing algorithm. A computer scientist may argue that a constant improvement in performance isn't as good as, say, going from an algorithm with O(n^2) performance to one with O(n lg n) performance, but the truth is that most of the time a constant factor of two or three times improvement, applied throughout a piece of software, can make the difference between a practical application and one that is simply too slow to comfortably use. And it is exactly this type of optimization with which most modern programmers have little experience.</p>
+</blockquote>
+<p>So which viewpoint is correct? I tend to think that Raymond is wrong, while Hyde is correct. The factors in which your program is running are still important, and can make a world of difference. Some factor optimizations may yield huge benefits and will make you and your users happier.</p>
+<p>Raymond is also misled because one cannot expect their end-users to upgrade their machines. Furthermore, it seems that we've <a href="http://www.gotw.ca/publications/concurrency-ddj.htm">hit the end of the linear CPU speed increase</a> for Semiconductor-based CPUs, and that we cannot expect a linear code to become faster with new hardware. Highly parallelized code may become faster, but parallelization is not always possible, or desirable.</p>
+<p><a href="http://www.mail-archive.com/linux-il@cs.huji.ac.il/msg27751.html">Another illustrative story</a> about why it's not a good idea to depend on Moore's law to speed up your code was posted by Gilboa Davara to the Linux-IL mailing list. Quoting from there while editing for coherency:</p>
+<blockquote>
+<p>A couple of years ago I worked for a medical software development company. I was working on the database development side. (We had our own proprietary object oriented database)</p>
+<p>Our database was pretty cool; it could handle an hospital level load on a dual Pentium Pro machine. (Which was a far cry from most big iron machines that were used back then.)</p>
+<p>Our medical software side used PowerBuilder (and later VB) to develop the medical applications. To put it mildly, the medical application itself, was by far, slower and heavier then the medical database that it was built upon. While 50 clients could run easily on a Pentium I 90 MHz with 32MB of RAM , the medical application ran like shit on a Pentium I 166 MHz with 64MB of RAM machine!</p>
+<p>And every-time we pointed this anomaly to the med team, they claimed that "new machines are bound, new CPUs; by the time we are out, CPU power won't be an issue."</p>
+<p>You know what, that med software now runs slower than a dead dog on a top-level Pentium 3 / Pentium 4 / Athlon machine&#x2026; nothing has changed.</p>
+</blockquote>
+
+<h3 id="Examples_for_Factor_Optimizations">Examples for Factor Optimizations</h3>
+
+<h4 id="Managing_Pointers_to_Structs_Instead_of_the_Structs_Themselves">Managing Pointers to Structs Instead of the Structs Themselves</h4>
+<p>If we have a collection of many C/C++-structs (structures that contain an adjacent number of elements of possibly different data types), then swapping two such structs (say for re-ordering or sorting) will require a lot of memory access. On the other hand if we manage pointers to such structures, with permanent addresses, then swapping two 32-bit or 64-bit pointers will be relatively cheap.</p>
+<p>The first ANSI C release (0.2.0) of Freecell Solver allocated a direct array of large C-structs, and then sorted them and binary searched them. In the 0.4.0 release, an array of pointers to individually-malloc()-ed structs was implemented instead, which resulted in a huge boost of speed. This taught me a valuable lesson on how to make a smart use of pointers as an optimization tool.</p>
+
+<h4 id="Reducing_Memory_Consumption">Reducing Memory Consumption</h4>
+<p>The more memory an application, or its individual elements consumes, the more cache misses it has, the more page swapping it requires, and it becomes more probable for data to require more than one cache line. As a result, reducing the size of a program can often lead to speed benefits.</p>
+<p>As <a href="http://www.mail-archive.com/linux-il@cs.huji.ac.il/msg07076.html">documented in a post to Linux-IL</a>, when Freecell Solver was converted from representing cards as 32-bit values, and converted to representing them using 8-bit octets, it became much faster. This is likely due to less swapping, due to less cache misses, and because more cards could fit in the Pentium's 32-byte cache row.</p>
+<p><a href="http://lwn.net/Articles/82495/">The "Cost of Inline Functions" LWN.net story</a> is also illustrative. When one function in the Linux kernel was un-inlined, it made the kernel somewhat faster. The reason was that all of the inlined instances of it occupied a (relatively) large amount of memory per-instance, and as a result, the kernel was larger, and there were more cache misses.</p>
+
+<h5 id="Note_about_the_Memory-Speed_Tradeoff">Note about the Memory-Speed Tradeoff</h5>
+<p>After reading what was written here, you may think this contradicts the common Memory-Speed Trade-off "truism". The Memory/Speed Trade-off has its origins in theoretical computer science, where it is shown that for certain tasks, one can reduce the run-time's asymptotic complexity by increasing the asymptotic amount of memory used (and vice versa). For example, we can sometimes reduce the asymptotic run-time from O(N^2) to O(N) by increasing the memory from O(1) to O(N).</p>
+<p>This is all nice and true, but it doesn't contradict the fact that given the architecture of contemporary computer hardware and operating systems, the less memory a program uses (while the logic of the algorithm remains the same) - the faster it will probably run. It's not an asymptotic trade-off, but rather a gain-gain situation.</p>
+
+<h4 id="Parallelization">Parallelization</h4>
+<p>By <a href="http://en.wikipedia.org/wiki/Parallel_computing">parallelizing</a> a task, one can split it into several lines-of-executions (processes, threads, tasks on different computers in the cluster, etc.) that each will run in parallel. As a result, the complete task itself will hopefully finish faster. A good example for parallelization is performing the same time-consuming operation on a lot of inputs. If we assign different processes subsets of the inputs, then they will likely finish it faster.</p>
+<p>Lately, parallelization has become especially attractive for making code run faster due to the advancement of multi-core CPUs. However, it should be noted that parallelization is still limited by some factors such as locking, serialization and de-serialization of the inter-process data, context-switching, CPU and operating system-constraints, and the fact that often the number of tasks will exceed the number of available processors. Most importantly, <a href="http://en.wikipedia.org/wiki/Amdahl%27s_law">Amdahl's law</a> rules that the serial part of the task (by definition) cannot be parallelized, and so limits the amount of gain from the parallelization.</p>
+
+<h4 id="Putting_the_Most_Important_struct_Members_First">Putting the Most Important struct Members First</h4>
+<p>In the Linux kernel the order of the struct members is ordered so the most important members fit within the Pentium architecture's 32-byte cache line size. This way, access to the struct in general is sped up because all of its members remain in the cache line most of the time and can be accessed more quickly.</p>
+
+<h4 id="Copy-on-write">Copy-on-write</h4>
+<p>Another useful optimization is known as <a href="http://en.wikipedia.org/wiki/Copy-on-write">copy-on-write</a>. A good example for this is when implementing a virtual machine for a programming language, and where we assign a variable to another one. While we can duplicate the contents of the variable each time, it would be cheaper to just increase the reference count, and wait for one of the variables to change before they are separated.</p>
+<p>If the contents of the copies are significant, then copy-on-write can yield significant savings in time.</p>
+
+<h4 id="Caching">Caching</h4>
+<p>If we perform many costly queries, then caching commonly-issued queries along with the results we got in memory is likely to increase the overall performance of the program. Caching is implemented in all kinds of software - from operating system kernels that hold recently-accessed file-system entries in cache, to database servers that keep caches of the various stages of the queries given to them.</p>
+<p>There's a variation on caching called <a href="http://en.wikipedia.org/wiki/Memoization">Memoization</a>, in which we never relieve of our results. It can be demonstrated that by memoizing the na&#xEF;ve (tree-recursive) Fibonacci-number calculation, one can actually reduce its complexity from an exponential one to a linear one.</p>
+<p>One should note that caching or memoization cannot be done in many cases, for example, if the queries have most kinds of side-effects.</p>
+
+<h4 id="Avoiding_Copying_Altogether">Avoiding Copying Altogether</h4>
+<p><a href="http://tech.groups.yahoo.com/group/hackers-il/message/2517">This Hackers-IL Post</a> gave the case for avoiding unnecessary copying of objects in order to increase performance. Calling copy constructors excessively can have a negative impact on performance, and reducing the number of calls to a minimum can improve performance. Just copying a large contiguous area in memory, several times, may have a negative effect on performance and eliminating it, may also turn out to be beneficial.</p>
+
+<h4 id="Inline_Functions">Inline Functions</h4>
+<p>Despite what was said earlier, in the context of reducing memory consumption, inlining functions in languages such as C or C++ can often have a positive effect on performance. Function calls are costly operations, and have a lot of overhead, and avoiding the function call by inserting the code in-place whenever the function is used, can increase performance. Inline functions become more and more beneficial the shorter they are, and if the memory occupied by the in-place calls is smaller than the memory used without inlining.</p>
+<p>If you are unsure whether inlining a certain function has a positive or negative effect, then benchmark and see.</p>
+
+<h4 id="The_Schwartzian_Transform">The Schwartzian Transform</h4>
+<p>The <a href="http://en.wikipedia.org/wiki/Schwartzian_transform">Schwartzian transform</a> is a way to optimize certain kinds of comparison-based sort operations. If the keys of the function that we want to compare take a lot of time to calculate (such as if they require access to the hard-disk), then we can first prepare an equivalent array with pairs of the inputs and their keys, then sort the pairs based on the keys, and finally filter only the original inputs from the pairs.</p>
+<p>It should be noted that the Schwartzian transform actually reduced the order of complexity of the number of times the keys are calculated from O(N*log(N)) to O(N). However, the overall order of complexity of the sort remains O(N*log(N)).</p>
+
+<h3 id="Call_for_a_Catalog_of_Optimizations">Call for a Catalog of Optimizations</h3>
+<p>I believe it would be a good idea to concentrate all the known optimizations in one place, in a similar fashion to the <a href="http://www.refactoring.com/catalog/">Catalog of Refactorings</a> and the <a href="http://en.wikipedia.org/wiki/Portland_Pattern_Repository">Portland Pattern Repository</a>. This would be a list that people can read for completeness, and to realize where their code can be improved, and for general inspiration, and to facilitate communication.</p>
+<p>I'm not aware of a similar effort as of the time of this writing, and it will be useful. This section only covered a very small subset of the possible optimization strategies, just to give a taste of them, and did not aim to be comprehensive.</p>
+
+<h2 id="Optimization_by_Changing_the_Dependencies">Optimization by Changing the Dependencies</h2>
+<p>Often the original choice of programming languages, or libraries can affect the execution speed, because they are not very optimized, too bloated, or otherwise too slow. Moving to a different programming language or library, including possibly replacing third-party code with your own, may have a positive effect on one's program's speed.</p>
+<p>For example, if you're doing a lot of number-crunching code in dynamic, P-code languages such as Perl, Python or Ruby (at least without utilizing such APIs as <a href="http://pdl.perl.org/">PDL</a> or <a href="http://www.scipy.org/">SciPy</a>) or otherwise your code involves many loops and conditionals, then you can expect it to run slower than if it was written in C or Assembler.</p>
+<p>Similarly, I found that <a href="http://en.wikipedia.org/wiki/GLib">GLib</a>'s balanced binary trees performed more poorly than those of libavl and libredblack, and that its hash performed more poorly than my own custom hash (which was since then optimized further). As a result, eliminating a dependency on GLib's data structures may improve the performance of the program.</p>
+
+<h2 id="Optimizing_by_Reducing_Feature-Set">Optimizing by Reducing Feature-Set</h2>
+<p>"There ain't no such thing as a free lunch". The more features your program has, the more code is required to implement them, which in turn consumes more memory, and slows things down. Furthermore, it requires more conditionals and often even loops, which also require extra processing. Thus, by removing features from one's application, one can have faster code.</p>
+<p>For example, in <a href="http://www.joelonsoftware.com/articles/APIWar.html">"How Microsoft Lost the API War"</a>, Joel Spolsky tells that Microsoft has gone to great lengths to maintain bug-to-bug (binary) backward compatibility for their Windows-line of operating system. As a result, buggy software that relied on API quirks that got changed, often could still work in later versions of Windows, because Microsoft made sure that Windows supplied them with a similar environment. As a result of maintaining all of this backward compatibility, there was more code in Microsoft's products, which may have contributed to the common and notorious perception that they became slower with each release.</p>
+
+<h3 id="Optimizing_by_Compromising_on_Security">Optimizing by Compromising on Security</h3>
+<p>I can go further than that, and claim that one can often optimize one's code by compromising on its security. The less sanity checks, input checks, etc. a code has - the faster it will run. For example, a code that does not have a lot of checking for the failure (and graceful handling) of dynamic memory allocation calls (in languages such as C without managed programming) such as <tt>malloc()</tt>, will run faster. (Until and if it runs out of memory and then crashes.)</p>
+<p>I read somewhere that <a href="http://tech.groups.yahoo.com/group/hackers-il/message/1688">programmers complained that checking and handling errors in their programs made them have to write about 4 times more code</a>, which makes sense. All this code slows the program down when everything runs as expected.</p>
+<p>Note that "Here be dragons". It's probably never a good idea to compromise on security in order to increase speed. But I've mentioned it here for completeness sake.</p>
+
+<h2 id="Conclusion">Conclusion</h2>
+<p>When I mentioned this article to some people on IRC (while it was still incomplete), someone said that "The first rule of optimization is 'Don't!'". I think such attitude is harmful. While I agree that "premature optimization is the root of all evil", sometimes one's code is just too slow and needs to be optimized. And if your code is what <a href="http://www.joelonsoftware.com/articles/FiveWorlds.html">Joel Spolsky calls "Shrinkwrap"</a> (i.e: code that is sold or otherwise distributed and made available for public consumption) then you can often not make assumptions on how fast it needs to be, and the faster it is - the better.</p>
+<p>Speed and optimizing other resources are one important factor <a href="http://www.shlomifish.org/philosophy/computers/high-quality-software/">in the general, abstract quality of programs</a>. If your program is slow, it is likely going to make your users frustrated and unhappy, which will be a failure in your mission as a software developer. So it is important that your program is fast enough, if not very much so.</p>
+<p>My aim in this document was to explain the "why"'s, "what"'s and to a lesser extent "how"'s of optimization. I hope I was successful, and that people who read it will be more willing to optimize their software and more clueful in how to do so.</p>
+
+<h2 id="Thanks">Thanks</h2>
+<p>I'd like to thank Daan for expressing a lot of interest in this article, and for his constant encouragement and input. Limbic_Region has given me some useful input for this article by email. Several people have made some contributions to this article: <a href="http://en.wikibooks.org/wiki/User:Jguk">Jguk</a>, <a href="http://en.wikibooks.org/w/index.php?title=User:Dallas1278&amp;action=edit&amp;redlink=1">Dallas1278</a> and the IPs 84.27.42.81, 85.146.243.13, 203.196.46.108, 70.176.53.73, and 213.129.10.120 . I'd also like to thank all the originators of the sources that I quoted or referenced in the article.</p></div></div></div></div></body></html>
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.