Commits

Shlomi Fish committed 033eddf

Add the initial version of the code.

Comments (0)

Files changed (13)

+This is a maintenance and update version of Peter Sanders’ code from this
+paper:
+
+http://www.mpi-inf.mpg.de/~sanders/papers/spqjea.ps.gz
+
+It was originally found here:
+
+http://www.mpi-inf.mpg.de/~sanders/programs/spq/
+
+Prof. Sanders has kinda licensed it under the Boost Software License
+open source license:
+
+http://en.wikipedia.org/wiki/Boost_Software_License
+
+Note that a modified version of some of this code is available for TPIE
+here:
+
+http://madalgo.au.dk/Trac-tpie/ticket/15
+
+Regards,
+
+    Shlomi Fish, shlomif@cpan.org, http://www.shlomifish.org/ .

peter-sanders-priority-queues/heap-CLR.h

+// Binary heaps using a straightforward implementation a la CLR
+#ifndef HEAP2
+#define HEAP2
+// #include "util.h"
+
+template <class Key, class Value>
+struct KNElement {Key key; Value value;};
+
+// fixed size binary heap
+template <class Key, class Value>
+class Heap2 {
+  //  static const Key infimum  = 4;
+  //static const Key supremum = numeric_limits<Key>.max();
+  typedef KNElement<Key, Value> Element;
+  Element *data;
+  int capacity;
+  int supremum;
+  int size;  // index of last used element
+public:
+  Heap2(Key sup, Key infimum, int cap):size(0),capacity(cap) { 
+    data = new Element[cap + 2];
+    data[0].key = infimum; // sentinel
+    data[capacity + 1].key = sup;
+    supremum = sup;
+    reset();
+  }
+  ~Heap2() { delete data; } 
+  Key getSupremum() { return supremum; }
+  void reset();
+  int   getSize()     const { return size; }
+  Key   getMinKey()   const { return data[1].key; }
+  Value getMinValue() const { return data[1].value; }
+  void  deleteMinBasic();
+  void  deleteMin(Key *key, Value *value) {
+    *key   = getMinKey();
+    *value = getMinValue();
+    deleteMinBasic();
+  }
+  void  insert(Key k, Value v);
+  void  sortTo(Element *to); // sort in increasing order and empty
+  //void  sortInPlace(); // in decreasing order
+  void print() {
+    for (int i = 1;  i <= size;  i++) {
+      cout << data[i].key << " ";
+    }
+    cout << endl;
+  }
+};
+
+
+// reset size to 0 and fill data array with sentinels
+template <class Key, class Value>
+inline void Heap2<Key, Value>::
+reset() {
+  size = 0;
+  Key sup = getSupremum();
+  int cap = capacity;
+  for (int i = 1;  i <= cap;  i++) {
+    data[i].key = sup;
+  }
+  // if this becomes a bottle neck
+  // we might want to replace this by log KNN 
+  // memcpy-s
+}
+
+
+
+template <class Key, class Value>
+inline void Heap2<Key, Value>::
+deleteMinBasic()
+{ int i, l, r, smallest;
+  Assert2(size > 0);
+
+  data[1] = data[size];
+  data[size].key = getSupremum();
+  size--;
+  i = 1;
+  for (;;) {
+    Debug3(print());
+    l = (i << 1);
+    r = (i << 1) + 1;
+    if ((l <= size) && data[l].key < data[i].key) {
+      smallest = l;
+    } else {
+      smallest = i;
+    }
+    if ((r <= size) && data[r].key < data[smallest].key) {
+      smallest = r;
+    }
+    if (smallest == i) break;
+    Element temp = data[i];
+    data[i] = data[smallest];
+    data[smallest] = temp;
+    i = smallest;
+  }
+}
+
+
+// empty the heap and put the element to "to"
+// sorted in increasing order
+template <class Key, class Value>
+inline void Heap2<Key, Value>::
+sortTo(Element *to)
+{
+  const int           sz = size;
+  const Key          sup = getSupremum();
+  Element * const beyond = to + sz;
+  Element * const root   = data + 1;
+  while (to < beyond) {
+    // copy minimun
+    *to = *root;
+    to++;
+
+    // bubble up second smallest as in deleteMin
+    int hole = 1;
+    int succ = 2;
+    while (succ <= sz) {
+      Key key1 = data[succ    ].key;
+      Key key2 = data[succ + 1].key;
+      if (key1 > key2) {
+        succ++;
+        data[hole].key   = key2;
+        data[hole].value = data[succ].value;
+      } else {
+        data[hole].key = key1;
+        data[hole].value = data[succ].value;
+      }
+      hole = succ;
+      succ <<= 1;
+    }
+
+    // just mark hole as deleted
+    data[hole].key = sup;
+  }
+  size = 0;
+}
+
+
+template <class Key, class Value>
+inline void Heap2<Key, Value>::
+insert(Key k, Value v)
+{
+  Assert2(size < capacity);
+  Debug4(cout << "insert(" << k << ", " << v << ")" << endl);
+
+  size++;
+  int hole = size; 
+  while (hole > 1 && data[hole >> 1].key > k) {
+    data[hole] = data[hole >> 1];
+    hole = hole >> 1;
+  }
+  data[hole].key   = k;
+  data[hole].value = v;
+}
+
+#endif

peter-sanders-priority-queues/heap2.h

+// 4ary heap data structure
+#ifndef HEAP2
+#define HEAP2
+#include "util.h"
+
+template <class Key, class Value>
+struct KNElement {Key key; Value value;};
+
+// fixed size binary heap
+template <class Key, class Value>
+class Heap2 {
+  //  static const Key infimum  = 4;
+  //static const Key supremum = numeric_limits<Key>.max();
+  typedef KNElement<Key, Value> Element;
+  Element *data;
+  int capacity;
+  int supremum;
+  int size;  // index of last used element
+public:
+  Heap2(Key sup, Key infimum, int cap):size(0),capacity(cap) { 
+    data = new Element[cap + 2];
+    data[0].key = infimum; // sentinel
+    data[capacity + 1].key = sup;
+    supremum = sup;
+    reset();
+  }
+  ~Heap2() { delete data; } 
+  Key getSupremum() { return supremum; }
+  void reset();
+  int   getSize()     const { return size; }
+  Key   getMinKey()   const { return data[1].key; }
+  Value getMinValue() const { return data[1].value; }
+  void  deleteMinBasic();
+  void  deleteMin(Key *key, Value *value) {
+    *key   = getMinKey();
+    *value = getMinValue();
+    deleteMinBasic();
+  }
+  void  insert(Key k, Value v);
+  void  sortTo(Element *to); // sort in increasing order and empty
+  //void  sortInPlace(); // in decreasing order
+};
+
+
+// reset size to 0 and fill data array with sentinels
+template <class Key, class Value>
+inline void Heap2<Key, Value>::
+reset() {
+  size = 0;
+  Key sup = getSupremum();
+  int cap = capacity;
+  for (int i = 1;  i <= cap;  i++) {
+    data[i].key = sup;
+  }
+  // if this becomes a bottle neck
+  // we might want to replace this by log KNN 
+  // memcpy-s
+}
+
+template <class Key, class Value>
+inline void Heap2<Key, Value>::
+deleteMinBasic()
+{
+  Assert2(size > 0);
+
+  // first move up elements on a min-path
+  int hole = 1; 
+  int succ = 2;
+  int sz   = size;
+  Element *dat = data;
+  while (succ < sz) {
+    Key key1 = dat[succ].key;
+    Key key2 = dat[succ + 1].key;
+    if (key1 > key2) {
+      succ++;
+      dat[hole].key   = key2;
+      dat[hole].value = dat[succ].value;
+    } else {
+      dat[hole].key   = key1;
+      dat[hole].value = dat[succ].value;
+    }
+    hole = succ;
+    succ <<= 1;
+  }
+
+  // bubble up rightmost element
+  Key bubble = dat[sz].key;
+  int pred = hole >> 1;
+  while (dat[pred].key > bubble) { // must terminate since min at root
+    dat[hole] = dat[pred];
+    hole = pred;
+    pred >>= 1;
+  }
+
+  // finally move data to hole
+  dat[hole].key = bubble;
+  dat[hole].value = dat[sz].value;
+
+  dat[size].key = getSupremum(); // mark as deleted
+  size = sz - 1;
+}
+
+
+// empty the heap and put the element to "to"
+// sorted in increasing order
+template <class Key, class Value>
+inline void Heap2<Key, Value>::
+sortTo(Element *to)
+{
+  const int           sz = size;
+  const Key          sup = getSupremum();
+  Element * const beyond = to + sz;
+  Element * const root   = data + 1;
+  while (to < beyond) {
+    // copy minimun
+    *to = *root;
+    to++;
+
+    // bubble up second smallest as in deleteMin
+    int hole = 1;
+    int succ = 2;
+    while (succ <= sz) {
+      Key key1 = data[succ    ].key;
+      Key key2 = data[succ + 1].key;
+      if (key1 > key2) {
+        succ++;
+        data[hole].key   = key2;
+        data[hole].value = data[succ].value;
+      } else {
+        data[hole].key = key1;
+        data[hole].value = data[succ].value;
+      }
+      hole = succ;
+      succ <<= 1;
+    }
+
+    // just mark hole as deleted
+    data[hole].key = sup;
+  }
+  size = 0;
+}
+
+
+template <class Key, class Value>
+inline void Heap2<Key, Value>::
+insert(Key k, Value v)
+{
+  Assert2(size < capacity);
+  Debug4(cout << "insert(" << k << ", " << v << ")" << endl);
+
+  Element *dat = data;
+  size++;
+  int hole = size; 
+  int pred = hole >> 1;
+  Key predKey = dat[pred].key;
+  while (predKey > k) { // must terminate due to sentinel at 0
+    dat[hole].key   = predKey;
+    dat[hole].value = dat[pred].value;
+    hole = pred;
+    pred >>= 1;
+    predKey = dat[pred].key;
+  }
+
+  // finally move data to hole
+  dat[hole].key   = k;
+  dat[hole].value = v;
+}
+
+#endif

peter-sanders-priority-queues/heap4.h

+// 4ary heap data structure
+#ifndef HEAP4
+#define HEAP4
+#include "util.h"
+
+const unsigned long LineSize = 64; // cache line size (or multiple)
+
+template <class Key, class Value>
+struct KNElement {Key key; Value value;};
+
+// align an address
+// require: sz is a power of two
+inline char *knAlign(void *p, unsigned long sz)
+{
+  return (char*)(((unsigned long)p + (sz - 1)) & ~(sz - 1));
+}//////////////////////////////////////////////////////////////////////
+// fixed size 4-ary heap
+template <class Key, class Value>
+class Heap4 {
+  //  static const Key infimum  = 4;
+  //static const Key supremum = numeric_limits<Key>.max();
+  typedef KNElement<Key, Value> Element;
+  int capacity;
+  Element * rawData;
+  Element * const data; // aligned version of rawData
+  int size;  // index of last used element
+  int finalLayerSize; // size of first layer with free space
+  int finalLayerDist; // distance to end of layer
+public:
+  Heap4(Key sup, Key infimum, int cap) :
+    capacity(cap), 
+    rawData(new Element[capacity + 4 + (LineSize-1)/sizeof(Element) + 1]),
+    data((Element*)knAlign(rawData, LineSize))
+  { 
+    data[0].key = infimum; // sentinel
+    data[capacity + 1].key = sup;
+    data[capacity + 2].key = sup;
+    data[capacity + 3].key = sup;
+    reset();
+  }
+
+  ~Heap4() { delete [] rawData; }
+  Key getSupremum() { return data[capacity + 1].key; }
+  void reset();
+  int   getSize()     const { return size; }
+  Key   getMinKey()   const { return data[1].key; }
+  Value getMinValue() const { return data[1].value; }
+  void  deleteMinBasic();
+  void  deleteMin(Key *key, Value *value);
+  void  insert(Key k, Value v);
+  void  print();
+  //void  sortTo(Element *to); // sort in increasing order and empty
+  //void  sortInPlace(); // in decreasing order
+};
+
+
+// reset size to 0 and fill data array with sentinels
+template <class Key, class Value>
+inline void Heap4<Key, Value>::
+reset() {
+  size = 0;
+  finalLayerSize = 1;
+  finalLayerDist = 0; 
+  Key sup = getSupremum();
+  for (int i = 1;  i <= capacity;  i++) {
+    data[i].key = sup;
+  }
+}
+
+template <class Key, class Value>
+inline void Heap4<Key, Value>::
+deleteMin(Key *key, Value *value)
+{
+  *key   = getMinKey();
+  *value = getMinValue();
+  deleteMinBasic();
+}
+
+template <class Key, class Value>
+inline void Heap4<Key, Value>::
+deleteMinBasic()
+{
+  Assert2(size > 0);
+  Key minKey, otherKey;
+  int delta;
+
+  // first move up elements on a min-path
+  int hole = 1; 
+  int succ = 2;
+  int layerSize = 4; // size of succ's layer
+  int layerPos  = 0; // pos of succ within its layer
+  int sz   = size;
+  size = sz - 1;
+  finalLayerDist++;
+  if (finalLayerDist == finalLayerSize) { // layer empty now
+    finalLayerSize >>= 2;
+    finalLayerDist = 0;
+  }
+  while (succ < sz) {
+    minKey = data[succ].key;
+    delta = 0;
+
+    // I could save a few assignments using 
+    // a complete case distincition but
+    // this costs in terms of instruction cache load
+    otherKey = data[succ + 1].key;
+    if (otherKey < minKey) { minKey = otherKey; delta = 1; }
+    otherKey = data[succ + 2].key;
+    if (otherKey < minKey) { minKey = otherKey; delta = 2; }
+    otherKey = data[succ + 3].key;
+    if (otherKey < minKey) { minKey = otherKey; delta = 3; }
+    succ += delta;
+    layerPos += delta;
+
+    // move min successor up
+    data[hole].key = minKey;
+    data[hole].value = data[succ].value;
+
+    // step to next layer
+    hole = succ;
+    succ = succ - layerPos + layerSize; // beginning of next layer
+    layerPos <<= 2;
+    succ += layerPos; // now correct value
+    layerSize <<= 2;
+  }
+
+  // bubble up rightmost element
+  Key bubble = data[sz].key;
+  layerSize >>= 2; // now size of hole's layer
+  layerPos  >>= 2; // now pos of hole within its layer
+  // Assert2(finalLayerSize == layerSize);
+  int layerDist = layerSize - layerPos - 1; // hole's dist to end of lay.
+  int pred = hole + layerDist - layerSize; // end of pred's layer for now
+  layerSize >>= 2; // now size of pred's layer
+  layerDist >>= 2; // now pred's pos in layer
+  pred = pred - layerDist; // finally preds index 
+  while (data[pred].key > bubble) { // must terminate since inf at root
+    data[hole] = data[pred];
+    hole = pred;
+    pred = hole + layerDist - layerSize; // end of hole's layer for now
+    layerSize >>= 2; // now size of pred's layer
+    layerDist >>= 2; 
+    pred = pred - layerDist; // finally preds index 
+  }
+
+  // finally move data to hole
+  data[hole].key = bubble;
+  data[hole].value = data[sz].value;
+
+  data[sz].key = getSupremum(); // mark as deleted
+}
+
+
+template <class Key, class Value>
+void Heap4<Key, Value>::
+insert(Key k, Value v)
+{
+  Assert2(size < capacity);
+  Debug4(cout << "insert(" << k << ", " << v << ")" << endl);
+
+  int layerSize = finalLayerSize;
+  int layerDist  = finalLayerDist;
+  finalLayerDist--;
+  if (finalLayerDist == -1) { // layer full
+    // start next layer
+    finalLayerSize <<= 2;
+    finalLayerDist = finalLayerSize - 1;
+  }
+  size++;
+  int hole = size; 
+  int pred = hole + layerDist - layerSize; // end of preds's layer for now
+  layerSize >>= 2; // now size of pred's layer
+  layerDist >>= 2; 
+  pred = pred - layerDist; // finally preds index 
+  Key predKey = data[pred].key;
+  while (predKey > k) { // must terminate due to sentinel at 0
+    data[hole].key   = predKey;
+    data[hole].value = data[pred].value;
+    hole = pred;
+    pred = hole + layerDist - layerSize; // end of preds's layer for now
+    layerSize >>= 2; // now size of pred's layer
+    layerDist >>= 2; 
+    pred = pred - layerDist; // finally preds index 
+    predKey = data[pred].key;
+  }
+
+  // finally move data to hole
+  data[hole].key   = k;
+  data[hole].value = v;
+}
+
+template <class Key, class Value>
+inline void Heap4<Key, Value>::
+print()
+{
+  int pos = 1;
+  for (int layerSize = 1;  pos < size;  layerSize <<= 2) {
+    for (int i = 0;  i < layerSize && pos + i <= size;  i++) {
+      cout << data[pos + i].key << " ";
+    }
+    pos += layerSize;
+    cout << endl;
+  }
+}
+
+#endif

peter-sanders-priority-queues/hold.C

+// benchmark with (insert insert delMin)^N (insert delMin delMin)^N
+// just in time RNG
+// version 3: use easier to handle binary heaps
+// hold.C: generage inputs in the hold model
+
+#include <limits.h>
+#include <iostream.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define DEBUGLEVEL 0
+#include "util.h"
+//#define KNH
+#define H2
+//#define H4
+//#define HSLOW
+
+#ifdef KNH
+#  include "knheap.C"
+#  define HTYPE KNHeap<int, int> 
+#  define HINIT heap(INT_MAX, -INT_MAX)
+#else
+#  ifdef H4
+#    include "heap4.h"
+#    define HTYPE Heap4<int, int> 
+#    define HINIT heap(INT_MAX, -INT_MAX, n)
+#  else
+#    ifdef H2
+#      include "heap2.h"
+#      define HTYPE Heap2<int, int> 
+#      define HINIT heap(INT_MAX, -INT_MAX, n)
+#    else 
+#      ifdef HSLOW
+#        include "heap-CLR.h"
+#        define HTYPE Heap2<int, int> 
+#        define HINIT heap(INT_MAX, -INT_MAX, n)
+#      endif
+#    endif
+#  endif
+#endif
+
+
+#define rand32() (ran32State = 1664525 * ran32State + 1013904223)
+#define getRandom() ((rand32()), (int)(((unsigned int)ran32State) >> 4))
+
+inline void onePass(HTYPE& heap, int n)
+{ int j, newElem, k, v, old;
+  long long insertSum=0, deleteSum=0;
+  static int ran32State = 42 << 20;
+  
+  Debug3(cout << heap.getSize());
+  Assert(heap.getSize() == 0);
+  // build heap
+  for (j = 0;  j < n;  j++) {   
+    newElem = getRandom();
+    heap.insert(newElem, j);
+    Debug0(insertSum += newElem);
+    Debug3(cout << newElem << "in ");
+  }
+    
+  // hold phase
+  old = 0;
+  for (j = 0;  j < 2*n;  j++) {   
+    heap.deleteMin(&k, &v);
+    Debug0(deleteSum += k);
+    Assert0(k >= old); 
+    Debug0(old = k);
+    Debug3(cout << k << "out ");
+
+    newElem = k + getRandom();
+    heap.insert(newElem, j);
+    Debug0(insertSum += newElem);
+    Debug3(cout << newElem << "in ");
+  }
+
+  Assert0(heap.getSize() == n);
+ 
+  // finish up
+  for (j = 0;  j < n;  j++) {   
+    heap.deleteMin(&k, &v);
+    Debug0(deleteSum += k);
+    Debug3(cout << k << "out ");
+  }
+
+  Assert0(deleteSum == insertSum);
+  Assert(heap.getSize() == 0);
+}
+
+
+int main(int argc, char **argv)
+{ 
+  Assert(argc > 1);
+  int n = atoi(argv[1]);
+  int i;
+  int ran32State = 42 << 20;
+  int repeat = 1;
+  double startTime, endTime;
+  if (argc > 2) repeat = atoi(argv[2]);
+  //#ifdef H2
+  //HTYPE *temp = new HTYPE(INT_MAX, -INT_MAX);
+  //HTYPE& heap =*temp;
+  //#else
+  HTYPE HINIT;
+  //#endif
+
+  // warmup
+  onePass(heap, n);
+
+  startTime = cpuTime();
+  for (i = 0;  i < repeat;  i++) {
+    onePass(heap, n);
+  }
+  endTime = cpuTime();
+
+  // output time per insert-delete-pair and this time over log n
+  double timePerPair = (endTime - startTime) / n / repeat / 3.0;
+  double timePerCompare = timePerPair / (log((double)n) / log(2.0));
+  cout << n << " " << timePerPair * 1e9 << " " << timePerCompare * 1e9 << endl;
+}
+

peter-sanders-priority-queues/index.html

+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+ <head>
+  <title>Index of /~sanders/programs/spq</title>
+ </head>
+ <body>
+<h1>Index of /~sanders/programs/spq</h1>
+<pre><img src="/icons/blank.gif" alt="Icon "> <a href="?C=N;O=D">Name</a>                    <a href="?C=M;O=A">Last modified</a>      <a href="?C=S;O=A">Size</a>  <a href="?C=D;O=A">Description</a><hr><img src="/icons/back.gif" alt="[DIR]"> <a href="/~sanders/programs/">Parent Directory</a>                             -   
+<img src="/icons/text.gif" alt="[TXT]"> <a href="heap-CLR.h">heap-CLR.h</a>              22-Nov-1998 18:12  3.5K  
+<img src="/icons/text.gif" alt="[TXT]"> <a href="heap2.h">heap2.h</a>                 22-Nov-1998 18:12  3.9K  
+<img src="/icons/text.gif" alt="[TXT]"> <a href="heap4.h">heap4.h</a>                 22-Nov-1998 18:12  5.8K  
+<img src="/icons/text.gif" alt="[TXT]"> <a href="hold.C">hold.C</a>                  21-Jun-1999 14:18  2.7K  
+<img src="/icons/text.gif" alt="[TXT]"> <a href="knheap.C">knheap.C</a>                22-Nov-1998 18:11   23K  
+<img src="/icons/text.gif" alt="[TXT]"> <a href="knheap.h">knheap.h</a>                22-Nov-1998 18:10  9.2K  
+<img src="/icons/text.gif" alt="[TXT]"> <a href="knupdown3.C">knupdown3.C</a>             22-Nov-1998 18:10  2.7K  
+<img src="/icons/text.gif" alt="[TXT]"> <a href="knwiggle.C">knwiggle.C</a>              22-Nov-1998 18:12  2.8K  
+<img src="/icons/text.gif" alt="[TXT]"> <a href="multiMergeUnrolled.C">multiMergeUnrolled.C</a>    22-Nov-1998 18:11  1.9K  
+<img src="/icons/unknown.gif" alt="[   ]"> <a href="readme.asc">readme.asc</a>              21-Jun-1999 14:18  741   
+<img src="/icons/text.gif" alt="[TXT]"> <a href="util.h">util.h</a>                  22-Nov-1998 18:13  2.3K  
+<hr></pre>
+<address>Apache/2.0 Server at www.mpi-inf.mpg.de Port 80</address>
+</body></html>

peter-sanders-priority-queues/knheap.C

+#include "knheap.h"
+#include <string.h>
+///////////////////////// LooserTree ///////////////////////////////////
+template <class Key, class Value>
+KNLooserTree<Key, Value>::
+KNLooserTree() : lastFree(0), size(0), logK(0), k(1)
+{
+  empty  [0] = 0;
+  segment[0] = 0;
+  current[0] = &dummy;
+  // entry and dummy are initialized by init
+  // since they need the value of supremum
+}
+
+
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+init(Key sup)
+{
+  dummy.key      = sup;
+  rebuildLooserTree();
+  Assert2(current[entry[0].index] == &dummy);
+}
+
+
+// rebuild looser tree information from the values in current
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+rebuildLooserTree()
+{  
+  int winner = initWinner(1);
+  entry[0].index = winner;
+  entry[0].key   = current[winner]->key;
+}
+
+
+// given any values in the leaves this
+// routing recomputes upper levels of the tree
+// from scratch in linear time
+// initialize entry[root].index and the subtree rooted there
+// return winner index
+template <class Key, class Value>
+int KNLooserTree<Key, Value>::
+initWinner(int root)
+{
+  if (root >= k) { // leaf reached
+    return root - k;
+  } else {
+    int left  = initWinner(2*root    );
+    int right = initWinner(2*root + 1);
+    Key lk    = current[left ]->key;
+    Key rk    = current[right]->key;
+    if (lk <= rk) { // right subtree looses
+      entry[root].index = right;
+      entry[root].key   = rk;
+      return left;
+    } else {
+      entry[root].index = left;
+      entry[root].key   = lk;
+      return right;
+    }
+  }
+}
+
+
+// first go up the tree all the way to the root
+// hand down old winner for the respective subtree
+// based on new value, and old winner and looser 
+// update each node on the path to the root top down.
+// This is implemented recursively
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+updateOnInsert(int node, 
+               Key     newKey, int     newIndex, 
+               Key *winnerKey, int *winnerIndex, // old winner
+               int *mask) // 1 << (ceil(log KNK) - dist-from-root)
+{
+  if (node == 0) { // winner part of root
+    *mask = 1 << (logK - 1);    
+    *winnerKey   = entry[0].key;
+    *winnerIndex = entry[0].index;
+    if (newKey < entry[node].key) {
+      entry[node].key   = newKey;
+      entry[node].index = newIndex;
+    }
+  } else {
+    updateOnInsert(node >> 1, newKey, newIndex, winnerKey, winnerIndex, mask);
+    Key looserKey   = entry[node].key;
+    int looserIndex = entry[node].index;
+    if ((*winnerIndex & *mask) != (newIndex & *mask)) { // different subtrees
+      if (newKey < looserKey) { // newKey will have influence here
+        if (newKey < *winnerKey) { // old winner loses here
+          entry[node].key   = *winnerKey;
+          entry[node].index = *winnerIndex;
+        } else { // new entry looses here
+          entry[node].key   = newKey;
+          entry[node].index = newIndex;
+        }
+      } 
+      *winnerKey   = looserKey;
+      *winnerIndex = looserIndex;
+    }
+    // note that nothing needs to be done if
+    // the winner came from the same subtree
+    // a) newKey <= winnerKey => even more reason for the other tree to loose
+    // b) newKey >  winnerKey => the old winner will beat the new
+    //                           entry further down the tree
+    // also the same old winner is handed down the tree
+
+    *mask >>= 1; // next level
+  }
+}
+
+
+// make the tree two times as wide
+// may only be called if no free slots are left ?? necessary ??
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+doubleK()
+{
+  // make all new entries empty
+  // and push them on the free stack
+  Assert2(lastFree == -1); // stack was empty (probably not needed)
+  Assert2(k < KNKMAX);
+  for (int i = 2*k - 1;  i >= k;  i--) {
+    current[i] = &dummy;
+    lastFree++;
+    empty[lastFree] = i;
+  }
+
+  // double the size
+  k *= 2;  logK++;
+
+  // recompute looser tree information
+  rebuildLooserTree();
+}
+
+
+// compact nonempty segments in the left half of the tree
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+compactTree()
+{
+  Assert2(logK > 0);
+  Key sup = dummy.key;
+
+  // compact all nonempty segments to the left
+  int from = 0;
+  int to   = 0;
+  for(;  from < k;  from++) {
+    if (current[from]->key != sup) {
+      current[to] = current[from];
+      segment[to] = segment[from];
+      to++;
+    } 
+  }
+
+  // half degree as often as possible
+  while (to < k/2) {
+    k /= 2;  logK--;
+  }
+
+  // overwrite garbage and compact the stack of empty segments
+  lastFree = -1; // none free
+  for (;  to < k;  to++) {
+    // push 
+    lastFree++;
+    empty[lastFree] = to;
+
+    current[to] = &dummy;
+  }
+
+  // recompute looser tree information
+  rebuildLooserTree();
+}
+
+
+// insert segment beginning at to
+// require: spaceIsAvailable() == 1 
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+insertSegment(Element *to, int sz)
+{
+  if (sz > 0) {
+    Assert2(to[0   ].key != getSupremum());
+    Assert2(to[sz-1].key != getSupremum());
+    // get a free slot
+    if (lastFree < 0) { // tree is too small
+      doubleK();
+    }
+    int index = empty[lastFree];
+    lastFree--; // pop
+
+
+    // link new segment
+    current[index] = segment[index] = to;
+    size += sz;
+    
+    // propagate new information up the tree
+    Key dummyKey;
+    int dummyIndex;
+    int dummyMask;
+    updateOnInsert((index + k) >> 1, to->key, index, 
+                   &dummyKey, &dummyIndex, &dummyMask);
+  } else {
+    // immediately deallocate
+    // this is not only an optimization 
+    // but also needed to keep empty segments from
+    // clogging up the tree
+    delete [] to;
+  }
+}
+
+
+// free an empty segment
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+deallocateSegment(int index)
+{
+  // reroute current pointer to some empty dummy segment
+  // with a sentinel key
+  current[index] = &dummy;
+
+  // free memory
+  delete [] segment[index];
+  segment[index] = 0;
+  
+  // push on the stack of free segment indices
+  lastFree++;
+  empty[lastFree] = index;
+}
+
+// multi-merge for a fixed K=1<<LogK
+// this looks ugly but the include file explains
+// why this is the most portable choice
+#define LogK 3
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+multiMergeUnrolled3(Element *to, int l)
+#include "multiMergeUnrolled.C"
+#undef LogK
+#define LogK 4
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+multiMergeUnrolled4(Element *to, int l)
+#include "multiMergeUnrolled.C"
+#undef LogK
+#define LogK 5
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+multiMergeUnrolled5(Element *to, int l)
+#include "multiMergeUnrolled.C"
+#undef LogK
+#define LogK 6
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+multiMergeUnrolled6(Element *to, int l)
+#include "multiMergeUnrolled.C"
+#undef LogK
+#define LogK 7
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+multiMergeUnrolled7(Element *to, int l)
+#include "multiMergeUnrolled.C"
+#undef LogK
+#define LogK 8
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+multiMergeUnrolled8(Element *to, int l)
+#include "multiMergeUnrolled.C"
+#undef LogK
+#define LogK 9
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+multiMergeUnrolled9(Element *to, int l)
+#include "multiMergeUnrolled.C"
+#undef LogK
+#define LogK 10
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+multiMergeUnrolled10(Element *to, int l)
+#include "multiMergeUnrolled.C"
+#undef LogK
+
+// delete the l smallest elements and write them to "to"
+// empty segments are deallocated
+// require:
+// - there are at least l elements
+// - segments are ended by sentinels
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+multiMerge(Element *to, int l)
+{
+  switch(logK) {
+  case 0: 
+    Assert2(k == 1);
+    Assert2(entry[0].index == 0);
+    Assert2(lastFree == -1 || l == 0);
+    memcpy(to, current[0], l * sizeof(Element));
+    current[0] += l;
+    entry[0].key = current[0]->key;
+    if (segmentIsEmpty(0)) deallocateSegment(0); 
+    break;
+  case 1:
+    Assert2(k == 2);
+    merge(current + 0, current + 1, to, l);
+    rebuildLooserTree();
+    if (segmentIsEmpty(0)) deallocateSegment(0); 
+    if (segmentIsEmpty(1)) deallocateSegment(1); 
+    break;
+  case 2:
+    Assert2(k == 4);
+    merge4(current + 0, current + 1, current + 2, current + 3, to, l);
+    rebuildLooserTree();
+    if (segmentIsEmpty(0)) deallocateSegment(0); 
+    if (segmentIsEmpty(1)) deallocateSegment(1); 
+    if (segmentIsEmpty(2)) deallocateSegment(2); 
+    if (segmentIsEmpty(3)) deallocateSegment(3);
+    break;
+  case  3: multiMergeUnrolled3(to, l); break;
+  case  4: multiMergeUnrolled4(to, l); break;
+  case  5: multiMergeUnrolled5(to, l); break;
+  case  6: multiMergeUnrolled6(to, l); break;
+  case  7: multiMergeUnrolled7(to, l); break;
+  case  8: multiMergeUnrolled8(to, l); break;
+  case  9: multiMergeUnrolled9(to, l); break;
+  case 10: multiMergeUnrolled10(to, l); break;
+  default: multiMergeK       (to, l); break;
+  }
+  size -= l;
+
+  // compact tree if it got considerably smaller
+  if (k > 1 && lastFree >= 3*k/5 - 1) { 
+    // using k/2 would be worst case inefficient
+    compactTree(); 
+  }
+}
+
+
+// is this segment empty and does not point to dummy yet?
+template <class Key, class Value>
+inline int KNLooserTree<Key, Value>::
+segmentIsEmpty(int i)
+{
+  return current[i]->key == getSupremum() && 
+         current[i] != &dummy;
+}
+
+
+// multi-merge for arbitrary K
+template <class Key, class Value>
+void KNLooserTree<Key, Value>::
+multiMergeK(Element *to, int l)
+{
+  Entry *currentPos;
+  Key currentKey;
+  int currentIndex; // leaf pointed to by current entry
+  int kReg = k;
+  Element *done = to + l;
+  int      winnerIndex = entry[0].index;
+  Key      winnerKey   = entry[0].key;
+  Element *winnerPos;
+  Key sup = dummy.key; // supremum
+  while (to < done) {
+    winnerPos = current[winnerIndex];
+
+    // write result
+    to->key   = winnerKey;
+    to->value = winnerPos->value;
+
+    // advance winner segment
+    winnerPos++;
+    current[winnerIndex] = winnerPos;
+    winnerKey = winnerPos->key;
+
+    // remove winner segment if empty now
+    if (winnerKey == sup) { 
+      deallocateSegment(winnerIndex); 
+    } 
+    
+    // go up the entry-tree
+    for (int i = (winnerIndex + kReg) >> 1;  i > 0;  i >>= 1) {
+      currentPos = entry + i;
+      currentKey = currentPos->key;
+      if (currentKey < winnerKey) {
+        currentIndex      = currentPos->index;
+        currentPos->key   = winnerKey;
+        currentPos->index = winnerIndex;
+        winnerKey         = currentKey;
+        winnerIndex       = currentIndex;
+      }
+    }
+
+    to++;
+  }
+  entry[0].index = winnerIndex;
+  entry[0].key   = winnerKey;  
+}
+
+////////////////////////// KNHeap //////////////////////////////////////
+template <class Key, class Value>
+KNHeap<Key, Value>::
+KNHeap(Key sup, Key infimum) : insertHeap(sup, infimum),
+                                   activeLevels(0), size(0)
+{
+  buffer1[KNBufferSize1].key = sup; // sentinel
+  minBuffer1 = buffer1 + KNBufferSize1; // empty
+  for (int i = 0;  i < KNLevels;  i++) {
+    tree[i].init(sup); // put tree[i] in a consistent state
+    buffer2[i][KNN].key = sup; // sentinel
+    minBuffer2[i] = &(buffer2[i][KNN]); // empty
+  }
+}
+
+
+//--------------------- Buffer refilling -------------------------------
+
+// refill buffer2[j] and return number of elements found
+template <class Key, class Value>
+int KNHeap<Key, Value>::refillBuffer2(int j)
+{
+  Element *oldTarget;
+  int deleteSize;
+  int treeSize = tree[j].getSize();
+  int bufferSize = (&(buffer2[j][0]) + KNN) - minBuffer2[j];
+  if (treeSize + bufferSize >= KNN) { // buffer will be filled
+    oldTarget = &(buffer2[j][0]);
+    deleteSize = KNN - bufferSize;
+  } else {
+    oldTarget = &(buffer2[j][0]) + KNN - treeSize - bufferSize;
+    deleteSize = treeSize;
+  }
+
+  // shift  rest to beginning
+  // possible hack:
+  // - use memcpy if no overlap
+  memmove(oldTarget, minBuffer2[j], bufferSize * sizeof(Element));
+  minBuffer2[j] = oldTarget;
+
+  // fill remaining space from tree
+  tree[j].multiMerge(oldTarget + bufferSize, deleteSize);
+  return deleteSize + bufferSize;
+}
+ 
+ 
+// move elements from the 2nd level buffers 
+// to the delete buffer
+template <class Key, class Value>
+void KNHeap<Key, Value>::refillBuffer1() 
+{
+  int totalSize = 0;
+  int sz;
+  for (int i = activeLevels - 1;  i >= 0;  i--) {
+    if ((&(buffer2[i][0]) + KNN) - minBuffer2[i] < KNBufferSize1) {
+      sz = refillBuffer2(i);
+      // max active level dry now?
+      if (sz == 0 && i == activeLevels - 1) { activeLevels--; }
+      else {totalSize += sz;}
+    } else {
+      totalSize += KNBufferSize1; // actually only a sufficient lower bound
+    }
+  }
+  if (totalSize >= KNBufferSize1) { // buffer can be filled
+    minBuffer1 = buffer1;
+    sz = KNBufferSize1; // amount to be copied
+    size -= KNBufferSize1; // amount left in buffer2
+  } else {
+    minBuffer1 = buffer1 + KNBufferSize1 - totalSize;
+    sz = totalSize;
+    Assert2(size == sz); // trees and buffer2 get empty
+    size = 0;
+  }
+
+  // now call simplified refill routines
+  // which can make the assumption that
+  // they find all they are asked to find in the buffers
+  minBuffer1 = buffer1 + KNBufferSize1 - sz;
+  switch(activeLevels) {
+  case 1: memcpy(minBuffer1, minBuffer2[0], sz * sizeof(Element));
+          minBuffer2[0] += sz;
+          break;
+  case 2: merge(&(minBuffer2[0]), 
+                &(minBuffer2[1]), minBuffer1, sz);
+          break;
+  case 3: merge3(&(minBuffer2[0]), 
+                 &(minBuffer2[1]),
+                 &(minBuffer2[2]), minBuffer1, sz);
+          break;
+  case 4: merge4(&(minBuffer2[0]), 
+                 &(minBuffer2[1]),
+                 &(minBuffer2[2]),
+                 &(minBuffer2[3]), minBuffer1, sz);
+          break;
+//  case 2: refillBuffer12(sz); break;
+//  case 3: refillBuffer13(sz); break;
+//  case 4: refillBuffer14(sz); break;
+  }
+}
+
+
+template <class Key, class Value>
+void KNHeap<Key, Value>::refillBuffer13(int sz) 
+{ Assert(0); // not yet implemented
+}
+
+template <class Key, class Value>
+void KNHeap<Key, Value>::refillBuffer14(int sz) 
+{ Assert(0); // not yet implemented
+}
+
+
+//--------------------------------------------------------------------
+
+// check if space is available on level k and
+// empty this level if necessary leading to a recursive call.
+// return the level where space was finally available
+template <class Key, class Value>
+int KNHeap<Key, Value>::makeSpaceAvailable(int level)
+{
+  int finalLevel;
+
+  Assert2(level <= activeLevels);
+  if (level == activeLevels) { activeLevels++; }
+  if (tree[level].spaceIsAvailable()) { 
+    finalLevel = level;
+  } else {
+    finalLevel = makeSpaceAvailable(level + 1);
+    int segmentSize = tree[level].getSize();
+    Element *newSegment = new Element[segmentSize + 1];
+    tree[level].multiMerge(newSegment, segmentSize); // empty this level
+    //    tree[level].cleanUp();
+    newSegment[segmentSize].key = buffer1[KNBufferSize1].key; // sentinel
+    // for queues where size << #inserts
+    // it might make sense to stay in this level if
+    // segmentSize < alpha * KNN * k^level for some alpha < 1
+    tree[level + 1].insertSegment(newSegment, segmentSize);
+  }
+  return finalLevel;
+}
+
+
+// empty the insert heap into the main data structure
+template <class Key, class Value>
+void KNHeap<Key, Value>::emptyInsertHeap()
+{
+  const Key sup = getSupremum();
+
+  // build new segment
+  Element *newSegment = new Element[KNN + 1];
+  Element *newPos = newSegment;
+
+  // put the new data there for now
+  insertHeap.sortTo(newSegment);
+  newSegment[KNN].key = sup; // sentinel
+
+  // copy the buffer1 and buffer2[0] to temporary storage
+  // (the tomporary can be eliminated using some dirty tricks)
+  const int tempSize = KNN + KNBufferSize1;
+  Element temp[tempSize + 1]; 
+  int sz1 = getSize1();
+  int sz2 = getSize2(0);
+  Element *pos = temp + tempSize - sz1 - sz2;
+  memcpy(pos      , minBuffer1   , sz1 * sizeof(Element)); 
+  memcpy(pos + sz1, minBuffer2[0], sz2 * sizeof(Element));
+  temp[tempSize].key = sup; // sentinel
+
+  // refill buffer1
+  // (using more complicated code it could be made somewhat fuller
+  // in certein circumstances)
+  merge(&pos, &newPos, minBuffer1, sz1);
+
+  // refill buffer2[0]
+  // (as above we might want to take the opportunity
+  // to make buffer2[0] fuller)
+  merge(&pos, &newPos, minBuffer2[0], sz2);
+
+  // merge the rest to the new segment
+  // note that merge exactly trips into the footsteps
+  // of itself
+  merge(&pos, &newPos, newSegment, KNN);
+  
+  // and insert it
+  int freeLevel = makeSpaceAvailable(0);
+  Assert2(freeLevel == 0 || tree[0].getSize() == 0);
+  tree[0].insertSegment(newSegment, KNN);
+
+  // get rid of invalid level 2 buffers
+  // by inserting them into tree 0 (which is almost empty in this case)
+  if (freeLevel > 0) {
+    for (int i = freeLevel;  i >= 0;  i--) { // reverse order not needed 
+      // but would allow immediate refill
+      newSegment = new Element[getSize2(i) + 1]; // with sentinel
+      memcpy(newSegment, minBuffer2[i], (getSize2(i) + 1) * sizeof(Element));
+      tree[0].insertSegment(newSegment, getSize2(i));
+      minBuffer2[i] = buffer2[i] + KNN; // empty
+    }
+  }
+
+  // update size
+  size += KNN; 
+
+  // special case if the tree was empty before
+  if (minBuffer1 == buffer1 + KNBufferSize1) { refillBuffer1(); }
+}
+
+/////////////////////////////////////////////////////////////////////
+// auxiliary functions
+
+// merge sz element from the two sentinel terminated input
+// sequences *f0 and *f1 to "to"
+// advance *fo and *f1 accordingly.
+// require: at least sz nonsentinel elements available in f0, f1
+// require: to may overwrite one of the sources as long as
+//   *fx + sz is before the end of fx
+template <class Key, class Value>
+void merge(KNElement<Key, Value> **f0,
+           KNElement<Key, Value> **f1,
+           KNElement<Key, Value>  *to, int sz) 
+{
+  KNElement<Key, Value> *from0   = *f0;
+  KNElement<Key, Value> *from1   = *f1;
+  KNElement<Key, Value> *done    = to + sz;
+  Key      key0    = from0->key;
+  Key      key1    = from1->key;
+
+  while (to < done) {
+    if (key1 <= key0) {
+      to->key   = key1;
+      to->value = from1->value; // note that this may be the same address
+      from1++; // nach hinten schieben?
+      key1 = from1->key;
+    } else {
+      to->key   = key0;
+      to->value = from0->value; // note that this may be the same address
+      from0++; // nach hinten schieben?
+      key0 = from0->key;
+    }
+    to++;
+  }
+  *f0   = from0;
+  *f1   = from1;
+}
+
+
+// merge sz element from the three sentinel terminated input
+// sequences *f0, *f1 and *f2 to "to"
+// advance *f0, *f1 and *f2 accordingly.
+// require: at least sz nonsentinel elements available in f0, f1 and f2
+// require: to may overwrite one of the sources as long as
+//   *fx + sz is before the end of fx
+template <class Key, class Value>
+void merge3(KNElement<Key, Value> **f0,
+           KNElement<Key, Value> **f1,
+           KNElement<Key, Value> **f2,
+           KNElement<Key, Value>  *to, int sz) 
+{
+  KNElement<Key, Value> *from0   = *f0;
+  KNElement<Key, Value> *from1   = *f1;
+  KNElement<Key, Value> *from2   = *f2;
+  KNElement<Key, Value> *done    = to + sz;
+  Key      key0    = from0->key;
+  Key      key1    = from1->key;
+  Key      key2    = from2->key;
+
+  if (key0 < key1) {
+    if (key1 < key2)   { goto s012; }
+    else { 
+      if (key2 < key0) { goto s201; }
+      else             { goto s021; }
+    }
+  } else {
+    if (key1 < key2) {
+      if (key0 < key2) { goto s102; }
+      else             { goto s120; }
+    } else             { goto s210; }
+  }
+
+#define Merge3Case(a,b,c)\
+  s ## a ## b ## c :\
+  if (to == done) goto finish;\
+  to->key = key ## a;\
+  to->value = from ## a -> value;\
+  to++;\
+  from ## a ++;\
+  key ## a = from ## a -> key;\
+  if (key ## a < key ## b) goto s ## a ## b ## c;\
+  if (key ## a < key ## c) goto s ## b ## a ## c;\
+  goto s ## b ## c ## a;
+
+  // the order is choosen in such a way that 
+  // four of the trailing gotos can be eliminated by the optimizer
+  Merge3Case(0, 1, 2);
+  Merge3Case(1, 2, 0);
+  Merge3Case(2, 0, 1);
+  Merge3Case(1, 0, 2);
+  Merge3Case(0, 2, 1);
+  Merge3Case(2, 1, 0);
+
+ finish:
+  *f0   = from0;
+  *f1   = from1;
+  *f2   = from2;
+}
+
+
+// merge sz element from the three sentinel terminated input
+// sequences *f0, *f1, *f2 and *f3 to "to"
+// advance *f0, *f1, *f2 and *f3 accordingly.
+// require: at least sz nonsentinel elements available in f0, f1, f2 and f2
+// require: to may overwrite one of the sources as long as
+//   *fx + sz is before the end of fx
+template <class Key, class Value>
+void merge4(KNElement<Key, Value> **f0,
+           KNElement<Key, Value> **f1,
+           KNElement<Key, Value> **f2,
+           KNElement<Key, Value> **f3,
+           KNElement<Key, Value>  *to, int sz) 
+{
+  KNElement<Key, Value> *from0   = *f0;
+  KNElement<Key, Value> *from1   = *f1;
+  KNElement<Key, Value> *from2   = *f2;
+  KNElement<Key, Value> *from3   = *f3;
+  KNElement<Key, Value> *done    = to + sz;
+  Key      key0    = from0->key;
+  Key      key1    = from1->key;
+  Key      key2    = from2->key;
+  Key      key3    = from3->key;
+
+#define StartMerge4(a, b, c, d)\
+  if (key##a <= key##b && key##b <= key##c && key##c <= key##d)\
+    goto s ## a ## b ## c ## d;
+
+  StartMerge4(0, 1, 2, 3);
+  StartMerge4(1, 2, 3, 0);
+  StartMerge4(2, 3, 0, 1);
+  StartMerge4(3, 0, 1, 2);
+
+  StartMerge4(0, 3, 1, 2);
+  StartMerge4(3, 1, 2, 0);
+  StartMerge4(1, 2, 0, 3);
+  StartMerge4(2, 0, 3, 1);
+
+  StartMerge4(0, 2, 3, 1);
+  StartMerge4(2, 3, 1, 0);
+  StartMerge4(3, 1, 0, 2);
+  StartMerge4(1, 0, 2, 3);
+
+  StartMerge4(2, 0, 1, 3);
+  StartMerge4(0, 1, 3, 2);
+  StartMerge4(1, 3, 2, 0);
+  StartMerge4(3, 2, 0, 1);
+
+  StartMerge4(3, 0, 2, 1);
+  StartMerge4(0, 2, 1, 3);
+  StartMerge4(2, 1, 3, 0);
+  StartMerge4(1, 3, 0, 2);
+
+  StartMerge4(1, 0, 3, 2);
+  StartMerge4(0, 3, 2, 1);
+  StartMerge4(3, 2, 1, 0);
+  StartMerge4(2, 1, 0, 3);
+
+#define Merge4Case(a, b, c, d)\
+  s ## a ## b ## c ## d:\
+  if (to == done) goto finish;\
+  to->key = key ## a;\
+  to->value = from ## a -> value;\
+  to++;\
+  from ## a ++;\
+  key ## a = from ## a -> key;\
+  if (key ## a < key ## c) {\
+    if (key ## a < key ## b) { goto s ## a ## b ## c ## d; }\
+    else                     { goto s ## b ## a ## c ## d; }\
+  } else {\
+    if (key ## a < key ## d) { goto s ## b ## c ## a ## d; }\
+    else                     { goto s ## b ## c ## d ## a; }\
+  }    
+  
+  Merge4Case(0, 1, 2, 3);
+  Merge4Case(1, 2, 3, 0);
+  Merge4Case(2, 3, 0, 1);
+  Merge4Case(3, 0, 1, 2);
+
+  Merge4Case(0, 3, 1, 2);
+  Merge4Case(3, 1, 2, 0);
+  Merge4Case(1, 2, 0, 3);
+  Merge4Case(2, 0, 3, 1);
+
+  Merge4Case(0, 2, 3, 1);
+  Merge4Case(2, 3, 1, 0);
+  Merge4Case(3, 1, 0, 2);
+  Merge4Case(1, 0, 2, 3);
+
+  Merge4Case(2, 0, 1, 3);
+  Merge4Case(0, 1, 3, 2);
+  Merge4Case(1, 3, 2, 0);
+  Merge4Case(3, 2, 0, 1);
+
+  Merge4Case(3, 0, 2, 1);
+  Merge4Case(0, 2, 1, 3);
+  Merge4Case(2, 1, 3, 0);
+  Merge4Case(1, 3, 0, 2);
+
+  Merge4Case(1, 0, 3, 2);
+  Merge4Case(0, 3, 2, 1);
+  Merge4Case(3, 2, 1, 0);
+  Merge4Case(2, 1, 0, 3);
+
+ finish:
+  *f0   = from0;
+  *f1   = from1;
+  *f2   = from2;
+  *f3   = from3;
+}

peter-sanders-priority-queues/knheap.h

+// hierarchical memory priority queue data structure
+#ifndef KNHEAP
+#define KNHEAP
+#include "util.h"
+
+const int KNBufferSize1 = 32; // equalize procedure call overheads etc. 
+const int KNN = 512; // bandwidth
+const int KNKMAX = 64;  // maximal arity
+const int KNLevels = 4; // overall capacity >= KNN*KNKMAX^KNLevels
+const int LogKNKMAX = 6;  // ceil(log KNK)
+/*
+const int KNBufferSize1 = 3; // equalize procedure call overheads etc. 
+const int KNN = 10; // bandwidth
+const int KNKMAX = 4;  // maximal arity
+const int KNLevels = 4; // overall capacity >= KNN*KNKMAX^KNLevels
+const int LogKNKMAX = 2;  // ceil(log KNK)
+*/
+template <class Key, class Value>
+struct KNElement {Key key; Value value;};
+
+//////////////////////////////////////////////////////////////////////
+// fixed size binary heap
+template <class Key, class Value, int capacity>
+class BinaryHeap {
+  //  static const Key infimum  = 4;
+  //static const Key supremum = numeric_limits<Key>.max();
+  typedef KNElement<Key, Value> Element;
+  Element data[capacity + 2];
+  int size;  // index of last used element
+public:
+  BinaryHeap(Key sup, Key infimum):size(0) { 
+    data[0].key = infimum; // sentinel
+    data[capacity + 1].key = sup;
+    reset();
+  }
+  Key getSupremum() { return data[capacity + 1].key; }
+  void reset();
+  int   getSize()     const { return size; }
+  Key   getMinKey()   const { return data[1].key; }
+  Value getMinValue() const { return data[1].value; }
+  void  deleteMin();
+  void  deleteMinFancy(Key *key, Value *value) {
+    *key   = getMinKey();
+    *value = getMinValue();
+    deleteMin();
+  }
+  void  insert(Key k, Value v);
+  void  sortTo(Element *to); // sort in increasing order and empty
+  //void  sortInPlace(); // in decreasing order
+};
+
+
+// reset size to 0 and fill data array with sentinels
+template <class Key, class Value, int capacity>
+inline void BinaryHeap<Key, Value, capacity>::
+reset() {
+  size = 0;
+  Key sup = getSupremum();
+  for (int i = 1;  i <= capacity;  i++) {
+    data[i].key = sup;
+  }
+  // if this becomes a bottle neck
+  // we might want to replace this by log KNN 
+  // memcpy-s
+}
+
+template <class Key, class Value, int capacity>
+inline void BinaryHeap<Key, Value, capacity>::
+deleteMin()
+{
+  Assert2(size > 0);
+
+  // first move up elements on a min-path
+  int hole = 1; 
+  int succ = 2;
+  int sz   = size;
+  while (succ < sz) {
+    Key key1 = data[succ].key;
+    Key key2 = data[succ + 1].key;
+    if (key1 > key2) {
+      succ++;
+      data[hole].key   = key2;
+      data[hole].value = data[succ].value;
+    } else {
+      data[hole].key   = key1;
+      data[hole].value = data[succ].value;
+    }
+    hole = succ;
+    succ <<= 1;
+  }
+
+  // bubble up rightmost element
+  Key bubble = data[sz].key;
+  int pred = hole >> 1;
+  while (data[pred].key > bubble) { // must terminate since min at root
+    data[hole] = data[pred];
+    hole = pred;
+    pred >>= 1;
+  }
+
+  // finally move data to hole
+  data[hole].key = bubble;
+  data[hole].value = data[sz].value;
+
+  data[size].key = getSupremum(); // mark as deleted
+  size = sz - 1;
+}
+
+
+// empty the heap and put the element to "to"
+// sorted in increasing order
+template <class Key, class Value, int capacity>
+inline void BinaryHeap<Key, Value, capacity>::
+sortTo(Element *to)
+{
+  const int           sz = size;
+  const Key          sup = getSupremum();
+  Element * const beyond = to + sz;
+  Element * const root   = data + 1;
+  while (to < beyond) {
+    // copy minimun
+    *to = *root;
+    to++;
+
+    // bubble up second smallest as in deleteMin
+    int hole = 1;
+    int succ = 2;
+    while (succ <= sz) {
+      Key key1 = data[succ    ].key;
+      Key key2 = data[succ + 1].key;
+      if (key1 > key2) {
+        succ++;
+        data[hole].key   = key2;
+        data[hole].value = data[succ].value;
+      } else {
+        data[hole].key = key1;
+        data[hole].value = data[succ].value;
+      }
+      hole = succ;
+      succ <<= 1;
+    }
+
+    // just mark hole as deleted
+    data[hole].key = sup;
+  }
+  size = 0;
+}
+
+
+template <class Key, class Value, int capacity>
+inline void BinaryHeap<Key, Value, capacity>::
+insert(Key k, Value v)
+{
+  Assert2(size < capacity);
+  Debug4(cout << "insert(" << k << ", " << v << ")" << endl);
+
+  size++;
+  int hole = size; 
+  int pred = hole >> 1;
+  Key predKey = data[pred].key;
+  while (predKey > k) { // must terminate due to sentinel at 0
+    data[hole].key   = predKey;
+    data[hole].value = data[pred].value;
+    hole = pred;
+    pred >>= 1;
+    predKey = data[pred].key;
+  }
+
+  // finally move data to hole
+  data[hole].key   = k;
+  data[hole].value = v;
+}
+
+//////////////////////////////////////////////////////////////////////
+// The data structure from Knuth, "Sorting and Searching", Section 5.4.1
+template <class Key, class Value>
+class KNLooserTree {
+  // public: // should not be here but then I would need a scary
+  // sequence of template friends which I doubt to work
+  // on all compilers
+  typedef KNElement<Key, Value> Element;
+  struct Entry   {
+    Key key;   // Key of Looser element (winner for 0)
+    int index; // number of loosing segment
+  };
+
+  // stack of empty segments
+  int empty[KNKMAX]; // indices of empty segments
+  int lastFree;  // where in "empty" is the last valid entry?
+
+  int size; // total number of elements stored
+  int logK; // log of current tree size
+  int k; // invariant k = 1 << logK
+
+  Element dummy; // target of empty segment pointers
+
+  // upper levels of looser trees
+  // entry[0] contains the winner info
+  Entry entry[KNKMAX]; 
+
+  // leaf information
+  // note that Knuth uses indices k..k-1
+  // while we use 0..k-1
+  Element *current[KNKMAX]; // pointer to actual element
+  Element *segment[KNKMAX]; // start of Segments
+
+  // private member functions
+  int initWinner(int root);
+  void updateOnInsert(int node, Key newKey, int newIndex, 
+                      Key *winnerKey, int *winnerIndex, int *mask);
+  void deallocateSegment(int index);
+  void doubleK();
+  void compactTree();
+  void rebuildLooserTree();
+  int segmentIsEmpty(int i);
+public:
+  KNLooserTree();
+  void init(Key sup); // before, no consistent state is reached :-(
+
+  void multiMergeUnrolled3(Element *to, int l);
+  void multiMergeUnrolled4(Element *to, int l);
+  void multiMergeUnrolled5(Element *to, int l);
+  void multiMergeUnrolled6(Element *to, int l);
+  void multiMergeUnrolled7(Element *to, int l);
+  void multiMergeUnrolled8(Element *to, int l);
+  void multiMergeUnrolled9(Element *to, int l);
+  void multiMergeUnrolled10(Element *to, int l);
+
+  void multiMerge(Element *to, int l); // delete l smallest element to "to"
+  void multiMergeK(Element *to, int l); 
+  int  spaceIsAvailable() { return k < KNKMAX || lastFree >= 0; } 
+     // for new segment
+  void insertSegment(Element *to, int sz); // insert segment beginning at to
+  int  getSize() { return size; }
+  Key getSupremum() { return dummy.key; }
+};  
+
+
+//////////////////////////////////////////////////////////////////////
+// 2 level multi-merge tree
+template <class Key, class Value>
+class KNHeap {
+  typedef KNElement<Key, Value> Element;
+
+  KNLooserTree<Key, Value> tree[KNLevels];
+
+  // one delete buffer for each tree (extra space for sentinel)
+  Element buffer2[KNLevels][KNN + 1]; // tree->buffer2->buffer1
+  Element *minBuffer2[KNLevels];
+
+  // overall delete buffer
+  Element buffer1[KNBufferSize1 + 1];
+  Element *minBuffer1;
+
+  // insert buffer
+  BinaryHeap<Key, Value, KNN> insertHeap;
+
+  // how many levels are active
+  int activeLevels;
+  
+  // total size not counting insertBuffer and buffer1
+  int size;
+
+  // private member functions
+  void refillBuffer1();
+  void refillBuffer11(int sz);
+  void refillBuffer12(int sz);
+  void refillBuffer13(int sz);
+  void refillBuffer14(int sz);
+  int refillBuffer2(int k);
+  int makeSpaceAvailable(int level);
+  void emptyInsertHeap();
+  Key getSupremum() const { return buffer2[0][KNN].key; }
+  int getSize1( ) const { return ( buffer1 + KNBufferSize1) - minBuffer1; }
+  int getSize2(int i) const { return &(buffer2[i][KNN])     - minBuffer2[i]; }
+public:
+  KNHeap(Key sup, Key infimum);
+  int   getSize() const;
+  void  getMin(Key *key, Value *value);
+  void  deleteMin(Key *key, Value *value);
+  void  insert(Key key, Value value);
+};
+
+
+template <class Key, class Value>  
+inline int KNHeap<Key, Value>::getSize() const 
+{ 
+  return 
+    size + 
+    insertHeap.getSize() + 
+    ((buffer1 + KNBufferSize1) - minBuffer1); 
+}
+
+template <class Key, class Value>
+inline void  KNHeap<Key, Value>::getMin(Key *key, Value *value) {
+  Key key1 = minBuffer1->key;
+  Key key2 = insertHeap.getMinKey();
+  if (key2 >= key1) {
+    *key   = key1;
+    *value = minBuffer1->value;
+  } else {
+    *key   = key2;
+    *value = insertHeap.getMinValue();
+  }
+}
+
+template <class Key, class Value>
+inline void  KNHeap<Key, Value>::deleteMin(Key *key, Value *value) {
+  Key key1 = minBuffer1->key;
+  Key key2 = insertHeap.getMinKey();
+  if (key2 >= key1) {
+    *key   = key1;
+    *value = minBuffer1->value;
+    Assert2(minBuffer1 < buffer1 + KNBufferSize1); // no delete from empty
+    minBuffer1++;
+    if (minBuffer1 == buffer1 + KNBufferSize1) {
+      refillBuffer1();
+    }
+  } else {
+    *key = key2;
+    *value = insertHeap.getMinValue();
+    insertHeap.deleteMin();
+  }
+}
+
+template <class Key, class Value>
+inline  void  KNHeap<Key, Value>::insert(Key k, Value v) {
+  if (insertHeap.getSize() == KNN) { emptyInsertHeap(); }
+  insertHeap.insert(k, v);
+}
+#endif

peter-sanders-priority-queues/knupdown3.C

+// benchmark with (insert insert delMin)^N (insert delMin delMin)^N
+// just in time RNG
+// version 3: use easier to handle binary heaps
+
+
+#include <limits.h>
+#include <iostream>
+#include <stdlib.h>
+#include <math.h>
+
+using namespace std;
+
+#define DEBUGLEVEL 0
+#include "util.h"
+#define KNH
+//#define H2
+//#define H4
+//#define HSLOW
+
+#ifdef KNH
+#  include "knheap.C"
+#  define HTYPE KNHeap<int, int> 
+#  define HINIT heap(INT_MAX, -INT_MAX)
+#else
+#  ifdef H4
+#    include "heap4.h"
+#    define HTYPE Heap4<int, int> 
+#    define HINIT heap(INT_MAX, -INT_MAX, n)
+#  else
+#    ifdef H2
+#      include "heap2.h"
+#      define HTYPE Heap2<int, int> 
+#      define HINIT heap(INT_MAX, -INT_MAX, n)
+#    else 
+#      ifdef HSLOW
+#        include "heap-CLR.h"
+#        define HTYPE Heap2<int, int> 
+#        define HINIT heap(INT_MAX, -INT_MAX, n)
+#      endif
+#    endif
+#  endif
+#endif
+
+
+#define rand32() (ran32State = 1664525 * ran32State + 1013904223)
+#define getRandom() ((rand32()), (int)(ran32State >> 2))
+
+inline void onePass(HTYPE& heap, int n)
+{ int j, newElem, k, v;
+  double insertSum=0, deleteSum=0;
+  static int ran32State = 42 << 20;
+  
+  Debug3(cout << heap.getSize());
+  Assert(heap.getSize() == 0);
+  for (j = 0;  j < n;  j++) {   
+    newElem = getRandom();
+    heap.insert(newElem, j);
+    Debug0(insertSum += newElem);
+    Debug3(cout << newElem << "in ");
+    
+    heap.deleteMin(&k, &v);
+    Debug0(deleteSum += k);
+    Debug3(cout << k << "out ");
+    
+    newElem = getRandom();
+    heap.insert(newElem, j + 1);
+    Debug0(insertSum += newElem);
+    Debug3(cout << newElem << "in ");
+  }
+  Assert0(heap.getSize() == n);
+  for (j = 0;  j < n;  j++) {   
+    heap.deleteMin(&k, &v);
+    Debug0(deleteSum += k);
+    Debug3(cout << k << "out ");
+
+    newElem = getRandom();
+    heap.insert(newElem, j);
+    Debug0(insertSum += newElem);
+    Debug3(cout << newElem << "in ");
+    
+    heap.deleteMin(&k, &v);
+    Debug0(deleteSum += k);
+    Debug3(cout << k << "out ");
+  }
+  Assert0(deleteSum == insertSum);
+  Assert(heap.getSize() == 0);
+}
+
+
+int main(int argc, char **argv)
+{ 
+  Assert(argc > 1);
+  int n = atoi(argv[1]);
+  int i;
+  int ran32State = 42 << 20;
+  int repeat = 1;
+  double startTime, endTime;
+  if (argc > 2) repeat = atoi(argv[2]);
+  //#ifdef H2
+  //HTYPE *temp = new HTYPE(INT_MAX, -INT_MAX);
+  //HTYPE& heap =*temp;
+  //#else
+  HTYPE HINIT;
+  //#endif
+
+  // warmup
+  onePass(heap, n);
+
+  startTime = cpuTime();
+  for (i = 0;  i < repeat;  i++) {
+    onePass(heap, n);
+  }
+  endTime = cpuTime();
+
+  // output time per insert-delete-pair and this time over log n
+  double timePerPair = (endTime - startTime) / n / repeat / 3.0;
+  double timePerCompare = timePerPair / (log((double)n) / log(2.0));
+  cout << n << " " << timePerPair * 1e9 << " " << timePerCompare * 1e9 << endl;
+}
+

peter-sanders-priority-queues/knwiggle.C

+// benchmark with (insert insert delMin)^N (insert delMin delMin)^N
+// just in time RNG
+// version 3: use easier to handle binary heaps
+// knwiggle: access sequence is 
+// (insert (insert delete)^k)^N (delete (insert delete)^k)^N
+
+#include <limits.h>
+#include <iostream.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define DEBUGLEVEL 0
+#include "util.h"
+//#define KNH
+#define H2
+//#define H4
+
+#ifdef KNH
+#  include "knheap.C"
+#  define HTYPE KNHeap<int, int> 
+#  define HINIT heap(INT_MAX, -INT_MAX)
+#else
+#  ifdef H4
+#    include "heap4.h"
+#    define HTYPE Heap4<int, int> 
+#    define HINIT heap(INT_MAX, -INT_MAX, n)
+#  else
+#    ifdef H2