Commits

Jens Alfke committed f81a0ba

Got "IncrementalWrite" Tree test running. Overhauled the way tree nodes store children. Lots of other bug fixing.

Comments (0)

Files changed (9)

     }
     
     Node* EdgeNode::insert(KeyType key, Leaf* value) {
-        return (Node*) this->_insertChildAt(key, (Child*)value, _indexForKey(key)+1);
+        return (Node*) this->_insertChildAt(key, Child(value), _indexForKey(key)+1);
     }
     
     // Split this node in half and return the new right sibling
         EdgeNode()                                          { }
         EdgeNode(File* f,ChunkPosition p)                   :Node(f,p) { }
         virtual bool hasLeaves() const                      {return true;}
-        Leaf* _child(int i) const                           {return (Leaf*)Node::_child(i);}
+        Leaf* _child(int i) const                           {return Node::_child(i).leaf;}
         virtual int count() const                           {return _nChildren;}
         virtual Leaf* get(KeyType) const;
         virtual void findFirst(KeyType, Tree::Iterator&) const;
     protected:
         virtual const EdgeNode* _findEdge(KeyType) const    {return this;}
         virtual EdgeNode* _createSibling();
-        void _insertDirectly(KeyType key, Leaf *value, int i);
-        virtual Child* _loadChild(ChunkPosition) const;
+        virtual void _loadChild (Child&) const;
         
         virtual ChunkPosition _write (File*);
     };

src/InteriorNode.cpp

 
     InteriorNode::InteriorNode(Node *left, Node *right)
     {
-        _addChild(left->minKey(), (Child*)left);
-        _addChild(right->minKey(), (Child*)right);
+        _addChild(left->minKey(), Child(left));
+        _addChild(right->minKey(), Child(right));
     }
     
     int InteriorNode::count() const {
         Node *newChild = child->insert(key,value);
         _updateKey(i);
         if (newChild) {
-            return (InteriorNode*) _insertChildAt(newChild->minKey(), (Child*)newChild, i+1);
+            return (InteriorNode*) _insertChildAt(newChild->minKey(), Child(newChild), i+1);
         } else
             return NULL;
     }
     InteriorNode* InteriorNode::_createSibling() {
         return new InteriorNode();
     }
-        
+    
     void InteriorNode::deleteTree() {
-        for (int i=0; i<_nChildren; i++)
-            delete _child(i);
+        _deleteChildNodes();
         delete this;
     }
     

src/InteriorNode.h

     public:
         InteriorNode(Node *left, Node *right);
         InteriorNode(File *f, ChunkPosition p)                  :Node(f,p) { }
-        Node* _child(int i) const                               {return (Node*)Node::_child(i);}
+        Node* _child(int i) const                               {return Node::_child(i).node;}
         virtual int count() const;
         virtual Leaf* get(KeyType) const;
         virtual void findFirst(KeyType, Tree::Iterator&) const;
         InteriorNode()                                          { }
         virtual const EdgeNode* _findEdge(KeyType) const;
         virtual InteriorNode* _createSibling();
-        virtual Child* _loadChild(ChunkPosition) const;
+        virtual void _loadChild (Child&) const;
 
         virtual ChunkPosition _write (File*);
 
     
     
     Node::Node()
-        :_chunk(NULL)
-        ,_nChildren(0)
+    :_chunk(NULL)
+    ,_children(NULL)
+    ,_nChildren(0)
     { }
     
-    Node::Node(int nChildren, KeyType keys[], Child* children[])
-        :_chunk(NULL)
-        ,_nChildren(nChildren)
+    Node::Node(int nChildren, KeyType keys[], Node::Child* children[])
+    :_chunk(NULL)
+    ,_children(new Child[kMaxChildren])
+    ,_nChildren(nChildren)
     {
         memcpy(_keys,keys, nChildren*sizeof(KeyType));
         memcpy(_children,children, nChildren*sizeof(_children[0]));
     }
     
-    Node::~Node() { }
+    Node::~Node() {
+        delete[] _children;
+    }
 
     
     /** Returns the maximum index whose key is <= the given key.
     }
     
     
-    Child* Node::_child(int i) const {
+    Node::Child* Node::children() const {
+        if (!_children && _nChildren > 0) {
+            // Allocate _children on demand:
+            assert(_chunk);
+            allocChildren();
+            for (int i=0; i<_nChildren; i++) {
+                _children[i].pos = _chunk->childPosition[i];
+                _children[i].node = NULL;
+                _children[i].leaf = NULL;
+            }
+        }
+        return _children;
+    }
+    
+    bool Node::_isChildLoaded(int i) const {
         assert(i>=0 && i<_nChildren);
-        if (_isChildLoaded(i))
-            return (Child*) _children[i];
+        return children()[i].node != NULL;
+    }
+    
+    Node::Child& Node::_child(int i) const {
+        assert(i>=0 && i<_nChildren);
+        Child &child = children()[i];
+        if (!child.leaf && child.pos > 0)
+            _loadChild(child);
+        return child;
+    }
+    
+    ChunkPosition Node::_getChildFilePosition (int i) const {
+        assert(i>=0 && i<_nChildren);
+        if (_children)
+            return _children[i].pos;
         else {
-            Child *child = _loadChild(_getChildFilePosition(i));
-            assert(child);
-            _children[i] = child;
-            return child;
+            assert(_chunk);
+            return _chunk->childPosition[i];
         }
     }
     
-    void Node::_addChild (KeyType key, Child* child) {
-        assert(child);
+    void Node::_setChildFilePosition(int i, ChunkPosition pos) {
+        assert(i>=0 && i<_nChildren);
+        children()[i].pos = pos;
+    }
+    
+    
+    void Node::_addChild (KeyType key, const Node::Child& child) {
         assert(_nChildren<kMaxChildren);
         _keys[_nChildren] = key;
+        allocChildren();
         _children[_nChildren++] = child;
         _dirty = true;
     }
             assert(dstIndex>=0 && dstIndex+n <= kMaxChildren);
             assert(srcIndex>=0 && srcIndex+n <=src->_nChildren);
             memmove(&_keys[dstIndex], &src->_keys[srcIndex], n*sizeof(KeyType));
-            memmove(&_children[dstIndex], &src->_children[srcIndex], n*sizeof(Child*));
+            allocChildren();
+            memmove(&_children[dstIndex], &src->children()[srcIndex], n*sizeof(_children[0]));
             _dirty = true;
         }
     }
     }        
     
     
-    Node* Node::_insertChildAt(KeyType key, Child *child, int i) {
+    void Node::_insertDirectly(KeyType key, const Node::Child &child, int i) {
+        this->_slideChildren(i, i+1);
+        _keys[i] = key;
+        allocChildren();
+        _children[i] = child;
+    }
+    
+    Node* Node::_insertChildAt(KeyType key, const Node::Child &child, int i) {
         if (!this->isFull()) {
             // Insert leaf directly:
             this->_insertDirectly(key,child,i);
         }
     }
     
-    void Node::_insertDirectly(KeyType key, Child *child, int i) {
-        this->_slideChildren(i, i+1);
-        _keys[i] = key;
-        _children[i] = child;
+    void Node::_deleteChildNodes() {
+        if (_children)
+            for (int i=0; i<_nChildren; i++)
+                delete _children[i].node;
     }
     
     void Node::describeAs (const char *className) const {
 #include "Base.h"
 #include "Tree.h"
 #include "Chunk.h"
+#include <assert.h>
 
 #ifndef _MOOSEYARD_NODE_
 #define _MOOSEYARD_NODE_
     class InteriorNode;
     class EdgeNode;
     
-    struct Child; // placeholder
-
+    
     
 #pragma pack(2)
 
 #pragma options align=reset
 
     
+    
     class Node : public Serializable {
     public:
         static const int kMinChildren = 10;                 // Can be varied but must be >= 2
 
         virtual bool hasLeaves() const                      {return false;}
         virtual int nChildren() const                       {return _nChildren;}
-        Child* _child(int i) const;
         virtual int count() const = 0;
         KeyType key(int i) const                            {return _keys[i];}
         virtual Leaf* get(KeyType) const =0;
         virtual void dump(int indent) const =0;
 
     protected:
+        struct Child {
+            Child () { }
+            Child (Node *node) :node(node), pos(0) { }
+            Child (KeyValueChunk *leaf) :leaf(leaf), pos(0) { }
+            Child (ChunkPosition pos) :node(NULL), pos(pos) {assert(pos>0);}
+            union {
+                Node *node;
+                KeyValueChunk *leaf;
+            };
+            ChunkPosition pos;
+        };
+        
         Node();
         Node(int keyCount, KeyType keys[], Child* children[]);
         Node(File*,ChunkPosition);
         int _indexForKey(KeyType) const;
 
-        void _setChild (int i, Child* child)         {_children[i] = child;}
-        void _addChild (KeyType, Child* child);
+        Child* children() const;
+        Child& _child(int) const;
+        virtual void _loadChild (Child&) const =0;
+        void _addChild (KeyType, const Child& child);
         void _copyChildren(Node *src, int srcIndex, int dstIndex, int n);
         void _slideChildren(int srcIndex, int dstIndex);
         void _moveChildrenFromLeft(Node *left, int numToMove);
         void _moveChildrenFromRight(Node *right, int numToMove);
-        void _insertDirectly(KeyType, Child *child, int i);
+        void _insertDirectly(KeyType, const Child &child, int i);
         void _removeDirectly(int i)                         {this->_slideChildren(i+1, i);}
         
-        Node* _insertChildAt(KeyType, Child *child, int i);
+        Node* _insertChildAt(KeyType, const Child &child, int i);
         virtual Node* _createSibling() =0;
-        void _removeChild(Child *child);
-        void _improveNChildren(InteriorNode *parent, int indexInParent);
-        virtual bool _eatChildren()                         {return false;}
         bool _eatLeft(Node *left);
         bool _eatRight(Node *right);
         bool _borrowFromLeft(Node *left);
         bool _borrowFromRight(Node *right);
 
-        ChunkPosition _getChildFilePosition (int i) const   {return _chunk ?_chunk->childPosition[i].get() :0L;}
+        ChunkPosition _getChildFilePosition (int i) const;
+        void _setChildFilePosition(int i, ChunkPosition);
         bool _isChildInFile (int i) const                   {return _getChildFilePosition(i) != 0;}
-        virtual Child* _loadChild(ChunkPosition) const =0;
+        bool _isChildLoaded (int i) const;
+        void _deleteChildNodes();
 
         void describeAs(const char *name) const;
 
         ChunkPosition _write (File *out, int chunkType, LittleEndian<ChunkPosition> childPositions[]);
-
-        const NodeChunk* _chunk;
+        
+    private:
+        void allocChildren() const                  {if (!_children) _children = new Child[kMaxChildren];}
+        friend class InteriorNode;
+        
+        const NodeChunk* _chunk;                    // NULL unless I was loaded from file
+        mutable Child* _children;                   // If NULL, consult _chunk->childPosition[]
+    protected:
         int _nChildren;
         KeyType _keys[kMaxChildren];
-        
-    private:
-        friend class InteriorNode;
-        bool _isChildLoaded (int i) const                   {return _children[i] != NULL;}
-        
-        mutable Child* _children[kMaxChildren];
     };
         
 }

src/NodeStorage.cpp

                 throw File::Error("key too long for b-tree");
             keyLengths[i] = _keys[i].length;
             pieces[3+i] = _keys[i];
+            assert(childPositions[i] > 0);
         }
         
         ChunkPosition newPos = (ChunkPosition) out->position();
     
     
     ChunkPosition InteriorNode::_write (File *out) {
+        LittleEndian<ChunkPosition> childPositions[_nChildren];
         // Write children first so they'll have updated file offsets:
-        for (int i=0; i<_nChildren; i++)
-            _child(i)->save(out);
+        for (int i=0; i<_nChildren; i++) {
+            if (_isChildLoaded(i)) {
+                Child &child = Node::_child(i);
+                child.node->save(out);
+                child.pos = child.node->chunkPosition();
+                childPositions[i] =  child.pos;
+            } else {
+                childPositions[i] = _getChildFilePosition(i);
+            }
+        }
 
-        LittleEndian<ChunkPosition> childPositions[_nChildren];
-        for (int i=0; i<_nChildren; i++)
-            childPositions[i] = _child(i)->chunkPosition();
         return Node::_write(out, NodeChunk::kInteriorChunkType, childPositions);
     }
     
         LittleEndian<ChunkPosition> childPositions[_nChildren];
         for (int i=0; i<_nChildren; i++) {
             ChunkPosition pos = _getChildFilePosition(i);
-            if (pos==0) {
-                const KeyValueChunk* child = _child(i);
-                if (child) {
-                    off_t position = out->positionOfMapped(child);
-                    if (position < 0) {
-                        // Hasn't been saved yet, so write it & update pos:
-                        position = out->position();
-                        KeyValueChunk::write(out, (ChunkPosition)position, child->key(), child->value());
-                    }
-                    if (position >= UINT32_MAX)
-                        throw File::Error("Tree value's file position too large");
-                    pos = (ChunkPosition)position;
+            if (pos==0 && _isChildLoaded(i)) {
+                Child &child = Node::_child(i);
+                off_t position = out->positionOfMapped(child.leaf);
+                if (position < 0) {
+                    // Hasn't been saved yet, so write it & update pos:
+                    position = out->position();
+                    KeyValueChunk::write(out, (ChunkPosition)position, 
+                                         child.leaf->key(), child.leaf->value());
                 }
+                if (position >= UINT32_MAX)
+                    throw File::Error("Tree value's file position too large");
+                child.pos = (ChunkPosition)position;
+                pos = child.pos;
             }
             childPositions[i] = pos;
         }
     
     
     Node::Node(File *file, ChunkPosition pos)
-    :Serializable(file,pos),
-     _chunk((NodeChunk*) Chunk::readFrom(file, pos)),
-     _nChildren(_chunk->nChildren),
-     _children()
+    :Serializable(file,pos)
+    ,_chunk((NodeChunk*) Chunk::readFrom(file, pos))
+    ,_children()
+    ,_nChildren(_chunk->nChildren)
     {
         if (_chunk->nChildren > kMaxChildren)
             throw File::Error(ERANGE, "Read bogus child count for node");
     }        
     
     
-    Child* InteriorNode::_loadChild(ChunkPosition pos) const {
-        return (Child*) Node::readNode(file(),pos);
+    void InteriorNode::_loadChild (Child &child) const {
+        child.node = Node::readNode(file(),child.pos);
     }
     
-    Child* EdgeNode::_loadChild(ChunkPosition pos) const {
+    void EdgeNode::_loadChild (Child &child) const {
         /*fprintf(stderr,"...reading Leaf at %i\n", pos);*/
-        const Chunk *child = Chunk::readFrom(file(), pos);
-        if (child->type() != KeyValueChunk::kChunkType)
+        const Chunk *leaf = Chunk::readFrom(file(), child.pos);
+        if (leaf->type() != KeyValueChunk::kChunkType)
             throw File::Error("EdgeNode child is not key-value chunk");
-        return (Child*) child;
+        child.leaf = (KeyValueChunk*)leaf;
     }
     
     
     
     
     ChunkPosition Tree::save (File* file) {
+        file->setPositionToEnd();
+        assert(file->position() > 0);       // Zero is an illegal position for any Chunk
         _root->save(file);
         return _root->chunkPosition();
     }

test/Tree_test.cpp

 }
 
 
-
-#pragma mark -
-#pragma mark PERSISTENCE TESTS:
-    
-    
-static ChunkPosition writeTreeTo(const char *path, int num) {
-    if (verbose) fprintf(stderr, "\n\nWriting a tree of %i nodes to /tmp/btreetest\n", num);
-    Tree tree;
-    for (int i=0; i<num; i++)
-        Insert(tree,i, false);
-    
-    File file(path, O_RDWR | O_CREAT | O_TRUNC);
-    file.setLength(0);
-    ChunkPosition rootPos = tree.save(&file);
-    fprintf(stderr, "Saved to %s -- root at %u\n", path, rootPos);
-    return rootPos;
-}
-
-
-TEST(Tree, Write) {
-    writeTreeTo("/tmp/btreetest", 100);
-}
-    
-TEST(Tree, Read) {
-    const int num = 500;
-    ChunkPosition rootPos = writeTreeTo("/tmp/btreetest", num);
-
-    File file("/tmp/btreetest", false);
-    file.mapRegion(0, (size_t)file.length());
-    Tree tree(&file, rootPos);
-    for (int i=0; i<num; i++) {
-        Leaf *value = tree.get(mkKey(i));
-        ASSERT_TRUE(value != NULL);
-        const char *valueStr = stringOf(value->value());
-        fprintf(stderr,"%i -> %s\n", i, valueStr);
-        char expectedStr[100];
-        sprintf(expectedStr, "value for %s", mkKey(i));
-        EXPECT_EQ(0, strcmp(valueStr, expectedStr));
-    }
-    if (verbose)
-        tree.dump();
-}
-
-/*
-TEST(Tree, IncrementalWrite) {
-    if (verbose) fprintf(stderr, "\n\nWriting a tree of %i nodes to /tmp/btreetest\n", num);
-    Tree tree;
-    for (int i=0; i<num/2; i++)
-        Insert(tree,i, false);
-    
-    File file("/tmp/btreetest", true);
-    file.setLength(0);
-    tree.save(&file);
-    if (verbose) fprintf(stderr, "Saved -- eof=%lld\n", file.length());
-    
-    if (verbose) fprintf(stderr, "Adding the rest of the values\n");
-    for (int i=num/2; i<num; i++)
-        Insert(tree,i, false);
-    
-    tree.save(&file);
-    if (verbose) fprintf(stderr, "Saved -- eof=%lld\n", file.length());
-    
-    if (verbose) fprintf(stderr, "Removing every 3rd value\n");
-    for (int i=0; i<num; i += 3)
-        Remove(tree, i, false);
-    
-    tree.save(&file);
-    if (verbose) fprintf(stderr, "Saved -- eof=%lld\n", file.length());
-}
-
-
 TEST(Tree,Words) {
     const int kIterations = 3;
     readWords();
         {
             Timer t("adding");
             for( int i=0; i<sNWords; i++)
-                tree.insert(sWords[i]->string(), sWords[i]);
-            EXPECT_EQ(sNWords, tree.count());
+                tree.insert(sWords[i], mkKeyValue(sWords[i]));
+            ASSERT_EQ(sNWords, tree.count());
             totalAdd += t.elapsed();
         }        
         {
             Timer t("getting");
             for( int i=0; i<sNWords; i++)
-                EXPECT_EQ( sWords[i] ,  tree.get(sWords[i]->string()) );
+                ASSERT_TRUE( Blob(sWords[i]).equals(tree.get(sWords[i])->value()) );
             totalGet += t.elapsed();
         }        
         {
             Timer t("removing");
             for( int i=0; i<sNWords; i++)
-                EXPECT_TRUE( tree.remove(sWords[i]->string(),NULL) );
+                ASSERT_TRUE( tree.remove(sWords[i],NULL) );
             totalRemove += t.elapsed();
         }        
     }
     printf("Average get   = %.0lfns\n", totalGet/kIterations/sNWords*1e9);
     printf("Average remove= %.0lfns\n", totalRemove/kIterations/sNWords*1e9);
 }
-*/
+
+
+#pragma mark -
+#pragma mark PERSISTENCE TESTS:
+    
+    
+static ChunkPosition writeTreeTo(const char *path, int num) {
+    if (verbose) fprintf(stderr, "\n\nWriting a tree of %i nodes to /tmp/btreetest\n", num);
+    Tree tree;
+    for (int i=0; i<num; i++)
+        Insert(tree,i, false);
+    
+    File file(path, O_RDWR | O_CREAT | O_TRUNC);
+    file.setLength(0);
+    file.write("header", 6);    // Zero is an illegal position for chunks
+    ChunkPosition rootPos = tree.save(&file);
+    fprintf(stderr, "Saved to %s -- root at %u\n", path, rootPos);
+    return rootPos;
+}
+    
+static void verifyEntry (Tree &tree, int i) {
+    Leaf *value = tree.get(mkKey(i));
+    ASSERT_TRUE(value != NULL);
+    const char *valueStr = stringOf(value->value());
+    fprintf(stderr,"%i -> %s\n", i, valueStr);
+    char expectedStr[100];
+    sprintf(expectedStr, "value for %s", mkKey(i));
+    ASSERT_EQ(0, strcmp(valueStr, expectedStr));
+}
+
+
+TEST(Tree, Write) {
+    writeTreeTo("/tmp/btreetest", 100);
+}
+    
+TEST(Tree, Read) {
+    const int num = 100;
+    ChunkPosition rootPos = writeTreeTo("/tmp/btreetest", num);
+
+    File file("/tmp/btreetest", O_RDONLY);
+    file.mapRegion(0, (size_t)file.length());
+    Tree tree(&file, rootPos);
+    for (int i=0; i<num; i++) 
+        verifyEntry(tree, i);
+    if (verbose)
+        tree.dump();
+}
+
+
+TEST(Tree, IncrementalWrite) {
+    fprintf(stderr, "\n\nWriting a tree of %i nodes to /tmp/btreetest\n", num);
+    ChunkPosition rootPos;
+    {
+        File file("/tmp/btreetest", O_RDWR | O_CREAT | O_TRUNC);
+        file.setLength(0);
+        file.write("header", 6);    // Zero is an illegal position for chunks
+
+        Tree tree;
+        for (int i=0; i<num/2; i++)
+            Insert(tree,i, false);
+        
+        rootPos = tree.save(&file);
+        fprintf(stderr, "Saved -- root=%u, eof=%lld\n", rootPos, file.length());
+        
+        fprintf(stderr, "Adding the rest of the values\n");
+        for (int i=num/2; i<num; i++)
+            Insert(tree,i, false);
+        
+        rootPos = tree.save(&file);
+        fprintf(stderr, "Saved -- root=%u, eof=%lld\n", rootPos, file.length());
+    }
+    {
+        fprintf(stderr, "Re-opening tree...\n");
+        File file("/tmp/btreetest", O_RDWR);
+        file.mapRegion(0, (size_t)file.length());
+        Tree tree(&file, rootPos);
+        fprintf(stderr, "Verifying...\n");
+        for (int i=0; i<num; i++)
+            verifyEntry(tree, i);
+        fprintf(stderr, "Removing every 3rd value\n");
+        for (int i=0; i<num; i += 3)
+            Remove(tree, i, false);
+        
+        rootPos = tree.save(&file);
+        fprintf(stderr, "Saved -- root=%u, eof=%lld\n", rootPos, file.length());
+    }
+    {
+        fprintf(stderr, "Re-reading tree...\n");
+        File file("/tmp/btreetest", O_RDONLY);
+        file.mapRegion(0, (size_t)file.length());
+        Tree tree(&file, rootPos);
+        fprintf(stderr, "Verifying...\n");
+        for (int i=0; i<num; i++) {
+            if (i % 3 != 0)
+                verifyEntry(tree, i);
+            else
+                ASSERT_EQ(NULL, tree.get(mkKey(i)));
+        }
+    }
 }
 
 
     }
 }    
 */
+
+}