Source

quechua / modules / data / hashtree.cc

/* 
 * Quechua - the lightweight data mining framework
 *
 * Copyright (C) 2012 Marek Denis <quechua@octogan.net>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "hashtree.h"

bool stdcmp::operator()(const itemset* a, const itemset* b) const {
    return itemset::cmp(a,b)==-1?true:false;
};

hashnode::hashnode() : depth(1), type(leaf) {
    for(int i=0;i<modulo;++i) children[i]=NULL;
}

hashnode::hashnode(int d,TREETYPE t) : depth(d), type(t) {
    for(int i=0;i<modulo;++i) children[i]=NULL;
}

hashnode::~hashnode() {
    items.clear();
    for(int i=0;i<modulo;++i)
        if(children[i]!=NULL)
            delete children[i];
}

bool hashnode::add(itemset* item,int idx) {
    if(type==interior) {
        index_t branch_idx = branch(item->val(idx));
        LOG(CRITIC) << "hashnode::add interior, branch_idx=" << branch_idx;
        return children[branch_idx]->add(item,++idx);
    }
    if(type==leaf) {
        if(item->getsize() == depth) { // there are no items to hash
            items.push_back(item);
            return true;
        }
        else if(items.size() < modulo) {
            items.push_back(item);
            return true;
        }
        else { // we need to split items into children
            type = interior;
            int branch_idx;
            items.push_back(item);
            for(int i=0;i<items.size();++i) {
                branch_idx = branch(items[i]->val(idx));
                children[branch_idx]->add(items[i],idx+1);
            }
            items.clear();
            return true;
        }
    }
};
// TODO(marek) : Erase border variable
set_t hashnode::find(const index_t* txn,int size,int idx,int border) const {
    set_t r_set;
    set_t tmp_set;
    if(type==interior) {
        assert(size>depth-1);
        int branch_idx;
        for(int i=idx;i<size-border;++i) {
            branch_idx = hash(txn[i]);
            if(!children[branch_idx])
                continue;
            tmp_set = children[branch_idx]->find(txn,size,i+1,border);
            r_set.insert(tmp_set.begin(),tmp_set.end());
            tmp_set.clear();
        }
        return r_set;
    }
    if(type==leaf) {
        for(int i=0;i<items.size();++i) {
           if(items[i]->intersection(txn,size)) r_set.insert(items[i]);
        }
        return r_set;
    }
}

int hashnode::getdepth() const {
    return depth;
};

void hashnode::setdepth(int x) {
    depth = x;
}

hashnode* hashnode::getchild(int x) const {
    if(x < 0 || x > modulo-1)
        return children[modulo-1];
    return children[x];
};

void hashnode::clear() {
    for_each(items.begin(),items.end(),delitem);
    for(int i=0;i<modulo;++i) children[i]->clear();
};

void hashnode::delitem(itemset* i) {
    return;
    delete i;
};

index_t hashnode::hash(const index_t& x) const {
    return x % modulo;
};

//returns index of the corresponding hashnode
index_t hashnode::branch(const index_t& x) {
    index_t branch_idx = hash(x);
    if(!children[branch_idx]) {
        children[branch_idx] = new hashnode(depth+1, leaf);
    }

    assert(branch_idx >=0 && branch_idx<modulo);
    return branch_idx;
}