Source

quechua / modules / data / itemsets.h

/* 
 * Quechua - the lightweight data mining framework
 *
 * Copyright (C) 2012 Marek Denis <quechua@octogan.net>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef ITEMSETS_H
#define ITEMSETS_H

#include "types.h" // for index_t
#include "../../../include/log.h"
//#include "hashtree.h"
#include <stdlib.h>
#include <memory>
#include <algorithm>
#include <deque>
#include <cassert>

using std::deque;

class itemset;
class itemsets;
class hashnode;
typedef std::shared_ptr<itemset>  itemset_ptr;
typedef std::shared_ptr<itemsets> itemsets_ptr;


class itemset {
 private:
    int copied;
    int size;
    int count;
    bool generator;
    index_t* values;
 public:
    itemset();
    itemset(int);
    virtual ~itemset();

//    index_t* giveaway(); - rather bad idea

    void debug() const;

    int  getsize() const;
    int  getcount() const;
    void setcount(int x);
    bool getgenerator() const;
    void setgenerator(bool g);
    void inc();
    void dec();
    index_t& val(int x) const;
    int clone(itemset* item,int border,bool cloneall = false) const;
    itemsets* subsets() const;
    bool intersection(const index_t*, int size);

    static int cmp(const itemset* a, const itemset* b);
    static int cmp_part(const itemset* a, const itemset* b, int shift);

    static itemset* merge(const itemset* a, const itemset* b);
    
};

class itemsets {
    protected:
        deque<itemset*> values;
        hashnode* tree;
        
        int find_idx(const itemset* item) const;

        static void delitem(itemset* i);
    public:
        //static itemset* sentinel;
        itemsets();
        itemsets(int);
        virtual ~itemsets();
        void clear();

        void debug() const;

        int getsize() const;
        itemset* find(const itemset* item) const; // try binary search
        itemset* insert(itemset* item); // try binary search like insertinga
        void append(itemset* item);
        bool erase(const itemset* item);
        bool erase(int x);
        itemset* getitem(int x) const;
        itemsets* subsets(itemset* pset);
        void buildtree();
        void prunecandidates(int minsup);
        void batchinc(int x);
        void correlate(itemset* compare,itemsets* ct);
 //       set_t correlate_trad(const itemset* compare,itemsets_ptr ct);
        static const itemset* utilize(const itemset* a, const itemset* b);


        // debugging method
//        void check_sets(set_t& a, set_t& b);
};
#include "hashtree.h"
#endif // ITEMSETS_H