db_reader.h

Go to the documentation of this file.
00001 /*
00002  *  Copyright (C) 2005 M.J. Zaki <zaki@cs.rpi.edu> Rensselaer Polytechnic Institute
00003  *  Written by parimi@cs.rpi.edu
00004  *  Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu
00005  *
00006  *  This program is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU General Public License
00008  *  as published by the Free Software Foundation; either version 2
00009  *  of the License, or (at your option) any later version.
00010  *
00011  *  This program is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *  GNU General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License along
00017  *  with this program; if not, write to the Free Software Foundation, Inc.,
00018  *  59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
00019  */
00021 #ifndef _DB_READER_H
00022 #define _DB_READER_H
00023 
00024 #include <sstream>
00025 #include <fstream>
00026 #include <vector>
00027 #include <map>
00028 
00029 // using namespace std;
00030 
00031 // adding all files manually for now, TODO: improve this include system
00032 
00033 #include "helper_funs.h"
00034 #include "generic_classes.h"
00035 #include "pat_fam.h"      // added later to make .cpp files
00041 template<typename PATTERN, typename TOKENIZER, template <typename> class ALLOC >
00042 class db_reader
00043 {
00044  public:
00045 
00046   typedef vat<typename PATTERN::PAT_PROPS, typename PATTERN::MINE_PROPS, ALLOC, std::vector> VAT; 
00047   typedef tokenizer<PATTERN, TOKENIZER, ALLOC > TKNZ; 
00048 
00053   db_reader(const char* infile_name): _in_db(infile_name)  {}
00054 
00060   db_reader(const char* infile_name, int mem_size): _in_db(infile_name) {filename=strdup(infile_name);_max_mem=mem_size;}
00061 
00065   ~db_reader()
00066     { close();}
00067 
00073   void open(const char* infile_name) { 
00074     if(is_open())
00075       _in_db.close();
00076       _in_db.open(infile_name);
00077     }
00078 
00082   void close() { 
00083     _in_db.close();
00084   }
00085 
00089   bool is_open() { 
00090     return _in_db.is_open();
00091   }
00092 
00099   template<class SM_T>
00100   void get_length_one(pat_fam<PATTERN>& freq_pats, storage_manager<PATTERN, VAT, ALLOC, SM_T>& vat_hmap, const int& minsup) {
00101 
00102     int tid;
00103     VAT* ivat;
00104     typename pat_fam<PATTERN>::IT pf_it;
00105 
00106     if(!is_open()) {
00107       // stream not open
00108       std::cerr<<"db_reader: file stream not open in get_length_one()"<<std::endl;
00109       return;
00110     }
00111 
00112     tid=tknz.parse_next_trans(_in_db, freq_pats, vat_hmap);
00113     while(tid!=-1) {
00114       tid=tknz.parse_next_trans(_in_db, freq_pats, vat_hmap);
00115     }
00116 
00117     // fill in support of level-1, discarding infrequent ones
00118     for(pf_it=freq_pats.begin(); pf_it!=freq_pats.end();++pf_it) {
00119       if(!(ivat=vat_hmap.get_vat(*pf_it))) {
00120         std::cerr<<"db_reader.get_length_one: VAT not found for "<<*pf_it<<std::endl;
00121         return;
00122       }
00123 
00124       //cout << "LEVEL 1 " << *ivat << endl;
00125     
00126       if((ivat->size())>=minsup)
00127         (*pf_it)->set_sup(make_pair(ivat->size(), 0));
00128       else {
00129         // Delete the pattern and the vat.
00130         vat_hmap.delete_vat(*pf_it);
00131         delete (*pf_it);
00132 
00133         freq_pats.erase(pf_it); 
00134         pf_it--;
00135       }
00136     } //end for
00137 
00138     // sort level-1 patterns
00139     //typename pat_fam<PATTERN>::IT b=freq_pats.begin(), e=freq_pats.end();
00140     //sort(b, e, less_than<PATTERN>());
00141 
00142   } //end get_length_one()
00143 
00144  private:
00145   std::ifstream _in_db;
00146   char* filename; // Holds the file name of the dataset
00147   unsigned long _max_mem; 
00148   TKNZ tknz; // An object of Tokenizer class
00149 }; //end class db_reader<itemset>
00150 
00151 #endif

Generated on Wed Jul 26 14:01:08 2006 for DMTL by  doxygen 1.4.7