seq_vat.h

00001 /*
00002  *  Copyright (C) 2005 M.J. Zaki <zaki@cs.rpi.edu> Rensselaer Polytechnic Institute
00003  *  Written by parimi@cs.rpi.edu
00004  *  Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu
00005  *
00006  *  This program is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU General Public License
00008  *  as published by the Free Software Foundation; either version 2
00009  *  of the License, or (at your option) any later version.
00010  *
00011  *  This program is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *  GNU General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License along
00017  *  with this program; if not, write to the Free Software Foundation, Inc.,
00018  *  59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
00019  */
00020 #ifndef _SEQ_VAT_H_
00021 #define _SEQ_VAT_H_
00022 
00023 #include "pattern.h"
00024 #include "seq_instance.h"
00025 #include "generic_classes.h"
00026 
00027 #include "typedefs.h"
00028 
00035 template<class PP, class MP, template <typename> class ALLOC, template <typename, typename> class VAT_ST >
00036 ostream& operator<< (ostream& ostr, const vat<SEQ_PROP, V_Fkk_MINE_PROP, ALLOC, VAT_ST>& svat);
00037 
00038 template<class PP, class MP, template <typename> class ALLOC, template <typename, typename> class VAT_ST >
00039 class vat<SEQ_PROP, V_Fkk_EMB_MINE_PROP, ALLOC, VAT_ST> 
00040 {
00041 public:  
00042   typedef pattern_support<V_Fkk_EMB_MINE_PROP> PAT_SUP;
00043   typedef vat<SEQ_PROP, V_Fkk_EMB_MINE_PROP, ALLOC, VAT_ST> VAT;
00044   typedef seq_instance<V_Fkk_EMB_MINE_PROP> INSTANCE;
00045   typedef VAT_ST<pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > >, ALLOC<pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > > > > IDLIST_T;
00046   typedef typename IDLIST_T::const_iterator CONST_IT;
00047   typedef typename IDLIST_T::iterator IT;
00048   
00049   typedef VAT_ST<INSTANCE, ALLOC<INSTANCE> > INSTANCES;
00050   typedef typename INSTANCES::const_iterator CONST_INST_IT;
00052     void* operator new(size_t size) {
00053       ALLOC<VAT> va;
00054       return va.allocate(size);
00055     }  
00056     
00057     void  operator delete(void *p, size_t size) {
00058       if (p) {
00059         ALLOC<VAT> va;
00060         va.deallocate(static_cast<VAT*> (p), size);
00061       }
00062     }
00063     
00064     inline IT begin() {return _idlist.begin();}
00065     inline CONST_IT begin() const {return _idlist.begin();}
00066     inline IT end() {return _idlist.end();}
00067     inline CONST_IT end() const {return _idlist.end();}
00068     
00069     bool empty() const {return _idlist.empty;}
00070     int size() const{return _idlist.size();}
00071     
00072     void clear() {_idlist.clear();}
00073     
00074     void push_back(pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > > const& inst) {
00075       _idlist.push_back(inst);
00076     }
00077     
00078     // Return the size of the Vat in bytes  
00079     unsigned long int byte_size() const{
00080       unsigned long int  b_size=0;
00081       CONST_IT it;
00082       for (it = begin(); it!=end();++it){
00083         b_size+=(2+it->second.size())*sizeof(int);
00084       }
00085       return b_size;
00086     }
00087     
00088     //serializing a VAT to an output stream
00089     void write_file(ostream & output_file) {
00090       int ITSZ=sizeof(int);
00091       ostringstream output;
00092       CONST_IT it;
00093       CONST_INST_IT it_inst;
00094       int tid,inst_sz,ts;
00095       for (it=begin();it!=end();it++){
00096         tid=it->first;
00097         inst_sz=it->second.size();
00098         output.write(reinterpret_cast<const char *>(&tid), ITSZ);
00099         output.write(reinterpret_cast<const char *>(&inst_sz), ITSZ);
00100         for (it_inst =it->second.begin(); it_inst!=it->second.end(); ++it_inst){
00101           ts=it_inst->get_ts();  
00102           output.write(reinterpret_cast<const char *>(&ts), ITSZ);
00103         } //for it_inst
00104       }//it
00105       output_file.write(output.str().c_str(), output.str().size());
00106     } //end write_file
00107     
00108     
00109     //deserialize a vat from an input stream 
00110     void read_file (istream & input, unsigned long int size) {
00111       //cout<<"reading from the the file"<<endl;
00112       int ITSZ=sizeof(int);
00113       int buf_size=size/ITSZ;   
00114       int *buf = new int[buf_size];
00115       input.read((char *)buf, (size)); 
00116       int current=0;
00117       while(current<buf_size){
00118         int tid=buf[current++];
00119         int inst_sz=buf[current++];
00120         int inst_end=current+inst_sz;
00121         INSTANCES new_tidlist;
00122         while(current <inst_end){
00123           new_tidlist.push_back(INSTANCE(buf[current++]));
00124         }
00125         _idlist.push_back(make_pair(tid, new_tidlist));
00126       }      
00127       input.clear();
00128       delete [] buf;      
00129     } //read_file
00130     
00136     template<typename PATTERN>
00137       static VAT** intersection(const VAT* const& vat_i, const VAT* const& vat_j, PAT_SUP** cand_sups, PATTERN**, bool& is_l2) {
00138         
00139         VAT** cand_vats = new VAT*[2]; // Max of 2 candidates possible for sequence extension.
00140         
00141         // Determine how many candidate vats.
00142         // If P->A join P->A then results in only one candidate vat.
00143         bool self_join_only = (cand_sups[1] == 0);   
00144         
00145         cand_vats[0]=new VAT; 
00146         if(!self_join_only)
00147           cand_vats[1]=new VAT;
00148         
00149         CONST_IT it_i=vat_i->begin(), it_j=vat_j->begin();
00150         
00151         if(it_i == vat_i->end() || it_j == vat_j->end())
00152           return cand_vats;
00153         
00154         while(it_i != vat_i->end() && it_j != vat_j->end()) {
00155           if(it_i->first < it_j->first) {
00156             it_i++;
00157             continue;
00158           }
00159           if(it_i->first > it_j->first) {
00160             it_j++;
00161             continue;
00162           }
00163           //execution reaches this point only if both TIDs are equal
00164           INSTANCES seqi_inst, seqj_inst;
00165           bool seqi_ind = false, seqj_ind = false, seqi_emb = false, seqj_emb = false;
00166           
00167           //intersect the instances for this tid
00168           INSTANCE::seq_join_seq(it_i->second, it_j->second, seqi_inst, seqj_inst, seqi_ind, 
00169                          seqj_ind, seqi_emb, seqj_emb, self_join_only);
00170           
00171           if(!seqi_inst.empty())
00172             cand_vats[0]->push_back(make_pair(it_i->first, seqi_inst));
00173           
00174           if(!seqj_inst.empty() && !self_join_only)
00175             cand_vats[1]->push_back(make_pair(it_i->first, seqj_inst));
00176           
00177           it_i++;
00178           it_j++;
00179           
00180         } //end while (it_i &* it_j)
00181         
00182         cand_sups[0]->set_sup(make_pair(cand_vats[0]->size(), cand_vats[0]->size()));
00183         if(!self_join_only)
00184           cand_sups[1]->set_sup(make_pair(cand_vats[1]->size(), cand_vats[1]->size()));
00185         
00186         return cand_vats;
00187       }
00188     friend ostream& operator<< <>(ostream&, const VAT&);
00189 private:
00190       IDLIST_T _idlist;
00191 };
00192 
00193 template<class PP, class MP, template <typename> class ALLOC, template <typename, typename> class VAT_ST >
00194 ostream& operator<< (ostream& ostr, const vat<SEQ_PROP, V_Fkk_MINE_PROP, ALLOC, VAT_ST>& svat) {
00195   typedef vat<SEQ_PROP, V_Fkk_MINE_PROP, ALLOC, VAT_ST> VAT;
00196   typename VAT::CONST_IT it=svat.begin();
00197   
00198   while(it!=svat.end()) {
00199     ostr<<it->first<<" ";
00200     typename VAT::CONST_INST_IT inst_it=it->second.begin();
00201     while(inst_it!=it->second.end())
00202       ostr<<(*inst_it++)<<" ";
00203     ostr<<endl;
00204     it++;
00205   }
00206   
00207   return ostr;
00208 }//end friend operator<<
00209 
00210 
00211 
00212 
00219 template<class PP, class MP, template <typename> class ALLOC, template <typename, typename> class VAT_ST >
00220 class vat<SEQ_PROP, V_Fkk_IND_MINE_PROP, ALLOC, VAT_ST> {
00221 public:
00222   
00223   typedef pattern_support<V_Fkk_IND_MINE_PROP> PAT_SUP;
00224   typedef vat<SEQ_PROP, V_Fkk_IND_MINE_PROP, ALLOC, VAT_ST> VAT;
00225   typedef seq_instance<V_Fkk_IND_MINE_PROP> INSTANCE;
00226   typedef VAT_ST<pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > >, ALLOC<pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > > > > IDLIST_T;
00227   typedef typename IDLIST_T::const_iterator CONST_IT;
00228   typedef typename IDLIST_T::iterator IT;
00229   typedef typename IDLIST_T::reverse_iterator RIT;
00230   
00231   typedef VAT_ST<INSTANCE, ALLOC<INSTANCE> > INSTANCES;
00232   typedef typename INSTANCES::const_iterator CONST_INST_IT;
00235     void* operator new(size_t size) {
00236       ALLOC<VAT> va;
00237       return va.allocate(size);
00238     }
00239     
00240     void  operator delete(void *p, size_t size) {
00241       if (p) {
00242         ALLOC<VAT> va;
00243         va.deallocate(static_cast<VAT*> (p), size);
00244       }
00245     }
00246     
00247     IT begin() {return _idlist.begin();}
00248     CONST_IT begin() const {return _idlist.begin();}
00249     IT end() {return _idlist.end();}
00250     CONST_IT end() const {return _idlist.end();}
00251     bool empty() const {return _idlist.empty;}
00252     int size() const {return _idlist.size();}
00253     void clear() {_idlist.clear();}
00254     void push_back(pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > > const& inst) {
00255       _idlist.push_back(inst);
00256     }
00257     
00258     // Return the size of the Vat in bytes  
00259     unsigned long int byte_size() const{
00260       unsigned long int  b_size=0;
00261       CONST_IT it;
00262       for (it = begin(); it!=end();++it){
00263         b_size+=(2+it->second.size()*2)*sizeof(int);
00264       }
00265       return b_size;
00266     }
00267     
00268     //serializing a VAT to an output stream
00269     void write_file(ostream & output_file) {
00270       int ITSZ=sizeof(int);
00271       ostringstream output;
00272       CONST_IT it;
00273       CONST_INST_IT it_inst;
00274       int tid,inst_sz,induced,pos;
00275       for (it=begin();it!=end();it++){
00276         tid=it->first;
00277         inst_sz=it->second.size();
00278         output.write(reinterpret_cast<const char *>(&tid), ITSZ);
00279         output.write(reinterpret_cast<const char *>(&inst_sz), ITSZ);
00280         for (it_inst =it->second.begin(); it_inst!=it->second.end(); ++it_inst){
00281           induced=(int)it_inst->get_induced();
00282           pos=it_inst->get_pos();  
00283           output.write(reinterpret_cast<const char *>(&pos), ITSZ);
00284           output.write(reinterpret_cast<const char *>(&induced), ITSZ);
00285         } //for it_inst
00286       }//it
00287       output_file.write(output.str().c_str(), output.str().size());
00288     } //end write_file
00289     
00290     //deserialize a vat from an input stream 
00291     void read_file (istream & input, unsigned long int size) {
00292       //cout<<"reading from the the file"<<endl;
00293       int ITSZ=sizeof(int);
00294       int buf_size=size/ITSZ;   
00295       int *buf = new int[buf_size];
00296       input.read((char *)buf, (size)); 
00297       int current=0;
00298       while(current<buf_size){
00299         int tid=buf[current++];
00300         int inst_sz=buf[current++];
00301         int inst_end=current+inst_sz*2;
00302         INSTANCES new_tidlist;
00303         while(current <inst_end){
00304           new_tidlist.push_back(INSTANCE(buf[current], (buf[current+1])?true:false));
00305           current= current+2;
00306 
00307         }
00308         _idlist.push_back(make_pair(tid, new_tidlist));
00309       }      
00310       input.clear();
00311       delete [] buf;      
00312     } //read_file
00313     
00314     
00320     template<typename PATTERN>
00321     static VAT** intersection(const VAT* const& vat_i, const VAT* const& vat_j, PAT_SUP** cand_sups, PATTERN**, bool& is_l2) {
00322       VAT** cand_vats = new VAT*[2]; // Max of 2 candidates possible for sequence extension.
00323                        // Determine how many candidate vats.
00324                        // If P->A join P->A then results in only one candidate vat.
00325       bool skip0 = (cand_sups[0] == 0);
00326       bool skip1 = (cand_sups[1] == 0);   
00327 
00328       cand_vats[0] = 0;
00329       cand_vats[1] = 0;
00330 
00331       if (!skip0)
00332         cand_vats[0]=new VAT; 
00333       if(!skip1)
00334         cand_vats[1]=new VAT;
00335       
00336       if (skip0 && skip1){
00337         return cand_vats;
00338       }
00339       
00340       CONST_IT it_i=vat_i->begin(), it_j=vat_j->begin();
00341       
00342       if(it_i==vat_i->end() || it_j==vat_j->end())
00343         return cand_vats;
00344       
00345       while(it_i!=vat_i->end() && it_j!=vat_j->end()) {
00346         if(it_i->first<it_j->first) {
00347           it_i++;
00348           continue;
00349         }
00350         if(it_i->first>it_j->first) {
00351           it_j++;
00352           continue;
00353         }
00354         //execution reaches this point only if both TIDs are equal
00355         INSTANCES seqi_inst, seqj_inst;
00356         bool seqi_ind = false, seqj_ind = false, seqi_emb = false, seqj_emb = false;
00357         
00358         //intersect the instances this tid
00359         INSTANCE::seq_join_seq(it_i->second, it_j->second, seqi_inst, seqj_inst, seqi_ind, 
00360                                seqj_ind, seqi_emb, seqj_emb, skip1, skip0);
00361         
00362         if (!skip0){
00363           if(seqi_ind)
00364             cand_sups[0]->incr_isup();
00365           // else 
00366           if(seqi_emb)
00367             cand_sups[0]->incr_esup();
00368         }
00369         
00370         if(!skip1) {
00371           if(seqj_ind)
00372             cand_sups[1]->incr_isup();
00373           // else
00374           if(seqj_emb)
00375             cand_sups[1]->incr_esup();
00376         }
00377         
00378         if(!seqi_inst.empty() && !skip0)
00379           cand_vats[0]->push_back(make_pair(it_i->first, seqi_inst));
00380         
00381         if(!seqj_inst.empty() && !skip1)
00382           cand_vats[1]->push_back(make_pair(it_i->first, seqj_inst));
00383         
00384         it_i++;
00385         it_j++;
00386         
00387       } //end while (it_i &* it_j)
00388       
00389       //cout << "ISECT I : " << *vat_i << endl;
00390       //cout << "ISECT J : " << *vat_j << endl;
00391       //if (!skip0)
00392       //cout << "CAND A : " << *cand_vats[0] << endl;  
00393       //if (!skip1)
00394       //  cout << "CAND B: " << *cand_vats[1] << endl;        
00395       return cand_vats;
00396     }
00397     friend ostream& operator<< <>(ostream&, const VAT&);
00398 
00399 private:
00400       IDLIST_T _idlist;
00401 };
00402 #endif
00403 

Generated on Wed Jul 26 14:01:08 2006 for DMTL by  doxygen 1.4.7