tree_vat.h

00001 /*
00002  *  Copyright (C) 2005 M.J. Zaki <zaki@cs.rpi.edu> Rensselaer Polytechnic Institute
00003  *  Written by parimi@cs.rpi.edu
00004  *  Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu
00005  *
00006  *  This program is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU General Public License
00008  *  as published by the Free Software Foundation; either version 2
00009  *  of the License, or (at your option) any later version.
00010  *
00011  *  This program is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *  GNU General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License along
00017  *  with this program; if not, write to the Free Software Foundation, Inc.,
00018  *  59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
00019  */
00020 // tree VAT classes
00021 
00022 #ifndef _TREE_VAT_H
00023 #define _TREE_VAT_H
00024 
00025 #include <vector>
00026 #include <utility>
00027 #include <sstream>
00028 
00029 #include "pattern.h"
00030 #include "pat_support.h"
00031 #include "tree_instance.h"
00032 #include "generic_classes.h"
00033 
00034 using namespace std;
00035 
00036 
00037 /*
00038 template<typename PP, typename MP, template <typename> class ALLOC,
00039          template<typename, typename> class ST>
00040 class vat<TREE_PROP, V_Fkk_EMB_MINE_PROP, ALLOC, ST>;
00041 
00042 template<typename PP, typename MP, template <typename> class ALLOC,
00043          template<typename, typename> class ST>
00044 class vat<TREE_PROP, V_Fkk_IND_MINE_PROP, ALLOC, ST>;
00045 template<typename PP, typename MP, template <typename> class ALLOC,
00046          template<typename, typename> class VAT_ST>
00047 ostream& operator<< (ostream& ostr, const vat<TREE_PROP, V_Fkk_EMB_MINE_PROP, ALLOC, VAT_ST>*);
00048 
00049 template<typename PP, typename MP, template <typename> class ALLOC,
00050          template<typename, typename> class VAT_ST>
00051 ostream& operator<< (ostream& ostr, const vat<TREE_PROP, V_Fkk_IND_MINE_PROP, ALLOC, VAT_ST>*);
00052 */
00053 template<typename PP, typename MP, template <typename> class ALLOC,
00054          template<typename, typename> class VAT_ST>
00055 ostream& operator<< (ostream& ostr, const vat<TREE_PROP, V_Fkk_MINE_PROP, ALLOC, VAT_ST>* );
00063 template<typename PP, typename MP, template <typename> class ALLOC, template<typename, typename> class VAT_ST>
00064 class vat<TREE_PROP, V_Fkk_EMB_MINE_PROP, ALLOC, VAT_ST>
00065 {
00066   
00067  public:
00068   template<typename T1, typename A>
00069     class ST: public VAT_ST<T1, A> {};
00070 
00071   typedef pattern_support<V_Fkk_EMB_MINE_PROP > PAT_SUP;
00072   typedef vat<TREE_PROP, V_Fkk_EMB_MINE_PROP, ALLOC, VAT_ST> VAT;
00073   typedef tree_instance<std::vector, PP> INSTANCE;
00074   typedef VAT_ST<pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > >, ALLOC<pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > > > > IDLIST_T;
00075   typedef typename IDLIST_T::const_iterator CONST_IT;
00076   typedef typename IDLIST_T::iterator IT;
00077   typedef typename VAT_ST<INSTANCE, ALLOC<INSTANCE> >::const_iterator CONST_INST_IT;
00078   typedef VAT_ST<INSTANCE, ALLOC<INSTANCE> > INSTANCES;
00079   typedef typename VAT_ST<INSTANCE, ALLOC<INSTANCE> >::iterator INST_IT;
00080   typedef typename std::vector<int>::const_iterator CONST_STORE_IT;
00081   typedef typename std::vector<int>::iterator STORE_IT; 
00082 
00083   void* operator new(size_t size) {
00084     ALLOC<VAT> v;
00085     return (v.allocate(size));
00086   } 
00087 
00088   void  operator delete(void *p, size_t size) {
00089     if (p) {
00090       ALLOC<VAT> v;
00091       v.deallocate(static_cast<VAT*> (p), size);
00092     }
00093   }
00094 
00095   
00096   IT begin() {return _idlist.begin();}
00097   CONST_IT begin() const {return _idlist.begin();}
00098   IT end() {return _idlist.end();}
00099   CONST_IT end() const {return _idlist.end();}
00100   
00101   bool empty() const {return _idlist.empty();}
00102   int size() {return _idlist.size();}
00103   
00105   void push_back(const pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > >& val) {
00106     _idlist.push_back(val);
00107   }
00108 
00109   unsigned long int byte_size() const{
00110     unsigned long int  b_size=0;
00111     CONST_IT it;
00112     CONST_INST_IT it_inst;
00113     //for each instance we will store 5 integer (1 for the vector size) and the vector
00114     for (it = begin(); it!=end();++it){
00115       b_size+=2*sizeof(int);
00116       for (it_inst=it->second.begin();it_inst!=it->second.end();++it_inst)
00117         b_size+=(it_inst->get_ml_size()+5)*sizeof(int);
00118     }
00119     return b_size;
00120   }
00121 
00122   //writing a VAT to a binary file
00123   void write_file(ostream & output_file) {
00124     int ITSZ=sizeof(int);
00125     ostringstream output;
00126     CONST_IT it;
00127     CONST_INST_IT it_inst;
00128     CONST_STORE_IT it_store;
00129     int tid,inst_sz,ml_sz,_lb,_ub,_depth,_type;
00130     for (it=begin();it!=end();++it){
00131       tid=it->first;
00132       inst_sz=it->second.size();
00133       output.write(reinterpret_cast<const char *>(&tid), ITSZ);
00134       output.write(reinterpret_cast<const char *>(&inst_sz), ITSZ);
00135       for (it_inst =it->second.begin(); it_inst!=it->second.end(); ++it_inst){
00136         ml_sz=it_inst->get_ml_size();
00137         output.write(reinterpret_cast<const char *>(&ml_sz), ITSZ);
00138         for (it_store=it_inst->get_ml_begin();it_store!=it_inst->get_ml_end();++it_store){
00139           output.write(reinterpret_cast<const char *>(&(*it_store)), ITSZ);
00140         }          
00141         _lb=it_inst->get_lb();  
00142         output.write(reinterpret_cast<const char *>(&_lb), ITSZ);
00143         _ub=it_inst->get_ub();  
00144         output.write(reinterpret_cast<const char *>(&_ub), ITSZ);
00145         _depth=it_inst->get_depth();  
00146         output.write(reinterpret_cast<const char *>(&_depth), ITSZ);
00147         _type=(int)it_inst->induced();  
00148         output.write(reinterpret_cast<const char *>(&_type), ITSZ);
00149       } //for it_inst
00150     }//it
00151      output_file.write(output.str().c_str(), output.str().size());
00152   }
00153     
00154   void read_file (istream & input, unsigned long int size) {
00155     //cout<<"reading from the the file"<<endl;
00156     int ITSZ=sizeof(int);
00157     int buf_size=size/ITSZ;   
00158     // cout<<buf_size;
00159     int *buf = new int[buf_size];
00160     input.read((char *)buf, (size));
00161     int current=0;
00162     while(current<buf_size){
00163       int tid=buf[current++];
00164       int inst_sz=buf[current++];
00165       INSTANCES _new_instlist;
00166       while (inst_sz-->0){
00167         int ml_sz=buf[current++];
00168         std::vector<int> ml;
00169         while(ml_sz-->0)
00170           ml.push_back(buf[current++]);
00171         _new_instlist.push_back(INSTANCE(ml, buf[current],buf[current+1],buf[current+2],buf[current+3]));
00172         current=current+4;
00173       }
00174       _idlist.push_back(make_pair(tid,_new_instlist));
00175     } 
00176     input.clear();
00177     delete [] buf;
00178   } 
00179 
00180   
00184   template<class PAT>
00185   static VAT** intersection(const VAT*const& v1, const VAT*const& v2, PAT_SUP** cand_sups, PAT**, bool) {
00186     VAT** cand_vats=new VAT*[2]; // max of 2 candidates possible
00187     bool do_child, do_cousin;
00188     do_cousin=(cand_sups[0]? 1:0);
00189     do_child=(cand_sups[1]? 1:0);
00190     
00191     if(!do_child && !do_cousin) {
00192       cerr<<"tree_vat: neither child nor cousin intersection is valid"<<endl;
00193       exit(0);
00194     }
00195     
00196     if(do_cousin)
00197       cand_vats[0]=new VAT;
00198     
00199     if(do_child)
00200       cand_vats[1]=new VAT;
00201     
00202     CONST_IT it_v1=v1->begin(), it_v2=v2->begin();
00203     
00204     while(it_v1!=v1->end() && it_v2!=v2->end()) {
00205       if(it_v1->first < it_v2->first) {
00206         it_v1++;
00207         continue;
00208       }
00209       
00210       if(it_v1->first > it_v2->first) {
00211         it_v2++;
00212         continue;
00213       }
00214       
00215       // execution reaches here only if both TIDs are equal
00216       CONST_INST_IT inst_it_v1;
00217       CONST_INST_IT inst_it_v2;
00218       
00219       for(inst_it_v1=it_v1->second.begin(); inst_it_v1!=it_v1->second.end(); inst_it_v1++) {
00220         for(inst_it_v2=it_v2->second.begin(); inst_it_v2!=it_v2->second.end(); inst_it_v2++) {
00221           // check if same tree's same inst are being compared
00222           if(v1==v2 && it_v1==it_v2 && inst_it_v1==inst_it_v2)
00223             continue;
00224           
00225           // cousin test
00226           if(do_cousin && cousin_test(*inst_it_v1, *inst_it_v2)) {
00227             INSTANCE new_inst(*inst_it_v2, inst_it_v1->lower());
00228             
00229             // append new pair if this tid did not exist in vat
00230             if(cand_vats[0]->empty() || cand_vats[0]->back().first != it_v1->first) {
00231               VAT_ST<INSTANCE, ALLOC<INSTANCE> > new_st;
00232               new_st.push_back(new_inst);
00233               cand_vats[0]->push_back(make_pair(it_v1->first, new_st));
00234             }
00235             else
00236               cand_vats[0]->back().second.push_back(new_inst);
00237           }//end if cousin_test
00238           
00239           // child test
00240           if(do_child && inst_it_v1->child_test(*inst_it_v2)) {
00241             INSTANCE new_inst(*inst_it_v2, inst_it_v1->lower());
00242             
00243             // append new pair if this tid did not exist in vat
00244             if(cand_vats[1]->empty() || cand_vats[1]->back().first != it_v1->first) {
00245               VAT_ST<INSTANCE, ALLOC<INSTANCE> > new_st;
00246               new_st.push_back(new_inst);
00247               cand_vats[1]->push_back(make_pair(it_v1->first, new_st));
00248             }
00249             else
00250               cand_vats[1]->back().second.push_back(new_inst);
00251             
00252           }//end if child_test
00253           
00254         }//end for inst_it_v2
00255         
00256       } //end for inst_it_v1  
00257       
00258       // advance to next tid
00259       it_v1++;
00260       it_v2++;
00261       
00262     } //end while 
00263     
00264     // copy the support into cand_sups
00265     if(do_cousin)
00266       cand_sups[0]->set_sup(make_pair(cand_vats[0]->size(), 0));
00267     
00268     if(do_child)
00269       cand_sups[1]->set_sup(make_pair(cand_vats[1]->size(), 0));
00270     
00271     return cand_vats;
00272     
00273   }//end intersect()
00274   
00275   friend ostream& operator<< <>(ostream&, const VAT*);
00276   
00277  private:
00278   IDLIST_T _idlist;
00279   
00281   pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > >& back() {
00282     if(empty()) {
00283       cerr<<"tree_vat: call to back in empty vat"<<endl;
00284       exit(0);
00285     }
00286     return _idlist.back();
00287   }
00288   
00289 };//end class vat-embedded tree
00290 
00291 template<typename PP, typename MP, template<typename> class VAT_ST>
00292 ostream& operator<< (ostream& ostr, const vat<TREE_PROP, V_Fkk_MINE_PROP, VAT_ST>* tvat) {
00293   typedef vat<TREE_PROP, V_Fkk_MINE_PROP, VAT_ST> VAT;
00294   typename VAT::CONST_IT it=tvat->begin();
00295   
00296   while(it!=tvat->end()) {
00297     ostr<<it->first<<" ";
00298     typename VAT::CONST_INST_IT inst_it=it->second.begin();
00299     while(inst_it!=it->second.end())
00300       ostr<<(*inst_it++)<<" ";
00301     ostr<<endl;
00302     it++;
00303   }
00304   
00305   return ostr;
00306 }//end friend operator<<
00307 
00308 
00316 template<typename PP, typename MP, template <typename> class ALLOC, template<typename, typename> class VAT_ST>
00317 class vat<TREE_PROP, V_Fkk_IND_MINE_PROP, ALLOC, VAT_ST>
00318 {
00319   
00320  public:
00321   template<typename T1, typename A>
00322     class ST: public VAT_ST<T1, A> {};
00323   
00324   typedef pattern_support<V_Fkk_IND_MINE_PROP> PAT_SUP;
00325   
00326   typedef vat<TREE_PROP, V_Fkk_IND_MINE_PROP, ALLOC, VAT_ST> VAT;
00327   typedef tree_instance<std::vector, PP> INSTANCE;
00328   typedef VAT_ST<pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > >, ALLOC<pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > > > > IDLIST_T;
00329   typedef typename IDLIST_T::const_iterator CONST_IT;
00330   typedef typename IDLIST_T::iterator IT;
00331   typedef typename VAT_ST<INSTANCE, ALLOC<INSTANCE> >::const_iterator CONST_INST_IT;
00332   typedef VAT_ST<INSTANCE, ALLOC<INSTANCE> > INSTANCES;
00333   typedef typename VAT_ST<INSTANCE, ALLOC<INSTANCE> >::iterator INST_IT;
00334   typedef typename std::vector<int>::const_iterator CONST_STORE_IT;
00335   typedef typename std::vector<int>::iterator STORE_IT; 
00336   IT begin() {return _idlist.begin();}
00337   CONST_IT begin() const {return _idlist.begin();}
00338   IT end() {return _idlist.end();}
00339   CONST_IT end() const {return _idlist.end();}
00340   
00341   bool empty() const {return _idlist.empty();}
00342   int size() {return _idlist.size();}
00343   
00345   void push_back(const pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > >& val) {
00346     _idlist.push_back(val);
00347   }
00348   
00349           
00350    unsigned long int byte_size() const{
00351       unsigned long int  b_size=0;
00352       CONST_IT it;
00353       CONST_INST_IT it_inst;
00354       //for each instance we will store 5 integer (1 for the vector size) and the vector
00355       for (it = begin(); it!=end();++it){
00356         b_size+=2*sizeof(int);
00357         for (it_inst=it->second.begin();it_inst!=it->second.end();++it_inst)
00358            b_size+=(it_inst->get_ml_size()+5)*sizeof(int);
00359       }
00360       return b_size;
00361   }
00362 
00363   //writing a VAT to a binary file
00364   void write_file(ostream & output) {
00365       //cout<<"Writing to the file"<<endl;
00366       //cout<< *this ;
00367       //cout<<endl;
00368       int ITSZ=sizeof(int);
00369       CONST_IT it;
00370       CONST_INST_IT it_inst;
00371       CONST_STORE_IT it_store;
00372       int tid,inst_sz,ml_sz,_lb,_ub,_depth,_type;
00373       for (it=begin();it!=end();++it){
00374         tid=it->first;
00375         inst_sz=it->second.size();
00376         output.write(reinterpret_cast<const char *>(&tid), ITSZ);
00377         output.write(reinterpret_cast<const char *>(&inst_sz), ITSZ);
00378         for (it_inst =it->second.begin(); it_inst!=it->second.end(); ++it_inst){
00379           ml_sz=it_inst->get_ml_size();
00380           output.write(reinterpret_cast<const char *>(&ml_sz), ITSZ);
00381           for (it_store=it_inst->get_ml_begin();it_store!=it_inst->get_ml_end();++it_store){
00382             output.write(reinterpret_cast<const char *>(&(*it_store)), ITSZ);
00383           }          
00384           _lb=it_inst->get_lb();  
00385           output.write(reinterpret_cast<const char *>(&_lb), ITSZ);
00386           _ub=it_inst->get_ub();  
00387           output.write(reinterpret_cast<const char *>(&_ub), ITSZ);
00388           _depth=it_inst->get_depth();  
00389           output.write(reinterpret_cast<const char *>(&_depth), ITSZ);
00390           _type=(int)it_inst->induced();  
00391           output.write(reinterpret_cast<const char *>(&_type), ITSZ);
00392         } //for it_inst
00393       }//it
00394 
00395   }
00396     
00397   void read_file (istream & input, unsigned long int size) {
00398      //cout<<"reading from the the file"<<endl;
00399      int ITSZ=sizeof(int);
00400      int buf_size=size/ITSZ;   
00401      int *buf = new int[buf_size];
00402      input.read((char *)buf, (size)); 
00403      int current=0;
00404      while(current<buf_size){
00405        int tid=buf[current++];
00406        int inst_sz=buf[current++];
00407        INSTANCES _new_instlist;
00408        while (inst_sz-->0){
00409          int ml_sz=buf[current++];
00410          std::vector<int> ml;
00411          while(ml_sz-->0)
00412            ml.push_back(buf[current++]);        
00413          _new_instlist.push_back(INSTANCE(ml, buf[current],buf[current+1],buf[current+2],buf[current+3]));
00414          current=current+4;
00415           
00416        }
00417        _idlist.push_back(make_pair(tid,_new_instlist));
00418      } 
00419      input.clear();
00420      delete []  buf;
00421   } 
00422 
00423 
00427   template<class PAT>
00428     static VAT** intersection(const VAT*const& v1, const VAT*const& v2, PAT_SUP** cand_sups, PAT**, bool is_l2) {
00429     VAT** cand_vats=new VAT*[2]; // max of 2 candidates possible
00430     bool do_child, do_cousin;
00431     do_cousin=(cand_sups[0]? 1:0);
00432     do_child=(cand_sups[1]? 1:0);
00433     int last_etid_cousin=-1, last_itid_cousin=-1, last_etid_child=-1, last_itid_child=-1;
00434     int d;
00435     bool iflag;
00436     
00437     if(!do_child && !do_cousin) {
00438       cerr<<"tree_vat: neither child nor cousin intersection is valid"<<endl;
00439       exit(0);
00440     }
00441     
00442     if(do_cousin)
00443       cand_vats[0]=new VAT;
00444     
00445     if(do_child)
00446       cand_vats[1]=new VAT;
00447     
00448     CONST_IT it_v1=v1->begin(), it_v2=v2->begin();
00449     
00450     while(it_v1!=v1->end() && it_v2!=v2->end()) {
00451       if(it_v1->first < it_v2->first) {
00452         it_v1++;
00453         continue;
00454       }
00455       
00456       if(it_v1->first > it_v2->first) {
00457         it_v2++;
00458         continue;
00459       }
00460       
00461       // execution reaches here only if both TIDs are equal
00462       CONST_INST_IT inst_it_v1;
00463       CONST_INST_IT inst_it_v2;
00464       
00465       for(inst_it_v1=it_v1->second.begin(); inst_it_v1!=it_v1->second.end(); inst_it_v1++) {
00466         if(!inst_it_v1->induced())
00467           continue;
00468         for(inst_it_v2=it_v2->second.begin(); inst_it_v2!=it_v2->second.end(); inst_it_v2++) {
00469           // check if same tree's same inst are being compared
00470           if(v1==v2 && it_v1==it_v2 && inst_it_v1==inst_it_v2)
00471             continue;
00472           
00473           // cousin test
00474           if(do_cousin && cousin_test(*inst_it_v1, *inst_it_v2)) {
00475             bool ind=(inst_it_v1->induced() && inst_it_v2->induced());
00476             INSTANCE new_inst(*inst_it_v2, inst_it_v1->lower(), inst_it_v2->depth(), ind);
00477             
00478             if(ind) {
00479               if(last_itid_cousin!=it_v1->first) {
00480                 cand_sups[0]->incr_isup();
00481                 last_itid_cousin=it_v1->first;
00482               }
00483             }
00484             else {
00485               if(last_etid_cousin!=it_v1->first) {
00486                 cand_sups[0]->incr_esup();
00487                 last_etid_cousin=it_v1->first;
00488               }
00489             }
00490             
00491             // append new pair if this tid did not exist in vat
00492             if(cand_vats[0]->empty() || cand_vats[0]->back().first != it_v1->first) {
00493               VAT_ST<INSTANCE, ALLOC<INSTANCE> > new_st;
00494               new_st.push_back(new_inst);
00495               cand_vats[0]->push_back(make_pair(it_v1->first, new_st));
00496             }
00497             else
00498               cand_vats[0]->back().second.push_back(new_inst);
00499           } //end if cousin_test
00500           
00501           // child test
00502           if(do_child && inst_it_v1->child_test(*inst_it_v2)) {
00503             
00504             if(is_l2) {
00505               d=inst_it_v1->depth_diff(*inst_it_v2);
00506               iflag=(d==_diff);
00507             }
00508             else {
00509               d=inst_it_v2->depth();
00510               iflag=(inst_it_v1->induced() && (inst_it_v1->depth_diff(*inst_it_v2)==_diff));
00511             }
00512             
00513             if(iflag) {
00514               if(last_itid_child!=it_v1->first) {
00515                 cand_sups[1]->incr_isup();
00516                 last_itid_child=it_v1->first;
00517               }
00518             }
00519             else {
00520               if(last_etid_child!=it_v1->first) {
00521                 cand_sups[1]->incr_esup();
00522                 last_etid_child=it_v1->first;
00523               }
00524             }
00525             
00526             INSTANCE new_inst(*inst_it_v2, inst_it_v1->lower(), d, iflag);
00527             
00528             // append new pair if this tid did not exist in vat
00529             if(cand_vats[1]->empty() || cand_vats[1]->back().first != it_v1->first) {
00530               VAT_ST<INSTANCE, ALLOC<INSTANCE> > new_st;
00531               new_st.push_back(new_inst);
00532               cand_vats[1]->push_back(make_pair(it_v1->first, new_st));
00533             }
00534             else
00535               cand_vats[1]->back().second.push_back(new_inst);
00536             
00537           }//end if child_test
00538           
00539         }//end for inst_it_v2
00540         
00541       }//end for inst_it_v1  
00542       
00543       // advance to next tid
00544       it_v1++;
00545       it_v2++;
00546       
00547     }//end while 
00548     
00549     return cand_vats;
00550     
00551   } //end intersect()
00552   
00553   friend ostream& operator<< <>(ostream&, const VAT*);
00554   
00555  private:
00556   IDLIST_T _idlist;
00557   static const int _diff=1; 
00562   pair<int, VAT_ST<INSTANCE, ALLOC<INSTANCE> > >& back() {
00563     if(empty()) {
00564       cerr<<"tree_vat: call to back in empty vat"<<endl;
00565       exit(0);
00566     }
00567     return _idlist.back();
00568   }
00569   
00570 };//end class vat-induced tree
00571 
00572 #endif

Generated on Wed Jul 26 14:01:08 2006 for DMTL by  doxygen 1.4.7