graph_vat.h

00001 /*
00002  *  Copyright (C) 2005 M.J. Zaki <zaki@cs.rpi.edu> Rensselaer Polytechnic Institute
00003  *  Written by parimi@cs.rpi.edu
00004  *  Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu
00005  *
00006  *  This program is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU General Public License
00008  *  as published by the Free Software Foundation; either version 2
00009  *  of the License, or (at your option) any later version.
00010  *
00011  *  This program is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *  GNU General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License along
00017  *  with this program; if not, write to the Free Software Foundation, Inc.,
00018  *  59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
00019  */
00020 #ifndef _GRAPH_VAT_H
00021 #define _GRAPH_VAT_H
00022 
00023 #include <ext/hash_set>
00024 #include "graph_evat.h"
00025 #include "pattern.h"
00026 #include "generic_classes.h"
00027 #include "typedefs.h"
00028 
00029 //#include "pat_support.h"
00030 
00031 
00032 time_tracker tt_vat, tt_fwd_isect, tt_back_isect;
00033 int fwd_isect_cnt=0;
00034 int back_isect_cnt=0;
00035 
00036 
00037 template<typename PP, typename MP, template <typename> class ALLOC, 
00038          template<typename, typename> class ST>
00039 class vat<GRAPH_PROP, V_Fk1_MINE_PROP, ALLOC, ST>;
00040 
00041 template<typename PP, typename MP, template <typename> class ALLOC,
00042          template<typename, typename> class ST>
00043 ostream& operator<< (ostream& ostr, const vat<PP, MP, ALLOC, ST>* v);
00044 
00046 // NOTE: ST should model a vector, else this class shall not compile
00047 // vectors are used to make the design efficient
00048 
00058 template<typename PP, typename MP, template <typename> class ALLOC, template<typename, typename> class ST>
00059 class vat<GRAPH_PROP, V_Fk1_MINE_PROP, ALLOC, ST>
00060 {
00061  public:
00062 
00063   typedef evat<ALLOC> EVAT;
00064   typedef vat<GRAPH_PROP, V_Fk1_MINE_PROP, ALLOC, ST> VAT;
00065   typedef ST<EVAT, ALLOC<EVAT> > RMP_VATS;
00066   typedef ST<pair<int, RMP_VATS>, ALLOC<pair<int, RMP_VATS> > > GVAT;
00071   typedef HASHNS::hash_set<int, HASHNS::hash<int>, std::equal_to<int>, ALLOC<int> > VSET; 
00074   typedef vector<vector<VSET, ALLOC<VSET> >, ALLOC<vector<VSET, ALLOC<VSET> > > > VSETS; 
00077   typedef typename GVAT::const_iterator CONST_IT;
00078   typedef typename GVAT::iterator IT;
00079   typedef typename ST<EVAT, ALLOC<EVAT> >::const_iterator CONST_EIT;
00080   typedef typename VSETS::iterator VS_IT;
00081   typedef typename VSETS::const_iterator CONST_VS_IT;
00082 
00083   void* operator new(size_t size) {
00084     ALLOC<VAT> v;
00085     return v.allocate(size);
00086   }
00087 
00088   void  operator delete(void *p, size_t size) {
00089     if (p) {
00090       ALLOC<VAT> v;
00091       v.deallocate(static_cast<VAT*> (p), size);
00092     }
00093   }
00094 
00095   IT begin() { 
00096     return _vat.begin();
00097   }
00098   CONST_IT begin() const { 
00099     return _vat.begin();
00100   }
00101   IT end() { 
00102     return _vat.end();
00103   }
00104   CONST_IT end() const { 
00105     return _vat.end();
00106   }
00107 
00108   VS_IT begin_v() { 
00109     return _vids.begin();
00110   }
00111   CONST_VS_IT begin_v() const { 
00112     return _vids.begin();
00113   }
00114   VS_IT end_v() { 
00115     return _vids.end();
00116   }
00117   CONST_VS_IT end_v() const { 
00118     return _vids.end();
00119   }
00120 
00121   friend ostream& operator<< <>(ostream&, const VAT*);
00122 
00123   int size() const { 
00124     return _vat.size();
00125   }
00126 
00127   bool empty() const { 
00128     return _vat.empty();
00129   }
00130 
00131   const pair<int, ST<EVAT, ALLOC<EVAT> > >& back() const { 
00132     return _vat.back();
00133   }
00134 
00135   void insert_occurrence_tid(const int& tid, const pair<int, int>& new_occurrence) {
00136     ST<EVAT, ALLOC<EVAT> > new_evats;
00137     evat<ALLOC> new_evat;
00138     new_evat.push_back(new_occurrence);
00139     new_evats.push_back(new_evat);
00140     _vat.push_back(make_pair(tid, new_evats));
00141   }//insert_new_occurrence()
00142 
00143 
00144   void insert_occurrence_evat(const pair<int, int>& new_occurrence) {
00145     evat<ALLOC> new_evat;
00146     new_evat.push_back(new_occurrence);
00147     _vat.back().second.push_back(new_evat);
00148   }//insert_occurrence_evat()
00149 
00150   void insert_occurrence(const pair<int, int>& new_occurrence) {
00151     _vat.back().second.back().push_back(new_occurrence);
00152   }
00153 
00154 
00155   void insert_vid_hs(const int& vid) { 
00156     VSET vs;
00157     vs.insert(vid);
00158     _vids.back().push_back(vs);
00159   }
00160 
00161 
00162   void insert_vid(const int& vid) { 
00163     _vids.back().back().insert(vid);
00164   }
00165 
00166 
00167   void insert_vid_tid(const int& vid) {
00168     VSET vset;
00169     vset.insert(vid);
00170     vector<VSET, ALLOC<VSET> > vsets;
00171     vsets.push_back(vset);
00172     _vids.push_back(vsets);      
00173   }//insert_vid_tid()
00174 
00175 
00176   void copy_vats(const pair<int, ST<evat<ALLOC>, ALLOC<evat<ALLOC> > > >& v1, const int& offset, const int& sz, bool swap=0) {
00177     int i;
00178     for(i=0; i<sz; i++)
00179       if(swap)
00180         _vat.back().second[i].push_back(make_pair(v1.second[i][offset].second, v1.second[i][offset].first));
00181       else
00182         _vat.back().second[i].push_back(v1.second[i][offset]);
00183   }//copy_vats()
00184   
00185 
00186   void copy_vats_tid(const pair<int, ST<evat<ALLOC>, ALLOC<evat<ALLOC> > > >& v1, const int& offset, const int& sz, bool swap=0) {
00187     int i;
00188     ST<EVAT, ALLOC<EVAT> > new_entry;
00189   
00190     for(i=0; i<sz; i++) {
00191       evat<ALLOC> tmp_evat;
00192       if(swap)
00193         tmp_evat.push_back(make_pair(v1.second[i][offset].second, v1.second[i][offset].first));
00194       else
00195         tmp_evat.push_back(v1.second[i][offset]);
00196         new_entry.push_back(tmp_evat);
00197     }
00198     _vat.push_back(make_pair(v1.first, new_entry));
00199   }//copy_vats_tid()
00200 
00201 
00202   void copy_vids_hs(const VSET& v1_vids) {
00203     
00204     VSET vs;
00205     typename VSET::const_iterator it;
00206     for(it=v1_vids.begin(); it!=v1_vids.end(); it++)
00207       vs.insert(*it);
00208     _vids.back().push_back(vs);
00209   }//copy_vids()
00210 
00211 
00212   void copy_vids_tid(const VSET& v1_vids) {
00213     VSET vset;
00214     typename VSET::const_iterator it;
00215     for(it=v1_vids.begin(); it!=v1_vids.end(); it++)
00216       vset.insert(*it);
00217     vector<VSET, ALLOC<VSET> > vsets;
00218     vsets.push_back(vset);
00219     _vids.push_back(vsets);
00220   }//copy_vids_tid()
00221 
00222 
00225   // NOTE: only one candidate is generated in a FkxF1 join of graphs, 
00226   // hence only the first value in cand_pats should be inspected
00227   template<typename PATTERN, typename PAT_SUP>
00228   static VAT** intersection(const VAT* v1, const VAT* v2, PAT_SUP** cand_sups, PATTERN** cand_pats, bool) {
00229 #ifdef PRINT
00230     cout<<"VAT intersection entered with v1="<<v1<<endl;
00231     cout<<"v2="<<v2<<endl;
00232 #endif
00233 
00234     tt_vat.start();
00235 
00236     // How it works
00237     // 1. determine which edge of v1 is being extended and get its evat1
00238     // 2. determine right-most-path of candidate (rmp)
00239     // 3. find common occurrences in v2 and evat1
00240     // 4. copy these common occurrences to new_vat, and also copy evats in 
00241     // rmp corresponding to these common occurrences
00242 
00243     VAT* cand_vat=new VAT;
00244     bool is_fwd;
00245     bool is_fwd_chain=false; // flag to denote whether edge appended by 
00246     // intersection is at the root (which is when flag=0)
00247     bool l2_eq=(cand_pats[0]->size()==3) && (cand_pats[0]->label(0)==cand_pats[0]->label(1)); // special case in evat intersection for L-2 with first edge 
00248     // with equal vertex labels
00249 
00250     int rmp_index; // index of last vid on rmp common to cand_pat and its 
00251     // parent's rmp
00252     int new_edge_state=-1; // flag to denote if the new edge to be added has 
00253     // same labeled vertices, of the form A-A (flag=0); or is of the form 
00254     // A-B (flag=1); or is not canonical at all, of the form B-A (flag=2).
00255     // evat intersection needs to take this into account
00256 
00257     int rvid=cand_pats[0]->rmost_vid();
00258     int edge_vid=-1; // vid of the other vertex (other than rvid) connected 
00259     // to rvid as to form the new edge
00260 
00261     typename PATTERN::CONST_EIT_PAIR eit_p=cand_pats[0]->out_edges(rvid);
00262     int back_idx=-1; // this is used only for back extensions. It holds the 
00263     // index of edge_vid in rmp of cand_pats[0]
00264 
00265     if(eit_p.second-eit_p.first>1)
00266       is_fwd=false; // last edge was fwd edge only if outdegree of last vid=1
00267     else
00268       is_fwd=true;
00269 
00270     // get other vertex's vid
00271     if(is_fwd) {
00272       edge_vid=eit_p.first->first;
00273       if(!edge_vid) // rvid is attached to the root
00274         is_fwd_chain=0;
00275       else
00276         is_fwd_chain=1;
00277     }
00278     else {
00279       //prev_vid=eit_p.first->first;
00280       eit_p.second--;
00281       edge_vid=eit_p.second->first;
00282 
00285       // TO DO: this is currently a linear search through rmp, is there a 
00286       // more efficient way??
00287       const typename PATTERN::RMP_T& cand_rmp=cand_pats[0]->rmost_path();
00288       for(unsigned int i=0; i<cand_rmp.size(); i++) {
00289         if(cand_rmp[i]==edge_vid) {
00290           back_idx=i;
00291           break;
00292         }
00293       }
00294 
00295       if(back_idx==-1) {
00296         cerr<<"vat.intersect: back_idx not found for edge_vid="<<edge_vid<<" in rmp.size="<<cand_rmp.size()<<endl;
00297         tt_vat.stop();
00298         return 0;
00299       }
00300     }
00301 
00302     // now determine which of v1's evats need to be copied into cand_vat: 
00303     // if is_fwd, only evats till edge_vid need be copied
00304     // else all evats need to be copied
00305 
00306     if(is_fwd)
00307       rmp_index=cand_pats[0]->rmp_size()-2;
00308     else
00309       rmp_index=cand_pats[0]->rmp_size()-1;
00310 
00311     if(cand_pats[0]->label(edge_vid)==cand_pats[0]->label(rvid))
00312       new_edge_state=0;
00313     else
00314       if(is_fwd)
00315         new_edge_state=(cand_pats[0]->label(edge_vid)>cand_pats[0]->label(rvid))+1;
00316       else
00317         new_edge_state=(cand_pats[0]->label(rvid)>cand_pats[0]->label(edge_vid))+1;
00318       
00319     CONST_IT it_v1=v1->begin();
00320     CONST_IT it_v2=v2->begin();
00321       
00322     // find a common TID
00323     while(it_v1!=v1->end() && it_v2!=v2->end()) {
00324       if(it_v1->first<it_v2->first) {
00325         it_v1++;
00326         continue;
00327       }
00328   
00329       if(it_v1->first>it_v2->first) {
00330         it_v2++;
00331         continue;
00332       }
00333   
00334       // execution reaches here only if both TIDs are equal
00335       const EVAT* v1_evat;
00336       const EVAT* v2_evat=&(it_v2->second[0]);
00337   
00338       if(!is_fwd)    
00339         v1_evat=&(it_v1->second[rmp_index-1]);
00340       else {
00341         if(is_fwd_chain)
00342           v1_evat=&((it_v1->second)[rmp_index-1]);
00343         else
00344           v1_evat=&((it_v1->second)[0]);
00345       }
00346   
00348       // the intersection routines are expected to fill in the new evat in 
00349       // cand_vat
00350       if(is_fwd) {
00351         tt_fwd_isect.start();
00352         evat<ALLOC>::template fwd_intersect<ST, VAT, ALLOC>(*v1, *v1_evat, *v2_evat, *cand_vat, 
00353                                          is_fwd_chain, rmp_index, new_edge_state, 
00354                                          it_v1-v1->begin(), l2_eq);
00355         tt_fwd_isect.stop();
00356         fwd_isect_cnt++;
00357       }
00358       else {
00359         tt_back_isect.start();
00360         evat<ALLOC>::template back_intersect<ST, VAT, ALLOC>(*v1, *v1_evat, *v2_evat, *cand_vat, back_idx, new_edge_state, it_v1-v1->begin());
00361         tt_back_isect.stop();
00362         back_isect_cnt++;
00363       }
00364 
00365       it_v1++;
00366       it_v2++;
00367     }//end while
00368 
00369     cand_sups[0]->set_sup(make_pair(cand_vat->size(), 0));
00370 
00371     VAT** cand_vats=new VAT*;
00372     cand_vats[0]=cand_vat;
00373     tt_vat.stop();
00374     return cand_vats;
00375   }//end intersect()
00376 
00377 
00378     unsigned long int byte_size() const{
00379       unsigned long int  b_size=0;
00380       CONST_IT it;
00381       CONST_EIT eit;
00382       b_size += sizeof(int);
00383       for (it = begin(); it!=end();++it){
00384         b_size += 2*sizeof(int); //tid, number of evats
00385 
00386         for (eit = it->second.begin(); eit != it->second.end(); eit++){
00387           b_size+=(1*sizeof(int))+eit->byte_size(); // n, e[0], e[1] .. e[n]
00388         }
00389       }
00390       // (VIDS GOES HERE)
00391       typename VSETS::const_iterator vit;
00392       b_size += sizeof(int);
00393       for (vit = begin_v(); vit != end_v(); vit++){
00394         b_size += sizeof(int);
00395         typename vector<VSET>::const_iterator vvsetit;
00396         for (vvsetit=vit->begin(); vvsetit!=vit->end(); vvsetit++){
00397           b_size += (vvsetit->size()+1) * sizeof(int);
00398         }//vvsetit
00399       } //vit
00400       return b_size;
00401     }
00402 
00403     void print(){
00404       int ITSZ=sizeof(int);
00405       CONST_IT it;
00406       CONST_EIT eit;
00407       int tid,evat_n,evat_sz;
00408       int gvat_sz=_vat.size();
00409       cout << "size:" <<gvat_sz << endl;
00410       for (it=begin();it!=end();++it){
00411         tid=it->first;
00412         evat_n=it->second.size();
00413         cout << tid << " " << evat_n << endl;
00414         for (eit=it->second.begin(); eit!=it->second.end(); ++eit){
00415           evat_sz = eit->size();
00416           cout << evat_sz << endl;
00417           eit->print();
00418         } //for eit
00419       }//it
00420       // Writing _vids goes here.
00421       typename VSETS::iterator vit;
00422       int vvsetn = _vids.size();
00423       cout << "Vids size: " << vvsetn << endl;
00424       for (vit=begin_v(); vit!=end_v(); vit++){
00425         typename vector<VSET>::iterator vvsetit;
00426         int vsetn = vit->size();
00427         cout << vsetn << endl;
00428         for (vvsetit=vit->begin(); vvsetit!=vit->end(); vvsetit++){
00429           typename VSET::iterator vsetit;
00430           int n = vvsetit->size();
00431           cout << "- " << n << endl;
00432           for (vsetit=vvsetit->begin(); vsetit!=vvsetit->end(); vsetit++){
00433             int v=*vsetit;
00434             cout << "-- " << v << endl;
00435           }//vsetit
00436         }//vvsetit
00437       }//vit
00438 
00439     }
00440 
00441     //writing a VAT to a binary file
00442     void write_file(ostream & output) const{
00443       //ostringstream output;
00444       int ITSZ=sizeof(int);
00445       CONST_IT it;
00446       CONST_EIT eit;
00447       int tid,evat_n,evat_sz;
00448       int gvat_sz=_vat.size();
00449       output.write(reinterpret_cast<const char *>(&gvat_sz), ITSZ);
00450       for (it=begin();it!=end();++it){
00451         tid=it->first;
00452         evat_n=it->second.size();
00453         output.write(reinterpret_cast<const char *>(&tid), ITSZ);
00454         output.write(reinterpret_cast<const char *>(&evat_n), ITSZ);
00455         for (eit=it->second.begin(); eit!=it->second.end(); ++eit){
00456           evat_sz = eit->size();
00457           output.write(reinterpret_cast<const char *>(&evat_sz), ITSZ);
00458           eit->write_file(output);
00459         } //for eit
00460       }//it
00461       // Writing _vids goes here.
00462       typename VSETS::const_iterator vit;
00463       int vvsetn = _vids.size();
00464       output.write(reinterpret_cast<const char *>(&vvsetn), ITSZ);
00465       for (vit=begin_v(); vit!=end_v(); vit++){
00466         typename vector<VSET>::const_iterator vvsetit;
00467         int vsetn = vit->size();
00468         output.write(reinterpret_cast<const char *>(&vsetn), ITSZ);
00469         for (vvsetit=vit->begin(); vvsetit!=vit->end(); vvsetit++){
00470           typename VSET::iterator vsetit;
00471           int n = vvsetit->size();
00472           output.write(reinterpret_cast<const char *>(&n), ITSZ);
00473           for (vsetit=vvsetit->begin(); vsetit!=vvsetit->end(); vsetit++){
00474             int v=*vsetit;
00475             output.write(reinterpret_cast<const char *>(&v), ITSZ);
00476           }//vsetit
00477         }//vvsetit
00478       }//vit
00479       //output_file.write(output.str().c_str(), output.str().size());
00480     } //end write_file
00481     
00482     void read_file (istream & input, unsigned long int size) {
00483       int ITSZ=sizeof(int);
00484       int buf_size=size/ITSZ;   
00485       int *buf = new int[buf_size];
00486       input.read((char *)buf, (size)); 
00487       int current=0;
00488       int vats_size=buf[current++], vats_seen=0;
00489       while(vats_seen++ < vats_size){
00490         int tid=buf[current++];
00491         int evat_n=buf[current++];
00492         int evats_seen=0;
00493         RMP_VATS edges;
00494         while(evats_seen++ < evat_n){
00495           evat<ALLOC> new_evat;
00496           int evat_sz=buf[current++];
00497           while(evat_sz-- > 0){
00498             int f1, f2;
00499             f1 = buf[current++];
00500             f2 = buf[current++];
00501             new_evat.push_back(make_pair(f1, f2));
00502           }
00503           edges.push_back(new_evat);
00504         }
00505         _vat.push_back(make_pair(tid, edges));
00506       }
00507       //Reading _vids goes here.
00508 
00509       int vids_size=buf[current++], vids_seen=0;
00510       while(vids_seen++ < vids_size){
00511         vector <VSET> new_vsetv;
00512         int vsetv_n=buf[current++];
00513         int vsets_seen=0;
00514         while(vsets_seen++ < vsetv_n){
00515           VSET new_vset;
00516           int vset_sz=buf[current++];
00517           while(vset_sz-- > 0){
00518             int i = buf[current++];
00519             new_vset.insert(i);
00520           } // evat_sz
00521           new_vsetv.push_back(new_vset);
00522         }//vsets_seen
00523         _vids.push_back(new_vsetv);
00524       }//vids_seen
00525 
00526       //this->print();
00527       input.clear();
00528       delete [] buf;
00529     } //read_file
00530 
00532   bool is_new_vertex(const int& vid, const int& tid, const int& offset) const {
00533     if(_vids[tid][offset].find(vid)==_vids[tid][offset].end()) {
00534       return true;
00535     }
00536     return false;
00537   }//end is_new_vertex()
00538 
00539 
00540   friend class evat<ALLOC>; // required for intersect functions in evat to work
00541   
00542  private:
00543   GVAT _vat;
00544   VSETS _vids;
00545 
00546 }; //end class vat for graphs
00547 
00548 
00549 template<typename PP, typename MP, template <typename> class ALLOC, template<typename, typename> class ST>
00550   ostream& operator<< (ostream& ostr, const vat<PP, MP, ALLOC, ST>* v) {
00551   typename vat<PP, MP, ALLOC, ST>::CONST_IT it;
00552   typename vat<PP, MP, ALLOC, ST>::RMP_VATS::const_iterator rit;
00553 
00554   ostr<<"VAT:"<<endl;
00555   for(it=v->begin(); it!=v->end(); it++) {
00556     ostr<<"tid="<<it->first<<endl;
00557     for(rit=it->second.begin(); rit!=it->second.end(); rit++)
00558       ostr<<*rit<<endl;
00559   }
00560 
00561   // These lines print out the vid-sets
00562   typename vat<PP, MP, ALLOC, ST>::VSETS::const_iterator vit1;
00563   // typename vector<typename vat<PP, MP, ALLOC, ST>::VSET, ALLOC<typename vat<PP, MP, ALLOC, ST>::VSET> >::const_iterator vit2;
00564   typename vector<typename vat<PP, MP, ALLOC, ST>::VSET >::const_iterator vit2;
00565   typename vat<PP, MP, ALLOC, ST>::VSET::const_iterator vit3;
00566 
00567   ostr<<"Vertices are"<<endl;
00568   for(vit1=v->begin_v(), it=v->begin(); vit1!=v->end_v(); vit1++, it++) {
00569     ostr<<"tid="<<it->first<<endl;
00570     for(vit2=vit1->begin(); vit2!=vit1->end(); vit2++) {
00571       for(vit3=vit2->begin(); vit3!=vit2->end(); vit3++)
00572         ostr<<*vit3<<" ";
00573         ostr<<endl;
00574     }
00575   }
00576       
00577   return ostr;
00578 }//operator<< for vat*
00579 
00580 #endif

Generated on Wed Jul 26 14:01:08 2006 for DMTL by  doxygen 1.4.7