graph_cand_gen.h

00001 /*
00002  *  Copyright (C) 2005 M.J. Zaki <zaki@cs.rpi.edu> Rensselaer Polytechnic Institute
00003  *  Written by parimi@cs.rpi.edu
00004  *  Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu
00005  *
00006  *  This program is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU General Public License
00008  *  as published by the Free Software Foundation; either version 2
00009  *  of the License, or (at your option) any later version.
00010  *
00011  *  This program is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *  GNU General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License along
00017  *  with this program; if not, write to the Free Software Foundation, Inc.,
00018  *  59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
00019  */
00020 #ifndef _GRAPH_CAND_GEN_H
00021 #define _GRAPH_CAND_GEN_H
00022 
00023 #include "level_one_hmap.h"
00024 #include "typedefs.h"
00025 
00026 extern unsigned long int freq_pats_count;
00027 extern bool print;
00028 
00029 // candidate generation for graphs
00030 template<typename PP, class MP, class PAT_ST, template<class, typename, typename, template <typename> class > class CC, template <typename> class ALLOC, class EDGE_MAP, class SM_TYPE>
00031 void cand_gen(GRAPH_PATTERN* pat, const EDGE_MAP& emap, const int& minsup, pat_fam<GRAPH_PATTERN >& freq_pats, 
00032               count_support<GRAPH_PROP, V_Fk1_MINE_PROP, PAT_ST, CC, ALLOC, SM_TYPE>& cs) {
00033 
00034 #ifdef PRINT
00035   cout<<"In call to cand_gen"<<endl;  
00036 #endif
00037   typedef pat_fam<GRAPH_PATTERN> PAT_FAM;
00038  
00040   back_extensions(pat, emap, minsup, freq_pats, cs);
00041 
00043   fwd_extensions(pat, emap, minsup, freq_pats, cs);
00044 
00045 }//end cand_gen()
00046 
00047 
00048 template<typename PP, class MP, class PAT_ST, template<class, typename, typename, template <typename> class> class CC, template <typename> class ALLOC, class EDGE_MAP, class SM_TYPE>
00049 void back_extensions(GRAPH_PATTERN* pat, const EDGE_MAP& emap, const int& minsup, pat_fam<GRAPH_PATTERN >& freq_pats, 
00050                      count_support<GRAPH_PROP, V_Fk1_MINE_PROP, PAT_ST, CC, ALLOC, SM_TYPE>& cs) {
00051 
00052   if(pat->rmp_size()<3) // back extensions only for graphs with rmost path 
00053     return;             // with atleast two edges
00054 #ifdef PRINT
00055   cout<<"Back extension for "<<pat<<endl;
00056 #endif
00057   typedef pat_fam<GRAPH_PATTERN> PAT_FAM;
00058 
00059   const typename GRAPH_PATTERN::VERTEX_T& last_v=pat->rmost_vertex();
00060   const typename GRAPH_PATTERN::RMP_T& rmp=pat->rmost_path();
00061   typename GRAPH_PATTERN::RMP_T::const_iterator rmp_it;
00062   typename GRAPH_PATTERN::EDGE_T e;
00063   GRAPH_PATTERN* edge=0;
00064   GRAPH_PATTERN* cand_pat=0;
00065   int rvid=pat->rmost_vid();
00066   int vid;
00067 
00068   // determine vid on rmp from where to start adding back edges
00069   rmp_it=rmp.end()-3;
00070   while(true) {
00071     if(rmp_it<rmp.begin())
00072       break;
00073     if(pat->get_out_edge(rvid, *rmp_it, e)) {
00074       rmp_it++;
00075       break;
00076     }
00077     rmp_it--;
00078   }
00079 
00080   if(rmp_it<rmp.begin())
00081     rmp_it++;
00082 
00083   while(rmp_it<rmp.end()-2) {
00084     vid=*rmp_it;
00085     const typename GRAPH_PATTERN::VERTEX_T& back_v=pat->label(vid);
00086 
00087     const typename EDGE_MAP::LABELS& lbls=emap.get_labels(last_v, back_v);
00088     typename EDGE_MAP::CONST_LIT lit=lbls.begin();
00089 
00090     // get all possible labels for this back edge
00091     while(lit!=lbls.end()) {
00092 
00093       // construct new candidate with this label
00094       cand_pat=pat->clone();
00095       cand_pat->add_out_edge(rvid, vid, *lit);
00096       cand_pat->add_out_edge(vid, rvid, *lit);
00097 
00098       if(!check_isomorphism(cand_pat)) {
00099         delete cand_pat;
00100         cand_pat=0;
00101         lit++;
00102         continue;
00103       }
00104 
00105       // create the one edged-pattern
00106       edge=new GRAPH_PATTERN;
00107       if(last_v<back_v)
00108         make_edge(edge, last_v, back_v, *lit);
00109       else
00110         make_edge(edge, back_v, last_v, *lit);
00111 #ifdef PRINT
00112       cout<<"Trying with back edge="<<edge<<endl;
00113       cout<<"New candidate="<<cand_pat<<endl;
00114 #endif
00115       // count support
00116       cs.count(pat, edge, &cand_pat, minsup, 1);
00117 
00118       delete edge;
00119       edge=0;
00120 
00121       // recursive call for frequent graph
00122       if(cand_pat->is_valid(minsup)) {
00123         //freq_pats.push_back(cand_pat);
00124         freq_pats_count++;
00125         if(print)
00126           cout << cand_pat;
00127         cand_gen(cand_pat, emap, minsup, freq_pats, cs);
00128         cs.delete_vat(cand_pat);
00129       }
00130       // else {
00131       delete cand_pat;
00132       cand_pat=0;
00133       // }
00134 
00135       lit++;
00136     }//end while lit
00137 
00138     rmp_it++;
00139   }//end while rmp_it
00140 
00141 }//end back_extensions()
00142 
00143 
00144 template<typename PP, class MP, class PAT_ST, template<typename, typename, typename, template <typename> class > class CC, template <typename> class ALLOC, class EDGE_MAP, class SM_TYPE>
00145 void fwd_extensions(GRAPH_PATTERN* pat, const EDGE_MAP& emap, const int& minsup, pat_fam<GRAPH_PATTERN >& freq_pats, 
00146                     count_support<GRAPH_PROP, V_Fk1_MINE_PROP, PAT_ST, CC, ALLOC, SM_TYPE>& cs) {
00147 #ifdef PRINT
00148   cout<<"Fwd extension for "<<pat<<endl;
00149 #endif
00150   typedef pat_fam<GRAPH_PATTERN> PAT_FAM;
00151 
00152   const typename GRAPH_PATTERN::RMP_T& rmp=pat->rmost_path();
00153   typename GRAPH_PATTERN::RMP_T::const_iterator rmp_it;
00154   
00155   typename EDGE_MAP::CONST_NIT nit;
00156   typename EDGE_MAP::CONST_LIT lit;  
00157   GRAPH_PATTERN* edge=0;
00158   GRAPH_PATTERN* cand_pat=0;
00159   int lvid;
00160   
00161   for(rmp_it=rmp.end()-1; rmp_it>=rmp.begin(); rmp_it--) {
00162     const typename GRAPH_PATTERN::VERTEX_T& src_v=pat->label(*rmp_it);
00163     const typename EDGE_MAP::NEIGHBORS& nbrs=emap.get_neighbors(src_v);
00164 
00165     for(nit=nbrs.begin(); nit!=nbrs.end(); nit++) {
00166       const typename GRAPH_PATTERN::VERTEX_T& dest_v=nit->first;
00167 
00168       for(lit=nit->second.begin(); lit!=nit->second.end(); lit++) {
00169         // construct new candidate
00170         cand_pat=pat->clone();
00171         lvid=cand_pat->add_vertex(dest_v);
00172         cand_pat->add_out_edge(*rmp_it, lvid, *lit);
00173         cand_pat->add_out_edge(lvid, *rmp_it, *lit);
00174 #ifdef PRINT
00175         // cout<<"New candidate="<<cand_pat<<endl;
00176 #endif
00177         if(!check_isomorphism(cand_pat)) {
00178 #ifdef PRINT
00179           cout<<"New candidate="<<cand_pat<<endl;
00180           cout<<"candidate did NOT pass isomorphism"<<endl;
00181 #endif
00182           delete cand_pat;
00183           cand_pat=0;
00184           continue;
00185         } 
00186         else {
00187 #ifdef PRINT
00188           cout<<"New candidate="<<cand_pat<<endl;
00189           cout<<"candidate passed isomorphism"<<endl;
00190 #endif
00191         }
00192 
00193         // create edge
00194         edge=new GRAPH_PATTERN;
00195         if(src_v<dest_v)
00196           make_edge(edge, src_v, dest_v, *lit);
00197         else
00198           make_edge(edge, dest_v, src_v, *lit);
00199 
00200 #ifdef PRINT
00201         cout<<"Trying with edge="<<edge<<endl;
00202 #endif
00203         // count support
00204         cs.count(pat, edge, &cand_pat, minsup, 1);
00205      
00206         delete edge;
00207         edge=0;
00208       
00209         // recursive call for frequent graph
00210         if(cand_pat->is_valid(minsup)) {
00211           // freq_pats.push_back(cand_pat);
00212           if(print)
00213             cout << cand_pat;
00214           freq_pats_count++;
00215           cand_gen(cand_pat, emap, minsup, freq_pats, cs);
00216           cs.delete_vat(cand_pat);
00217         }
00218         // else {
00219         delete cand_pat;
00220         cand_pat=0;
00221         // }
00222 
00223       }//end for lit
00224     }//end for nit      
00225   }//end for rmp_it
00226 
00227 }//end fwd_extensions()
00228 
00229 
00233 // it is assumed that v1<=v2 for VAT intersection to work, this function  does 
00234 // not verify it;
00235 // in particular, storage_manager for a single undirected edge (A-B) stores it 
00236 // as canonical code A-B and not B-A.
00237 
00238 template<typename pattern, typename V_T, typename E_T>
00239 void make_edge(pattern* p, const V_T& v1,const V_T& v2, const E_T& e) {
00240   p->add_vertex(v1);
00241   p->add_vertex(v2);
00242   p->add_out_edge(0, 1, e);
00243   p->add_out_edge(1, 0, e);
00244   p->init_canonical_code(five_tuple<V_T, E_T>(0, 1, p->label(0), e, p->label(1)));
00245 } //end make_edge()
00246 
00247 #endif

Generated on Wed Jul 26 14:01:08 2006 for DMTL by  doxygen 1.4.7