tree_cand_gen.h

00001 /*
00002  *  Copyright (C) 2005 M.J. Zaki <zaki@cs.rpi.edu> Rensselaer Polytechnic Institute
00003  *  Written by parimi@cs.rpi.edu
00004  *  Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu
00005  *
00006  *  This program is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU General Public License
00008  *  as published by the Free Software Foundation; either version 2
00009  *  of the License, or (at your option) any later version.
00010  *
00011  *  This program is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *  GNU General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License along
00017  *  with this program; if not, write to the Free Software Foundation, Inc.,
00018  *  59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
00019  */
00020 // candidate generation for trees
00021 
00022 #ifndef _TREE_CAND_GEN_H
00023 #define _TREE_CAND_GEN_H
00024 
00025 #include <algorithm>
00026 
00027 #include "tree_iso_check.h"
00028 #include "tree_operators.h"
00029 #include "typedefs.h"
00030 
00031 extern unsigned long freq_pats_count;
00032 extern bool print;
00033 
00034 template<class PP, class MP, class PAT_ST, template<typename, typename, typename, template <typename> class> class CC, 
00035 template<typename> class ALLOC, class SM_TYPE>
00036 
00037 void cand_gen(const pat_fam<TREE_PATTERN >& Fk, const pat_fam<TREE_PATTERN >&, const int& minsup, 
00038               pat_fam<TREE_PATTERN >& freq_pats, count_support<TREE_PROP, V_Fkk_MINE_PROP, PAT_ST, CC, ALLOC, SM_TYPE>& cs) {
00039   
00040   typedef pat_fam<TREE_PATTERN> PAT_FAM;
00041   typedef typename PAT_FAM::CONST_IT PF_CIT;
00042   
00043   PF_CIT it_i, it_j;
00044   const int num_cands=2;
00045   
00046   for(it_i=Fk.begin(); it_i!=Fk.end(); it_i++) {
00047     
00048     PAT_FAM pat_fam_i;
00049     TREE_PATTERN* pat_i=*it_i;
00050     
00051     // check if this pattern should be extended
00052     if(!pat_i->is_freq(minsup) || !pat_i->is_canonical())
00053       continue;
00054     
00055     int pos_i=compute_pos(pat_i);
00056     for(it_j=Fk.begin(); it_j!=Fk.end(); it_j++) {
00057       
00058       TREE_PATTERN* pat_j=*it_j;
00059       int i;
00060       
00061       // check if valid candidate can result
00062       int pos_j=compute_pos(pat_j);
00063       
00064       if(pos_i<pos_j) {
00065         continue;
00066       }
00067       
00068       // generate new candidates
00069       TREE_PATTERN** cand_pats=join(pat_i, pat_j, pos_i, pos_j);
00070       
00071       for(i=0; i<num_cands; i++)
00072         if(cand_pats[i])
00073           check_isomorphism(cand_pats[i]);
00074       
00075       // count support of candidates
00076       cs.count(pat_i, pat_j, cand_pats, minsup, num_cands);
00077       
00078       // discard infrequent patterns
00079       for(i=0; i<num_cands; i++) {
00080         if(!cand_pats[i])
00081           continue;
00082         
00083         if(!cand_pats[i]->is_valid(minsup)) {
00084           delete cand_pats[i];
00085           cand_pats[i]=0;
00086         }
00087         else {
00088           pat_fam_i.push_back(cand_pats[i]);
00089           if(cand_pats[i]->is_canonical() && cand_pats[i]->is_freq(minsup)) {
00090             // freq_pats.push_back(cand_pats[i]);
00091             freq_pats_count++;
00092             if(print)
00093               cout << cand_pats[i];
00094           }
00095         }
00096       }
00097       
00098       // cleanup for this iteration
00099       delete[] cand_pats;
00100       
00101     }//end for it_j
00102     
00103     // recursive call
00104     if(!pat_fam_i.empty())
00105       cand_gen(pat_fam_i, pat_fam_i, minsup, freq_pats, cs);
00106     
00107   }//end for it_i
00108   
00109   // cleanup VATs for entire equivalence class
00110   for(it_i=Fk.begin(); it_i!=Fk.end(); it_i++) {
00111     cs.delete_vat(*it_i);
00112     TREE_PATTERN* pat_i=*it_i;
00113     if(!pat_i->is_freq(minsup) || !pat_i->is_canonical())
00114       delete pat_i;
00115   }
00116 }//end cand_gen()
00117 
00118 template<class PP, class MP, class PAT_ST, template<typename, typename, typename, template <typename> class > class CC, template <typename> class ALLOC>
00119 TREE_PATTERN** join(const TREE_PATTERN* pat_i, TREE_PATTERN* pat_j, const int& pos_i, const int& pos_j) {
00120   // NOTE: follows scheme of TreeMinerV, perhaps SLEUTH may be more elegant ??
00121   
00122   // NOTE: convention is that cand_pats[0] is cousin extension, 
00123   // cand_pats[1] is child extension
00124   // This convention is used by vat::intersect
00125   const typename TREE_PATTERN::EDGE_T E_LBL=0; //default, defunt edge label for trees
00126   TREE_PATTERN** cand_pats=new TREE_PATTERN*[2];
00127   
00128   if(pos_i==pos_j && pat_i->size()>1) {
00129     // case Ia of paper
00130     // construct first candidate - cousin extension
00131     cand_pats[0]=pat_i->clone();
00132     const typename TREE_PATTERN::VERTEX_T& new_v=pat_j->rmost_vertex();
00133     int rvid=cand_pats[0]->add_vertex(new_v);
00134     cand_pats[0]->add_out_edge(pos_j, rvid, E_LBL);
00135     cand_pats[0]->add_in_edge(rvid, pos_j, E_LBL);
00136     
00137     //construct second candidate - child extension
00138     cand_pats[1]=pat_i->clone();
00139     rvid=cand_pats[1]->add_vertex(new_v);
00140     cand_pats[1]->add_out_edge(pat_i->rmost_vid(), rvid, E_LBL);
00141     cand_pats[1]->add_in_edge(rvid, pat_i->rmost_vid(), E_LBL);
00142     
00143     return cand_pats;
00144   }//end if case Ia
00145   
00146   // execution reaches this point iff num_cands=1
00147   if(pos_i>pos_j) {
00148     // case II - cousin extension
00149     // construct candidate
00150     cand_pats[0]=pat_i->clone();
00151     const typename TREE_PATTERN::VERTEX_T& new_v=pat_j->rmost_vertex();
00152     int rvid=cand_pats[0]->add_vertex(new_v);
00153     cand_pats[0]->add_out_edge(pos_j, rvid, E_LBL);
00154     cand_pats[0]->add_in_edge(rvid, pos_j, E_LBL);
00155     cand_pats[1]=0;
00156     return cand_pats;
00157   }
00158   
00159   // assert: this is case Ib - child extension
00160   cand_pats[0]=0;
00161   cand_pats[1]=pat_i->clone();
00162   const typename TREE_PATTERN::VERTEX_T& new_v=pat_j->rmost_vertex();
00163   int rvid=cand_pats[1]->add_vertex(new_v);
00164   cand_pats[1]->add_out_edge(pat_i->rmost_vid(), rvid, E_LBL);
00165   cand_pats[1]->add_in_edge(rvid, pat_i->rmost_vid(), E_LBL);
00166   return cand_pats;
00167 }//end join()
00168 
00169 #endif

Generated on Wed Jul 26 14:01:08 2006 for DMTL by  doxygen 1.4.7