iset_cand_gen.h

00001 /*
00002  *  Copyright (C) 2005 M.J. Zaki <zaki@cs.rpi.edu> Rensselaer Polytechnic Institute
00003  *  Written by parimi@cs.rpi.edu
00004  *  Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu
00005  *
00006  *  This program is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU General Public License
00008  *  as published by the Free Software Foundation; either version 2
00009  *  of the License, or (at your option) any later version.
00010  *
00011  *  This program is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *  GNU General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License along
00017  *  with this program; if not, write to the Free Software Foundation, Inc.,
00018  *  59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
00019  */
00020 #ifndef _ISET_CAND_GEN_H_
00021 #define _ISET_CAND_GEN_H_
00022 
00023 #include "pattern.h"
00024 #include "count_support.h"
00025 #include "iset_iso_check.h"
00026 #include "iset_vat.h"
00027 #include "typedefs.h"
00028 
00029 extern unsigned long int freq_pats_count;
00030 extern bool print;
00031 
00050 template<class PP, class MP, class PAT_ST, 
00051          template<class, typename, typename, template <typename> class> class CC, 
00052          template <typename> class ALLOC, class SM_TYPE>
00053 void cand_gen(const pat_fam<ISET_PATTERN>& Fk_one, const pat_fam<ISET_PATTERN>& Fk_two, int minsup, 
00054               count_support<ISET_PROP, V_Fkk_MINE_PROP, PAT_ST, CC, ALLOC, SM_TYPE >& cnt_sup, pat_fam<ISET_PATTERN>& freq_pats) {
00055 
00056   typedef pat_fam<ISET_PATTERN> PAT_FAM;
00057   typedef typename PAT_FAM::CONST_IT PF_CIT;
00058   
00059   PF_CIT it_i, it_j;
00060   int num_cands = 1;
00061   
00062   for(it_i=Fk_one.begin(); it_i!=Fk_one.end(); it_i++) {
00063     
00064     PAT_FAM pat_fam_i;  // new equivalent class 
00065     ISET_PATTERN* pat_i=*it_i;
00066     
00067     for(it_j=it_i+1; it_j!=Fk_one.end(); it_j++) {
00068       
00069       ISET_PATTERN* pat_j=*it_j;
00070       
00071       // Generate candidates from equivalent class Fk_one.
00072       ISET_PATTERN** cand_pats = join(pat_i, pat_j);
00073       
00074       // Isomorphism check, for itemset every candidate patterns pass the iso-check.
00075       check_isomorphism(cand_pats[0]);
00076       
00077       // Count support of candidates, checking whether new patterns in cand_pats are frequent or not.
00078       cnt_sup.count(pat_i, pat_j, cand_pats, minsup, num_cands);
00079       
00080       // Get rid of infrequent candidate. This is itemset
00081       // so, we have only one candidate pattern.
00082       if(!cand_pats[0]->is_freq(minsup)) {
00083         delete cand_pats[0];
00084       } else {
00085         // pushing new and frequent candidates in new equivalent class pat_fam_i
00086         pat_fam_i.push_back(cand_pats[0]);
00087         // pushing new and frequent candidates in freq_pats for output
00088         //freq_pats.push_back(cand_pats[0]);
00089         // number of frequent patterns increase by 1
00090         freq_pats_count++;
00091         if(print)
00092           cout << cand_pats[0];
00093       }
00094       // delete the array of pointers holding the cand_pats 
00095       delete[] cand_pats;
00096     }
00097     
00098     cnt_sup.delete_vat(*it_i);
00099     
00100     // Recursively mine the new pattern family. Depth first pattern generation
00101     if(!pat_fam_i.empty()) {
00102       cand_gen(pat_fam_i, pat_fam_i, minsup, cnt_sup, freq_pats);
00103    
00104       // Deleting individual elements (which are pointers) in the vector. 
00105       for(unsigned int c=0; c < pat_fam_i.size(); c++) {
00106         delete pat_fam_i[c];
00107       }
00108 
00109       // Deleting the pat_fam_i after the recursion calls processed all its
00110       // descendent patterns.
00111       pat_fam_i.clear();
00112     }
00113     
00114   } //end for i
00115   
00116 } //end cand_gen()
00117 
00134 template<class PP, class MP, class PAT_ST, 
00135          template<class, typename, typename, template <typename> class> class CC, 
00136          template <typename> class ALLOC >
00137 ISET_PATTERN** join(const ISET_PATTERN* pat_i, ISET_PATTERN* pat_j) {
00138   
00139   ISET_PATTERN** cand_pats=new ISET_PATTERN*[1];  // an array of one element that holds pointer to a pattern
00140                                         // this allocation of memory is released in cand_gen() routine
00141                                         // after we either delete the candidate, if it is infrequent
00142                                         // or store it in freq_pats data structure if it is frequent 
00143   cand_pats[0] = pat_i->clone();   
00144   const typename ISET_PATTERN::VERTEX_T& rmv = pat_j->rmost_vertex();
00145   cand_pats[0]->add_vertex(rmv);
00146   
00147   return cand_pats;
00148 }
00149 
00150 #endif

Generated on Wed Jul 26 14:01:08 2006 for DMTL by  doxygen 1.4.7