mset_cand_gen.h

00001 /*
00002  *  Copyright (C) 2005 M.J. Zaki <zaki@cs.rpi.edu> Rensselaer Polytechnic Institute
00003  *  Written by parimi@cs.rpi.edu
00004  *  Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu
00005  *
00006  *  This program is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU General Public License
00008  *  as published by the Free Software Foundation; either version 2
00009  *  of the License, or (at your option) any later version.
00010  *
00011  *  This program is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *  GNU General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License along
00017  *  with this program; if not, write to the Free Software Foundation, Inc.,
00018  *  59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
00019  */
00020 #ifndef _MSET_CAND_GEN_H_
00021 #define _MSET_CAND_GEN_H_
00022 
00023 #include "pattern.h"
00024 #include "count_support.h"
00025 #include "mset_iso_check.h"
00026 #include "mset_vat.h"
00027 #include "typedefs.h"
00028 
00029 extern unsigned long int freq_pats_count;
00030 extern bool print;
00031 
00050 template<class PP, class MP, class PAT_ST, template<typename, typename, typename, template <typename> class> class CC, 
00051          template <typename> class ALLOC, class SM_TYPE>
00052 void cand_gen(const pat_fam<MSET_PATTERN>& Fk_one, const pat_fam<MSET_PATTERN>& Fk_two, int minsup, 
00053               count_support<MSET_PROP, V_Fkk_MINE_PROP, PAT_ST, CC, ALLOC, SM_TYPE >& cnt_sup, pat_fam<MSET_PATTERN>& freq_pats) {
00054 
00055   typedef pat_fam<MSET_PATTERN> PAT_FAM;
00056   typedef typename PAT_FAM::CONST_IT PF_CIT;
00057   
00058   PF_CIT it_i, it_j;
00059   int num_cands = 1;
00060   
00061   for(it_i=Fk_one.begin(); it_i!=Fk_one.end(); it_i++) {
00062     
00063     PAT_FAM pat_fam_i;  // new equivalent class 
00064     MSET_PATTERN* pat_i=*it_i;
00065     
00066     for(it_j=it_i; it_j!=Fk_one.end(); it_j++) {
00067       
00068       MSET_PATTERN* pat_j=*it_j;
00069 
00070       // cout << "Joining = " << pat_i << " and " << pat_j << endl;
00071       
00072       // Generate candidates from equivalent class Fk_one.
00073       MSET_PATTERN** cand_pats = join(pat_i, pat_j);
00074       
00075       // Isomorphism check, for itemset every candidate patterns pass the iso-check.
00076       check_isomorphism(cand_pats[0]);
00077       
00078       // Count support of candidates, checking whether new patterns in cand_pats are frequent or not.
00079       cnt_sup.count(pat_i, pat_j, cand_pats, minsup, num_cands);
00080       // cout << "Cand pattern = " << cand_pats[0] << endl;
00081       
00082       // Get rid of infrequent candidate. This is itemset
00083       // so, we have only one candidate pattern.
00084       if(!cand_pats[0]->is_freq(minsup)) {
00085         delete cand_pats[0];
00086       } else {
00087         // pushing new and frequent candidates in new equivalent class pat_fam_i
00088         pat_fam_i.push_back(cand_pats[0]);
00089         // pushing new and frequent candidates in freq_pats for output
00090         //freq_pats.push_back(cand_pats[0]);
00091         // number of frequent patterns increase by 1
00092         freq_pats_count++;
00093         if(print)
00094           cout << cand_pats[0];
00095       }
00096       // delete the array of pointers holding the cand_pats 
00097       delete[] cand_pats;
00098     }
00099     
00100     cnt_sup.delete_vat(*it_i);
00101     
00102     // Recursively mine the new pattern family. Depth first pattern generation
00103     if(!pat_fam_i.empty()) {
00104       cand_gen(pat_fam_i, pat_fam_i, minsup, cnt_sup, freq_pats);
00105     
00106       // Deleting the pat_fam_i after the recursion calls processed all its
00107       // descendent patterns.
00108       pat_fam_i.clear();
00109     }
00110     
00111   } //end for i
00112   
00113 } //end cand_gen()
00114 
00131 template<class PP, class MP, class PAT_ST, template<typename, typename, typename, template <typename> class> class CC, 
00132          template <typename> class ALLOC >
00133 MSET_PATTERN** join(const MSET_PATTERN* pat_i, MSET_PATTERN* pat_j) {
00134   
00135   MSET_PATTERN** cand_pats=new MSET_PATTERN*[1];  // an array of one element that holds pointer to a pattern
00136                                         // this allocation of memory is released in cand_gen() routine
00137                                         // after we either delete the candidate, if it is infrequent
00138                                         // or store it in freq_pats data structure if it is frequent 
00139   cand_pats[0] = pat_i->clone();   
00140   const typename MSET_PATTERN::VERTEX_T& rmv = pat_j->rmost_vertex();
00141   cand_pats[0]->add_vertex(rmv);
00142   
00143   return cand_pats;
00144 }
00145 
00146 #endif

Generated on Wed Jul 26 14:01:08 2006 for DMTL by  doxygen 1.4.7