00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef _SEQ_CAND_GEN_H_
00023 #define _SEQ_CAND_GEN_H_
00024
00025 #include "count_support.h"
00026 #include "seq_iso_check.h"
00027 #include "seq_can_code.h"
00028 #include "seq_operators.h"
00029 #include "helper_funs.h"
00030
00031 extern unsigned long freq_pats_count;
00032 extern bool print;
00033
00038 template<class PP, class MP, class PAT_ST, template<typename, typename, typename, template <typename> class> class CC, template <typename> class ALLOC, class SM_TYPE>
00039 void cand_gen(const pat_fam<SEQ_PATTERN>& Fk, const pat_fam<SEQ_PATTERN>&, int min_sup,
00040 pat_fam<SEQ_PATTERN>& freq_pats, count_support<SEQ_PROP, V_Fkk_MINE_PROP, PAT_ST, CC, ALLOC, SM_TYPE >& cs, int depth) {
00041
00042 typedef pat_fam<SEQ_PATTERN> PAT_FAM;
00043 typedef typename PAT_FAM::CONST_IT PF_CIT;
00044 typedef HASHNS::hash_map<int, int, HASHNS::hash<int> > INT_TO_INT;
00045
00046 PF_CIT it_i, it_j;
00047
00048 int sz=Fk.end()-Fk.begin();
00049 PAT_FAM pat_fams[sz];
00050 int patfam_count=0;
00051 INT_TO_INT patfam_index;
00052
00053 for(it_i=Fk.begin(); it_i!=Fk.end(); it_i++) {
00054 bool skip0 = false;
00055 SEQ_PATTERN* pat_i = *it_i;
00056
00057
00058
00059 if (!pat_i->is_freq(min_sup)) skip0 = true;
00060
00061
00062 if(patfam_index.find(pat_i->pat_id())==patfam_index.end())
00063 if(patfam_index.insert(make_pair(pat_i->pat_id(), patfam_count)).second)
00064 patfam_count++;
00065 else
00066 cerr<<"sequence-cand_gen(): patfam_index.insert failed"<<endl;
00067
00068 for(it_j=it_i; it_j!=Fk.end(); it_j++) {
00069 bool skip1 = false;
00070
00071 SEQ_PATTERN* pat_j = *it_j;
00072
00073
00074
00075 if (!pat_j->is_freq(min_sup)) skip1 = true;
00076
00077
00078 if (skip0 && skip1) continue;
00079
00080 if(patfam_index.find(pat_j->pat_id())==patfam_index.end())
00081 if(patfam_index.insert(make_pair(pat_j->pat_id(), patfam_count)).second)
00082 patfam_count++;
00083 else
00084 cout<<"spade.dfs_mine: patfam_index.insert failed"<<endl;
00085
00086
00087 int num_cands = 2;
00088
00089
00090 SEQ_PATTERN** cand_pats=join(pat_i, pat_j, skip0, skip1);
00091
00092
00093 for(int i=0; i<num_cands; i++){
00094 if(cand_pats[i]){
00095 check_isomorphism(cand_pats[i]);
00096 }
00097 }
00098
00099
00100
00101 if (cand_pats[0] || cand_pats[1])
00102 cs.count(pat_i, pat_j, cand_pats, min_sup, num_cands);
00103
00104
00105
00106
00107
00108
00109
00110
00111 for(int i=0; i<num_cands; i++) {
00112 if(!cand_pats[i])
00113 continue;
00114
00115 if(!cand_pats[i]->is_valid(min_sup)) {
00116 delete cand_pats[i];
00117 cand_pats[i]=0;
00118 }
00119 else {
00120 int idx=0;
00121 if(i == 0)
00122 idx = patfam_index[pat_i->pat_id()];
00123 else if(i == 1)
00124 idx = patfam_index[pat_j->pat_id()];
00125
00126 pat_fams[idx].push_back(cand_pats[i]);
00127
00128
00129 if(cand_pats[i]->is_freq(min_sup)) {
00130
00131 if(print){
00132 cout << cand_pats[i];
00133 }
00134 freq_pats_count++;
00135 }
00136 }
00137 }
00138
00139 delete[] cand_pats;
00140
00141 }
00142
00143
00144 cs.delete_vat(*it_i);
00145
00146 }
00147
00148 for(int i=0; i<sz; i++) {
00149
00150 if(!pat_fams[i].empty()) {
00151
00152
00153
00154
00155 cand_gen(pat_fams[i], pat_fams[i], min_sup, freq_pats, cs, depth+1);
00156 }
00157 }
00158
00159 }
00160
00161 template<class PP, class MP, class PAT_ST, template<typename, typename, typename, template <typename> class > class CC, template <typename> class ALLOC >
00162 SEQ_PATTERN** join(const SEQ_PATTERN* pat_i, SEQ_PATTERN* pat_j,
00163 bool skip0=false, bool skip1 =false) {
00164
00165
00166
00167
00168
00169 const typename SEQ_PATTERN::EDGE_T DEF_E_LBL=0;
00170 SEQ_PATTERN** cand_pats=new SEQ_PATTERN*[2];
00171
00172
00173
00174
00175 if (skip0){
00176 cand_pats[0] = 0;
00177 }
00178 else{
00179 cand_pats[0] = pat_i->clone();
00180 const typename SEQ_PATTERN::VERTEX_T& new_v=pat_j->rmost_vertex();
00181 int old_rmv = pat_i->rmost_vid();
00182 int new_rmv = cand_pats[0]->add_vertex(new_v);
00183 cand_pats[0]->add_out_edge(old_rmv, new_rmv, DEF_E_LBL);
00184 cand_pats[0]->add_in_edge(new_rmv, old_rmv, DEF_E_LBL);
00185 }
00186
00187
00188 if(skip1 || pat_i->rmost_vertex() == pat_j->rmost_vertex()) {
00189 cand_pats[1] = 0;
00190 }
00191 else {
00192
00193
00194 cand_pats[1] = pat_j->clone();
00195 const typename SEQ_PATTERN::VERTEX_T& new_v=pat_i->rmost_vertex();
00196 int old_rmv = pat_j->rmost_vid();
00197 int new_rmv = cand_pats[1]->add_vertex(new_v);
00198 cand_pats[1]->add_out_edge(old_rmv, new_rmv, DEF_E_LBL);
00199 cand_pats[1]->add_in_edge(new_rmv, old_rmv, DEF_E_LBL);
00200 }
00201
00202 return cand_pats;
00203 }
00204
00205 #endif