00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef _GRAPH_VAT_H
00021 #define _GRAPH_VAT_H
00022
00023 #include <ext/hash_set>
00024 #include "graph_evat.h"
00025 #include "pattern.h"
00026 #include "generic_classes.h"
00027 #include "typedefs.h"
00028
00029
00030
00031
00032 time_tracker tt_vat, tt_fwd_isect, tt_back_isect;
00033 int fwd_isect_cnt=0;
00034 int back_isect_cnt=0;
00035
00036
00037 template<typename PP, typename MP, template <typename> class ALLOC,
00038 template<typename, typename> class ST>
00039 class vat<GRAPH_PROP, V_Fk1_MINE_PROP, ALLOC, ST>;
00040
00041 template<typename PP, typename MP, template <typename> class ALLOC,
00042 template<typename, typename> class ST>
00043 ostream& operator<< (ostream& ostr, const vat<PP, MP, ALLOC, ST>* v);
00044
00046
00047
00048
00058 template<typename PP, typename MP, template <typename> class ALLOC, template<typename, typename> class ST>
00059 class vat<GRAPH_PROP, V_Fk1_MINE_PROP, ALLOC, ST>
00060 {
00061 public:
00062
00063 typedef evat<ALLOC> EVAT;
00064 typedef vat<GRAPH_PROP, V_Fk1_MINE_PROP, ALLOC, ST> VAT;
00065 typedef ST<EVAT, ALLOC<EVAT> > RMP_VATS;
00066 typedef ST<pair<int, RMP_VATS>, ALLOC<pair<int, RMP_VATS> > > GVAT;
00071 typedef HASHNS::hash_set<int, HASHNS::hash<int>, std::equal_to<int>, ALLOC<int> > VSET;
00074 typedef vector<vector<VSET, ALLOC<VSET> >, ALLOC<vector<VSET, ALLOC<VSET> > > > VSETS;
00077 typedef typename GVAT::const_iterator CONST_IT;
00078 typedef typename GVAT::iterator IT;
00079 typedef typename ST<EVAT, ALLOC<EVAT> >::const_iterator CONST_EIT;
00080 typedef typename VSETS::iterator VS_IT;
00081 typedef typename VSETS::const_iterator CONST_VS_IT;
00082
00083 void* operator new(size_t size) {
00084 ALLOC<VAT> v;
00085 return v.allocate(size);
00086 }
00087
00088 void operator delete(void *p, size_t size) {
00089 if (p) {
00090 ALLOC<VAT> v;
00091 v.deallocate(static_cast<VAT*> (p), size);
00092 }
00093 }
00094
00095 IT begin() {
00096 return _vat.begin();
00097 }
00098 CONST_IT begin() const {
00099 return _vat.begin();
00100 }
00101 IT end() {
00102 return _vat.end();
00103 }
00104 CONST_IT end() const {
00105 return _vat.end();
00106 }
00107
00108 VS_IT begin_v() {
00109 return _vids.begin();
00110 }
00111 CONST_VS_IT begin_v() const {
00112 return _vids.begin();
00113 }
00114 VS_IT end_v() {
00115 return _vids.end();
00116 }
00117 CONST_VS_IT end_v() const {
00118 return _vids.end();
00119 }
00120
00121 friend ostream& operator<< <>(ostream&, const VAT*);
00122
00123 int size() const {
00124 return _vat.size();
00125 }
00126
00127 bool empty() const {
00128 return _vat.empty();
00129 }
00130
00131 const pair<int, ST<EVAT, ALLOC<EVAT> > >& back() const {
00132 return _vat.back();
00133 }
00134
00135 void insert_occurrence_tid(const int& tid, const pair<int, int>& new_occurrence) {
00136 ST<EVAT, ALLOC<EVAT> > new_evats;
00137 evat<ALLOC> new_evat;
00138 new_evat.push_back(new_occurrence);
00139 new_evats.push_back(new_evat);
00140 _vat.push_back(make_pair(tid, new_evats));
00141 }
00142
00143
00144 void insert_occurrence_evat(const pair<int, int>& new_occurrence) {
00145 evat<ALLOC> new_evat;
00146 new_evat.push_back(new_occurrence);
00147 _vat.back().second.push_back(new_evat);
00148 }
00149
00150 void insert_occurrence(const pair<int, int>& new_occurrence) {
00151 _vat.back().second.back().push_back(new_occurrence);
00152 }
00153
00154
00155 void insert_vid_hs(const int& vid) {
00156 VSET vs;
00157 vs.insert(vid);
00158 _vids.back().push_back(vs);
00159 }
00160
00161
00162 void insert_vid(const int& vid) {
00163 _vids.back().back().insert(vid);
00164 }
00165
00166
00167 void insert_vid_tid(const int& vid) {
00168 VSET vset;
00169 vset.insert(vid);
00170 vector<VSET, ALLOC<VSET> > vsets;
00171 vsets.push_back(vset);
00172 _vids.push_back(vsets);
00173 }
00174
00175
00176 void copy_vats(const pair<int, ST<evat<ALLOC>, ALLOC<evat<ALLOC> > > >& v1, const int& offset, const int& sz, bool swap=0) {
00177 int i;
00178 for(i=0; i<sz; i++)
00179 if(swap)
00180 _vat.back().second[i].push_back(make_pair(v1.second[i][offset].second, v1.second[i][offset].first));
00181 else
00182 _vat.back().second[i].push_back(v1.second[i][offset]);
00183 }
00184
00185
00186 void copy_vats_tid(const pair<int, ST<evat<ALLOC>, ALLOC<evat<ALLOC> > > >& v1, const int& offset, const int& sz, bool swap=0) {
00187 int i;
00188 ST<EVAT, ALLOC<EVAT> > new_entry;
00189
00190 for(i=0; i<sz; i++) {
00191 evat<ALLOC> tmp_evat;
00192 if(swap)
00193 tmp_evat.push_back(make_pair(v1.second[i][offset].second, v1.second[i][offset].first));
00194 else
00195 tmp_evat.push_back(v1.second[i][offset]);
00196 new_entry.push_back(tmp_evat);
00197 }
00198 _vat.push_back(make_pair(v1.first, new_entry));
00199 }
00200
00201
00202 void copy_vids_hs(const VSET& v1_vids) {
00203
00204 VSET vs;
00205 typename VSET::const_iterator it;
00206 for(it=v1_vids.begin(); it!=v1_vids.end(); it++)
00207 vs.insert(*it);
00208 _vids.back().push_back(vs);
00209 }
00210
00211
00212 void copy_vids_tid(const VSET& v1_vids) {
00213 VSET vset;
00214 typename VSET::const_iterator it;
00215 for(it=v1_vids.begin(); it!=v1_vids.end(); it++)
00216 vset.insert(*it);
00217 vector<VSET, ALLOC<VSET> > vsets;
00218 vsets.push_back(vset);
00219 _vids.push_back(vsets);
00220 }
00221
00222
00225
00226
00227 template<typename PATTERN, typename PAT_SUP>
00228 static VAT** intersection(const VAT* v1, const VAT* v2, PAT_SUP** cand_sups, PATTERN** cand_pats, bool) {
00229 #ifdef PRINT
00230 cout<<"VAT intersection entered with v1="<<v1<<endl;
00231 cout<<"v2="<<v2<<endl;
00232 #endif
00233
00234 tt_vat.start();
00235
00236
00237
00238
00239
00240
00241
00242
00243 VAT* cand_vat=new VAT;
00244 bool is_fwd;
00245 bool is_fwd_chain=false;
00246
00247 bool l2_eq=(cand_pats[0]->size()==3) && (cand_pats[0]->label(0)==cand_pats[0]->label(1));
00248
00249
00250 int rmp_index;
00251
00252 int new_edge_state=-1;
00253
00254
00255
00256
00257 int rvid=cand_pats[0]->rmost_vid();
00258 int edge_vid=-1;
00259
00260
00261 typename PATTERN::CONST_EIT_PAIR eit_p=cand_pats[0]->out_edges(rvid);
00262 int back_idx=-1;
00263
00264
00265 if(eit_p.second-eit_p.first>1)
00266 is_fwd=false;
00267 else
00268 is_fwd=true;
00269
00270
00271 if(is_fwd) {
00272 edge_vid=eit_p.first->first;
00273 if(!edge_vid)
00274 is_fwd_chain=0;
00275 else
00276 is_fwd_chain=1;
00277 }
00278 else {
00279
00280 eit_p.second--;
00281 edge_vid=eit_p.second->first;
00282
00285
00286
00287 const typename PATTERN::RMP_T& cand_rmp=cand_pats[0]->rmost_path();
00288 for(unsigned int i=0; i<cand_rmp.size(); i++) {
00289 if(cand_rmp[i]==edge_vid) {
00290 back_idx=i;
00291 break;
00292 }
00293 }
00294
00295 if(back_idx==-1) {
00296 cerr<<"vat.intersect: back_idx not found for edge_vid="<<edge_vid<<" in rmp.size="<<cand_rmp.size()<<endl;
00297 tt_vat.stop();
00298 return 0;
00299 }
00300 }
00301
00302
00303
00304
00305
00306 if(is_fwd)
00307 rmp_index=cand_pats[0]->rmp_size()-2;
00308 else
00309 rmp_index=cand_pats[0]->rmp_size()-1;
00310
00311 if(cand_pats[0]->label(edge_vid)==cand_pats[0]->label(rvid))
00312 new_edge_state=0;
00313 else
00314 if(is_fwd)
00315 new_edge_state=(cand_pats[0]->label(edge_vid)>cand_pats[0]->label(rvid))+1;
00316 else
00317 new_edge_state=(cand_pats[0]->label(rvid)>cand_pats[0]->label(edge_vid))+1;
00318
00319 CONST_IT it_v1=v1->begin();
00320 CONST_IT it_v2=v2->begin();
00321
00322
00323 while(it_v1!=v1->end() && it_v2!=v2->end()) {
00324 if(it_v1->first<it_v2->first) {
00325 it_v1++;
00326 continue;
00327 }
00328
00329 if(it_v1->first>it_v2->first) {
00330 it_v2++;
00331 continue;
00332 }
00333
00334
00335 const EVAT* v1_evat;
00336 const EVAT* v2_evat=&(it_v2->second[0]);
00337
00338 if(!is_fwd)
00339 v1_evat=&(it_v1->second[rmp_index-1]);
00340 else {
00341 if(is_fwd_chain)
00342 v1_evat=&((it_v1->second)[rmp_index-1]);
00343 else
00344 v1_evat=&((it_v1->second)[0]);
00345 }
00346
00348
00349
00350 if(is_fwd) {
00351 tt_fwd_isect.start();
00352 evat<ALLOC>::template fwd_intersect<ST, VAT, ALLOC>(*v1, *v1_evat, *v2_evat, *cand_vat,
00353 is_fwd_chain, rmp_index, new_edge_state,
00354 it_v1-v1->begin(), l2_eq);
00355 tt_fwd_isect.stop();
00356 fwd_isect_cnt++;
00357 }
00358 else {
00359 tt_back_isect.start();
00360 evat<ALLOC>::template back_intersect<ST, VAT, ALLOC>(*v1, *v1_evat, *v2_evat, *cand_vat, back_idx, new_edge_state, it_v1-v1->begin());
00361 tt_back_isect.stop();
00362 back_isect_cnt++;
00363 }
00364
00365 it_v1++;
00366 it_v2++;
00367 }
00368
00369 cand_sups[0]->set_sup(make_pair(cand_vat->size(), 0));
00370
00371 VAT** cand_vats=new VAT*;
00372 cand_vats[0]=cand_vat;
00373 tt_vat.stop();
00374 return cand_vats;
00375 }
00376
00377
00378 unsigned long int byte_size() const{
00379 unsigned long int b_size=0;
00380 CONST_IT it;
00381 CONST_EIT eit;
00382 b_size += sizeof(int);
00383 for (it = begin(); it!=end();++it){
00384 b_size += 2*sizeof(int);
00385
00386 for (eit = it->second.begin(); eit != it->second.end(); eit++){
00387 b_size+=(1*sizeof(int))+eit->byte_size();
00388 }
00389 }
00390
00391 typename VSETS::const_iterator vit;
00392 b_size += sizeof(int);
00393 for (vit = begin_v(); vit != end_v(); vit++){
00394 b_size += sizeof(int);
00395 typename vector<VSET>::const_iterator vvsetit;
00396 for (vvsetit=vit->begin(); vvsetit!=vit->end(); vvsetit++){
00397 b_size += (vvsetit->size()+1) * sizeof(int);
00398 }
00399 }
00400 return b_size;
00401 }
00402
00403 void print(){
00404 int ITSZ=sizeof(int);
00405 CONST_IT it;
00406 CONST_EIT eit;
00407 int tid,evat_n,evat_sz;
00408 int gvat_sz=_vat.size();
00409 cout << "size:" <<gvat_sz << endl;
00410 for (it=begin();it!=end();++it){
00411 tid=it->first;
00412 evat_n=it->second.size();
00413 cout << tid << " " << evat_n << endl;
00414 for (eit=it->second.begin(); eit!=it->second.end(); ++eit){
00415 evat_sz = eit->size();
00416 cout << evat_sz << endl;
00417 eit->print();
00418 }
00419 }
00420
00421 typename VSETS::iterator vit;
00422 int vvsetn = _vids.size();
00423 cout << "Vids size: " << vvsetn << endl;
00424 for (vit=begin_v(); vit!=end_v(); vit++){
00425 typename vector<VSET>::iterator vvsetit;
00426 int vsetn = vit->size();
00427 cout << vsetn << endl;
00428 for (vvsetit=vit->begin(); vvsetit!=vit->end(); vvsetit++){
00429 typename VSET::iterator vsetit;
00430 int n = vvsetit->size();
00431 cout << "- " << n << endl;
00432 for (vsetit=vvsetit->begin(); vsetit!=vvsetit->end(); vsetit++){
00433 int v=*vsetit;
00434 cout << "-- " << v << endl;
00435 }
00436 }
00437 }
00438
00439 }
00440
00441
00442 void write_file(ostream & output) const{
00443
00444 int ITSZ=sizeof(int);
00445 CONST_IT it;
00446 CONST_EIT eit;
00447 int tid,evat_n,evat_sz;
00448 int gvat_sz=_vat.size();
00449 output.write(reinterpret_cast<const char *>(&gvat_sz), ITSZ);
00450 for (it=begin();it!=end();++it){
00451 tid=it->first;
00452 evat_n=it->second.size();
00453 output.write(reinterpret_cast<const char *>(&tid), ITSZ);
00454 output.write(reinterpret_cast<const char *>(&evat_n), ITSZ);
00455 for (eit=it->second.begin(); eit!=it->second.end(); ++eit){
00456 evat_sz = eit->size();
00457 output.write(reinterpret_cast<const char *>(&evat_sz), ITSZ);
00458 eit->write_file(output);
00459 }
00460 }
00461
00462 typename VSETS::const_iterator vit;
00463 int vvsetn = _vids.size();
00464 output.write(reinterpret_cast<const char *>(&vvsetn), ITSZ);
00465 for (vit=begin_v(); vit!=end_v(); vit++){
00466 typename vector<VSET>::const_iterator vvsetit;
00467 int vsetn = vit->size();
00468 output.write(reinterpret_cast<const char *>(&vsetn), ITSZ);
00469 for (vvsetit=vit->begin(); vvsetit!=vit->end(); vvsetit++){
00470 typename VSET::iterator vsetit;
00471 int n = vvsetit->size();
00472 output.write(reinterpret_cast<const char *>(&n), ITSZ);
00473 for (vsetit=vvsetit->begin(); vsetit!=vvsetit->end(); vsetit++){
00474 int v=*vsetit;
00475 output.write(reinterpret_cast<const char *>(&v), ITSZ);
00476 }
00477 }
00478 }
00479
00480 }
00481
00482 void read_file (istream & input, unsigned long int size) {
00483 int ITSZ=sizeof(int);
00484 int buf_size=size/ITSZ;
00485 int *buf = new int[buf_size];
00486 input.read((char *)buf, (size));
00487 int current=0;
00488 int vats_size=buf[current++], vats_seen=0;
00489 while(vats_seen++ < vats_size){
00490 int tid=buf[current++];
00491 int evat_n=buf[current++];
00492 int evats_seen=0;
00493 RMP_VATS edges;
00494 while(evats_seen++ < evat_n){
00495 evat<ALLOC> new_evat;
00496 int evat_sz=buf[current++];
00497 while(evat_sz-- > 0){
00498 int f1, f2;
00499 f1 = buf[current++];
00500 f2 = buf[current++];
00501 new_evat.push_back(make_pair(f1, f2));
00502 }
00503 edges.push_back(new_evat);
00504 }
00505 _vat.push_back(make_pair(tid, edges));
00506 }
00507
00508
00509 int vids_size=buf[current++], vids_seen=0;
00510 while(vids_seen++ < vids_size){
00511 vector <VSET> new_vsetv;
00512 int vsetv_n=buf[current++];
00513 int vsets_seen=0;
00514 while(vsets_seen++ < vsetv_n){
00515 VSET new_vset;
00516 int vset_sz=buf[current++];
00517 while(vset_sz-- > 0){
00518 int i = buf[current++];
00519 new_vset.insert(i);
00520 }
00521 new_vsetv.push_back(new_vset);
00522 }
00523 _vids.push_back(new_vsetv);
00524 }
00525
00526
00527 input.clear();
00528 delete [] buf;
00529 }
00530
00532 bool is_new_vertex(const int& vid, const int& tid, const int& offset) const {
00533 if(_vids[tid][offset].find(vid)==_vids[tid][offset].end()) {
00534 return true;
00535 }
00536 return false;
00537 }
00538
00539
00540 friend class evat<ALLOC>;
00541
00542 private:
00543 GVAT _vat;
00544 VSETS _vids;
00545
00546 };
00547
00548
00549 template<typename PP, typename MP, template <typename> class ALLOC, template<typename, typename> class ST>
00550 ostream& operator<< (ostream& ostr, const vat<PP, MP, ALLOC, ST>* v) {
00551 typename vat<PP, MP, ALLOC, ST>::CONST_IT it;
00552 typename vat<PP, MP, ALLOC, ST>::RMP_VATS::const_iterator rit;
00553
00554 ostr<<"VAT:"<<endl;
00555 for(it=v->begin(); it!=v->end(); it++) {
00556 ostr<<"tid="<<it->first<<endl;
00557 for(rit=it->second.begin(); rit!=it->second.end(); rit++)
00558 ostr<<*rit<<endl;
00559 }
00560
00561
00562 typename vat<PP, MP, ALLOC, ST>::VSETS::const_iterator vit1;
00563
00564 typename vector<typename vat<PP, MP, ALLOC, ST>::VSET >::const_iterator vit2;
00565 typename vat<PP, MP, ALLOC, ST>::VSET::const_iterator vit3;
00566
00567 ostr<<"Vertices are"<<endl;
00568 for(vit1=v->begin_v(), it=v->begin(); vit1!=v->end_v(); vit1++, it++) {
00569 ostr<<"tid="<<it->first<<endl;
00570 for(vit2=vit1->begin(); vit2!=vit1->end(); vit2++) {
00571 for(vit3=vit2->begin(); vit3!=vit2->end(); vit3++)
00572 ostr<<*vit3<<" ";
00573 ostr<<endl;
00574 }
00575 }
00576
00577 return ostr;
00578 }
00579
00580 #endif