graph_iso_check.h

00001 /*
00002  *  Copyright (C) 2005 M.J. Zaki <zaki@cs.rpi.edu> Rensselaer Polytechnic Institute
00003  *  Written by parimi@cs.rpi.edu
00004  *  Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu
00005  *
00006  *  This program is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU General Public License
00008  *  as published by the Free Software Foundation; either version 2
00009  *  of the License, or (at your option) any later version.
00010  *
00011  *  This program is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *  GNU General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License along
00017  *  with this program; if not, write to the Free Software Foundation, Inc.,
00018  *  59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
00019  */
00025 #ifndef _GRAPH_ISO_CHECK_H
00026 #define _GRAPH_ISO_CHECK_H
00027 
00028 #include<vector>
00029 #include<iostream>
00030 #include "typedefs.h"
00031 #include "time_tracker.h"
00032 
00033 using namespace std;
00034 #include "element_parser.h"
00035 
00036 #define INF_LABEL "99999999" // max label initializer for edge & vertex labels
00037 
00038 template<class PP, class MP, class ST, template<class, typename, typename, template <typename> class > class CC,
00039          template <typename> class ALLOC >
00040 class pattern;
00041 
00042 class undirected;
00043 
00044 template<class prop, class next_property>
00045 class proplist;
00046 
00047 
00048 int  num_startup=0;  
00049 int  num_urmp=0;  
00050 time_tracker tt_iso_chk;
00051 time_tracker tt_iso_startup;
00052 time_tracker tt_update_rmp;
00053 time_tracker tt_minimal;
00054 time_tracker  tt_rest;
00055 void print_rmp(const vector<int>& rmp) {
00056   cout<<"rmp is:"<<endl;
00057   for(unsigned int i=0; i<rmp.size(); i++)
00058     cout<<rmp[i]<<" ";
00059   cout<<endl;
00060 }
00061 
00064 // Note: it is assumed that this function is called for graphs of atleast 
00065 // size=2
00066 template<class PP, class MP, class PAT_ST, template<class, typename, typename, template <typename> class > class CC, 
00067          template <typename> class ALLOC >
00068 void update_rmost_path(GRAPH_PATTERN*const& cand_pat) {
00069   // How it works: no change in rmost-path (rmp) if last edge added was back-
00070   // edge - only append to canonical code. 
00071   // In case of fwd edge, rmp is modified as well
00072 
00073   num_urmp++;  
00074   tt_update_rmp.start();
00075 
00076   int new_vid=cand_pat->rmost_vid(); // vertex added to generate this candidate
00077   typename GRAPH_PATTERN::RMP_T& rmost_path=cand_pat->_rmost_path;
00078 
00079   // no change to rmp if this candidate was formed by back extension
00080   // but canonical code must be updated
00081   if(new_vid==*(rmost_path.end()-1)) {
00082     typename GRAPH_PATTERN::CONST_EIT_PAIR eit_p=cand_pat->out_edges(new_vid);
00083     eit_p.second--;
00084     int back_vid=eit_p.second->first; // vid at back vertex of edge
00085     typename GRAPH_PATTERN::CAN_CODE::FIVE_TUPLE new_tuple(new_vid, back_vid, cand_pat->label(new_vid), eit_p.second->second, cand_pat->label(back_vid));
00086     cand_pat->_canonical_code.push_back(new_tuple);
00087     return;
00088   }
00089 
00091   int pvid=(cand_pat->out_edges(new_vid)).first->first; // parent of new_vid
00092   typename GRAPH_PATTERN::RMP_T::iterator it=rmost_path.end()-1;
00093 
00094   while(it!=rmost_path.begin()) {
00095     if(*it==pvid)
00096     break;
00097     it=rmost_path.erase(it);
00098     it--;
00099   }
00100 
00101   rmost_path.push_back(new_vid);
00102   // update canonical code
00103   typename GRAPH_PATTERN::CONST_EIT_PAIR eit_p=cand_pat->out_edges(new_vid);
00104   typename GRAPH_PATTERN::CAN_CODE::FIVE_TUPLE new_tuple(pvid, new_vid, cand_pat->label(pvid), eit_p.first->second, cand_pat->label(new_vid));
00105   cand_pat->_canonical_code.push_back(new_tuple);
00106   tt_update_rmp.stop();
00107 
00108 }//end update_rmost_path()
00109 
00112 template<class PP, class MP, class PAT_ST, template<class, typename, typename, template <typename> class> class CC,
00113          template <typename> class ALLOC >
00114 bool check_isomorphism(GRAPH_PATTERN*const& cand_pat) {
00115   // How it works: it is a recursive test. At each recursive call, the 
00116   // minimal edge according to DFS ordering (gSpan) is determined. If this 
00117   // minimal edge is what cand_pat has at that level, then cand_pat passes 
00118   // test at that call and further recursive calls have to be made. This 
00119   // recursion continues till i) test failed at some level or ii) all edges 
00120   // are exhausted, when cand_pat is declared to have passes isomorphsim test
00121 
00122   tt_iso_chk.start();
00123 
00124   typedef CC<GRAPH_PROP, typename GRAPH_PATTERN::VERTEX_T, typename GRAPH_PATTERN::EDGE_T, ALLOC > CAN_CODE;
00125 
00126   // first step is get rmp updated
00127   update_rmost_path(cand_pat);
00128 
00129 #ifdef PRINT
00130   cout<<"checking isomorphism for: "<<endl<<cand_pat->canonical_code()<<endl;
00131 #endif
00132 
00133   typename GRAPH_PATTERN::EDGE_T e; // least label of edge
00134   typename GRAPH_PATTERN::VERTEX_T src_v, dest_v; // least labelled vertices of an edge
00135   vector<pair<int, int> > ids; // ids with least labelled edges
00136   vector<CAN_CODE> new_codes;
00137 
00138   tt_rest.start();
00139   // default startup values
00140   src_v=cand_pat->label(0);
00141   dest_v=cand_pat->label(1);
00142   if(!cand_pat->get_out_edge(0, 1, e)) {
00143     cerr<<"check_iso(graph): get_out_edge failed in startup"<<endl;
00144     tt_iso_chk.stop();
00145     return false;
00146   }
00147   tt_rest.stop();
00148 
00149   iso_startup(cand_pat, src_v, dest_v, e, ids);
00150 
00151   // create separate codes for each pair in ids
00152   // each such pair shall have to be tested for isomorphism
00153   
00154   tt_rest.start();
00155   for(unsigned int i=0; i<ids.size(); i++) {
00156     CAN_CODE new_cc(typename CAN_CODE::FIVE_TUPLE(0, 1, cand_pat->label(ids[i].first), e, cand_pat->label(ids[i].second)), ids[i].first, ids[i].second);
00157     new_cc.init_rmp();
00158     new_codes.push_back(new_cc);
00159   }
00160 
00161   // codes in new_codes are all equivalent
00162   // check if any one is < current one
00163 
00164 
00165   if(new_codes[0][0]<cand_pat->_canonical_code[0]) {
00166 #ifdef PRINT
00167       cout<<"Isomorphism decision made at first level: new_tuple="<<new_codes[0][0]<<endl;
00168       cout<<"current_code="<<cand_pat->_canonical_code[0]<<endl;
00169 #endif
00170       cand_pat->_is_canonical=false;
00171       tt_iso_chk.stop();
00172       return false;
00173     }
00174   tt_rest.stop();
00175 
00176   for(unsigned int i=0; i<new_codes.size(); i++) {
00177     tt_minimal.start();
00178     bool r = minimal(cand_pat, new_codes[i]);
00179     if(!r) {
00180       cand_pat->_is_canonical=false;
00181       tt_iso_chk.stop();
00182       return false;
00183     }
00184     tt_minimal.stop();
00185   }
00186 
00187   // execution reaches here iff all codes were found to be >= current one
00188   cand_pat->_is_canonical=true;
00189   tt_iso_chk.stop();
00190   return true;
00191 
00192 }//end check_isomorphism()
00193 
00194 
00195 // find minimal pair, acc to DFS ordering
00196 template<typename PATTERN>
00197 void iso_startup(const PATTERN* cand_pat, typename PATTERN::VERTEX_T& src_v, typename PATTERN::VERTEX_T& dest_v, typename PATTERN::EDGE_T& e, vector<pair<int, int> >& ids) {  
00198   num_startup++;  
00199   tt_iso_startup.start();
00200   // find minimal pair, acc to DFS ordering
00201   typename PATTERN::CONST_IT it;
00202   typename PATTERN::CONST_EIT_PAIR eit_p;
00203   for(it=cand_pat->begin(); it!=cand_pat->end(); it++) {
00204     if(it->v>src_v)
00205     continue;
00206 
00207     eit_p=cand_pat->out_edges(it->id);
00208       
00209     if(it->v<src_v) {
00210       // new minimal src label found  
00211       // find edge with least label      
00212       ids.clear();
00213       e=eit_p.first->second;
00214       src_v=it->v;
00215       dest_v=cand_pat->label(eit_p.first->first);
00216       ids.push_back(make_pair(it->id, eit_p.first->first));
00217       eit_p.first++;
00218     }
00219       
00220     while(eit_p.first!=eit_p.second) {
00221       if(eit_p.first->second<=e) {
00222         if(eit_p.first->second<e) {
00223           // new minimal edge found
00224           ids.clear();
00225           e=eit_p.first->second;
00226           dest_v=cand_pat->label(eit_p.first->first);
00227           ids.push_back(make_pair(it->id, eit_p.first->first));
00228           eit_p.first++;
00229           continue;
00230         }
00231 
00232         if(cand_pat->label(eit_p.first->first)<=dest_v) {
00233           if(cand_pat->label(eit_p.first->first)<dest_v) {
00234             // new least dest label found
00235             ids.clear();
00236             dest_v=cand_pat->label(eit_p.first->first);
00237           }
00238           ids.push_back(make_pair(it->id, eit_p.first->first));
00239         }
00240       }//end if eit_p.first->second<=e
00241       eit_p.first++;
00242     }  
00243   }//end for
00244 
00245   tt_iso_startup.stop();
00246 
00247 }//end iso_startup()
00248 
00249 
00252 template<typename PATTERN, typename CAN_CODE>
00253 bool minimal(const PATTERN* cand_pat, CAN_CODE& new_code) {
00254 
00255   element_parser<typename PATTERN::EDGE_T> ed_prsr;
00256   element_parser<typename PATTERN::VERTEX_T> vt_prsr;
00257 
00258   // starts by checking if a back-edge can be added
00259   // else, the minimal fwd edge is added and minimality checked again
00260 
00262   // if all edges have been added to new_code, then no new edges can be added
00263   // hence cand_pat is minimal
00264   if(cand_pat->canonical_code().size()==new_code.size())
00265     return true;
00266 
00267   const CAN_CODE& current_code=cand_pat->canonical_code();
00268   typename CAN_CODE::CONST_IT cc_it=new_code.end()-1;
00269   bool is_last_fwd=(cc_it->_i<cc_it->_j); // denotes if last edge in new_code was a fwd edge
00270   int last_vid=(is_last_fwd)? cc_it->_j: cc_it->_i; // vid to which edge shall be added  
00271   int last_vid_g=new_code.gid(last_vid);
00272   typename PATTERN::CONST_EIT_PAIR eit_p=cand_pat->out_edges(last_vid_g);
00273   int code_index=new_code.size();
00274   typename CAN_CODE::RMP_T& rmp=new_code.rmost_path();
00275   typename CAN_CODE::RMP_T::iterator rmp_it;
00276 
00278   int back_vid, last_back_vid;
00279   back_vid=last_vid;
00280   typename PATTERN::EDGE_T e=typename PATTERN::EDGE_T();
00281 
00282   if(is_last_fwd)
00283     // relatively straight forward - add the farthest back edge you find
00284     last_back_vid=-1;
00285   else
00286     // add a back edge only if it's after the last back edge present
00287     last_back_vid=cc_it->_j;
00288   
00289   while(eit_p.first!=eit_p.second) {
00290     rmp_it=rmp.end()-3;
00291 
00292       // check if this vid occurs on the rmp of can code
00293     while(rmp_it>=rmp.begin()) {
00294       if(*rmp_it==new_code.cid(eit_p.first->first))
00295         break;
00296 
00297       if(*rmp_it<new_code.cid(eit_p.first->first))
00298         rmp_it=rmp.begin();
00299   
00300       rmp_it--;
00301     }
00302 
00303     if(rmp_it<rmp.begin()) {
00304       eit_p.first++;
00305       continue;
00306     }
00307 
00308     // atleast 2 nodes prior to last one
00309     if(new_code.cid(eit_p.first->first)<back_vid // this vertex is farthest one seen so far
00310        && new_code.cid(eit_p.first->first)>last_back_vid) { // a valid back edge must end  
00311                                                               // at a vertex after last_back_vid
00312        back_vid=new_code.cid(eit_p.first->first);
00313        e=eit_p.first->second;
00314     }
00315     eit_p.first++;
00316   }
00317       
00318   if(back_vid!=last_vid) {
00319     // valid back edge found
00320     typename PATTERN::VERTEX_T lbl1=cand_pat->label(new_code.gid(last_vid));
00321     typename PATTERN::VERTEX_T lbl2=cand_pat->label(new_code.gid(back_vid));
00322     typename CAN_CODE::FIVE_TUPLE new_tuple(last_vid, back_vid, lbl1, e, lbl2);
00323     if(new_tuple<current_code[code_index]) {
00324 #ifdef PRINT
00325       cout<<"decision at back-edge: new tuple is more minimal"<<endl;
00326       cout<<"new_tuple="<<new_tuple<<endl;
00327       cout<<"curent_tuple="<<current_code[code_index]<<endl;
00328       cout<<"current_code"<<current_code<<endl;
00329 #endif
00330       return false;
00331     }
00332 
00333     if(current_code[code_index]<new_tuple) {
00334 #ifdef PRINT
00335       cout<<"decision at back-edge: current tuple is more minimal"<<endl;
00336       cout<<"new_tuple="<<new_tuple<<endl;
00337       cout<<"curent_tuple="<<current_code[code_index]<<endl;
00338       cout<<"current_code"<<current_code<<endl;
00339 #endif
00340       return true;
00341     }
00342     else {
00343       new_code.append(new_tuple);
00344       // no changes to new_code's rmp, nor to the cid-gid mappings since
00345       // back edge implies both vertices were already present in it
00346       return minimal(cand_pat, new_code);
00347     }
00348   }
00349   
00350   // execution gets here iff back-edge add failed
00352   // how to: find the deepest outgoing edge, and among all such edges find the 
00353   // minimal one, i.e. least edge-label + least dest-label
00354 
00355   bool fwd_found=false;
00356   rmp_it=rmp.end()-1;
00357   last_vid=*rmp_it;
00358   vector<int> new_vids; // equivalent minimal vertices whose minimality has to be checked recursively
00359   e=ed_prsr.parse_element(INF_LABEL);
00360   typename PATTERN::VERTEX_T dest_v=vt_prsr.parse_element(INF_LABEL);
00361 
00362   while(rmp_it>=rmp.begin()) {
00363     // get neighbors of current vertex
00364     eit_p=cand_pat->out_edges(new_code.gid(*rmp_it));
00365 
00366     // try to find minimal fwd edge
00367     while(eit_p.first!=eit_p.second) {
00368       if(new_code.cid(eit_p.first->first)!=-1) {
00369         // this vertex is already part of minimal code (CHECK THIS ??)
00370         eit_p.first++;
00371         continue;
00372       }
00373 
00374       if(eit_p.first->second<=e) {
00375         // minimal edge
00376         typename PATTERN::VERTEX_T curr_lbl=cand_pat->label(eit_p.first->first);
00377         if(eit_p.first->second<e) {
00378           // new minimal edge found
00379           new_vids.clear();
00380           e=eit_p.first->second;
00381           dest_v=curr_lbl;
00382         }
00383 
00384         if(curr_lbl<=dest_v) {
00385           // minimal dest label
00386           if(curr_lbl<dest_v) {
00387             new_vids.clear();
00388             dest_v=curr_lbl;
00389           }
00390           new_vids.push_back(eit_p.first->first);
00391         }
00392       }
00393       eit_p.first++;
00394     }//end while eit_p.first..
00395 
00396     if(!new_vids.empty()) {
00397       // fwd extension found at this level
00398       fwd_found=true;
00399       break;
00400     }
00401 
00402     rmp_it--;
00403     rmp.pop_back();
00404   }//end while !fwd_found..
00405 
00406   if(!fwd_found || rmp_it<rmp.begin()) {
00407     // no fwd edge extension could be found i.e. all fwd edges have been added
00408     // so this code is minimal
00409     //cout<<"Isomorphism decision at fwd-edge: all edges exhausted"<<endl;
00410     return true;
00411   }
00412 
00413   typename CAN_CODE::FIVE_TUPLE new_tuple(*rmp_it, last_vid+1, cand_pat->label(new_code.gid(*rmp_it)), e, cand_pat->label(new_vids[0]));
00414   if(new_tuple<current_code[code_index]) {
00415     // new edge is more minimal
00416 #ifdef PRINT
00417     cout<<"decision at fwd-edge: new_edge is more minimal"<<endl;
00418     cout<<"new_tuple: "<<new_tuple<<endl;
00419     cout<<"current_tuple: "<<current_code[code_index]<<endl;
00420     cout<<"current_code="<<current_code<<endl;
00421 #endif
00422     return false;
00423   }
00424 
00425   if(current_code[code_index]<new_tuple) {
00426     // current tuple is more minimal
00427 #ifdef PRINT
00428     cout<<"decision at fwd-edge: current tuple is more minimal"<<endl;
00429     cout<<"new_tuple: "<<new_tuple<<endl;
00430     cout<<"current_tuple: "<<current_code[code_index]<<endl;
00431     cout<<"current_code="<<current_code<<endl;
00432 #endif
00433     return true;
00434   }
00435 
00436   // check minimality against each new code
00437   for(unsigned int i=0; i<new_vids.size(); i++) {
00438     CAN_CODE next_code=new_code;
00439     next_code.append(new_tuple, new_code.gid(*rmp_it), new_vids[i]);
00440     next_code.append_rmp(last_vid+1);
00441     if(!minimal(cand_pat, next_code))
00442       return false;
00443   }
00444 
00445   // all codes were greater than cand_pat's code
00446 #ifdef PRINT
00447   cout<<"decision at fwd-edge: this code more minimal than all codes"<<endl;
00448 #endif
00449   return true;
00450 
00451 }//end minimal()
00452 
00453 #endif

Generated on Wed Jul 26 14:01:08 2006 for DMTL by  doxygen 1.4.7