MayaChemTools

   1 package Graph::CyclesDetection;
   2 #
   3 # $RCSfile: CyclesDetection.pm,v $
   4 # $Date: 2015/02/28 20:49:06 $
   5 # $Revision: 1.27 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Graph;
  33 use Graph::Path;
  34 use Graph::PathGraph;
  35 use BitVector;
  36 
  37 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  38 
  39 @ISA = qw(Exporter);
  40 @EXPORT = qw();
  41 @EXPORT_OK = qw();
  42 
  43 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  44 
  45 # Setup class variables...
  46 my($ClassName);
  47 _InitializeClass();
  48 
  49 # Overload Perl functions...
  50 use overload '""' => 'StringifyCyclesDetection';
  51 
  52 # Class constructor...
  53 sub new {
  54   my($Class, $Graph) = @_;
  55 
  56   # Initialize object...
  57   my $This = {};
  58   bless $This, ref($Class) || $Class;
  59   $This->_InitializeCyclesDetection($Graph);
  60 
  61   return $This;
  62 }
  63 
  64 # Initialize object data...
  65 sub _InitializeCyclesDetection {
  66   my($This, $Graph) = @_;
  67 
  68   $This->{Graph} = $Graph;
  69 
  70   # Cycles list...
  71   @{$This->{AllCyclicPaths}} = ();
  72 
  73   # Cyclic paths which are not part of any other cycle...
  74   @{$This->{IndependentCyclicPaths}} = ();
  75 
  76   return $This;
  77 }
  78 
  79 # Initialize class ...
  80 sub _InitializeClass {
  81   #Class name...
  82   $ClassName = __PACKAGE__;
  83 }
  84 
  85 # Detect all cycles in graph...
  86 #
  87 sub DetectCycles {
  88   my($This) = @_;
  89 
  90   return $This->DetectCyclesUsingCollapsingPathGraphMethodology();
  91 }
  92 
  93 # Detect all cycles in the graph using collapsing path graph [Ref 31] methodology...
  94 #
  95 # Note:
  96 #   . For topologically complex graphs containing large number of cycles,
  97 #     CollapseVertexAndCollectCyclicPathsDetectCycles method implemented in
  98 #     PathGraph can time out in which case no cycles are detected or
  99 #     assigned.
 100 #
 101 sub DetectCyclesUsingCollapsingPathGraphMethodology {
 102   my($This) = @_;
 103   my($PathGraph);
 104 
 105   # Create a path graph...
 106   $PathGraph = new Graph::PathGraph($This->{Graph});
 107 
 108   # For a vertex to be in a cycle, its degree must be >=2. So delete vertices recursively
 109   # till all vertices with degree less than 2 have been deleted...
 110   $PathGraph->DeleteVerticesWithDegreeLessThan(2);
 111 
 112   # Setup a VertexID and EdgeID to index map usage during retrieval of independent cycles to
 113   # avoid going over all vertices in all cylic paths later...
 114   #
 115   my($VertexIDsToIndicesRef, $LargestVertexIndex, $EdgeIDsToIndicesRef, $LargestEdgeIDIndex);
 116   ($VertexIDsToIndicesRef, $LargestVertexIndex) = $This->_SetupVertexIDsToIndicesMap($PathGraph);
 117   ($EdgeIDsToIndicesRef, $LargestEdgeIDIndex) = $This->_SetupEdgeIDsToIndicesMap($PathGraph);
 118 
 119   # Recursively collapse vertices with lowest degree...
 120   my($VertexID, $CycleVertexID);
 121   while ($VertexID = $PathGraph->GetVertexWithSmallestDegree()) {
 122       if (!$PathGraph->CollapseVertexAndCollectCyclicPaths($VertexID)) {
 123         # Cycles detection didn't finish...
 124         return undef;
 125       }
 126   }
 127 
 128   # Get detected cycles and save 'em sorted by size...
 129   push @{$This->{AllCyclicPaths}}, sort { $a->GetLength() <=> $b->GetLength() } $PathGraph->GetCyclicPaths();
 130 
 131   # Retrieve independent cyclic paths...
 132   return $This->_RetrieveIndependentCycles($VertexIDsToIndicesRef, $LargestVertexIndex, $EdgeIDsToIndicesRef, $LargestEdgeIDIndex);
 133 }
 134 
 135 # Retrieve and save independent cyclic paths...
 136 #
 137 # Set of independent cycles identified using this method doesn't correspond to basis set of
 138 # rings or smallest set of smallest rings (SSSR) [ Refs 29-30 ]; instead, set of cycles identified
 139 # as independent cycles simply correspond to cycles which contain no other cycle as their
 140 # subcycles and can't be described as linear combination of smaller cycles. And it also happen
 141 # to contain all the rings in basis set of rings and SSSR. In other words, it's a superset of basis set
 142 # of cycles and SSSR. For example, six four membered cycles are identified for cubane which is one
 143 # more than the basis set of cycles.
 144 #
 145 sub _RetrieveIndependentCycles {
 146   my($This, $VertexIDsToIndicesRef, $LargestVertexIndex, $EdgeIDsToIndicesRef, $LargestEdgeIDIndex) = @_;
 147 
 148   # Only 1 or 0 cyclic paths...
 149   if (@{$This->{AllCyclicPaths}} <= 1) {
 150     push @{$This->{IndependentCyclicPaths}}, @{$This->{AllCyclicPaths}};
 151     return $This;
 152   }
 153 
 154   # Setup bit vectors for each cyclic path with size of each bit vector corresponding to
 155   # maximum vertex index plus one...
 156   my($CyclicPath, $BitVector, @BitNums, @CyclicPathBitVectors, @CyclicPathEdgeIDsBitVectors);
 157 
 158   @CyclicPathBitVectors = (); @CyclicPathEdgeIDsBitVectors = ();
 159 
 160   # Set bits corresponding to VertexIDs EdgeIDs using their indices...
 161   for $CyclicPath (@{$This->{AllCyclicPaths}}) {
 162     $BitVector = new BitVector($LargestVertexIndex);
 163     @BitNums = map { $VertexIDsToIndicesRef->{$_} } $CyclicPath->GetVertices();
 164     $BitVector->SetBits(@BitNums);
 165     push @CyclicPathBitVectors, $BitVector;
 166 
 167     $BitVector = new BitVector($LargestEdgeIDIndex);
 168     @BitNums = map { $EdgeIDsToIndicesRef->{$_} } $This->_GetPathEdgeIDs($CyclicPath);
 169     $BitVector->SetBits(@BitNums);
 170     push @CyclicPathEdgeIDsBitVectors, $BitVector;
 171   }
 172 
 173   # First smallest cyclic path always ends up as an independent cyclic path...
 174   push @{$This->{IndependentCyclicPaths}}, $This->{AllCyclicPaths}[0];
 175 
 176   # Retrieve other independent cyclic paths...
 177   my($CurrentIndex, $PreviousIndex, $CurrentCyclicPath, $PreviousCyclicPath, $CurrentPathLength, $PreviousPathLength, $CurrentBitVector, $PreviousBitVector, $CurrentAndPreviousBitVectpor, $AllPreviousSmallerPathsBitVector, $AllPreviousSmallerPathsEdgeIDsBitVector, $CurrentEdgeIDsBitVector, $AndBitVector, %SmallerPathAlreadyAdded, %SkipPath);
 178 
 179   %SkipPath = ();
 180   %SmallerPathAlreadyAdded = ();
 181   $AllPreviousSmallerPathsBitVector = new BitVector($LargestVertexIndex);
 182   $AllPreviousSmallerPathsEdgeIDsBitVector = new BitVector($LargestEdgeIDIndex);
 183 
 184   CURRENT: for $CurrentIndex (1 .. $#{$This->{AllCyclicPaths}}) {
 185     if (exists $SkipPath{$CurrentIndex}) {
 186       next CURRENT;
 187     }
 188     $CurrentCyclicPath = $This->{AllCyclicPaths}[$CurrentIndex];
 189     $CurrentBitVector = $CyclicPathBitVectors[$CurrentIndex];
 190     $CurrentPathLength = $CurrentCyclicPath->GetLength();
 191 
 192     PREVIOUS: for $PreviousIndex (0 .. ($CurrentIndex - 1)) {
 193       if (exists $SkipPath{$PreviousIndex}) {
 194         next PREVIOUS;
 195       }
 196       $PreviousCyclicPath = $This->{AllCyclicPaths}[$PreviousIndex];
 197       $PreviousBitVector = $CyclicPathBitVectors[$PreviousIndex];
 198 
 199       # Is previous path a subset of current path?
 200       $CurrentAndPreviousBitVectpor = $PreviousBitVector &  $CurrentBitVector;
 201       if ($PreviousBitVector->GetNumOfSetBits() == $CurrentAndPreviousBitVectpor->GetNumOfSetBits()) {
 202         # Current path doesn't qualify an independent path...
 203         $SkipPath{$CurrentIndex} = 1;
 204         next CURRENT;
 205       }
 206 
 207       $PreviousPathLength = $PreviousCyclicPath->GetLength();
 208       if ($PreviousPathLength < $CurrentPathLength) {
 209         # Mark cycle vertices seen in cyclic paths with length smaller than current path...
 210         if (! exists $SmallerPathAlreadyAdded{$PreviousIndex}) {
 211           $SmallerPathAlreadyAdded{$PreviousIndex} = 1;
 212           $AllPreviousSmallerPathsBitVector = $AllPreviousSmallerPathsBitVector | $PreviousBitVector;
 213           $AllPreviousSmallerPathsEdgeIDsBitVector = $AllPreviousSmallerPathsEdgeIDsBitVector | $CyclicPathEdgeIDsBitVectors[$PreviousIndex];
 214         }
 215       }
 216     }
 217     if ($AllPreviousSmallerPathsBitVector->GetNumOfSetBits()) {
 218       # Is current path a linear combination of smaller paths?
 219       $AndBitVector = $AllPreviousSmallerPathsBitVector &  $CurrentBitVector;
 220       if ($CurrentBitVector->GetNumOfSetBits() == $AndBitVector->GetNumOfSetBits()) {
 221         # Are all edges in the current path already present in smaller paths?
 222         $CurrentEdgeIDsBitVector = $CyclicPathEdgeIDsBitVectors[$CurrentIndex];
 223         $AndBitVector = $AllPreviousSmallerPathsEdgeIDsBitVector &  $CurrentEdgeIDsBitVector;
 224 
 225         if ($CurrentEdgeIDsBitVector->GetNumOfSetBits() == $AndBitVector->GetNumOfSetBits()) {
 226           # Current path doesn't qualify an independent path...
 227           $SkipPath{$CurrentIndex} = 1;
 228           next CURRENT;
 229         }
 230       }
 231     }
 232     # It's an independent cyclic path...
 233     push @{$This->{IndependentCyclicPaths}}, $CurrentCyclicPath;
 234   }
 235   return $This;
 236 }
 237 
 238 # Setup a VertexID to index map...
 239 #
 240 sub _SetupVertexIDsToIndicesMap {
 241   my($This, $PathGraph) = @_;
 242   my($LargestVertexIndex, @VertexIDs, %VertexIDsMap);
 243 
 244   %VertexIDsMap = (); @VertexIDs = (); $LargestVertexIndex = 0;
 245 
 246   @VertexIDs = $PathGraph->GetVertices();
 247   if (! @VertexIDs) {
 248     return (\%VertexIDsMap, $LargestVertexIndex);
 249   }
 250   @VertexIDsMap{ @VertexIDs } = (0 .. $#VertexIDs);
 251   $LargestVertexIndex = scalar @VertexIDs;
 252 
 253   return (\%VertexIDsMap, $LargestVertexIndex);
 254 }
 255 
 256 # Setup a Edge to index map using paths associated to egdes in an intial
 257 # path graph...
 258 #
 259 sub _SetupEdgeIDsToIndicesMap {
 260   my($This, $PathGraph) = @_;
 261   my($Path, $LargestEdgeIndex, @EdgeIDs, %EdgeIDsMap);
 262 
 263   %EdgeIDsMap = (); @EdgeIDs = (); $LargestEdgeIndex = 0;
 264 
 265   @EdgeIDs = ();
 266   for $Path ($PathGraph->GetPaths()) {
 267     push @EdgeIDs, $This->_GetPathEdgeIDs($Path);
 268   }
 269 
 270   if (! @EdgeIDs) {
 271     return (\%EdgeIDsMap, $LargestEdgeIndex);
 272   }
 273 
 274   @EdgeIDsMap{ @EdgeIDs } = (0 .. $#EdgeIDs);
 275   $LargestEdgeIndex = scalar @EdgeIDs;
 276 
 277   return (\%EdgeIDsMap, $LargestEdgeIndex);
 278 }
 279 
 280 # Get path edge IDs or number of edges. The edge IDs are generated from
 281 # edge vertices and correpond to VertexID1-VertexID2 where VertexID1 <=
 282 # VertexID2.
 283 #
 284 sub _GetPathEdgeIDs {
 285   my($This, $Path) = @_;
 286   my(@EdgesVertexIDs, @EdgeIDs);
 287 
 288   @EdgesVertexIDs = (); @EdgeIDs = ();
 289   @EdgesVertexIDs = $Path->GetEdges();
 290   if (!@EdgesVertexIDs) {
 291     return wantarray ? @EdgeIDs : (scalar @EdgeIDs);
 292   }
 293 
 294   # Set up edge IDs...
 295   my($Index, $VertexID1, $VertexID2, $EdgeID);
 296 
 297   for ($Index = 0; $Index < $#EdgesVertexIDs; $Index += 2) {
 298     $VertexID1 = $EdgesVertexIDs[$Index]; $VertexID2 = $EdgesVertexIDs[$Index + 1];
 299     $EdgeID = ($VertexID1 <= $VertexID2) ? "$VertexID1-$VertexID2" : "$VertexID2-$VertexID1";
 300     push @EdgeIDs, $EdgeID;
 301   }
 302 
 303   return wantarray ? @EdgeIDs : (scalar @EdgeIDs);
 304 }
 305 
 306 # Return an array containing references to cyclic paths or number of cylic paths...
 307 #
 308 sub GetAllCyclicPaths {
 309   my($This) = @_;
 310 
 311   return wantarray ? @{$This->{AllCyclicPaths}} : scalar @{$This->{AllCyclicPaths}};
 312 }
 313 
 314 # Get cyclic paths which are independent. In otherwords, cycles which don't contain any other
 315 # cycle as their subset...
 316 #
 317 sub GetIndependentCyclicPaths {
 318   my($This) = @_;
 319 
 320   return wantarray ? @{$This->{IndependentCyclicPaths}} : scalar @{$This->{IndependentCyclicPaths}};
 321 }
 322 
 323 # Return a string containg data for CyclesDetection object...
 324 sub StringifyCyclesDetection {
 325   my($This) = @_;
 326   my($CyclesDetectionString, $CyclesCount, $CyclicPath);
 327 
 328   $CyclesCount = @{$This->{AllCyclicPaths}};
 329   $CyclesDetectionString = "AllCycles: Count - $CyclesCount";
 330   for $CyclicPath (@{$This->{AllCyclicPaths}}) {
 331     $CyclesDetectionString .= "; Cycle: " . join('-', $CyclicPath->GetVertices());
 332   }
 333 
 334   $CyclesCount = @{$This->{IndependentCyclicPaths}};
 335   $CyclesDetectionString .= "\nIndependentCycles: Count - $CyclesCount";
 336   for $CyclicPath (@{$This->{IndependentCyclicPaths}}) {
 337     $CyclesDetectionString .= "; Cycle: " . join('-', $CyclicPath->GetVertices());
 338   }
 339 
 340   return $CyclesDetectionString;
 341 }
 342 
 343 # Return a reference to new cycle detection object...
 344 sub Copy {
 345   my($This) = @_;
 346   my($NewCyclesDetection);
 347 
 348   $NewCyclesDetection = Storable::dclone($This);
 349 
 350   return $NewCyclesDetection;
 351 }
 352