comparison lib/Graph/CyclesDetection.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 package Graph::CyclesDetection;
2 #
3 # $RCSfile: CyclesDetection.pm,v $
4 # $Date: 2015/02/28 20:49:06 $
5 # $Revision: 1.27 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use Graph;
33 use Graph::Path;
34 use Graph::PathGraph;
35 use BitVector;
36
37 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
38
39 @ISA = qw(Exporter);
40 @EXPORT = qw();
41 @EXPORT_OK = qw();
42
43 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
44
45 # Setup class variables...
46 my($ClassName);
47 _InitializeClass();
48
49 # Overload Perl functions...
50 use overload '""' => 'StringifyCyclesDetection';
51
52 # Class constructor...
53 sub new {
54 my($Class, $Graph) = @_;
55
56 # Initialize object...
57 my $This = {};
58 bless $This, ref($Class) || $Class;
59 $This->_InitializeCyclesDetection($Graph);
60
61 return $This;
62 }
63
64 # Initialize object data...
65 sub _InitializeCyclesDetection {
66 my($This, $Graph) = @_;
67
68 $This->{Graph} = $Graph;
69
70 # Cycles list...
71 @{$This->{AllCyclicPaths}} = ();
72
73 # Cyclic paths which are not part of any other cycle...
74 @{$This->{IndependentCyclicPaths}} = ();
75
76 return $This;
77 }
78
79 # Initialize class ...
80 sub _InitializeClass {
81 #Class name...
82 $ClassName = __PACKAGE__;
83 }
84
85 # Detect all cycles in graph...
86 #
87 sub DetectCycles {
88 my($This) = @_;
89
90 return $This->DetectCyclesUsingCollapsingPathGraphMethodology();
91 }
92
93 # Detect all cycles in the graph using collapsing path graph [Ref 31] methodology...
94 #
95 # Note:
96 # . For topologically complex graphs containing large number of cycles,
97 # CollapseVertexAndCollectCyclicPathsDetectCycles method implemented in
98 # PathGraph can time out in which case no cycles are detected or
99 # assigned.
100 #
101 sub DetectCyclesUsingCollapsingPathGraphMethodology {
102 my($This) = @_;
103 my($PathGraph);
104
105 # Create a path graph...
106 $PathGraph = new Graph::PathGraph($This->{Graph});
107
108 # For a vertex to be in a cycle, its degree must be >=2. So delete vertices recursively
109 # till all vertices with degree less than 2 have been deleted...
110 $PathGraph->DeleteVerticesWithDegreeLessThan(2);
111
112 # Setup a VertexID and EdgeID to index map usage during retrieval of independent cycles to
113 # avoid going over all vertices in all cylic paths later...
114 #
115 my($VertexIDsToIndicesRef, $LargestVertexIndex, $EdgeIDsToIndicesRef, $LargestEdgeIDIndex);
116 ($VertexIDsToIndicesRef, $LargestVertexIndex) = $This->_SetupVertexIDsToIndicesMap($PathGraph);
117 ($EdgeIDsToIndicesRef, $LargestEdgeIDIndex) = $This->_SetupEdgeIDsToIndicesMap($PathGraph);
118
119 # Recursively collapse vertices with lowest degree...
120 my($VertexID, $CycleVertexID);
121 while ($VertexID = $PathGraph->GetVertexWithSmallestDegree()) {
122 if (!$PathGraph->CollapseVertexAndCollectCyclicPaths($VertexID)) {
123 # Cycles detection didn't finish...
124 return undef;
125 }
126 }
127
128 # Get detected cycles and save 'em sorted by size...
129 push @{$This->{AllCyclicPaths}}, sort { $a->GetLength() <=> $b->GetLength() } $PathGraph->GetCyclicPaths();
130
131 # Retrieve independent cyclic paths...
132 return $This->_RetrieveIndependentCycles($VertexIDsToIndicesRef, $LargestVertexIndex, $EdgeIDsToIndicesRef, $LargestEdgeIDIndex);
133 }
134
135 # Retrieve and save independent cyclic paths...
136 #
137 # Set of independent cycles identified using this method doesn't correspond to basis set of
138 # rings or smallest set of smallest rings (SSSR) [ Refs 29-30 ]; instead, set of cycles identified
139 # as independent cycles simply correspond to cycles which contain no other cycle as their
140 # subcycles and can't be described as linear combination of smaller cycles. And it also happen
141 # to contain all the rings in basis set of rings and SSSR. In other words, it's a superset of basis set
142 # of cycles and SSSR. For example, six four membered cycles are identified for cubane which is one
143 # more than the basis set of cycles.
144 #
145 sub _RetrieveIndependentCycles {
146 my($This, $VertexIDsToIndicesRef, $LargestVertexIndex, $EdgeIDsToIndicesRef, $LargestEdgeIDIndex) = @_;
147
148 # Only 1 or 0 cyclic paths...
149 if (@{$This->{AllCyclicPaths}} <= 1) {
150 push @{$This->{IndependentCyclicPaths}}, @{$This->{AllCyclicPaths}};
151 return $This;
152 }
153
154 # Setup bit vectors for each cyclic path with size of each bit vector corresponding to
155 # maximum vertex index plus one...
156 my($CyclicPath, $BitVector, @BitNums, @CyclicPathBitVectors, @CyclicPathEdgeIDsBitVectors);
157
158 @CyclicPathBitVectors = (); @CyclicPathEdgeIDsBitVectors = ();
159
160 # Set bits corresponding to VertexIDs EdgeIDs using their indices...
161 for $CyclicPath (@{$This->{AllCyclicPaths}}) {
162 $BitVector = new BitVector($LargestVertexIndex);
163 @BitNums = map { $VertexIDsToIndicesRef->{$_} } $CyclicPath->GetVertices();
164 $BitVector->SetBits(@BitNums);
165 push @CyclicPathBitVectors, $BitVector;
166
167 $BitVector = new BitVector($LargestEdgeIDIndex);
168 @BitNums = map { $EdgeIDsToIndicesRef->{$_} } $This->_GetPathEdgeIDs($CyclicPath);
169 $BitVector->SetBits(@BitNums);
170 push @CyclicPathEdgeIDsBitVectors, $BitVector;
171 }
172
173 # First smallest cyclic path always ends up as an independent cyclic path...
174 push @{$This->{IndependentCyclicPaths}}, $This->{AllCyclicPaths}[0];
175
176 # Retrieve other independent cyclic paths...
177 my($CurrentIndex, $PreviousIndex, $CurrentCyclicPath, $PreviousCyclicPath, $CurrentPathLength, $PreviousPathLength, $CurrentBitVector, $PreviousBitVector, $CurrentAndPreviousBitVectpor, $AllPreviousSmallerPathsBitVector, $AllPreviousSmallerPathsEdgeIDsBitVector, $CurrentEdgeIDsBitVector, $AndBitVector, %SmallerPathAlreadyAdded, %SkipPath);
178
179 %SkipPath = ();
180 %SmallerPathAlreadyAdded = ();
181 $AllPreviousSmallerPathsBitVector = new BitVector($LargestVertexIndex);
182 $AllPreviousSmallerPathsEdgeIDsBitVector = new BitVector($LargestEdgeIDIndex);
183
184 CURRENT: for $CurrentIndex (1 .. $#{$This->{AllCyclicPaths}}) {
185 if (exists $SkipPath{$CurrentIndex}) {
186 next CURRENT;
187 }
188 $CurrentCyclicPath = $This->{AllCyclicPaths}[$CurrentIndex];
189 $CurrentBitVector = $CyclicPathBitVectors[$CurrentIndex];
190 $CurrentPathLength = $CurrentCyclicPath->GetLength();
191
192 PREVIOUS: for $PreviousIndex (0 .. ($CurrentIndex - 1)) {
193 if (exists $SkipPath{$PreviousIndex}) {
194 next PREVIOUS;
195 }
196 $PreviousCyclicPath = $This->{AllCyclicPaths}[$PreviousIndex];
197 $PreviousBitVector = $CyclicPathBitVectors[$PreviousIndex];
198
199 # Is previous path a subset of current path?
200 $CurrentAndPreviousBitVectpor = $PreviousBitVector & $CurrentBitVector;
201 if ($PreviousBitVector->GetNumOfSetBits() == $CurrentAndPreviousBitVectpor->GetNumOfSetBits()) {
202 # Current path doesn't qualify an independent path...
203 $SkipPath{$CurrentIndex} = 1;
204 next CURRENT;
205 }
206
207 $PreviousPathLength = $PreviousCyclicPath->GetLength();
208 if ($PreviousPathLength < $CurrentPathLength) {
209 # Mark cycle vertices seen in cyclic paths with length smaller than current path...
210 if (! exists $SmallerPathAlreadyAdded{$PreviousIndex}) {
211 $SmallerPathAlreadyAdded{$PreviousIndex} = 1;
212 $AllPreviousSmallerPathsBitVector = $AllPreviousSmallerPathsBitVector | $PreviousBitVector;
213 $AllPreviousSmallerPathsEdgeIDsBitVector = $AllPreviousSmallerPathsEdgeIDsBitVector | $CyclicPathEdgeIDsBitVectors[$PreviousIndex];
214 }
215 }
216 }
217 if ($AllPreviousSmallerPathsBitVector->GetNumOfSetBits()) {
218 # Is current path a linear combination of smaller paths?
219 $AndBitVector = $AllPreviousSmallerPathsBitVector & $CurrentBitVector;
220 if ($CurrentBitVector->GetNumOfSetBits() == $AndBitVector->GetNumOfSetBits()) {
221 # Are all edges in the current path already present in smaller paths?
222 $CurrentEdgeIDsBitVector = $CyclicPathEdgeIDsBitVectors[$CurrentIndex];
223 $AndBitVector = $AllPreviousSmallerPathsEdgeIDsBitVector & $CurrentEdgeIDsBitVector;
224
225 if ($CurrentEdgeIDsBitVector->GetNumOfSetBits() == $AndBitVector->GetNumOfSetBits()) {
226 # Current path doesn't qualify an independent path...
227 $SkipPath{$CurrentIndex} = 1;
228 next CURRENT;
229 }
230 }
231 }
232 # It's an independent cyclic path...
233 push @{$This->{IndependentCyclicPaths}}, $CurrentCyclicPath;
234 }
235 return $This;
236 }
237
238 # Setup a VertexID to index map...
239 #
240 sub _SetupVertexIDsToIndicesMap {
241 my($This, $PathGraph) = @_;
242 my($LargestVertexIndex, @VertexIDs, %VertexIDsMap);
243
244 %VertexIDsMap = (); @VertexIDs = (); $LargestVertexIndex = 0;
245
246 @VertexIDs = $PathGraph->GetVertices();
247 if (! @VertexIDs) {
248 return (\%VertexIDsMap, $LargestVertexIndex);
249 }
250 @VertexIDsMap{ @VertexIDs } = (0 .. $#VertexIDs);
251 $LargestVertexIndex = scalar @VertexIDs;
252
253 return (\%VertexIDsMap, $LargestVertexIndex);
254 }
255
256 # Setup a Edge to index map using paths associated to egdes in an intial
257 # path graph...
258 #
259 sub _SetupEdgeIDsToIndicesMap {
260 my($This, $PathGraph) = @_;
261 my($Path, $LargestEdgeIndex, @EdgeIDs, %EdgeIDsMap);
262
263 %EdgeIDsMap = (); @EdgeIDs = (); $LargestEdgeIndex = 0;
264
265 @EdgeIDs = ();
266 for $Path ($PathGraph->GetPaths()) {
267 push @EdgeIDs, $This->_GetPathEdgeIDs($Path);
268 }
269
270 if (! @EdgeIDs) {
271 return (\%EdgeIDsMap, $LargestEdgeIndex);
272 }
273
274 @EdgeIDsMap{ @EdgeIDs } = (0 .. $#EdgeIDs);
275 $LargestEdgeIndex = scalar @EdgeIDs;
276
277 return (\%EdgeIDsMap, $LargestEdgeIndex);
278 }
279
280 # Get path edge IDs or number of edges. The edge IDs are generated from
281 # edge vertices and correpond to VertexID1-VertexID2 where VertexID1 <=
282 # VertexID2.
283 #
284 sub _GetPathEdgeIDs {
285 my($This, $Path) = @_;
286 my(@EdgesVertexIDs, @EdgeIDs);
287
288 @EdgesVertexIDs = (); @EdgeIDs = ();
289 @EdgesVertexIDs = $Path->GetEdges();
290 if (!@EdgesVertexIDs) {
291 return wantarray ? @EdgeIDs : (scalar @EdgeIDs);
292 }
293
294 # Set up edge IDs...
295 my($Index, $VertexID1, $VertexID2, $EdgeID);
296
297 for ($Index = 0; $Index < $#EdgesVertexIDs; $Index += 2) {
298 $VertexID1 = $EdgesVertexIDs[$Index]; $VertexID2 = $EdgesVertexIDs[$Index + 1];
299 $EdgeID = ($VertexID1 <= $VertexID2) ? "$VertexID1-$VertexID2" : "$VertexID2-$VertexID1";
300 push @EdgeIDs, $EdgeID;
301 }
302
303 return wantarray ? @EdgeIDs : (scalar @EdgeIDs);
304 }
305
306 # Return an array containing references to cyclic paths or number of cylic paths...
307 #
308 sub GetAllCyclicPaths {
309 my($This) = @_;
310
311 return wantarray ? @{$This->{AllCyclicPaths}} : scalar @{$This->{AllCyclicPaths}};
312 }
313
314 # Get cyclic paths which are independent. In otherwords, cycles which don't contain any other
315 # cycle as their subset...
316 #
317 sub GetIndependentCyclicPaths {
318 my($This) = @_;
319
320 return wantarray ? @{$This->{IndependentCyclicPaths}} : scalar @{$This->{IndependentCyclicPaths}};
321 }
322
323 # Return a string containg data for CyclesDetection object...
324 sub StringifyCyclesDetection {
325 my($This) = @_;
326 my($CyclesDetectionString, $CyclesCount, $CyclicPath);
327
328 $CyclesCount = @{$This->{AllCyclicPaths}};
329 $CyclesDetectionString = "AllCycles: Count - $CyclesCount";
330 for $CyclicPath (@{$This->{AllCyclicPaths}}) {
331 $CyclesDetectionString .= "; Cycle: " . join('-', $CyclicPath->GetVertices());
332 }
333
334 $CyclesCount = @{$This->{IndependentCyclicPaths}};
335 $CyclesDetectionString .= "\nIndependentCycles: Count - $CyclesCount";
336 for $CyclicPath (@{$This->{IndependentCyclicPaths}}) {
337 $CyclesDetectionString .= "; Cycle: " . join('-', $CyclicPath->GetVertices());
338 }
339
340 return $CyclesDetectionString;
341 }
342
343 # Return a reference to new cycle detection object...
344 sub Copy {
345 my($This) = @_;
346 my($NewCyclesDetection);
347
348 $NewCyclesDetection = Storable::dclone($This);
349
350 return $NewCyclesDetection;
351 }
352
353 1;
354
355 __END__
356
357 =head1 NAME
358
359 CyclesDetection
360
361 =head1 SYNOPSIS
362
363 use Graph::CyclesDetection;
364
365 use Graph::CyclesDetection qw(:all);
366
367 =head1 DESCRIPTION
368
369 B<CyclesDetection> class provides the following methods:
370
371 new, Copy, DetectCycles, DetectCyclesUsingCollapsingPathGraphMethodology,
372 GetAllCyclicPaths, GetIndependentCyclicPaths, StringifyCyclesDetection
373
374 Cycles in a B<Graph> are detected using collapsing path graph [Ref 31]
375 methodology.
376
377 =head2 METHODS
378
379 =over 4
380
381 =item B<new>
382
383 $NewCyclesDetection = new Graph::CyclesDetection($Graph);
384
385 Using specified I<Graph>, B<new> method creates a new B<CyclesDetection> object and returns
386 newly created B<CyclesDetection> object.
387
388 =item B<Copy>
389
390 $NewCyclesDetection = $CyclesDetection->Copy();
391
392 Copies I<CyclesDetection> and its associated data using B<Storable::dclone> and returns a new
393 B<CyclesDetection> object.
394
395 =item B<DetectCycles>
396
397 $CyclesDetection->DetectCycles();
398
399 Detects all cycles in a graph and returns I<CyclesDetection>.
400
401 =item B<DetectCyclesUsingCollapsingPathGraphMethodology>
402
403 $CyclesDetection->DetectCyclesUsingCollapsingPathGraphMethodology();
404
405 Detects all cycles in a graph using collapsing path graph [Ref 31] methodology
406 and returns I<CyclesDetection>.
407
408 =item B<GetAllCyclicPaths>
409
410 @AllCyclicPaths = $CyclesDetection->GetAllCyclicPaths();
411 $NumOfAllCyclicPaths = $CyclesDetection->GetAllCyclicPaths();
412
413 Returns an array containing references to all cyclic paths identified during cycles
414 detection. In scalar text, number of cycles is returned.
415
416 =item B<GetIndependentCyclicPaths>
417
418 @IndependentCyclicPaths = $CyclesDetection->GetAllCyclicPaths();
419 $NumOfIndependentCyclicPaths = $CyclesDetection->GetAllCyclicPaths();
420
421 Returns an array containing references to independent cyclic paths identified during cycles
422 detection. In scalar text, number of cycles is returned.
423
424 A set of independent cycles identified during cycles detection doesn't correspond to the basis set of
425 rings or smallest set of smallest rings (SSSR) [ Refs 29-30 ]; instead, set of cycles indentified
426 as independent cycles simply correpond to cycles which contain no other cycle as their
427 subcycles and can't be described as a linear combination of smaller cycles. And it also happens
428 to contain all the rings in basis set of rings and SSSR. In other words, it's a superset of a basis set
429 of cycles and SSSR. For example, six four membered cycles are indentified for cubane, which is one
430 more than the basis set of cycles.
431
432 =item B<StringifyCyclesDetection>
433
434 $String = $CyclesDetection->StringifyCyclesDetection();
435
436 Returns a string containing information about I<CyclesDetection> object.
437
438 =back
439
440 =head1 AUTHOR
441
442 Manish Sud <msud@san.rr.com>
443
444 =head1 SEE ALSO
445
446 Graph.pm, Path.pm, PathGraph.pm
447
448 =head1 COPYRIGHT
449
450 Copyright (C) 2015 Manish Sud. All rights reserved.
451
452 This file is part of MayaChemTools.
453
454 MayaChemTools is free software; you can redistribute it and/or modify it under
455 the terms of the GNU Lesser General Public License as published by the Free
456 Software Foundation; either version 3 of the License, or (at your option)
457 any later version.
458
459 =cut