Mercurial > repos > deepakjadmin > mayatool3_test2
diff lib/Graph/CyclesDetection.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/Graph/CyclesDetection.pm Wed Jan 20 09:23:18 2016 -0500 @@ -0,0 +1,459 @@ +package Graph::CyclesDetection; +# +# $RCSfile: CyclesDetection.pm,v $ +# $Date: 2015/02/28 20:49:06 $ +# $Revision: 1.27 $ +# +# Author: Manish Sud <msud@san.rr.com> +# +# Copyright (C) 2015 Manish Sud. All rights reserved. +# +# This file is part of MayaChemTools. +# +# MayaChemTools is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation; either version 3 of the License, or (at your option) any +# later version. +# +# MayaChemTools is distributed in the hope that it will be useful, but without +# any warranty; without even the implied warranty of merchantability of fitness +# for a particular purpose. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, +# Boston, MA, 02111-1307, USA. +# + +use strict; +use Carp; +use Exporter; +use Graph; +use Graph::Path; +use Graph::PathGraph; +use BitVector; + +use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); + +@ISA = qw(Exporter); +@EXPORT = qw(); +@EXPORT_OK = qw(); + +%EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); + +# Setup class variables... +my($ClassName); +_InitializeClass(); + +# Overload Perl functions... +use overload '""' => 'StringifyCyclesDetection'; + +# Class constructor... +sub new { + my($Class, $Graph) = @_; + + # Initialize object... + my $This = {}; + bless $This, ref($Class) || $Class; + $This->_InitializeCyclesDetection($Graph); + + return $This; +} + +# Initialize object data... +sub _InitializeCyclesDetection { + my($This, $Graph) = @_; + + $This->{Graph} = $Graph; + + # Cycles list... + @{$This->{AllCyclicPaths}} = (); + + # Cyclic paths which are not part of any other cycle... + @{$This->{IndependentCyclicPaths}} = (); + + return $This; +} + +# Initialize class ... +sub _InitializeClass { + #Class name... + $ClassName = __PACKAGE__; +} + +# Detect all cycles in graph... +# +sub DetectCycles { + my($This) = @_; + + return $This->DetectCyclesUsingCollapsingPathGraphMethodology(); +} + +# Detect all cycles in the graph using collapsing path graph [Ref 31] methodology... +# +# Note: +# . For topologically complex graphs containing large number of cycles, +# CollapseVertexAndCollectCyclicPathsDetectCycles method implemented in +# PathGraph can time out in which case no cycles are detected or +# assigned. +# +sub DetectCyclesUsingCollapsingPathGraphMethodology { + my($This) = @_; + my($PathGraph); + + # Create a path graph... + $PathGraph = new Graph::PathGraph($This->{Graph}); + + # For a vertex to be in a cycle, its degree must be >=2. So delete vertices recursively + # till all vertices with degree less than 2 have been deleted... + $PathGraph->DeleteVerticesWithDegreeLessThan(2); + + # Setup a VertexID and EdgeID to index map usage during retrieval of independent cycles to + # avoid going over all vertices in all cylic paths later... + # + my($VertexIDsToIndicesRef, $LargestVertexIndex, $EdgeIDsToIndicesRef, $LargestEdgeIDIndex); + ($VertexIDsToIndicesRef, $LargestVertexIndex) = $This->_SetupVertexIDsToIndicesMap($PathGraph); + ($EdgeIDsToIndicesRef, $LargestEdgeIDIndex) = $This->_SetupEdgeIDsToIndicesMap($PathGraph); + + # Recursively collapse vertices with lowest degree... + my($VertexID, $CycleVertexID); + while ($VertexID = $PathGraph->GetVertexWithSmallestDegree()) { + if (!$PathGraph->CollapseVertexAndCollectCyclicPaths($VertexID)) { + # Cycles detection didn't finish... + return undef; + } + } + + # Get detected cycles and save 'em sorted by size... + push @{$This->{AllCyclicPaths}}, sort { $a->GetLength() <=> $b->GetLength() } $PathGraph->GetCyclicPaths(); + + # Retrieve independent cyclic paths... + return $This->_RetrieveIndependentCycles($VertexIDsToIndicesRef, $LargestVertexIndex, $EdgeIDsToIndicesRef, $LargestEdgeIDIndex); +} + +# Retrieve and save independent cyclic paths... +# +# Set of independent cycles identified using this method doesn't correspond to basis set of +# rings or smallest set of smallest rings (SSSR) [ Refs 29-30 ]; instead, set of cycles identified +# as independent cycles simply correspond to cycles which contain no other cycle as their +# subcycles and can't be described as linear combination of smaller cycles. And it also happen +# to contain all the rings in basis set of rings and SSSR. In other words, it's a superset of basis set +# of cycles and SSSR. For example, six four membered cycles are identified for cubane which is one +# more than the basis set of cycles. +# +sub _RetrieveIndependentCycles { + my($This, $VertexIDsToIndicesRef, $LargestVertexIndex, $EdgeIDsToIndicesRef, $LargestEdgeIDIndex) = @_; + + # Only 1 or 0 cyclic paths... + if (@{$This->{AllCyclicPaths}} <= 1) { + push @{$This->{IndependentCyclicPaths}}, @{$This->{AllCyclicPaths}}; + return $This; + } + + # Setup bit vectors for each cyclic path with size of each bit vector corresponding to + # maximum vertex index plus one... + my($CyclicPath, $BitVector, @BitNums, @CyclicPathBitVectors, @CyclicPathEdgeIDsBitVectors); + + @CyclicPathBitVectors = (); @CyclicPathEdgeIDsBitVectors = (); + + # Set bits corresponding to VertexIDs EdgeIDs using their indices... + for $CyclicPath (@{$This->{AllCyclicPaths}}) { + $BitVector = new BitVector($LargestVertexIndex); + @BitNums = map { $VertexIDsToIndicesRef->{$_} } $CyclicPath->GetVertices(); + $BitVector->SetBits(@BitNums); + push @CyclicPathBitVectors, $BitVector; + + $BitVector = new BitVector($LargestEdgeIDIndex); + @BitNums = map { $EdgeIDsToIndicesRef->{$_} } $This->_GetPathEdgeIDs($CyclicPath); + $BitVector->SetBits(@BitNums); + push @CyclicPathEdgeIDsBitVectors, $BitVector; + } + + # First smallest cyclic path always ends up as an independent cyclic path... + push @{$This->{IndependentCyclicPaths}}, $This->{AllCyclicPaths}[0]; + + # Retrieve other independent cyclic paths... + my($CurrentIndex, $PreviousIndex, $CurrentCyclicPath, $PreviousCyclicPath, $CurrentPathLength, $PreviousPathLength, $CurrentBitVector, $PreviousBitVector, $CurrentAndPreviousBitVectpor, $AllPreviousSmallerPathsBitVector, $AllPreviousSmallerPathsEdgeIDsBitVector, $CurrentEdgeIDsBitVector, $AndBitVector, %SmallerPathAlreadyAdded, %SkipPath); + + %SkipPath = (); + %SmallerPathAlreadyAdded = (); + $AllPreviousSmallerPathsBitVector = new BitVector($LargestVertexIndex); + $AllPreviousSmallerPathsEdgeIDsBitVector = new BitVector($LargestEdgeIDIndex); + + CURRENT: for $CurrentIndex (1 .. $#{$This->{AllCyclicPaths}}) { + if (exists $SkipPath{$CurrentIndex}) { + next CURRENT; + } + $CurrentCyclicPath = $This->{AllCyclicPaths}[$CurrentIndex]; + $CurrentBitVector = $CyclicPathBitVectors[$CurrentIndex]; + $CurrentPathLength = $CurrentCyclicPath->GetLength(); + + PREVIOUS: for $PreviousIndex (0 .. ($CurrentIndex - 1)) { + if (exists $SkipPath{$PreviousIndex}) { + next PREVIOUS; + } + $PreviousCyclicPath = $This->{AllCyclicPaths}[$PreviousIndex]; + $PreviousBitVector = $CyclicPathBitVectors[$PreviousIndex]; + + # Is previous path a subset of current path? + $CurrentAndPreviousBitVectpor = $PreviousBitVector & $CurrentBitVector; + if ($PreviousBitVector->GetNumOfSetBits() == $CurrentAndPreviousBitVectpor->GetNumOfSetBits()) { + # Current path doesn't qualify an independent path... + $SkipPath{$CurrentIndex} = 1; + next CURRENT; + } + + $PreviousPathLength = $PreviousCyclicPath->GetLength(); + if ($PreviousPathLength < $CurrentPathLength) { + # Mark cycle vertices seen in cyclic paths with length smaller than current path... + if (! exists $SmallerPathAlreadyAdded{$PreviousIndex}) { + $SmallerPathAlreadyAdded{$PreviousIndex} = 1; + $AllPreviousSmallerPathsBitVector = $AllPreviousSmallerPathsBitVector | $PreviousBitVector; + $AllPreviousSmallerPathsEdgeIDsBitVector = $AllPreviousSmallerPathsEdgeIDsBitVector | $CyclicPathEdgeIDsBitVectors[$PreviousIndex]; + } + } + } + if ($AllPreviousSmallerPathsBitVector->GetNumOfSetBits()) { + # Is current path a linear combination of smaller paths? + $AndBitVector = $AllPreviousSmallerPathsBitVector & $CurrentBitVector; + if ($CurrentBitVector->GetNumOfSetBits() == $AndBitVector->GetNumOfSetBits()) { + # Are all edges in the current path already present in smaller paths? + $CurrentEdgeIDsBitVector = $CyclicPathEdgeIDsBitVectors[$CurrentIndex]; + $AndBitVector = $AllPreviousSmallerPathsEdgeIDsBitVector & $CurrentEdgeIDsBitVector; + + if ($CurrentEdgeIDsBitVector->GetNumOfSetBits() == $AndBitVector->GetNumOfSetBits()) { + # Current path doesn't qualify an independent path... + $SkipPath{$CurrentIndex} = 1; + next CURRENT; + } + } + } + # It's an independent cyclic path... + push @{$This->{IndependentCyclicPaths}}, $CurrentCyclicPath; + } + return $This; +} + +# Setup a VertexID to index map... +# +sub _SetupVertexIDsToIndicesMap { + my($This, $PathGraph) = @_; + my($LargestVertexIndex, @VertexIDs, %VertexIDsMap); + + %VertexIDsMap = (); @VertexIDs = (); $LargestVertexIndex = 0; + + @VertexIDs = $PathGraph->GetVertices(); + if (! @VertexIDs) { + return (\%VertexIDsMap, $LargestVertexIndex); + } + @VertexIDsMap{ @VertexIDs } = (0 .. $#VertexIDs); + $LargestVertexIndex = scalar @VertexIDs; + + return (\%VertexIDsMap, $LargestVertexIndex); +} + +# Setup a Edge to index map using paths associated to egdes in an intial +# path graph... +# +sub _SetupEdgeIDsToIndicesMap { + my($This, $PathGraph) = @_; + my($Path, $LargestEdgeIndex, @EdgeIDs, %EdgeIDsMap); + + %EdgeIDsMap = (); @EdgeIDs = (); $LargestEdgeIndex = 0; + + @EdgeIDs = (); + for $Path ($PathGraph->GetPaths()) { + push @EdgeIDs, $This->_GetPathEdgeIDs($Path); + } + + if (! @EdgeIDs) { + return (\%EdgeIDsMap, $LargestEdgeIndex); + } + + @EdgeIDsMap{ @EdgeIDs } = (0 .. $#EdgeIDs); + $LargestEdgeIndex = scalar @EdgeIDs; + + return (\%EdgeIDsMap, $LargestEdgeIndex); +} + +# Get path edge IDs or number of edges. The edge IDs are generated from +# edge vertices and correpond to VertexID1-VertexID2 where VertexID1 <= +# VertexID2. +# +sub _GetPathEdgeIDs { + my($This, $Path) = @_; + my(@EdgesVertexIDs, @EdgeIDs); + + @EdgesVertexIDs = (); @EdgeIDs = (); + @EdgesVertexIDs = $Path->GetEdges(); + if (!@EdgesVertexIDs) { + return wantarray ? @EdgeIDs : (scalar @EdgeIDs); + } + + # Set up edge IDs... + my($Index, $VertexID1, $VertexID2, $EdgeID); + + for ($Index = 0; $Index < $#EdgesVertexIDs; $Index += 2) { + $VertexID1 = $EdgesVertexIDs[$Index]; $VertexID2 = $EdgesVertexIDs[$Index + 1]; + $EdgeID = ($VertexID1 <= $VertexID2) ? "$VertexID1-$VertexID2" : "$VertexID2-$VertexID1"; + push @EdgeIDs, $EdgeID; + } + + return wantarray ? @EdgeIDs : (scalar @EdgeIDs); +} + +# Return an array containing references to cyclic paths or number of cylic paths... +# +sub GetAllCyclicPaths { + my($This) = @_; + + return wantarray ? @{$This->{AllCyclicPaths}} : scalar @{$This->{AllCyclicPaths}}; +} + +# Get cyclic paths which are independent. In otherwords, cycles which don't contain any other +# cycle as their subset... +# +sub GetIndependentCyclicPaths { + my($This) = @_; + + return wantarray ? @{$This->{IndependentCyclicPaths}} : scalar @{$This->{IndependentCyclicPaths}}; +} + +# Return a string containg data for CyclesDetection object... +sub StringifyCyclesDetection { + my($This) = @_; + my($CyclesDetectionString, $CyclesCount, $CyclicPath); + + $CyclesCount = @{$This->{AllCyclicPaths}}; + $CyclesDetectionString = "AllCycles: Count - $CyclesCount"; + for $CyclicPath (@{$This->{AllCyclicPaths}}) { + $CyclesDetectionString .= "; Cycle: " . join('-', $CyclicPath->GetVertices()); + } + + $CyclesCount = @{$This->{IndependentCyclicPaths}}; + $CyclesDetectionString .= "\nIndependentCycles: Count - $CyclesCount"; + for $CyclicPath (@{$This->{IndependentCyclicPaths}}) { + $CyclesDetectionString .= "; Cycle: " . join('-', $CyclicPath->GetVertices()); + } + + return $CyclesDetectionString; +} + +# Return a reference to new cycle detection object... +sub Copy { + my($This) = @_; + my($NewCyclesDetection); + + $NewCyclesDetection = Storable::dclone($This); + + return $NewCyclesDetection; +} + +1; + +__END__ + +=head1 NAME + +CyclesDetection + +=head1 SYNOPSIS + +use Graph::CyclesDetection; + +use Graph::CyclesDetection qw(:all); + +=head1 DESCRIPTION + +B<CyclesDetection> class provides the following methods: + +new, Copy, DetectCycles, DetectCyclesUsingCollapsingPathGraphMethodology, +GetAllCyclicPaths, GetIndependentCyclicPaths, StringifyCyclesDetection + +Cycles in a B<Graph> are detected using collapsing path graph [Ref 31] +methodology. + +=head2 METHODS + +=over 4 + +=item B<new> + + $NewCyclesDetection = new Graph::CyclesDetection($Graph); + +Using specified I<Graph>, B<new> method creates a new B<CyclesDetection> object and returns +newly created B<CyclesDetection> object. + +=item B<Copy> + + $NewCyclesDetection = $CyclesDetection->Copy(); + +Copies I<CyclesDetection> and its associated data using B<Storable::dclone> and returns a new +B<CyclesDetection> object. + +=item B<DetectCycles> + + $CyclesDetection->DetectCycles(); + +Detects all cycles in a graph and returns I<CyclesDetection>. + +=item B<DetectCyclesUsingCollapsingPathGraphMethodology> + + $CyclesDetection->DetectCyclesUsingCollapsingPathGraphMethodology(); + +Detects all cycles in a graph using collapsing path graph [Ref 31] methodology +and returns I<CyclesDetection>. + +=item B<GetAllCyclicPaths> + + @AllCyclicPaths = $CyclesDetection->GetAllCyclicPaths(); + $NumOfAllCyclicPaths = $CyclesDetection->GetAllCyclicPaths(); + +Returns an array containing references to all cyclic paths identified during cycles +detection. In scalar text, number of cycles is returned. + +=item B<GetIndependentCyclicPaths> + + @IndependentCyclicPaths = $CyclesDetection->GetAllCyclicPaths(); + $NumOfIndependentCyclicPaths = $CyclesDetection->GetAllCyclicPaths(); + +Returns an array containing references to independent cyclic paths identified during cycles +detection. In scalar text, number of cycles is returned. + +A set of independent cycles identified during cycles detection doesn't correspond to the basis set of +rings or smallest set of smallest rings (SSSR) [ Refs 29-30 ]; instead, set of cycles indentified +as independent cycles simply correpond to cycles which contain no other cycle as their +subcycles and can't be described as a linear combination of smaller cycles. And it also happens +to contain all the rings in basis set of rings and SSSR. In other words, it's a superset of a basis set +of cycles and SSSR. For example, six four membered cycles are indentified for cubane, which is one +more than the basis set of cycles. + +=item B<StringifyCyclesDetection> + + $String = $CyclesDetection->StringifyCyclesDetection(); + +Returns a string containing information about I<CyclesDetection> object. + +=back + +=head1 AUTHOR + +Manish Sud <msud@san.rr.com> + +=head1 SEE ALSO + +Graph.pm, Path.pm, PathGraph.pm + +=head1 COPYRIGHT + +Copyright (C) 2015 Manish Sud. All rights reserved. + +This file is part of MayaChemTools. + +MayaChemTools is free software; you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + +=cut