| 1 | 1 <html> | 
|  | 2 <head> | 
|  | 3 <title>MayaChemTools:Code:SequenceFileUtil.pm</title> | 
|  | 4 <meta http-equiv="content-type" content="text/html;charset=utf-8"> | 
|  | 5 <link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css"> | 
|  | 6 </head> | 
|  | 7 <body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10"> | 
|  | 8 <br/> | 
|  | 9 <center> | 
|  | 10 <a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a> | 
|  | 11 </center> | 
|  | 12 <br/> | 
|  | 13 <pre> | 
|  | 14 <a name="package-SequenceFileUtil-"></a>   1 <span class="k">package </span><span class="i">SequenceFileUtil</span><span class="sc">;</span> | 
|  | 15    2 <span class="c">#</span> | 
|  | 16    3 <span class="c"># $RCSfile: SequenceFileUtil.pm,v $</span> | 
|  | 17    4 <span class="c"># $Date: 2015/02/28 20:47:18 $</span> | 
|  | 18    5 <span class="c"># $Revision: 1.33 $</span> | 
|  | 19    6 <span class="c">#</span> | 
|  | 20    7 <span class="c"># Author: Manish Sud <msud@san.rr.com></span> | 
|  | 21    8 <span class="c">#</span> | 
|  | 22    9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span> | 
|  | 23   10 <span class="c">#</span> | 
|  | 24   11 <span class="c"># This file is part of MayaChemTools.</span> | 
|  | 25   12 <span class="c">#</span> | 
|  | 26   13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span> | 
|  | 27   14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span> | 
|  | 28   15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span> | 
|  | 29   16 <span class="c"># later version.</span> | 
|  | 30   17 <span class="c">#</span> | 
|  | 31   18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span> | 
|  | 32   19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span> | 
|  | 33   20 <span class="c"># for a particular purpose.  See the GNU Lesser General Public License for more</span> | 
|  | 34   21 <span class="c"># details.</span> | 
|  | 35   22 <span class="c">#</span> | 
|  | 36   23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span> | 
|  | 37   24 <span class="c"># along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or</span> | 
|  | 38   25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span> | 
|  | 39   26 <span class="c"># Boston, MA, 02111-1307, USA.</span> | 
|  | 40   27 <span class="c">#</span> | 
|  | 41   28 | 
|  | 42   29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span> | 
|  | 43   30 <span class="k">use</span> <span class="w">Exporter</span><span class="sc">;</span> | 
|  | 44   31 <span class="k">use</span> <span class="w">Text::ParseWords</span><span class="sc">;</span> | 
|  | 45   32 <span class="k">use</span> <span class="w">TextUtil</span><span class="sc">;</span> | 
|  | 46   33 <span class="k">use</span> <span class="w">FileUtil</span><span class="sc">;</span> | 
|  | 47   34 | 
|  | 48   35 <span class="k">use</span> <span class="w">vars</span> <span class="q">qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS)</span><span class="sc">;</span> | 
|  | 49   36 | 
|  | 50   37 <span class="i">@ISA</span> = <span class="q">qw(Exporter)</span><span class="sc">;</span> | 
|  | 51   38 <span class="i">@EXPORT</span> = <span class="q">qw(AreSequenceLengthsIdentical CalcuatePercentSequenceIdentity CalculatePercentSequenceIdentityMatrix GetLongestSequence GetShortestSequence GetSequenceLength IsGapResidue IsSupportedSequenceFile IsClustalWSequenceFile IsPearsonFastaSequenceFile IsMSFSequenceFile ReadSequenceFile RemoveSequenceGaps RemoveSequenceAlignmentGapColumns WritePearsonFastaSequenceFile)</span><span class="sc">;</span> | 
|  | 52   39 <span class="i">@EXPORT_OK</span> = <span class="q">qw()</span><span class="sc">;</span> | 
|  | 53   40 | 
|  | 54   41 <span class="i">%EXPORT_TAGS</span> = <span class="s">(</span><span class="w">all</span>  <span class="cm">=></span> <span class="s">[</span><span class="i">@EXPORT</span><span class="cm">,</span> <span class="i">@EXPORT_OK</span><span class="s">]</span><span class="s">)</span><span class="sc">;</span> | 
|  | 55   42 | 
|  | 56   43 <span class="c"># Compare lengths of all sequences...</span> | 
|  | 57 <a name="AreSequenceLengthsIdentical-"></a>  44 <span class="k">sub </span><span class="m">AreSequenceLengthsIdentical</span> <span class="s">{</span> | 
|  | 58   45   <span class="k">my</span><span class="s">(</span><span class="i">$SequencesDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 59   46   <span class="k">my</span><span class="s">(</span><span class="i">$Status</span><span class="cm">,</span> <span class="i">$ID</span><span class="cm">,</span> <span class="i">$FirstID</span><span class="cm">,</span> <span class="i">$FirstSeqLen</span><span class="cm">,</span> <span class="i">$FirstDifferentLenID</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="s">)</span><span class="sc">;</span> | 
|  | 60   47 | 
|  | 61   48   <span class="i">$Status</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 62   49   <span class="i">$FirstID</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 63   50   <span class="i">$FirstDifferentLenID</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 64   51 | 
|  | 65   52   <span class="j">ID:</span> <span class="k">for</span> <span class="i">$ID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequencesDataRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 66   53     <span class="k">if</span> <span class="s">(</span>!<span class="i">$FirstID</span><span class="s">)</span> <span class="s">{</span> | 
|  | 67   54       <span class="i">$FirstID</span> = <span class="i">$ID</span><span class="sc">;</span> | 
|  | 68   55       <span class="i">$FirstSeqLen</span> = <span class="k">length</span><span class="s">(</span><span class="i">$SequencesDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 69   56       <span class="k">next</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 70   57     <span class="s">}</span> | 
|  | 71   58     <span class="i">$SeqLen</span> = <span class="k">length</span><span class="s">(</span><span class="i">$SequencesDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 72   59     <span class="k">if</span> <span class="s">(</span><span class="i">$SeqLen</span> != <span class="i">$FirstSeqLen</span><span class="s">)</span> <span class="s">{</span> | 
|  | 73   60       <span class="i">$Status</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 74   61       <span class="i">$FirstDifferentLenID</span> = <span class="i">$ID</span><span class="sc">;</span> | 
|  | 75   62       <span class="k">last</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 76   63     <span class="s">}</span> | 
|  | 77   64   <span class="s">}</span> | 
|  | 78   65   <span class="k">return</span> <span class="s">(</span><span class="i">$Status</span><span class="s">)</span><span class="sc">;</span> | 
|  | 79   66 <span class="s">}</span> | 
|  | 80   67 | 
|  | 81   68 <span class="c"># Calculate percent identity between two sequences. By default, gaps are ignored.</span> | 
|  | 82 <a name="CalcuatePercentSequenceIdentity-"></a>  69 <span class="k">sub </span><span class="m">CalcuatePercentSequenceIdentity</span> <span class="s">{</span> | 
|  | 83   70   <span class="k">my</span><span class="s">(</span><span class="i">$Sequence1</span><span class="cm">,</span> <span class="i">$Sequence2</span><span class="cm">,</span> <span class="i">$PercentIdentity</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="cm">,</span> <span class="i">$Precision</span><span class="s">)</span><span class="sc">;</span> | 
|  | 84   71 | 
|  | 85   72   <span class="i">$PercentIdentity</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 86   73   <span class="i">$Precision</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 87   74   <span class="i">$IgnoreGaps</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 88   75   <span class="k">if</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">4</span><span class="s">)</span> <span class="s">{</span> | 
|  | 89   76     <span class="s">(</span><span class="i">$Sequence1</span><span class="cm">,</span> <span class="i">$Sequence2</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="cm">,</span> <span class="i">$Precision</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 90   77   <span class="s">}</span> | 
|  | 91   78   <span class="k">elsif</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> | 
|  | 92   79     <span class="s">(</span><span class="i">$Sequence1</span><span class="cm">,</span> <span class="i">$Sequence2</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 93   80   <span class="s">}</span> | 
|  | 94   81   <span class="k">elsif</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 95   82     <span class="s">(</span><span class="i">$Sequence1</span><span class="cm">,</span> <span class="i">$Sequence2</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 96   83   <span class="s">}</span> | 
|  | 97   84   <span class="k">else</span> <span class="s">{</span> | 
|  | 98   85     <span class="k">return</span> <span class="i">$PercentIdentity</span><span class="sc">;</span> | 
|  | 99   86   <span class="s">}</span> | 
|  | 100   87   <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Sequence1</span><span class="s">)</span> && <span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Sequence2</span><span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 101   88     <span class="k">return</span> <span class="i">$PercentIdentity</span><span class="sc">;</span> | 
|  | 102   89   <span class="s">}</span> | 
|  | 103   90   <span class="k">my</span><span class="s">(</span><span class="i">$Index</span><span class="cm">,</span> <span class="i">$Identity</span><span class="cm">,</span> <span class="i">$Sequence1Len</span><span class="cm">,</span> <span class="i">$Sequence2Len</span><span class="cm">,</span> <span class="i">$Residue1</span><span class="cm">,</span> <span class="i">$Residue2</span><span class="cm">,</span> <span class="i">$ResMatchCount</span><span class="cm">,</span> <span class="i">$ResCount</span><span class="s">)</span><span class="sc">;</span> | 
|  | 104   91 | 
|  | 105   92   <span class="i">$Sequence1Len</span> = <span class="k">length</span><span class="s">(</span><span class="i">$Sequence1</span><span class="s">)</span><span class="sc">;</span> | 
|  | 106   93   <span class="i">$Sequence2Len</span> = <span class="k">length</span><span class="s">(</span><span class="i">$Sequence2</span><span class="s">)</span><span class="sc">;</span> | 
|  | 107   94 | 
|  | 108   95   <span class="i">$ResMatchCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 109   96   <span class="i">$ResCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 110   97   <span class="j">RESIDUE:</span> <span class="k">for</span> <span class="i">$Index</span> <span class="s">(</span><span class="n">0</span> .. <span class="s">(</span><span class="i">$Sequence1Len</span> - <span class="n">1</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 111   98     <span class="i">$Residue1</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$Sequence1</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="n">1</span><span class="s">)</span><span class="sc">;</span> | 
|  | 112   99     <span class="i">$Residue2</span> = <span class="s">(</span><span class="i">$Index</span> < <span class="i">$Sequence2Len</span><span class="s">)</span> ? <span class="k">substr</span><span class="s">(</span><span class="i">$Sequence2</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="n">1</span><span class="s">)</span> <span class="co">:</span> <span class="q">''</span><span class="sc">;</span> | 
|  | 113  100     <span class="k">if</span> <span class="s">(</span><span class="i">$IgnoreGaps</span><span class="s">)</span> <span class="s">{</span> | 
|  | 114  101       <span class="k">if</span> <span class="s">(</span><span class="i">$Residue1</span> !~ <span class="q">/[A-Z]/i</span> || <span class="i">$Residue2</span> !~ <span class="q">/[A-Z]/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 115  102         <span class="k">next</span> <span class="j">RESIDUE</span><span class="sc">;</span> | 
|  | 116  103       <span class="s">}</span> | 
|  | 117  104     <span class="s">}</span> | 
|  | 118  105     <span class="k">if</span> <span class="s">(</span><span class="i">$Residue1</span> <span class="k">eq</span> <span class="i">$Residue2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 119  106       <span class="i">$ResMatchCount</span>++<span class="sc">;</span> | 
|  | 120  107     <span class="s">}</span> | 
|  | 121  108     <span class="i">$ResCount</span>++<span class="sc">;</span> | 
|  | 122  109   <span class="s">}</span> | 
|  | 123  110   <span class="i">$Identity</span> = <span class="i">$ResCount</span> ? <span class="s">(</span><span class="i">$ResMatchCount</span>/<span class="i">$ResCount</span><span class="s">)</span> <span class="co">:</span> <span class="n">0.0</span><span class="sc">;</span> | 
|  | 124  111   <span class="i">$PercentIdentity</span> = <span class="k">sprintf</span><span class="s">(</span><span class="q">"%.${Precision}f"</span><span class="cm">,</span> <span class="s">(</span><span class="i">$Identity</span> * <span class="n">100</span><span class="s">)</span><span class="s">)</span><span class="sc">;</span> | 
|  | 125  112 | 
|  | 126  113   <span class="k">return</span> <span class="i">$PercentIdentity</span><span class="sc">;</span> | 
|  | 127  114 <span class="s">}</span> | 
|  | 128  115 | 
|  | 129  116 <span class="c"># Calculate pairwise identify matrix for all the sequences and return a reference</span> | 
|  | 130  117 <span class="c"># to a hash with the following keys:</span> | 
|  | 131  118 <span class="c">#</span> | 
|  | 132  119 <span class="c"># {IDs} - Sequence IDs</span> | 
|  | 133  120 <span class="c"># {Count} - Number of IDs</span> | 
|  | 134  121 <span class="c"># {PercentIdentity}{$RowID}{$ColID} - Percent identify for a pair of sequences</span> | 
|  | 135  122 <span class="c">#</span> | 
|  | 136 <a name="CalculatePercentSequenceIdentityMatrix-"></a> 123 <span class="k">sub </span><span class="m">CalculatePercentSequenceIdentityMatrix</span> <span class="s">{</span> | 
|  | 137  124   <span class="k">my</span><span class="s">(</span><span class="i">$SequencesDataRef</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="cm">,</span> <span class="cm">,</span> <span class="i">$Precision</span><span class="cm">,</span> <span class="i">$ID</span><span class="cm">,</span> <span class="i">$RowID</span><span class="cm">,</span> <span class="i">$ColID</span><span class="cm">,</span> <span class="i">$RowIDSeq</span><span class="cm">,</span> <span class="i">$ColIDSeq</span><span class="cm">,</span> <span class="i">$PercentIdentity</span><span class="cm">,</span> <span class="i">%IdentityMatrixData</span><span class="s">)</span><span class="sc">;</span> | 
|  | 138  125 | 
|  | 139  126   <span class="i">$IgnoreGaps</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 140  127   <span class="i">$Precision</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 141  128   <span class="k">if</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> | 
|  | 142  129     <span class="s">(</span><span class="i">$SequencesDataRef</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="cm">,</span> <span class="i">$Precision</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 143  130   <span class="s">}</span> | 
|  | 144  131   <span class="k">elsif</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 145  132     <span class="s">(</span><span class="i">$SequencesDataRef</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 146  133   <span class="s">}</span> | 
|  | 147  134   <span class="k">else</span> <span class="s">{</span> | 
|  | 148  135     <span class="s">(</span><span class="i">$SequencesDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 149  136   <span class="s">}</span> | 
|  | 150  137 | 
|  | 151  138   <span class="i">%IdentityMatrixData</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 152  139   <span class="i">@</span>{<span class="i">$IdentityMatrixData</span>{<span class="w">IDs</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 153  140   <span class="i">%</span>{<span class="i">$IdentityMatrixData</span>{<span class="w">PercentIdentity</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 154  141   <span class="i">$IdentityMatrixData</span>{<span class="w">Count</span>} = <span class="n">0</span><span class="sc">;</span> | 
|  | 155  142 | 
|  | 156  143   <span class="k">for</span> <span class="i">$ID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequencesDataRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 157  144     <span class="k">push</span> <span class="i">@</span>{<span class="i">$IdentityMatrixData</span>{<span class="w">IDs</span>}}<span class="cm">,</span> <span class="i">$ID</span><span class="sc">;</span> | 
|  | 158  145     <span class="i">$IdentityMatrixData</span>{<span class="w">Count</span>} += <span class="n">1</span><span class="sc">;</span> | 
|  | 159  146   <span class="s">}</span> | 
|  | 160  147   <span class="c"># Initialize and calculate percent identity data values...</span> | 
|  | 161  148   <span class="k">for</span> <span class="i">$RowID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequencesDataRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 162  149     <span class="i">%</span>{<span class="i">$IdentityMatrixData</span>{<span class="w">PercentIdentity</span>}{<span class="i">$RowID</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 163  150     <span class="i">$RowIDSeq</span> = <span class="i">$SequencesDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$RowID</span>}<span class="sc">;</span> | 
|  | 164  151     <span class="k">for</span> <span class="i">$ColID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequencesDataRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 165  152       <span class="i">$IdentityMatrixData</span>{<span class="i">$RowID</span>}{<span class="i">$ColID</span>} = <span class="q">''</span><span class="sc">;</span> | 
|  | 166  153       <span class="i">$ColIDSeq</span> = <span class="i">$SequencesDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ColID</span>}<span class="sc">;</span> | 
|  | 167  154       <span class="i">$PercentIdentity</span> = <span class="i">CalcuatePercentSequenceIdentity</span><span class="s">(</span><span class="i">$RowIDSeq</span><span class="cm">,</span> <span class="i">$ColIDSeq</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="cm">,</span> <span class="i">$Precision</span><span class="s">)</span><span class="sc">;</span> | 
|  | 168  155       <span class="i">$IdentityMatrixData</span>{<span class="w">PercentIdentity</span>}{<span class="i">$RowID</span>}{<span class="i">$ColID</span>} = <span class="i">$PercentIdentity</span><span class="sc">;</span> | 
|  | 169  156     <span class="s">}</span> | 
|  | 170  157   <span class="s">}</span> | 
|  | 171  158   <span class="k">return</span> \<span class="i">%IdentityMatrixData</span><span class="sc">;</span> | 
|  | 172  159 <span class="s">}</span> | 
|  | 173  160 | 
|  | 174  161 <span class="c"># Retrieve information about shortest sequence...</span> | 
|  | 175 <a name="GetShortestSequence-"></a> 162 <span class="k">sub </span><span class="m">GetShortestSequence</span> <span class="s">{</span> | 
|  | 176  163   <span class="k">my</span><span class="s">(</span><span class="i">$SequencesDataRef</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="cm">,</span> <span class="i">$ID</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$Description</span><span class="s">)</span><span class="sc">;</span> | 
|  | 177  164 | 
|  | 178  165   <span class="i">$IgnoreGaps</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 179  166   <span class="k">if</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 180  167     <span class="s">(</span><span class="i">$SequencesDataRef</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 181  168   <span class="s">}</span> | 
|  | 182  169   <span class="k">else</span> <span class="s">{</span> | 
|  | 183  170     <span class="s">(</span><span class="i">$SequencesDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 184  171   <span class="s">}</span> | 
|  | 185  172 | 
|  | 186  173   <span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$Description</span><span class="s">)</span> =  <span class="i">_GetShortestOrLongestSequence</span><span class="s">(</span><span class="i">$SequencesDataRef</span><span class="cm">,</span> <span class="q">'Shortest'</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="s">)</span><span class="sc">;</span> | 
|  | 187  174   <span class="k">return</span> <span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$Description</span><span class="s">)</span><span class="sc">;</span> | 
|  | 188  175 <span class="s">}</span> | 
|  | 189  176 | 
|  | 190  177 <span class="c"># Retrieve information about longest sequence..</span> | 
|  | 191 <a name="GetLongestSequence-"></a> 178 <span class="k">sub </span><span class="m">GetLongestSequence</span> <span class="s">{</span> | 
|  | 192  179   <span class="k">my</span><span class="s">(</span><span class="i">$SequencesDataRef</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="cm">,</span> <span class="i">$ID</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$Description</span><span class="s">)</span><span class="sc">;</span> | 
|  | 193  180 | 
|  | 194  181   <span class="i">$IgnoreGaps</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 195  182   <span class="k">if</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 196  183     <span class="s">(</span><span class="i">$SequencesDataRef</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 197  184   <span class="s">}</span> | 
|  | 198  185   <span class="k">else</span> <span class="s">{</span> | 
|  | 199  186     <span class="s">(</span><span class="i">$SequencesDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 200  187   <span class="s">}</span> | 
|  | 201  188 | 
|  | 202  189   <span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$Description</span><span class="s">)</span> =  <span class="i">_GetShortestOrLongestSequence</span><span class="s">(</span><span class="i">$SequencesDataRef</span><span class="cm">,</span> <span class="q">'Longest'</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="s">)</span><span class="sc">;</span> | 
|  | 203  190   <span class="k">return</span> <span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$Description</span><span class="s">)</span><span class="sc">;</span> | 
|  | 204  191 <span class="s">}</span> | 
|  | 205  192 | 
|  | 206  193 <span class="c"># Get sequence length...</span> | 
|  | 207 <a name="GetSequenceLength-"></a> 194 <span class="k">sub </span><span class="m">GetSequenceLength</span> <span class="s">{</span> | 
|  | 208  195   <span class="k">my</span><span class="s">(</span><span class="i">$Seq</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="s">)</span><span class="sc">;</span> | 
|  | 209  196 | 
|  | 210  197   <span class="i">$SeqLen</span> = <span class="q">''</span><span class="sc">;</span> <span class="i">$IgnoreGaps</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 211  198   <span class="k">if</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 212  199     <span class="s">(</span><span class="i">$Seq</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 213  200   <span class="s">}</span> | 
|  | 214  201   <span class="k">else</span> <span class="s">{</span> | 
|  | 215  202     <span class="s">(</span><span class="i">$Seq</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 216  203   <span class="s">}</span> | 
|  | 217  204   <span class="k">if</span> <span class="s">(</span><span class="i">$IgnoreGaps</span><span class="s">)</span> <span class="s">{</span> | 
|  | 218  205     <span class="k">my</span><span class="s">(</span><span class="i">$Index</span><span class="cm">,</span> <span class="i">$Residue</span><span class="s">)</span><span class="sc">;</span> | 
|  | 219  206     <span class="i">$SeqLen</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 220  207     <span class="k">for</span> <span class="i">$Index</span> <span class="s">(</span><span class="n">0</span> .. <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Seq</span><span class="s">)</span> - <span class="n">1</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 221  208       <span class="i">$Residue</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$Seq</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="n">1</span><span class="s">)</span><span class="sc">;</span> | 
|  | 222  209       <span class="k">if</span> <span class="s">(</span><span class="i">$Residue</span> =~ <span class="q">/[A-Z]/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 223  210         <span class="i">$SeqLen</span>++<span class="sc">;</span> | 
|  | 224  211       <span class="s">}</span> | 
|  | 225  212     <span class="s">}</span> | 
|  | 226  213   <span class="s">}</span> | 
|  | 227  214   <span class="k">else</span> <span class="s">{</span> | 
|  | 228  215     <span class="i">$SeqLen</span> = <span class="k">length</span><span class="s">(</span><span class="i">$Seq</span><span class="s">)</span><span class="sc">;</span> | 
|  | 229  216   <span class="s">}</span> | 
|  | 230  217 | 
|  | 231  218   <span class="k">return</span> <span class="i">$SeqLen</span><span class="sc">;</span> | 
|  | 232  219 <span class="s">}</span> | 
|  | 233  220 | 
|  | 234  221 <span class="c"># Is it a gap residue...</span> | 
|  | 235 <a name="IsGapResidue-"></a> 222 <span class="k">sub </span><span class="m">IsGapResidue</span> <span class="s">{</span> | 
|  | 236  223   <span class="k">my</span><span class="s">(</span><span class="i">$Residue</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 237  224   <span class="k">my</span><span class="s">(</span><span class="i">$Status</span><span class="s">)</span><span class="sc">;</span> | 
|  | 238  225 | 
|  | 239  226   <span class="i">$Status</span> = <span class="s">(</span><span class="i">$Residue</span> !~ <span class="q">/[A-Z]/i</span> <span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 240  227 | 
|  | 241  228   <span class="k">return</span> <span class="i">$Status</span><span class="sc">;</span> | 
|  | 242  229 <span class="s">}</span> | 
|  | 243  230 | 
|  | 244  231 <span class="c"># Is it a supported sequence file?</span> | 
|  | 245  232 <span class="c">#</span> | 
|  | 246  233 <span class="c"># Supported seqence formats are:</span> | 
|  | 247  234 <span class="c">#</span> | 
|  | 248  235 <span class="c"># ALN/ClustalW   .aln</span> | 
|  | 249  236 <span class="c"># GCG/MSF         .msf</span> | 
|  | 250  237 <span class="c"># PILEUP/MSF     .msf</span> | 
|  | 251  238 <span class="c"># Fasts(Pearson) .fasta, .fta</span> | 
|  | 252  239 <span class="c"># NBRF/PIR         .pir</span> | 
|  | 253  240 <span class="c">#</span> | 
|  | 254 <a name="IsSupportedSequenceFile-"></a> 241 <span class="k">sub </span><span class="m">IsSupportedSequenceFile</span> <span class="s">{</span> | 
|  | 255  242   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 256  243   <span class="k">my</span><span class="s">(</span><span class="i">$Status</span><span class="cm">,</span> <span class="i">$SequenceFormat</span><span class="s">)</span><span class="sc">;</span> | 
|  | 257  244   <span class="i">$Status</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$SequenceFormat</span> = <span class="q">'NotSupported'</span><span class="sc">;</span> | 
|  | 258  245 | 
|  | 259  246   <span class="j">SEQFORMAT:</span> <span class="s">{</span> | 
|  | 260  247       <span class="k">if</span> <span class="s">(</span><span class="i">IsClustalWSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span><span class="i">$Status</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$SequenceFormat</span> = <span class="q">'ClustalW'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SEQFORMAT</span><span class="s">}</span> | 
|  | 261  248       <span class="k">if</span> <span class="s">(</span><span class="i">IsPearsonFastaSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span><span class="i">$Status</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$SequenceFormat</span> = <span class="q">'Pearson'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SEQFORMAT</span><span class="s">}</span> | 
|  | 262  249       <span class="k">if</span> <span class="s">(</span><span class="i">IsPIRFastaSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span><span class="i">$Status</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$SequenceFormat</span> = <span class="q">'PIR'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SEQFORMAT</span><span class="s">}</span> | 
|  | 263  250       <span class="k">if</span> <span class="s">(</span><span class="i">IsMSFSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span><span class="i">$Status</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$SequenceFormat</span> = <span class="q">'MSF'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SEQFORMAT</span><span class="s">}</span> | 
|  | 264  251       <span class="i">$Status</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$SequenceFormat</span> = <span class="q">'NotSupported'</span><span class="sc">;</span> | 
|  | 265  252   <span class="s">}</span> | 
|  | 266  253   <span class="k">return</span> <span class="s">(</span><span class="i">$Status</span><span class="cm">,</span> <span class="i">$SequenceFormat</span><span class="s">)</span><span class="sc">;</span> | 
|  | 267  254 <span class="s">}</span> | 
|  | 268  255 | 
|  | 269  256 <span class="c"># Is it a ClustalW multiple sequence sequence file...</span> | 
|  | 270 <a name="IsClustalWSequenceFile-"></a> 257 <span class="k">sub </span><span class="m">IsClustalWSequenceFile</span> <span class="s">{</span> | 
|  | 271  258   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 272  259   <span class="k">my</span><span class="s">(</span><span class="i">$Status</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 273  260 | 
|  | 274  261   <span class="i">$Status</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 275  262 | 
|  | 276  263   <span class="k">open</span> <span class="w">SEQUENCEFILE</span><span class="cm">,</span> <span class="q">"$SequenceFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Couldn't open $SequenceFile: $!\n"</span><span class="sc">;</span> | 
|  | 277  264   <span class="i">$Line</span> = <span class="i">GetTextLine</span><span class="s">(</span>\<span class="i">*SEQUENCEFILE</span><span class="s">)</span><span class="sc">;</span> | 
|  | 278  265   <span class="i">$Status</span> = <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/(ClustalW|Clustal W|Clustal)/i</span> <span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 279  266   <span class="k">close</span> <span class="w">SEQUENCEFILE</span><span class="sc">;</span> | 
|  | 280  267 | 
|  | 281  268   <span class="k">return</span> <span class="i">$Status</span><span class="sc">;</span> | 
|  | 282  269 <span class="s">}</span> | 
|  | 283  270 | 
|  | 284  271 <span class="c"># Is it a valid Pearson fasta sequence or alignment file?</span> | 
|  | 285  272 <span class="c">#</span> | 
|  | 286 <a name="IsPearsonFastaSequenceFile-"></a> 273 <span class="k">sub </span><span class="m">IsPearsonFastaSequenceFile</span> <span class="s">{</span> | 
|  | 287  274   <span class="k">my</span><span class="s">(</span><span class="i">$FastaFile</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$Status</span><span class="s">)</span><span class="sc">;</span> | 
|  | 288  275 | 
|  | 289  276   <span class="s">(</span><span class="i">$FastaFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 290  277   <span class="i">$Status</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 291  278 | 
|  | 292  279   <span class="k">open</span> <span class="w">FASTAFILE</span><span class="cm">,</span> <span class="q">"$FastaFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Couldn't open $FastaFile: $!\n"</span><span class="sc">;</span> | 
|  | 293  280   <span class="i">$Line</span> = <span class="i">GetTextLine</span><span class="s">(</span>\<span class="i">*FASTAFILE</span><span class="s">)</span><span class="sc">;</span> | 
|  | 294  281 | 
|  | 295  282   <span class="c"># First line starts with > and the fourth character is not ';'; otherwise, it's</span> | 
|  | 296  283   <span class="c"># PIR FASTA format.</span> | 
|  | 297  284   <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^>/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 298  285     <span class="k">my</span><span class="s">(</span><span class="i">$FourthChar</span><span class="s">)</span><span class="sc">;</span> | 
|  | 299  286     <span class="i">$FourthChar</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="n">3</span><span class="cm">,</span> <span class="n">1</span><span class="s">)</span><span class="sc">;</span> | 
|  | 300  287     <span class="i">$Status</span> = <span class="s">(</span><span class="i">$FourthChar</span> !~ <span class="q">/\;/</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 301  288   <span class="s">}</span> | 
|  | 302  289   <span class="k">close</span> <span class="w">FASTAFILE</span><span class="sc">;</span> | 
|  | 303  290 | 
|  | 304  291   <span class="k">return</span> <span class="i">$Status</span><span class="sc">;</span> | 
|  | 305  292 <span class="s">}</span> | 
|  | 306  293 | 
|  | 307  294 <span class="c"># Is it a valid NBRF/PIR fasta sequence or alignment file?</span> | 
|  | 308  295 <span class="c">#</span> | 
|  | 309 <a name="IsPIRFastaSequenceFile-"></a> 296 <span class="k">sub </span><span class="m">IsPIRFastaSequenceFile</span> <span class="s">{</span> | 
|  | 310  297   <span class="k">my</span><span class="s">(</span><span class="i">$FastaFile</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$Status</span><span class="s">)</span><span class="sc">;</span> | 
|  | 311  298 | 
|  | 312  299   <span class="s">(</span><span class="i">$FastaFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 313  300   <span class="i">$Status</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 314  301 | 
|  | 315  302   <span class="k">open</span> <span class="w">FASTAFILE</span><span class="cm">,</span> <span class="q">"$FastaFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Couldn't open $FastaFile: $!\n"</span><span class="sc">;</span> | 
|  | 316  303   <span class="i">$Line</span> = <span class="i">GetTextLine</span><span class="s">(</span>\<span class="i">*FASTAFILE</span><span class="s">)</span><span class="sc">;</span> | 
|  | 317  304 | 
|  | 318  305   <span class="c"># First line starts with > and the fourth character is ';'; otherwise, it's</span> | 
|  | 319  306   <span class="c"># a Pearson FASTA format.</span> | 
|  | 320  307   <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^>/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 321  308     <span class="k">my</span><span class="s">(</span><span class="i">$FourthChar</span><span class="s">)</span><span class="sc">;</span> | 
|  | 322  309     <span class="i">$FourthChar</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="n">3</span><span class="cm">,</span> <span class="n">1</span><span class="s">)</span><span class="sc">;</span> | 
|  | 323  310     <span class="i">$Status</span> = <span class="s">(</span><span class="i">$FourthChar</span> =~ <span class="q">/\;/</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 324  311   <span class="s">}</span> | 
|  | 325  312   <span class="k">close</span> <span class="w">FASTAFILE</span><span class="sc">;</span> | 
|  | 326  313 | 
|  | 327  314   <span class="k">return</span> <span class="i">$Status</span><span class="sc">;</span> | 
|  | 328  315 <span class="s">}</span> | 
|  | 329  316 | 
|  | 330  317 <span class="c"># Is it a valid MSF sequence or alignment file?</span> | 
|  | 331  318 <span class="c">#</span> | 
|  | 332 <a name="IsMSFSequenceFile-"></a> 319 <span class="k">sub </span><span class="m">IsMSFSequenceFile</span> <span class="s">{</span> | 
|  | 333  320   <span class="k">my</span><span class="s">(</span><span class="i">$MSFFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 334  321 | 
|  | 335  322   <span class="k">open</span> <span class="w">MSFFILE</span><span class="cm">,</span> <span class="q">"$MSFFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Couldn't open $MSFFile: $!\n"</span><span class="sc">;</span> | 
|  | 336  323 | 
|  | 337  324   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$Status</span><span class="s">)</span><span class="sc">;</span> | 
|  | 338  325 | 
|  | 339  326   <span class="i">$Status</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 340  327   <span class="c"># Find a line that contains MSF: keyword and ends with '..'</span> | 
|  | 341  328   <span class="j">LINE:</span> <span class="k">while</span> <span class="s">(</span><span class="i">$Line</span> = <span class="i">GetTextLine</span><span class="s">(</span>\<span class="i">*MSFFILE</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 342  329     <span class="i">$Line</span> = <span class="i">RemoveLeadingWhiteSpaces</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 343  330     <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/MSF:/i</span> && <span class="i">$Line</span> =~ <span class="q">/\.\.[ ]*$/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 344  331       <span class="i">$Status</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 345  332       <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 346  333     <span class="s">}</span> | 
|  | 347  334     <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/(!!AA_MULTIPLE_ALIGNMENT|!!NA_MULTIPLE_ALIGNMENT|PILEUP)/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 348  335       <span class="c"># Pileup MSF...</span> | 
|  | 349  336       <span class="i">$Status</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 350  337       <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 351  338     <span class="s">}</span> | 
|  | 352  339   <span class="s">}</span> | 
|  | 353  340   <span class="k">close</span> <span class="w">MSFFILE</span><span class="sc">;</span> | 
|  | 354  341 | 
|  | 355  342   <span class="k">return</span> <span class="i">$Status</span><span class="sc">;</span> | 
|  | 356  343 <span class="s">}</span> | 
|  | 357  344 | 
|  | 358  345 <span class="c"># Read sequence or sequence alignment file...</span> | 
|  | 359 <a name="ReadSequenceFile-"></a> 346 <span class="k">sub </span><span class="m">ReadSequenceFile</span> <span class="s">{</span> | 
|  | 360  347   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 361  348 | 
|  | 362  349   <span class="k">if</span> <span class="s">(</span><span class="i">IsPearsonFastaSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 363  350     <span class="k">return</span> <span class="i">ReadPearsonFastaSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 364  351   <span class="s">}</span> | 
|  | 365  352   <span class="k">elsif</span> <span class="s">(</span><span class="i">IsPIRFastaSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 366  353     <span class="k">return</span> <span class="i">ReadPIRFastaSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 367  354   <span class="s">}</span> | 
|  | 368  355   <span class="k">elsif</span> <span class="s">(</span><span class="i">IsMSFSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 369  356     <span class="k">return</span> <span class="i">ReadMSFSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 370  357   <span class="s">}</span> | 
|  | 371  358   <span class="k">elsif</span> <span class="s">(</span><span class="i">IsClustalWSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 372  359     <span class="k">return</span> <span class="i">ReadClustalWSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 373  360   <span class="s">}</span> | 
|  | 374  361   <span class="k">else</span> <span class="s">{</span> | 
|  | 375  362     <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span> | 
|  | 376  363   <span class="s">}</span> | 
|  | 377  364 <span class="s">}</span> | 
|  | 378  365 | 
|  | 379  366 <span class="c"># Read file and setup alignment data...</span> | 
|  | 380 <a name="ReadClustalWSequenceFile-"></a> 367 <span class="k">sub </span><span class="m">ReadClustalWSequenceFile</span> <span class="s">{</span> | 
|  | 381  368   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 382  369 | 
|  | 383  370   <span class="k">return</span> <span class="i">_ReadFileAndSetupSequencesData</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="cm">,</span> <span class="q">'ClustalW'</span><span class="s">)</span><span class="sc">;</span> | 
|  | 384  371 <span class="s">}</span> | 
|  | 385  372 | 
|  | 386  373 <span class="c"># Read file and setup alignment data...</span> | 
|  | 387 <a name="ReadPearsonFastaSequenceFile-"></a> 374 <span class="k">sub </span><span class="m">ReadPearsonFastaSequenceFile</span> <span class="s">{</span> | 
|  | 388  375   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 389  376 | 
|  | 390  377   <span class="k">return</span> <span class="i">_ReadFileAndSetupSequencesData</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="cm">,</span> <span class="q">'Pearson'</span><span class="s">)</span><span class="sc">;</span> | 
|  | 391  378 <span class="s">}</span> | 
|  | 392  379 | 
|  | 393  380 <span class="c"># Read file and setup alignment data...</span> | 
|  | 394 <a name="ReadPIRFastaSequenceFile-"></a> 381 <span class="k">sub </span><span class="m">ReadPIRFastaSequenceFile</span> <span class="s">{</span> | 
|  | 395  382   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 396  383 | 
|  | 397  384   <span class="k">return</span> <span class="i">_ReadFileAndSetupSequencesData</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="cm">,</span> <span class="q">'PIR'</span><span class="s">)</span><span class="sc">;</span> | 
|  | 398  385 <span class="s">}</span> | 
|  | 399  386 | 
|  | 400  387 | 
|  | 401  388 <span class="c"># Read file and setup sequence data...</span> | 
|  | 402 <a name="ReadMSFSequenceFile-"></a> 389 <span class="k">sub </span><span class="m">ReadMSFSequenceFile</span> <span class="s">{</span> | 
|  | 403  390   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 404  391 | 
|  | 405  392   <span class="k">return</span> <span class="i">_ReadFileAndSetupSequencesData</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="cm">,</span> <span class="q">'MSF'</span><span class="s">)</span><span class="sc">;</span> | 
|  | 406  393 <span class="s">}</span> | 
|  | 407  394 | 
|  | 408  395 <span class="c"># Write out a Pearson FASTA file...</span> | 
|  | 409 <a name="WritePearsonFastaSequenceFile-"></a> 396 <span class="k">sub </span><span class="m">WritePearsonFastaSequenceFile</span> <span class="s">{</span> | 
|  | 410  397   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceFileName</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="cm">,</span> <span class="i">$MaxLength</span><span class="cm">,</span> <span class="i">$ID</span><span class="cm">,</span> <span class="i">$Description</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$WrappedSequence</span><span class="s">)</span><span class="sc">;</span> | 
|  | 411  398 | 
|  | 412  399   <span class="i">$MaxLength</span> = <span class="n">80</span><span class="sc">;</span> | 
|  | 413  400   <span class="k">if</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> | 
|  | 414  401     <span class="s">(</span><span class="i">$SequenceFileName</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="cm">,</span> <span class="i">$MaxLength</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 415  402   <span class="s">}</span> | 
|  | 416  403   <span class="k">elsif</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 417  404     <span class="s">(</span><span class="i">$SequenceFileName</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 418  405   <span class="s">}</span> | 
|  | 419  406   <span class="k">open</span> <span class="w">SEQUENCEFILE</span><span class="cm">,</span> <span class="q">">$SequenceFileName"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Can't open $SequenceFileName: $!\n"</span><span class="sc">;</span> | 
|  | 420  407   <span class="k">for</span> <span class="i">$ID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequenceDataRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 421  408     <span class="i">$Description</span> = <span class="i">$SequenceDataRef</span>->{<span class="w">Description</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 422  409     <span class="i">$Sequence</span> = <span class="i">$SequenceDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 423  410     <span class="i">$WrappedSequence</span> = <span class="i">WrapText</span><span class="s">(</span><span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$MaxLength</span><span class="cm">,</span> <span class="q">"\n"</span><span class="s">)</span><span class="sc">;</span> | 
|  | 424  411 | 
|  | 425  412     <span class="c"># Description also contains ID...</span> | 
|  | 426  413     <span class="k">print</span> <span class="i">SEQUENCEFILE</span> <span class="q">">$Description\n"</span><span class="sc">;</span> | 
|  | 427  414     <span class="k">print</span> <span class="i">SEQUENCEFILE</span> <span class="q">"$WrappedSequence\n"</span><span class="sc">;</span> | 
|  | 428  415   <span class="s">}</span> | 
|  | 429  416   <span class="k">close</span> <span class="w">SEQUENCEFILE</span><span class="sc">;</span> | 
|  | 430  417 <span class="s">}</span> | 
|  | 431  418 | 
|  | 432  419 <span class="c"># Get ID, Sequence and Length for smallest or longest sequence</span> | 
|  | 433 <a name="_GetShortestOrLongestSequence-"></a> 420 <span class="k">sub </span><span class="m">_GetShortestOrLongestSequence</span> <span class="s">{</span> | 
|  | 434  421   <span class="k">my</span><span class="s">(</span><span class="i">$SequencesDataRef</span><span class="cm">,</span> <span class="i">$SequenceType</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 435  422   <span class="k">my</span><span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Seq</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$Description</span><span class="cm">,</span> <span class="i">$FirstID</span><span class="cm">,</span> <span class="i">$FirstSeqLen</span><span class="cm">,</span> <span class="i">$CurrentID</span><span class="cm">,</span> <span class="i">$CurrentSeq</span><span class="cm">,</span> <span class="i">$CurrentSeqLen</span><span class="cm">,</span> <span class="i">$CurrentDescription</span><span class="s">)</span><span class="sc">;</span> | 
|  | 436  423 | 
|  | 437  424   <span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Seq</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="s">)</span> = <span class="s">(</span><span class="q">''</span><span class="cm">,</span> <span class="q">''</span><span class="cm">,</span> <span class="q">''</span><span class="s">)</span><span class="sc">;</span> | 
|  | 438  425   <span class="i">$FirstID</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 439  426 | 
|  | 440  427   <span class="j">ID:</span> <span class="k">for</span> <span class="i">$CurrentID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequencesDataRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 441  428     <span class="i">$CurrentSeq</span> = <span class="i">$IgnoreGaps</span> ? <span class="i">RemoveSequenceGaps</span><span class="s">(</span><span class="i">$SequencesDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$CurrentID</span>}<span class="s">)</span> <span class="co">:</span> <span class="i">$SequencesDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$CurrentID</span>}<span class="sc">;</span> | 
|  | 442  429     <span class="i">$CurrentSeqLen</span> = <span class="i">GetSequenceLength</span><span class="s">(</span><span class="i">$CurrentSeq</span><span class="cm">,</span> <span class="i">$IgnoreGaps</span><span class="s">)</span><span class="sc">;</span> | 
|  | 443  430     <span class="i">$CurrentDescription</span> = <span class="i">$SequencesDataRef</span>->{<span class="w">Description</span>}{<span class="i">$CurrentID</span>}<span class="sc">;</span> | 
|  | 444  431     <span class="k">if</span> <span class="s">(</span>!<span class="i">$FirstID</span><span class="s">)</span> <span class="s">{</span> | 
|  | 445  432       <span class="i">$FirstID</span> = <span class="i">$ID</span><span class="sc">;</span> <span class="i">$FirstSeqLen</span> = <span class="i">$CurrentSeqLen</span><span class="sc">;</span> | 
|  | 446  433       <span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Seq</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$Description</span><span class="s">)</span> = <span class="s">(</span><span class="i">$CurrentID</span><span class="cm">,</span> <span class="i">$CurrentSeq</span><span class="cm">,</span> <span class="i">$CurrentSeqLen</span><span class="cm">,</span> <span class="i">$CurrentDescription</span><span class="s">)</span><span class="sc">;</span> | 
|  | 447  434       <span class="k">next</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 448  435     <span class="s">}</span> | 
|  | 449  436     <span class="k">if</span> <span class="s">(</span><span class="i">$CurrentSeqLen</span> != <span class="i">$SeqLen</span><span class="s">)</span> <span class="s">{</span> | 
|  | 450  437       <span class="k">if</span> <span class="s">(</span><span class="s">(</span><span class="i">$SequenceType</span> =~ <span class="q">/Shortest/i</span><span class="s">)</span> && <span class="s">(</span><span class="i">$CurrentSeqLen</span> < <span class="i">$SeqLen</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 451  438         <span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Seq</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$Description</span><span class="s">)</span> = <span class="s">(</span><span class="i">$CurrentID</span><span class="cm">,</span> <span class="i">$CurrentSeq</span><span class="cm">,</span> <span class="i">$CurrentSeqLen</span><span class="cm">,</span> <span class="i">$CurrentDescription</span><span class="s">)</span><span class="sc">;</span> | 
|  | 452  439       <span class="s">}</span> | 
|  | 453  440       <span class="k">elsif</span> <span class="s">(</span><span class="s">(</span><span class="i">$SequenceType</span> =~ <span class="q">/Longest/i</span><span class="s">)</span> && <span class="s">(</span><span class="i">$CurrentSeqLen</span> > <span class="i">$SeqLen</span><span class="s">)</span> <span class="s">)</span> <span class="s">{</span> | 
|  | 454  441         <span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Seq</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$Description</span><span class="s">)</span> = <span class="s">(</span><span class="i">$CurrentID</span><span class="cm">,</span> <span class="i">$CurrentSeq</span><span class="cm">,</span> <span class="i">$CurrentSeqLen</span><span class="cm">,</span> <span class="i">$CurrentDescription</span><span class="s">)</span><span class="sc">;</span> | 
|  | 455  442       <span class="s">}</span> | 
|  | 456  443     <span class="s">}</span> | 
|  | 457  444   <span class="s">}</span> | 
|  | 458  445   <span class="k">return</span> <span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Seq</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$Description</span><span class="s">)</span><span class="sc">;</span> | 
|  | 459  446 <span class="s">}</span> | 
|  | 460  447 | 
|  | 461  448 <span class="c"># Remove gaps in the sequence and return new sequence...</span> | 
|  | 462 <a name="RemoveSequenceGaps-"></a> 449 <span class="k">sub </span><span class="m">RemoveSequenceGaps</span> <span class="s">{</span> | 
|  | 463  450   <span class="k">my</span><span class="s">(</span><span class="i">$Seq</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 464  451   <span class="k">my</span><span class="s">(</span><span class="i">$SeqWithoutGaps</span><span class="cm">,</span> <span class="i">$SeqLen</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="i">$Residue</span><span class="s">)</span><span class="sc">;</span> | 
|  | 465  452 | 
|  | 466  453   <span class="i">$SeqWithoutGaps</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 467  454   <span class="i">$SeqLen</span> = <span class="k">length</span><span class="s">(</span><span class="i">$Seq</span><span class="s">)</span><span class="sc">;</span> | 
|  | 468  455   <span class="k">for</span> <span class="i">$Index</span> <span class="s">(</span><span class="n">0</span> .. <span class="s">(</span><span class="i">$SeqLen</span> - <span class="n">1</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 469  456     <span class="i">$Residue</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$Seq</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="n">1</span><span class="s">)</span><span class="sc">;</span> | 
|  | 470  457     <span class="k">if</span> <span class="s">(</span><span class="i">$Residue</span> =~ <span class="q">/[A-Z]/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 471  458       <span class="i">$SeqWithoutGaps</span> .= <span class="i">$Residue</span><span class="sc">;</span> | 
|  | 472  459     <span class="s">}</span> | 
|  | 473  460   <span class="s">}</span> | 
|  | 474  461 | 
|  | 475  462   <span class="k">return</span> <span class="i">$SeqWithoutGaps</span><span class="sc">;</span> | 
|  | 476  463 <span class="s">}</span> | 
|  | 477  464 | 
|  | 478  465 <span class="c"># Using input alignment data map ref containing following keys, generate</span> | 
|  | 479  466 <span class="c"># a new hash with same set of keys after residue columns containg only</span> | 
|  | 480  467 <span class="c"># gaps have been removed:</span> | 
|  | 481  468 <span class="c">#</span> | 
|  | 482  469 <span class="c"># {IDs} : Array of IDs in order as they appear in file</span> | 
|  | 483  470 <span class="c"># {Count}: ID count...</span> | 
|  | 484  471 <span class="c"># {Description}{$ID} : Description data...</span> | 
|  | 485  472 <span class="c"># {Sequence}{$ID} : Sequence data...</span> | 
|  | 486  473 <span class="c">#</span> | 
|  | 487 <a name="RemoveSequenceAlignmentGapColumns-"></a> 474 <span class="k">sub </span><span class="m">RemoveSequenceAlignmentGapColumns</span> <span class="s">{</span> | 
|  | 488  475   <span class="k">my</span><span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$AlignmentDataMapRef</span><span class="cm">,</span> <span class="i">%NewAlignmentDataMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 489  476 | 
|  | 490  477   <span class="s">(</span><span class="i">$AlignmentDataMapRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 491  478 | 
|  | 492  479   <span class="i">%NewAlignmentDataMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 493  480   <span class="i">@</span>{<span class="i">$NewAlignmentDataMap</span>{<span class="w">IDs</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 494  481   <span class="i">%</span>{<span class="i">$NewAlignmentDataMap</span>{<span class="w">Description</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 495  482   <span class="i">%</span>{<span class="i">$NewAlignmentDataMap</span>{<span class="w">Sequence</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 496  483   <span class="i">$NewAlignmentDataMap</span>{<span class="w">Count</span>} = <span class="n">0</span><span class="sc">;</span> | 
|  | 497  484 | 
|  | 498  485   <span class="c"># Transfer ID and count information...</span> | 
|  | 499  486   <span class="k">for</span> <span class="i">$ID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$AlignmentDataMapRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 500  487     <span class="k">push</span> <span class="i">@</span>{<span class="i">$NewAlignmentDataMap</span>{<span class="w">IDs</span>}}<span class="cm">,</span> <span class="i">$ID</span><span class="sc">;</span> | 
|  | 501  488     <span class="i">$NewAlignmentDataMap</span>{<span class="w">Description</span>}{<span class="i">$ID</span>} = <span class="i">$AlignmentDataMapRef</span>->{<span class="w">Description</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 502  489     <span class="i">$NewAlignmentDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} = <span class="q">''</span><span class="sc">;</span> | 
|  | 503  490     <span class="i">$NewAlignmentDataMap</span>{<span class="w">Count</span>} += <span class="n">1</span><span class="sc">;</span> | 
|  | 504  491   <span class="s">}</span> | 
|  | 505  492 | 
|  | 506  493   <span class="c"># Go over residue columns and transfer the data...</span> | 
|  | 507  494   <span class="k">my</span><span class="s">(</span><span class="i">$FirstID</span><span class="cm">,</span> <span class="i">$FirstSeq</span><span class="cm">,</span> <span class="i">$FirstSeqLen</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="i">$Res</span><span class="cm">,</span> <span class="i">$GapColumn</span><span class="s">)</span><span class="sc">;</span> | 
|  | 508  495 | 
|  | 509  496   <span class="i">$FirstID</span> = <span class="i">$AlignmentDataMapRef</span>->{<span class="w">IDs</span>}[<span class="n">0</span>]<span class="sc">;</span> | 
|  | 510  497   <span class="i">$FirstSeq</span> = <span class="i">$AlignmentDataMapRef</span>->{<span class="w">Sequence</span>}{<span class="i">$FirstID</span>}<span class="sc">;</span> | 
|  | 511  498   <span class="i">$FirstSeqLen</span> = <span class="k">length</span><span class="s">(</span><span class="i">$FirstSeq</span><span class="s">)</span><span class="sc">;</span> | 
|  | 512  499 | 
|  | 513  500   <span class="j">RES:</span> <span class="k">for</span> <span class="i">$Index</span> <span class="s">(</span><span class="n">0</span> .. <span class="s">(</span><span class="i">$FirstSeqLen</span> - <span class="n">1</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 514  501     <span class="c"># Is this a gap column?</span> | 
|  | 515  502     <span class="i">$GapColumn</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 516  503     <span class="j">ID:</span> <span class="k">for</span> <span class="i">$ID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$AlignmentDataMapRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 517  504       <span class="i">$Res</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$AlignmentDataMapRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="n">1</span><span class="s">)</span><span class="sc">;</span> | 
|  | 518  505       <span class="k">if</span> <span class="s">(</span><span class="i">$Res</span> =~ <span class="q">/[A-Z]/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 519  506         <span class="i">$GapColumn</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 520  507         <span class="k">last</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 521  508       <span class="s">}</span> | 
|  | 522  509     <span class="s">}</span> | 
|  | 523  510     <span class="k">if</span> <span class="s">(</span><span class="i">$GapColumn</span><span class="s">)</span> <span class="s">{</span> | 
|  | 524  511       <span class="k">next</span> <span class="j">RES</span><span class="sc">;</span> | 
|  | 525  512     <span class="s">}</span> | 
|  | 526  513     <span class="c"># Transfer this residue...</span> | 
|  | 527  514     <span class="k">for</span> <span class="i">$ID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$AlignmentDataMapRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 528  515       <span class="i">$Res</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$AlignmentDataMapRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="n">1</span><span class="s">)</span><span class="sc">;</span> | 
|  | 529  516       <span class="i">$NewAlignmentDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} .= <span class="i">$Res</span><span class="sc">;</span> | 
|  | 530  517     <span class="s">}</span> | 
|  | 531  518   <span class="s">}</span> | 
|  | 532  519 | 
|  | 533  520   <span class="k">return</span> <span class="s">(</span>\<span class="i">%NewAlignmentDataMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 534  521 <span class="s">}</span> | 
|  | 535  522 | 
|  | 536  523 <span class="c">#</span> | 
|  | 537  524 <span class="c"># Read sequences file and return a reference to hash with the following keys:</span> | 
|  | 538  525 <span class="c">#</span> | 
|  | 539  526 <span class="c"># {IDs} - Array of sequence IDs</span> | 
|  | 540  527 <span class="c"># {Count} - Number of sequences</span> | 
|  | 541  528 <span class="c"># {Description}{$ID} - Sequence description</span> | 
|  | 542  529 <span class="c"># {Sequence}{$ID} - Sequence for a specific ID</span> | 
|  | 543  530 <span class="c"># {InputFileType} - Sequence file format</span> | 
|  | 544  531 <span class="c"># {ConservedAnnotation} - Conserved residue annonation</span> | 
|  | 545  532 <span class="c">#</span> | 
|  | 546  533 <span class="c"># Note:</span> | 
|  | 547  534 <span class="c">#   . Conserved residue annotation either exist in the input sequence alignment file or set</span> | 
|  | 548  535 <span class="c">#     for a file containing same number of residues for all the sequence using the following</span> | 
|  | 549  536 <span class="c">#     notation: * - Residue conserved; ' ' - Residue not conserved.</span> | 
|  | 550  537 <span class="c">#</span> | 
|  | 551 <a name="_ReadFileAndSetupSequencesData-"></a> 538 <span class="k">sub </span><span class="m">_ReadFileAndSetupSequencesData</span> <span class="s">{</span> | 
|  | 552  539   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="cm">,</span> <span class="i">$SequenceType</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 553  540   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceDataMapRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 554  541 | 
|  | 555  542   <span class="i">$SequenceDataMapRef</span> = <span class="k">undef</span><span class="sc">;</span> | 
|  | 556  543 | 
|  | 557  544   <span class="c"># Read sequence file...</span> | 
|  | 558  545   <span class="i">$SequenceDataMapRef</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 559  546   <span class="k">if</span> <span class="s">(</span><span class="i">$SequenceType</span> =~ <span class="q">/^ClustalW$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 560  547     <span class="i">$SequenceDataMapRef</span> = <span class="i">_ReadClustalWFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 561  548   <span class="s">}</span> | 
|  | 562  549   <span class="k">elsif</span> <span class="s">(</span><span class="i">$SequenceType</span> =~ <span class="q">/^Pearson$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 563  550     <span class="i">$SequenceDataMapRef</span> = <span class="i">_ReadPearsonFastaFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 564  551   <span class="s">}</span> | 
|  | 565  552   <span class="k">elsif</span> <span class="s">(</span><span class="i">$SequenceType</span> =~ <span class="q">/^PIR$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 566  553     <span class="i">$SequenceDataMapRef</span> = <span class="i">_ReadPIRFastaFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 567  554   <span class="s">}</span> | 
|  | 568  555   <span class="k">elsif</span> <span class="s">(</span><span class="i">$SequenceType</span> =~ <span class="q">/^MSF$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 569  556     <span class="i">$SequenceDataMapRef</span> = <span class="i">_ReadMSFFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 570  557   <span class="s">}</span> | 
|  | 571  558   <span class="k">else</span> <span class="s">{</span> | 
|  | 572  559     <span class="k">return</span> <span class="i">$SequenceDataMapRef</span><span class="sc">;</span> | 
|  | 573  560   <span class="s">}</span> | 
|  | 574  561 | 
|  | 575  562   <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$SequenceDataMapRef</span>->{<span class="w">ConservedAnnotation</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 576  563     <span class="k">return</span> <span class="s">(</span><span class="i">$SequenceDataMapRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 577  564   <span class="s">}</span> | 
|  | 578  565   <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="s">(</span><span class="i">$SequenceDataMapRef</span>->{<span class="w">Count</span>} > <span class="n">1</span><span class="s">)</span> && <span class="s">(</span><span class="i">AreSequenceLengthsIdentical</span><span class="s">(</span><span class="i">$SequenceDataMapRef</span><span class="s">)</span><span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 579  566     <span class="k">return</span> <span class="s">(</span><span class="i">$SequenceDataMapRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 580  567   <span class="s">}</span> | 
|  | 581  568 | 
|  | 582  569   <span class="c"># Use the first sequence to setup an empty ConservedAnnotation key...</span> | 
|  | 583  570   <span class="c"># And mark fully conserved residues...</span> | 
|  | 584  571   <span class="c">#</span> | 
|  | 585  572   <span class="k">my</span><span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$FirstSequence</span><span class="cm">,</span> <span class="i">$FirstSeqLen</span><span class="cm">,</span> <span class="i">$Res</span><span class="cm">,</span> <span class="i">$FirstRes</span><span class="cm">,</span> <span class="i">$ResConserved</span><span class="cm">,</span> <span class="i">$Index</span><span class="s">)</span><span class="sc">;</span> | 
|  | 586  573   <span class="i">$ID</span> = <span class="i">$SequenceDataMapRef</span>->{<span class="w">IDs</span>}[<span class="n">0</span>]<span class="sc">;</span> | 
|  | 587  574   <span class="i">$FirstSequence</span> = <span class="i">$SequenceDataMapRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 588  575   <span class="i">$FirstSeqLen</span> = <span class="k">length</span><span class="s">(</span><span class="i">$FirstSequence</span><span class="s">)</span><span class="sc">;</span> | 
|  | 589  576   <span class="i">$SequenceDataMapRef</span>->{<span class="w">ConservedAnnotation</span>} = <span class="q">''</span><span class="sc">;</span> | 
|  | 590  577   <span class="k">for</span> <span class="i">$Index</span> <span class="s">(</span><span class="n">0</span> .. <span class="s">(</span><span class="i">$FirstSeqLen</span> - <span class="n">1</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 591  578     <span class="i">$FirstRes</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 592  579     <span class="i">$ResConserved</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 593  580     <span class="j">ID:</span> <span class="k">for</span> <span class="i">$ID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequenceDataMapRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 594  581       <span class="i">$Sequence</span> = <span class="i">$SequenceDataMapRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 595  582       <span class="i">$Res</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="n">1</span><span class="s">)</span><span class="sc">;</span> | 
|  | 596  583       <span class="k">if</span> <span class="s">(</span>!<span class="i">$FirstRes</span><span class="s">)</span> <span class="s">{</span> | 
|  | 597  584         <span class="i">$FirstRes</span> = <span class="i">$Res</span><span class="sc">;</span> | 
|  | 598  585         <span class="k">next</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 599  586       <span class="s">}</span> | 
|  | 600  587       <span class="k">if</span> <span class="s">(</span><span class="s">(</span><span class="i">$Res</span> !~ <span class="q">/[A-Z]/i</span><span class="s">)</span> || <span class="s">(</span><span class="i">$Res</span> <span class="k">ne</span> <span class="i">$FirstRes</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 601  588         <span class="i">$ResConserved</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 602  589         <span class="k">last</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 603  590       <span class="s">}</span> | 
|  | 604  591     <span class="s">}</span> | 
|  | 605  592     <span class="k">if</span> <span class="s">(</span><span class="i">$ResConserved</span><span class="s">)</span> <span class="s">{</span> | 
|  | 606  593       <span class="i">$SequenceDataMapRef</span>->{<span class="w">ConservedAnnotation</span>} .= <span class="q">'*'</span><span class="sc">;</span> | 
|  | 607  594     <span class="s">}</span> | 
|  | 608  595     <span class="k">else</span> <span class="s">{</span> | 
|  | 609  596       <span class="i">$SequenceDataMapRef</span>->{<span class="w">ConservedAnnotation</span>} .= <span class="q">' '</span><span class="sc">;</span> | 
|  | 610  597     <span class="s">}</span> | 
|  | 611  598   <span class="s">}</span> | 
|  | 612  599 | 
|  | 613  600   <span class="k">return</span> <span class="s">(</span><span class="i">$SequenceDataMapRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 614  601 <span class="s">}</span> | 
|  | 615  602 | 
|  | 616  603 <span class="c"># Read sequence data in ClustalW multiple sequence alignment file and</span> | 
|  | 617  604 <span class="c"># return a reference to hash with these keys and values:</span> | 
|  | 618  605 <span class="c">#</span> | 
|  | 619  606 <span class="c"># {IDs} - Array of sequence IDs</span> | 
|  | 620  607 <span class="c"># {Count} - Number of sequences</span> | 
|  | 621  608 <span class="c"># {Description}{$ID} - Sequence description</span> | 
|  | 622  609 <span class="c"># {Sequence}{$ID} - Sequence for a specific ID</span> | 
|  | 623  610 <span class="c"># {InputFileType} - Sequence file format</span> | 
|  | 624  611 <span class="c"># {ConservedAnnotation} - Conserved residue annonations: space, *, : , .</span> | 
|  | 625  612 <span class="c">#</span> | 
|  | 626  613 <span class="c">#</span> | 
|  | 627  614 <span class="c">#</span> | 
|  | 628  615 <span class="c"># And based on ClustalW/X manual, here is what the ConservedAnnonations mean:</span> | 
|  | 629  616 <span class="c">#</span> | 
|  | 630  617 <span class="c"># '*' indicates positions which have a single, fully conserved residue</span> | 
|  | 631  618 <span class="c">#</span> | 
|  | 632  619 <span class="c"># ':' indicates that one of the following 'strong' groups is fully conserved: STA</span> | 
|  | 633  620 <span class="c">#    NEQK NHQK NDEQ QHRK MILV MILF HY FYW</span> | 
|  | 634  621 | 
|  | 635  622 <span class="c"># '.' indicates that one of the following 'weaker' groups is fully conserved:</span> | 
|  | 636  623 <span class="c">#     CSA ATV SAG STNK STPA SGND SNDEQK NDEQHK NEQHRK FVLIM HFY</span> | 
|  | 637  624 <span class="c">#</span> | 
|  | 638  625 <span class="c"># These are all the positively scoring groups that occur in the Gonnet Pam250</span> | 
|  | 639  626 <span class="c"># matrix. The strong and weak groups are defined as strong score >0.5 and weak</span> | 
|  | 640  627 <span class="c"># score =<0.5 respectively.</span> | 
|  | 641  628 <span class="c">#</span> | 
|  | 642 <a name="_ReadClustalWFile-"></a> 629 <span class="k">sub </span><span class="m">_ReadClustalWFile</span> <span class="s">{</span> | 
|  | 643  630   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 644  631   <span class="k">my</span><span class="s">(</span><span class="i">%SequencesDataMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 645  632 | 
|  | 646  633   <span class="c"># Initialize data...</span> | 
|  | 647  634   <span class="i">%SequencesDataMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 648  635   <span class="i">@</span>{<span class="i">$SequencesDataMap</span>{<span class="w">IDs</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 649  636   <span class="i">%</span>{<span class="i">$SequencesDataMap</span>{<span class="w">Description</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 650  637   <span class="i">%</span>{<span class="i">$SequencesDataMap</span>{<span class="w">Sequence</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 651  638   <span class="i">$SequencesDataMap</span>{<span class="w">Count</span>} = <span class="n">0</span><span class="sc">;</span> | 
|  | 652  639   <span class="i">$SequencesDataMap</span>{<span class="w">ConservedAnnotation</span>} = <span class="q">''</span><span class="sc">;</span> | 
|  | 653  640   <span class="i">$SequencesDataMap</span>{<span class="w">InputFileType</span>} = <span class="q">'ClustalW'</span><span class="sc">;</span> | 
|  | 654  641 | 
|  | 655  642   <span class="k">open</span> <span class="w">SEQUENCEFILE</span><span class="cm">,</span> <span class="q">"$SequenceFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Couldn't open $SequenceFile: $!\n"</span><span class="sc">;</span> | 
|  | 656  643 | 
|  | 657  644   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$LineLength</span><span class="cm">,</span> <span class="i">$AnnotationStart</span><span class="cm">,</span> <span class="i">$AnnotationLength</span><span class="cm">,</span> <span class="i">$Annotation</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$SequenceLength</span><span class="cm">,</span> <span class="i">$ID</span><span class="cm">,</span> <span class="i">$IDIndex</span><span class="s">)</span><span class="sc">;</span> | 
|  | 658  645 | 
|  | 659  646   <span class="c"># Ignore the header line...</span> | 
|  | 660  647   <span class="i">$Line</span> = <span class="q"><SEQUENCEFILE></span><span class="sc">;</span> | 
|  | 661  648 | 
|  | 662  649   <span class="j">LINE:</span> <span class="k">while</span> <span class="s">(</span><span class="i">$Line</span> = <span class="i">GetTextLine</span><span class="s">(</span>\<span class="i">*SEQUENCEFILE</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 663  650     <span class="k">if</span> <span class="s">(</span><span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^[ \*\:\.]/</span><span class="s">)</span> && <span class="s">(</span><span class="i">$Line</span> !~ <span class="q">/[A-Z]/i</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 664  651       <span class="c"># Annotation for sequences: fully conserverd, weaker or stronger group conserverd.</span> | 
|  | 665  652       <span class="c"># Extract it and save...</span> | 
|  | 666  653       <span class="i">$LineLength</span> = <span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 667  654       <span class="i">$AnnotationStart</span> = <span class="i">$LineLength</span> - <span class="i">$SequenceLength</span><span class="sc">;</span> | 
|  | 668  655       <span class="i">$AnnotationLength</span> = <span class="i">$SequenceLength</span><span class="sc">;</span> | 
|  | 669  656       <span class="i">$Annotation</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$AnnotationStart</span><span class="cm">,</span> <span class="i">$AnnotationLength</span><span class="s">)</span><span class="sc">;</span> | 
|  | 670  657       <span class="i">$SequencesDataMap</span>{<span class="w">ConservedAnnotation</span>} .= <span class="i">$Annotation</span><span class="sc">;</span> | 
|  | 671  658     <span class="s">}</span> | 
|  | 672  659     <span class="k">else</span> <span class="s">{</span> | 
|  | 673  660       <span class="c"># Extract ID and sequences...</span> | 
|  | 674  661       <span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="s">)</span>= <span class="i">$Line</span> =~ <span class="q">/^[ ]*(.*?)[ ]+(.*?)[ 01-9]*$/</span><span class="sc">;</span> | 
|  | 675  662       <span class="i">$Sequence</span> =~ <span class="q">s/ //g</span><span class="sc">;</span> | 
|  | 676  663       <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="i">$ID</span> && <span class="i">$Sequence</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 677  664         <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 678  665       <span class="s">}</span> | 
|  | 679  666 | 
|  | 680  667       <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$SequencesDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 681  668         <span class="c"># Append to existing alignment value...</span> | 
|  | 682  669         <span class="i">$SequenceLength</span> = <span class="k">length</span><span class="s">(</span><span class="i">$Sequence</span><span class="s">)</span><span class="sc">;</span> | 
|  | 683  670         <span class="i">$SequencesDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} .= <span class="i">$Sequence</span><span class="sc">;</span> | 
|  | 684  671       <span class="s">}</span> | 
|  | 685  672       <span class="k">else</span> <span class="s">{</span> | 
|  | 686  673         <span class="c"># New alignment data...</span> | 
|  | 687  674         <span class="i">$SequencesDataMap</span>{<span class="w">Count</span>} += <span class="n">1</span><span class="sc">;</span> | 
|  | 688  675         <span class="k">push</span> <span class="i">@</span>{<span class="i">$SequencesDataMap</span>{<span class="w">IDs</span>}}<span class="cm">,</span> <span class="i">$ID</span><span class="sc">;</span> | 
|  | 689  676         <span class="i">$SequencesDataMap</span>{<span class="w">Description</span>}{<span class="i">$ID</span>} = <span class="i">$ID</span><span class="sc">;</span> | 
|  | 690  677         <span class="i">$SequencesDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} = <span class="i">$Sequence</span><span class="sc">;</span> | 
|  | 691  678         <span class="i">$SequenceLength</span> = <span class="k">length</span><span class="s">(</span><span class="i">$Sequence</span><span class="s">)</span><span class="sc">;</span> | 
|  | 692  679       <span class="s">}</span> | 
|  | 693  680     <span class="s">}</span> | 
|  | 694  681   <span class="s">}</span> | 
|  | 695  682   <span class="k">close</span> <span class="w">SEQUENCEFILE</span><span class="sc">;</span> | 
|  | 696  683   <span class="k">return</span> <span class="s">(</span>\<span class="i">%SequencesDataMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 697  684 <span class="s">}</span> | 
|  | 698  685 | 
|  | 699  686 <span class="c"># Read Pearson fasta file and return a reference to hash with these keys:</span> | 
|  | 700  687 <span class="c">#</span> | 
|  | 701  688 <span class="c"># {IDs} - Array of sequence IDs</span> | 
|  | 702  689 <span class="c"># {Count} - Number of sequences</span> | 
|  | 703  690 <span class="c"># {Description}{$ID} - Sequence description</span> | 
|  | 704  691 <span class="c"># {Sequence}{$ID} - Sequence for a specific ID</span> | 
|  | 705  692 <span class="c"># {InputFileType} - Sequence file format</span> | 
|  | 706  693 <span class="c"># {ConservedAnnotation} - Conserved residue annonation</span> | 
|  | 707  694 <span class="c">#</span> | 
|  | 708 <a name="_ReadPearsonFastaFile-"></a> 695 <span class="k">sub </span><span class="m">_ReadPearsonFastaFile</span> <span class="s">{</span> | 
|  | 709  696   <span class="k">my</span><span class="s">(</span><span class="i">$FastaFileName</span><span class="cm">,</span> <span class="i">$ID</span><span class="cm">,</span> <span class="i">$Description</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$IgnoreID</span><span class="cm">,</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">%FastaDataMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 710  697 | 
|  | 711  698   <span class="s">(</span><span class="i">$FastaFileName</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 712  699 | 
|  | 713  700   <span class="i">%FastaDataMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 714  701   <span class="i">@</span>{<span class="i">$FastaDataMap</span>{<span class="w">IDs</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 715  702   <span class="i">%</span>{<span class="i">$FastaDataMap</span>{<span class="w">Description</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 716  703   <span class="i">%</span>{<span class="i">$FastaDataMap</span>{<span class="w">Sequence</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 717  704   <span class="i">$FastaDataMap</span>{<span class="w">Count</span>} = <span class="n">0</span><span class="sc">;</span> | 
|  | 718  705   <span class="i">$FastaDataMap</span>{<span class="w">InputFileType</span>} = <span class="q">'Pearson'</span><span class="sc">;</span> | 
|  | 719  706 | 
|  | 720  707   <span class="k">open</span> <span class="w">FASTAFILE</span><span class="cm">,</span> <span class="q">"$FastaFileName"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Couldn't open $FastaFileName: $!\n"</span><span class="sc">;</span> | 
|  | 721  708   <span class="i">$ID</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 722  709   <span class="i">$IgnoreID</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 723  710   <span class="j">LINE:</span> <span class="k">while</span> <span class="s">(</span><span class="i">$Line</span> = <span class="i">GetTextLine</span><span class="s">(</span>\<span class="i">*FASTAFILE</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 724  711     <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^\>/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 725  712       <span class="c"># Start of a new ID...</span> | 
|  | 726  713       <span class="i">$Line</span> =~ <span class="q">s/^\>//</span><span class="sc">;</span> | 
|  | 727  714       <span class="i">$Line</span> = <span class="i">RemoveLeadingWhiteSpaces</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 728  715       <span class="i">@LineWords</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 729  716       <span class="i">@LineWords</span> = <span class="k">split</span> <span class="q">/ /</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span> | 
|  | 730  717 | 
|  | 731  718       <span class="i">$ID</span> = <span class="i">$LineWords</span>[<span class="n">0</span>]<span class="sc">;</span> | 
|  | 732  719       <span class="i">$ID</span> =~ <span class="q">s/ //g</span><span class="sc">;</span> | 
|  | 733  720       <span class="i">$Description</span> = <span class="i">$Line</span><span class="sc">;</span> | 
|  | 734  721 | 
|  | 735  722       <span class="i">$IgnoreID</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 736  723       <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$FastaDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 737  724         <span class="i">$IgnoreID</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 738  725         <span class="k">warn</span> <span class="q">"Warning: ID, $ID, in Fasta file already exists. Ignoring ID and sequence data...\n"</span><span class="sc">;</span> | 
|  | 739  726         <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 740  727       <span class="s">}</span> | 
|  | 741  728       <span class="k">push</span> <span class="i">@</span>{<span class="i">$FastaDataMap</span>{<span class="w">IDs</span>}}<span class="cm">,</span> <span class="i">$ID</span><span class="sc">;</span> | 
|  | 742  729       <span class="i">$FastaDataMap</span>{<span class="w">Description</span>}{<span class="i">$ID</span>} = <span class="i">$Description</span><span class="sc">;</span> | 
|  | 743  730       <span class="i">$FastaDataMap</span>{<span class="w">Count</span>} += <span class="n">1</span><span class="sc">;</span> | 
|  | 744  731       <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 745  732     <span class="s">}</span> | 
|  | 746  733     <span class="k">if</span> <span class="s">(</span><span class="i">$IgnoreID</span><span class="s">)</span> <span class="s">{</span> <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> <span class="s">}</span> | 
|  | 747  734 | 
|  | 748  735     <span class="c"># Remove any spaces in the sequence...</span> | 
|  | 749  736     <span class="i">$Line</span> =~ <span class="q">s/ //g</span><span class="sc">;</span> | 
|  | 750  737     <span class="c"># Sequence data for active ID...</span> | 
|  | 751  738     <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$FastaDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 752  739       <span class="i">$FastaDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} .= <span class="i">$Line</span><span class="sc">;</span> | 
|  | 753  740     <span class="s">}</span> | 
|  | 754  741     <span class="k">else</span> <span class="s">{</span> | 
|  | 755  742       <span class="i">$FastaDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} = <span class="i">$Line</span><span class="sc">;</span> | 
|  | 756  743     <span class="s">}</span> | 
|  | 757  744   <span class="s">}</span> | 
|  | 758  745   <span class="k">close</span> <span class="w">FASTAFILE</span><span class="sc">;</span> | 
|  | 759  746   <span class="k">return</span> \<span class="i">%FastaDataMap</span><span class="sc">;</span> | 
|  | 760  747 <span class="s">}</span> | 
|  | 761  748 | 
|  | 762  749 <span class="c"># Read PIR fasta file and return a reference to hash with these keys:</span> | 
|  | 763  750 <span class="c">#</span> | 
|  | 764  751 <span class="c"># {IDs} - Array of sequence IDs</span> | 
|  | 765  752 <span class="c"># {Count} - Number of sequences</span> | 
|  | 766  753 <span class="c"># {Description}{$ID} - Sequence description</span> | 
|  | 767  754 <span class="c"># {Sequence}{$ID} - Sequence for a specific ID</span> | 
|  | 768  755 <span class="c"># {InputFileType} - Sequence file format</span> | 
|  | 769  756 <span class="c"># {ConservedAnnotation} - Conserved residue annonation</span> | 
|  | 770  757 <span class="c">#</span> | 
|  | 771  758 <span class="c"># Format:</span> | 
|  | 772  759 <span class="c"># A sequence in PIR format consists of:</span> | 
|  | 773  760 <span class="c"># One line starting with</span> | 
|  | 774  761 <span class="c">#   a ">" (greater-than) sign, followed by</span> | 
|  | 775  762 <span class="c">#   a two-letter code describing the sequence type code (P1, F1, DL, DC, RL, RC, N3, N1 or XX), followed by</span> | 
|  | 776  763 <span class="c">#   a semicolon, followed by</span> | 
|  | 777  764 <span class="c">#   the sequence identification code (the database ID-code).</span> | 
|  | 778  765 <span class="c"># One line containing a textual description of the sequence.</span> | 
|  | 779  766 <span class="c"># One or more lines containing the sequence itself. The end of the</span> | 
|  | 780  767 <span class="c"># sequence is marked by a "*" (asterisk) character.</span> | 
|  | 781  768 <span class="c">#</span> | 
|  | 782  769 <span class="c"># A file in PIR format may comprise more than one sequence.</span> | 
|  | 783  770 <span class="c">#</span> | 
|  | 784  771 <span class="c"># The PIR format is also often referred to as the NBRF format.</span> | 
|  | 785  772 <span class="c">#</span> | 
|  | 786  773 <span class="c"># Code SequenceType</span> | 
|  | 787  774 <span class="c"># P1    Protein (complete)</span> | 
|  | 788  775 <span class="c"># F1    Protein (fragment)</span> | 
|  | 789  776 <span class="c"># DL    DNA (linear)</span> | 
|  | 790  777 <span class="c"># DC    DNA (circular)</span> | 
|  | 791  778 <span class="c"># RL    RNA (linear)</span> | 
|  | 792  779 <span class="c"># RC   RNA (circular)</span> | 
|  | 793  780 <span class="c"># N3    tRNA</span> | 
|  | 794  781 <span class="c"># N1    Other functional RNA</span> | 
|  | 795  782 <span class="c">#</span> | 
|  | 796  783 | 
|  | 797 <a name="_ReadPIRFastaFile-"></a> 784 <span class="k">sub </span><span class="m">_ReadPIRFastaFile</span> <span class="s">{</span> | 
|  | 798  785   <span class="k">my</span><span class="s">(</span><span class="i">$FastaFileName</span><span class="cm">,</span> <span class="i">$ID</span><span class="cm">,</span> <span class="i">$Description</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$SequenceTypeCode</span><span class="cm">,</span> <span class="i">$ReadingSequenceData</span><span class="cm">,</span> <span class="i">%FastaDataMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 799  786 | 
|  | 800  787   <span class="s">(</span><span class="i">$FastaFileName</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 801  788 | 
|  | 802  789   <span class="i">%FastaDataMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 803  790   <span class="i">@</span>{<span class="i">$FastaDataMap</span>{<span class="w">IDs</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 804  791   <span class="i">%</span>{<span class="i">$FastaDataMap</span>{<span class="w">Description</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 805  792   <span class="i">%</span>{<span class="i">$FastaDataMap</span>{<span class="w">Sequence</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 806  793   <span class="i">%</span>{<span class="i">$FastaDataMap</span>{<span class="w">SequenceTypeCode</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 807  794   <span class="i">$FastaDataMap</span>{<span class="w">Count</span>} = <span class="n">0</span><span class="sc">;</span> | 
|  | 808  795   <span class="i">$FastaDataMap</span>{<span class="w">InputFileType</span>} = <span class="q">'PIR'</span><span class="sc">;</span> | 
|  | 809  796 | 
|  | 810  797   <span class="k">open</span> <span class="w">FASTAFILE</span><span class="cm">,</span> <span class="q">"$FastaFileName"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Couldn't open $FastaFileName: $!\n"</span><span class="sc">;</span> | 
|  | 811  798   <span class="i">$ID</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 812  799   <span class="i">$ReadingSequenceData</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 813  800   <span class="j">LINE:</span> <span class="k">while</span> <span class="s">(</span><span class="i">$Line</span> = <span class="i">GetTextLine</span><span class="s">(</span>\<span class="i">*FASTAFILE</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 814  801     <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^\>/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 815  802       <span class="c"># Start of a new ID...</span> | 
|  | 816  803       <span class="i">$Line</span> =~ <span class="q">s/^\>//</span><span class="sc">;</span> | 
|  | 817  804       <span class="i">$Line</span> = <span class="i">RemoveLeadingWhiteSpaces</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 818  805       <span class="s">(</span><span class="i">$SequenceTypeCode</span><span class="cm">,</span> <span class="i">$ID</span><span class="s">)</span> = <span class="q">/^\>(.*?)\;(.*?)$/</span><span class="sc">;</span> | 
|  | 819  806 | 
|  | 820  807       <span class="c"># Use next line to retrieve sequence description...</span> | 
|  | 821  808       <span class="i">$Line</span> = <span class="i">GetTextLine</span><span class="s">(</span>\<span class="i">*FASTAFILE</span><span class="s">)</span><span class="sc">;</span> | 
|  | 822  809       <span class="i">$Line</span> = <span class="i">RemoveLeadingWhiteSpaces</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 823  810       <span class="i">$Description</span> = <span class="i">$Line</span><span class="sc">;</span> | 
|  | 824  811 | 
|  | 825  812       <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$FastaDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 826  813         <span class="k">warn</span> <span class="q">"Warning: ID, $ID, in Fasta file already exists. Ignoring ID and sequence data...\n"</span><span class="sc">;</span> | 
|  | 827  814         <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 828  815       <span class="s">}</span> | 
|  | 829  816       <span class="i">$ReadingSequenceData</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 830  817       <span class="k">push</span> <span class="i">@</span>{<span class="i">$FastaDataMap</span>{<span class="w">IDs</span>}}<span class="cm">,</span> <span class="i">$ID</span><span class="sc">;</span> | 
|  | 831  818       <span class="i">$FastaDataMap</span>{<span class="w">SequenceTypeCode</span>}{<span class="i">$ID</span>} = <span class="i">$SequenceTypeCode</span><span class="sc">;</span> | 
|  | 832  819       <span class="i">$FastaDataMap</span>{<span class="w">Description</span>}{<span class="i">$ID</span>} = <span class="i">$Description</span><span class="sc">;</span> | 
|  | 833  820       <span class="i">$FastaDataMap</span>{<span class="w">Count</span>} += <span class="n">1</span><span class="sc">;</span> | 
|  | 834  821       <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 835  822     <span class="s">}</span> | 
|  | 836  823     <span class="k">if</span> <span class="s">(</span>!<span class="i">$ReadingSequenceData</span><span class="s">)</span> <span class="s">{</span> <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> <span class="s">}</span> | 
|  | 837  824 | 
|  | 838  825     <span class="c"># Remove any spaces in the sequence...</span> | 
|  | 839  826     <span class="i">$Line</span> =~ <span class="q">s/ //g</span><span class="sc">;</span> | 
|  | 840  827     <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/[\*]$/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 841  828       <span class="c"># End of sequence...</span> | 
|  | 842  829       <span class="i">$ReadingSequenceData</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 843  830       <span class="i">$Line</span> =~ <span class="q">s/[\*]$//</span><span class="sc">;</span> | 
|  | 844  831     <span class="s">}</span> | 
|  | 845  832     <span class="c"># Sequence data for active ID...</span> | 
|  | 846  833     <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$FastaDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 847  834       <span class="i">$FastaDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} .= <span class="i">$Line</span><span class="sc">;</span> | 
|  | 848  835     <span class="s">}</span> | 
|  | 849  836     <span class="k">else</span> <span class="s">{</span> | 
|  | 850  837       <span class="i">$FastaDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} = <span class="i">$Line</span><span class="sc">;</span> | 
|  | 851  838     <span class="s">}</span> | 
|  | 852  839   <span class="s">}</span> | 
|  | 853  840   <span class="k">close</span> <span class="w">FASTAFILE</span><span class="sc">;</span> | 
|  | 854  841   <span class="k">return</span> \<span class="i">%FastaDataMap</span><span class="sc">;</span> | 
|  | 855  842 <span class="s">}</span> | 
|  | 856  843 | 
|  | 857  844 <span class="c"># Read MSF file and return a reference to hash with these keys:</span> | 
|  | 858  845 <span class="c">#</span> | 
|  | 859  846 <span class="c"># {IDs} : Array of IDs in order as they appear in file</span> | 
|  | 860  847 <span class="c"># {Count}: ID count...</span> | 
|  | 861  848 <span class="c"># {Description}{$ID} : Description data...</span> | 
|  | 862  849 <span class="c"># {Sequence}{$ID} : Sequence data...</span> | 
|  | 863  850 <span class="c">#</span> | 
|  | 864 <a name="_ReadMSFFile-"></a> 851 <span class="k">sub </span><span class="m">_ReadMSFFile</span> <span class="s">{</span> | 
|  | 865  852   <span class="k">my</span><span class="s">(</span><span class="i">$MSFFileName</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">%MSFDataMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 866  853 | 
|  | 867  854   <span class="s">(</span><span class="i">$MSFFileName</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 868  855 | 
|  | 869  856   <span class="i">%MSFDataMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 870  857   <span class="i">@</span>{<span class="i">$MSFDataMap</span>{<span class="w">IDs</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 871  858   <span class="i">%</span>{<span class="i">$MSFDataMap</span>{<span class="w">Description</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 872  859   <span class="i">%</span>{<span class="i">$MSFDataMap</span>{<span class="w">Sequence</span>}} =<span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 873  860   <span class="i">$MSFDataMap</span>{<span class="w">Count</span>} = <span class="n">0</span><span class="sc">;</span> | 
|  | 874  861   <span class="i">$MSFDataMap</span>{<span class="w">InputFileType</span>} = <span class="q">'MSF'</span><span class="sc">;</span> | 
|  | 875  862 | 
|  | 876  863   <span class="k">open</span> <span class="w">MSFFILE</span><span class="cm">,</span> <span class="q">"$MSFFileName"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Couldn't open $MSFFileName: $!\n"</span><span class="sc">;</span> | 
|  | 877  864 | 
|  | 878  865   <span class="c"># Collect sequences and IDs...</span> | 
|  | 879  866   <span class="c">#</span> | 
|  | 880  867   <span class="c"># '//' after the name fields indicates end of header list and start of sequence data.</span> | 
|  | 881  868   <span class="c">#</span> | 
|  | 882  869   <span class="k">my</span><span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Len</span><span class="cm">,</span> <span class="i">$Check</span><span class="cm">,</span> <span class="i">$Weight</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$NameFieldsFound</span><span class="cm">,</span> <span class="i">%MSFIDsMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 883  870   <span class="i">%MSFIDsMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 884  871   <span class="i">$NameFieldsFound</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 885  872   <span class="j">LINE:</span> <span class="k">while</span> <span class="s">(</span><span class="i">$Line</span> = <span class="i">GetTextLine</span><span class="s">(</span>\<span class="i">*MSFFILE</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 886  873     <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/Name:/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 887  874       <span class="i">$NameFieldsFound</span>++<span class="sc">;</span> | 
|  | 888  875       <span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Len</span><span class="cm">,</span> <span class="i">$Check</span><span class="cm">,</span> <span class="i">$Weight</span><span class="s">)</span> = <span class="i">$Line</span> =~ <span class="q">/^[ ]*Name:[ ]+(.*?)[ ]+Len:[ ]+(.*?)[ ]+Check:[ ]+(.*?)[ ]+Weight:[ ]+(.*?)[ ]*$/</span><span class="sc">;</span> | 
|  | 889  876       <span class="k">if</span> <span class="s">(</span><span class="i">$ID</span> =~ <span class="q">/ /</span><span class="s">)</span> <span class="s">{</span> | 
|  | 890  877         <span class="s">(</span><span class="i">$ID</span><span class="s">)</span> = <span class="i">$ID</span> =~ <span class="q">/^(.*?)[ ]+/</span> | 
|  | 891  878       <span class="s">}</span> | 
|  | 892  879       <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$MSFIDsMap</span>{<span class="i">$ID</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 893  880         <span class="k">warn</span> <span class="q">"Warning: ID, $ID, in MSF file already exists. Ignoring ID and sequence data...\n"</span><span class="sc">;</span> | 
|  | 894  881         <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 895  882       <span class="s">}</span> | 
|  | 896  883       <span class="i">$MSFIDsMap</span>{<span class="i">$ID</span>} = <span class="i">$ID</span><span class="sc">;</span> | 
|  | 897  884       <span class="k">push</span> <span class="i">@</span>{<span class="i">$MSFDataMap</span>{<span class="w">IDs</span>}}<span class="cm">,</span> <span class="i">$ID</span><span class="sc">;</span> | 
|  | 898  885       <span class="i">$MSFDataMap</span>{<span class="w">Description</span>}{<span class="i">$ID</span>} = <span class="i">$ID</span><span class="sc">;</span> | 
|  | 899  886       <span class="i">$MSFDataMap</span>{<span class="w">Count</span>} += <span class="n">1</span><span class="sc">;</span> | 
|  | 900  887     <span class="s">}</span> | 
|  | 901  888     <span class="k">elsif</span> <span class="s">(</span> <span class="q">/\/\//</span> && <span class="i">$NameFieldsFound</span><span class="s">)</span> <span class="s">{</span> | 
|  | 902  889       <span class="c"># End of header list...</span> | 
|  | 903  890       <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 904  891     <span class="s">}</span> | 
|  | 905  892   <span class="s">}</span> | 
|  | 906  893   <span class="c"># Collect all sequences...</span> | 
|  | 907  894   <span class="c">#</span> | 
|  | 908  895   <span class="k">my</span><span class="s">(</span><span class="i">$FirstField</span><span class="cm">,</span> <span class="i">$SecondField</span><span class="s">)</span><span class="sc">;</span> | 
|  | 909  896   <span class="k">while</span> <span class="s">(</span><span class="i">$Line</span> = <span class="i">GetTextLine</span><span class="s">(</span>\<span class="i">*MSFFILE</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 910  897     <span class="s">(</span><span class="i">$FirstField</span><span class="cm">,</span> <span class="i">$SecondField</span><span class="s">)</span> = <span class="i">$Line</span> =~ <span class="q">/^[ ]*(.*?)[ ]+(.*?)$/</span><span class="sc">;</span> | 
|  | 911  898     <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$MSFIDsMap</span>{<span class="i">$FirstField</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 912  899       <span class="c"># It's ID and sequence data...</span> | 
|  | 913  900       <span class="i">$ID</span> = <span class="i">$FirstField</span><span class="sc">;</span> | 
|  | 914  901       <span class="i">$Sequence</span> = <span class="i">$SecondField</span><span class="sc">;</span> | 
|  | 915  902       <span class="c"># Take out spaces and leave the gap characters...</span> | 
|  | 916  903       <span class="i">$Sequence</span> =~ <span class="q">s/ //g</span><span class="sc">;</span> | 
|  | 917  904       <span class="k">if</span> <span class="s">(</span><span class="i">$MSFDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 918  905         <span class="i">$MSFDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} .= <span class="i">$Sequence</span><span class="sc">;</span> | 
|  | 919  906       <span class="s">}</span> | 
|  | 920  907       <span class="k">else</span> <span class="s">{</span> | 
|  | 921  908         <span class="i">$MSFDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} = <span class="i">$Sequence</span><span class="sc">;</span> | 
|  | 922  909       <span class="s">}</span> | 
|  | 923  910     <span class="s">}</span> | 
|  | 924  911   <span class="s">}</span> | 
|  | 925  912 | 
|  | 926  913   <span class="k">close</span> <span class="w">MSFFILE</span><span class="sc">;</span> | 
|  | 927  914   <span class="k">return</span> \<span class="i">%MSFDataMap</span><span class="sc">;</span> | 
|  | 928  915 <span class="s">}</span> | 
|  | 929  916 | 
|  | 930  917 | 
|  | 931 <a name="EOF-"></a></pre> | 
|  | 932 <p> </p> | 
|  | 933 <br /> | 
|  | 934 <center> | 
|  | 935 <img src="../../../images/h2o2.png"> | 
|  | 936 </center> | 
|  | 937 </body> | 
|  | 938 </html> |