| 0 | 1 <html> | 
|  | 2 <head> | 
|  | 3 <title>MayaChemTools:Code:AnalyzeSequenceFilesData.pl</title> | 
|  | 4 <meta http-equiv="content-type" content="text/html;charset=utf-8"> | 
|  | 5 <link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css"> | 
|  | 6 </head> | 
|  | 7 <body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10"> | 
|  | 8 <br/> | 
|  | 9 <center> | 
|  | 10 <a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a> | 
|  | 11 </center> | 
|  | 12 <br/> | 
|  | 13 <pre> | 
|  | 14    1 #!/usr/bin/perl -w | 
|  | 15    2 <span class="c">#</span> | 
|  | 16    3 <span class="c"># $RCSfile: AnalyzeSequenceFilesData.pl,v $</span> | 
|  | 17    4 <span class="c"># $Date: 2015/02/28 20:46:04 $</span> | 
|  | 18    5 <span class="c"># $Revision: 1.33 $</span> | 
|  | 19    6 <span class="c">#</span> | 
|  | 20    7 <span class="c"># Author: Manish Sud <msud@san.rr.com></span> | 
|  | 21    8 <span class="c">#</span> | 
|  | 22    9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span> | 
|  | 23   10 <span class="c">#</span> | 
|  | 24   11 <span class="c"># This file is part of MayaChemTools.</span> | 
|  | 25   12 <span class="c">#</span> | 
|  | 26   13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span> | 
|  | 27   14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span> | 
|  | 28   15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span> | 
|  | 29   16 <span class="c"># later version.</span> | 
|  | 30   17 <span class="c">#</span> | 
|  | 31   18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span> | 
|  | 32   19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span> | 
|  | 33   20 <span class="c"># for a particular purpose.  See the GNU Lesser General Public License for more</span> | 
|  | 34   21 <span class="c"># details.</span> | 
|  | 35   22 <span class="c">#</span> | 
|  | 36   23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span> | 
|  | 37   24 <span class="c"># along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or</span> | 
|  | 38   25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span> | 
|  | 39   26 <span class="c"># Boston, MA, 02111-1307, USA.</span> | 
|  | 40   27 <span class="c">#</span> | 
|  | 41   28 | 
|  | 42   29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span> | 
|  | 43   30 <span class="k">use</span> <span class="w">FindBin</span><span class="sc">;</span> <span class="k">use</span> <span class="w">lib</span> <span class="q">"$FindBin::Bin/../lib"</span><span class="sc">;</span> | 
|  | 44   31 <span class="k">use</span> <span class="w">Getopt::Long</span><span class="sc">;</span> | 
|  | 45   32 <span class="k">use</span> <span class="w">File::Basename</span><span class="sc">;</span> | 
|  | 46   33 <span class="k">use</span> <span class="w">Text::ParseWords</span><span class="sc">;</span> | 
|  | 47   34 <span class="k">use</span> <span class="w">Benchmark</span><span class="sc">;</span> | 
|  | 48   35 <span class="k">use</span> <span class="w">FileUtil</span><span class="sc">;</span> | 
|  | 49   36 <span class="k">use</span> <span class="w">TextUtil</span><span class="sc">;</span> | 
|  | 50   37 <span class="k">use</span> <span class="w">SequenceFileUtil</span><span class="sc">;</span> | 
|  | 51   38 <span class="k">use</span> <span class="w">AminoAcids</span><span class="sc">;</span> | 
|  | 52   39 <span class="k">use</span> <span class="w">NucleicAcids</span><span class="sc">;</span> | 
|  | 53   40 | 
|  | 54   41 <span class="k">my</span><span class="s">(</span><span class="i">$ScriptName</span><span class="cm">,</span> <span class="i">%Options</span><span class="cm">,</span> <span class="i">$StartTime</span><span class="cm">,</span> <span class="i">$EndTime</span><span class="cm">,</span> <span class="i">$TotalTime</span><span class="s">)</span><span class="sc">;</span> | 
|  | 55   42 | 
|  | 56   43 <span class="c"># Autoflush STDOUT</span> | 
|  | 57   44 <span class="i">$|</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 58   45 | 
|  | 59   46 <span class="c"># Starting message...</span> | 
|  | 60   47 <span class="i">$ScriptName</span> = <span class="i">basename</span><span class="s">(</span><span class="i">$0</span><span class="s">)</span><span class="sc">;</span> | 
|  | 61   48 <span class="k">print</span> <span class="q">"\n$ScriptName: Starting...\n\n"</span><span class="sc">;</span> | 
|  | 62   49 <span class="i">$StartTime</span> = <span class="w">new</span> <span class="w">Benchmark</span><span class="sc">;</span> | 
|  | 63   50 | 
|  | 64   51 <span class="c"># Setup script usage message...</span> | 
|  | 65   52 <span class="i">SetupScriptUsage</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 66   53 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">help</span>} || <span class="i">@ARGV</span> < <span class="n">1</span><span class="s">)</span> <span class="s">{</span> | 
|  | 67   54   <span class="k">die</span> <span class="i">GetUsageFromPod</span><span class="s">(</span><span class="q">"$FindBin::Bin/$ScriptName"</span><span class="s">)</span><span class="sc">;</span> | 
|  | 68   55 <span class="s">}</span> | 
|  | 69   56 | 
|  | 70   57 <span class="c"># Expand wild card file names...</span> | 
|  | 71   58 <span class="k">my</span><span class="s">(</span><span class="i">@SequenceFilesList</span><span class="s">)</span><span class="sc">;</span> | 
|  | 72   59 <span class="i">@SequenceFilesList</span> = <span class="i">ExpandFileNames</span><span class="s">(</span>\<span class="i">@ARGV</span><span class="cm">,</span> <span class="q">"aln msf fasta fta pir"</span><span class="s">)</span><span class="sc">;</span> | 
|  | 73   60 | 
|  | 74   61 <span class="k">print</span> <span class="q">"Processing options...\n"</span><span class="sc">;</span> | 
|  | 75   62 <span class="k">my</span><span class="s">(</span><span class="i">%OptionsInfo</span><span class="s">)</span><span class="sc">;</span> | 
|  | 76   63 <span class="i">ProcessOptions</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 77   64 | 
|  | 78   65 <span class="c"># Set up information about input files...</span> | 
|  | 79   66 <span class="k">print</span> <span class="q">"Checking input sequence file(s)...\n"</span><span class="sc">;</span> | 
|  | 80   67 <span class="k">my</span><span class="s">(</span><span class="i">%SequenceFilesInfo</span><span class="s">)</span><span class="sc">;</span> | 
|  | 81   68 <span class="i">RetrieveSequenceFilesInfo</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 82   69 <span class="i">SetupSequenceRegionsData</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 83   70 | 
|  | 84   71 <span class="c"># Process input files..</span> | 
|  | 85   72 <span class="k">my</span><span class="s">(</span><span class="i">$FileIndex</span><span class="s">)</span><span class="sc">;</span> | 
|  | 86   73 <span class="k">if</span> <span class="s">(</span><span class="i">@SequenceFilesList</span> > <span class="n">1</span><span class="s">)</span> <span class="s">{</span> | 
|  | 87   74   <span class="k">print</span> <span class="q">"\nProcessing sequence files...\n"</span><span class="sc">;</span> | 
|  | 88   75 <span class="s">}</span> | 
|  | 89   76 <span class="k">for</span> <span class="i">$FileIndex</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#SequenceFilesList</span><span class="s">)</span> <span class="s">{</span> | 
|  | 90   77   <span class="k">if</span> <span class="s">(</span><span class="i">$SequenceFilesInfo</span>{<span class="w">FilesOkay</span>}[<span class="i">$FileIndex</span>]<span class="s">)</span> <span class="s">{</span> | 
|  | 91   78     <span class="k">print</span> <span class="q">"\nProcessing file $SequenceFilesList[$FileIndex]...\n"</span><span class="sc">;</span> | 
|  | 92   79     <span class="i">AnalyzeSequenceFileData</span><span class="s">(</span><span class="i">$FileIndex</span><span class="s">)</span><span class="sc">;</span> | 
|  | 93   80   <span class="s">}</span> | 
|  | 94   81 <span class="s">}</span> | 
|  | 95   82 <span class="k">print</span> <span class="q">"\n$ScriptName:Done...\n\n"</span><span class="sc">;</span> | 
|  | 96   83 | 
|  | 97   84 <span class="i">$EndTime</span> = <span class="w">new</span> <span class="w">Benchmark</span><span class="sc">;</span> | 
|  | 98   85 <span class="i">$TotalTime</span> = <span class="w">timediff</span> <span class="s">(</span><span class="i">$EndTime</span><span class="cm">,</span> <span class="i">$StartTime</span><span class="s">)</span><span class="sc">;</span> | 
|  | 99   86 <span class="k">print</span> <span class="q">"Total time: "</span><span class="cm">,</span> <span class="i">timestr</span><span class="s">(</span><span class="i">$TotalTime</span><span class="s">)</span><span class="cm">,</span> <span class="q">"\n"</span><span class="sc">;</span> | 
|  | 100   87 | 
|  | 101   88 <span class="c">###############################################################################</span> | 
|  | 102   89 | 
|  | 103   90 <span class="c"># Analyze sequence file...</span> | 
|  | 104 <a name="AnalyzeSequenceFileData-"></a>  91 <span class="k">sub </span><span class="m">AnalyzeSequenceFileData</span> <span class="s">{</span> | 
|  | 105   92   <span class="k">my</span><span class="s">(</span><span class="i">$FileIndex</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 106   93   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 107   94 | 
|  | 108   95   <span class="i">$SequenceFile</span> = <span class="i">$SequenceFilesList</span>[<span class="i">$FileIndex</span>]<span class="sc">;</span> | 
|  | 109   96 | 
|  | 110   97   <span class="k">open</span> <span class="w">SEQUENCEFILE</span><span class="cm">,</span> <span class="q">"$SequenceFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Error: Can't open $SequenceFile: $! \n"</span><span class="sc">;</span> | 
|  | 111   98   <span class="i">$SequenceDataRef</span> = <span class="i">ReadSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 112   99   <span class="k">close</span> <span class="w">SEQUENCEFILE</span><span class="sc">;</span> | 
|  | 113  100 | 
|  | 114  101   <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">CalculatePercentIdentityMatrix</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 115  102     <span class="i">CalculatePercentIdentityMatrix</span><span class="s">(</span><span class="i">$FileIndex</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 116  103   <span class="s">}</span> | 
|  | 117  104   <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">PerformResidueFrequencyAnalysis</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 118  105     <span class="i">PerformResidueFrequencyAnalysis</span><span class="s">(</span><span class="i">$FileIndex</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 119  106   <span class="s">}</span> | 
|  | 120  107 <span class="s">}</span> | 
|  | 121  108 | 
|  | 122  109 <span class="c"># Calculate percent identity matrix...</span> | 
|  | 123 <a name="CalculatePercentIdentityMatrix-"></a> 110 <span class="k">sub </span><span class="m">CalculatePercentIdentityMatrix</span> <span class="s">{</span> | 
|  | 124  111   <span class="k">my</span><span class="s">(</span><span class="i">$FileIndex</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 125  112   <span class="k">my</span><span class="s">(</span><span class="i">$PercentIdentity</span><span class="cm">,</span> <span class="i">$PercentIdentityMatrixFile</span><span class="cm">,</span> <span class="i">$PercentIdentityMatrixRef</span><span class="cm">,</span> <span class="i">$RowID</span><span class="cm">,</span> <span class="i">$ColID</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">@LineWords</span><span class="s">)</span><span class="sc">;</span> | 
|  | 126  113 | 
|  | 127  114   <span class="i">$PercentIdentityMatrixFile</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileRoot</span>}[<span class="i">$FileIndex</span>] . <span class="q">'PercentIdentityMatrix.'</span> . <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileExt</span>}[<span class="i">$FileIndex</span>]<span class="sc">;</span> | 
|  | 128  115   <span class="i">$PercentIdentityMatrixRef</span> = <span class="i">CalculatePercentSequenceIdentityMatrix</span><span class="s">(</span><span class="i">$SequenceDataRef</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">IgnoreGaps</span>}<span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">Precision</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 129  116 | 
|  | 130  117   <span class="k">print</span> <span class="q">"Generating percent identity matrix file $PercentIdentityMatrixFile...\n"</span><span class="sc">;</span> | 
|  | 131  118   <span class="k">open</span> <span class="w">OUTFILE</span><span class="cm">,</span> <span class="q">">$PercentIdentityMatrixFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Can't open $PercentIdentityMatrixFile: $!\n"</span><span class="sc">;</span> | 
|  | 132  119 | 
|  | 133  120   <span class="c"># Write out column labels...</span> | 
|  | 134  121   <span class="i">@LineWords</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 135  122   <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="q">''</span><span class="sc">;</span> | 
|  | 136  123   <span class="k">for</span> <span class="i">$ColID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$PercentIdentityMatrixRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 137  124     <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$ColID</span><span class="sc">;</span> | 
|  | 138  125   <span class="s">}</span> | 
|  | 139  126   <span class="i">$Line</span> = <span class="i">JoinWords</span><span class="s">(</span>\<span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutDelim</span>}<span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutQuote</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 140  127   <span class="k">print</span> <span class="i">OUTFILE</span> <span class="q">"$Line\n"</span><span class="sc">;</span> | 
|  | 141  128 | 
|  | 142  129   <span class="c"># Write out rows...</span> | 
|  | 143  130   <span class="k">for</span> <span class="i">$RowID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$PercentIdentityMatrixRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 144  131     <span class="i">@LineWords</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 145  132     <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$RowID</span><span class="sc">;</span> | 
|  | 146  133     <span class="k">for</span> <span class="i">$ColID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$PercentIdentityMatrixRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 147  134       <span class="i">$PercentIdentity</span> = <span class="i">$PercentIdentityMatrixRef</span>->{<span class="w">PercentIdentity</span>}{<span class="i">$RowID</span>}{<span class="i">$ColID</span>}<span class="sc">;</span> | 
|  | 148  135       <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$PercentIdentity</span><span class="sc">;</span> | 
|  | 149  136     <span class="s">}</span> | 
|  | 150  137     <span class="i">$Line</span> = <span class="i">JoinWords</span><span class="s">(</span>\<span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutDelim</span>}<span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutQuote</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 151  138     <span class="k">print</span> <span class="i">OUTFILE</span> <span class="q">"$Line\n"</span><span class="sc">;</span> | 
|  | 152  139   <span class="s">}</span> | 
|  | 153  140   <span class="k">close</span> <span class="w">OUTFILE</span><span class="sc">;</span> | 
|  | 154  141 <span class="s">}</span> | 
|  | 155  142 | 
|  | 156  143 <span class="c"># Perform frequency analysis...</span> | 
|  | 157 <a name="PerformResidueFrequencyAnalysis-"></a> 144 <span class="k">sub </span><span class="m">PerformResidueFrequencyAnalysis</span> <span class="s">{</span> | 
|  | 158  145   <span class="k">my</span><span class="s">(</span><span class="i">$FileIndex</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 159  146 | 
|  | 160  147   <span class="i">CountResiduesInRegions</span><span class="s">(</span><span class="i">$FileIndex</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 161  148   <span class="i">CalculatePercentResidueFrequencyInRegions</span><span class="s">(</span><span class="i">$FileIndex</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 162  149   <span class="i">GeneratePercentResidueFrequencyOutFilesForRegions</span><span class="s">(</span><span class="i">$FileIndex</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 163  150 <span class="s">}</span> | 
|  | 164  151 | 
|  | 165  152 <span class="c"># Count residues...</span> | 
|  | 166 <a name="CountResiduesInRegions-"></a> 153 <span class="k">sub </span><span class="m">CountResiduesInRegions</span> <span class="s">{</span> | 
|  | 167  154   <span class="k">my</span><span class="s">(</span><span class="i">$FileIndex</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 168  155 | 
|  | 169  156   <span class="c"># Setup rerfernce sequence data...</span> | 
|  | 170  157   <span class="k">my</span><span class="s">(</span><span class="i">$RefereceSequenceID</span><span class="cm">,</span> <span class="i">$RefereceSequence</span><span class="s">)</span><span class="sc">;</span> | 
|  | 171  158   <span class="i">$RefereceSequenceID</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceID</span>}[<span class="i">$FileIndex</span>]<span class="sc">;</span> | 
|  | 172  159   <span class="i">$RefereceSequence</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequence</span>}[<span class="i">$FileIndex</span>]<span class="sc">;</span> | 
|  | 173  160 | 
|  | 174  161   <span class="c"># Count residues...</span> | 
|  | 175  162   <span class="k">my</span><span class="s">(</span><span class="i">$RegionID</span><span class="cm">,</span> <span class="i">$StartResNum</span><span class="cm">,</span> <span class="i">$EndResNum</span><span class="cm">,</span> <span class="i">$ResNum</span><span class="cm">,</span> <span class="i">$ResIndex</span><span class="cm">,</span> <span class="i">$ID</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$Residue</span><span class="s">)</span><span class="sc">;</span> | 
|  | 176  163   <span class="k">for</span> <span class="i">$RegionID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="w">RegionIDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 177  164     <span class="i">$StartResNum</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">StartResNum</span>}<span class="sc">;</span> | 
|  | 178  165     <span class="i">$EndResNum</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">EndResNum</span>}<span class="sc">;</span> | 
|  | 179  166     <span class="j">RESNUM:</span> <span class="k">for</span> <span class="i">$ResNum</span> <span class="s">(</span><span class="i">$StartResNum</span> .. <span class="i">$EndResNum</span><span class="s">)</span> <span class="s">{</span> | 
|  | 180  167       <span class="i">$ResIndex</span> = <span class="i">$ResNum</span> - <span class="n">1</span><span class="sc">;</span> | 
|  | 181  168       <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">IgnoreGaps</span>} && <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$FileIndex</span>]{<span class="w">IsGap</span>}{<span class="i">$ResNum</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 182  169         <span class="k">next</span> <span class="j">RESNUM</span><span class="sc">;</span> | 
|  | 183  170       <span class="s">}</span> | 
|  | 184  171       <span class="c"># Go over residues in column $ResNum in all the sequences...</span> | 
|  | 185  172       <span class="j">ID:</span> <span class="k">for</span> <span class="i">$ID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequenceDataRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 186  173         <span class="i">$Sequence</span> = <span class="i">$SequenceDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 187  174         <span class="i">$Residue</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$ResIndex</span><span class="cm">,</span> <span class="n">1</span><span class="s">)</span><span class="sc">;</span> | 
|  | 188  175         <span class="k">if</span> <span class="s">(</span><span class="i">IsGapResidue</span><span class="s">(</span><span class="i">$Residue</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 189  176           <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">Count</span>}{<span class="i">$ResNum</span>}{<span class="w">Gap</span>} += <span class="n">1</span><span class="sc">;</span> | 
|  | 190  177         <span class="s">}</span> | 
|  | 191  178         <span class="k">else</span> <span class="s">{</span> | 
|  | 192  179           <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">Count</span>}{<span class="i">$ResNum</span>}{<span class="i">$Residue</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 193  180             <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">Count</span>}{<span class="i">$ResNum</span>}{<span class="i">$Residue</span>} += <span class="n">1</span><span class="sc">;</span> | 
|  | 194  181           <span class="s">}</span> | 
|  | 195  182           <span class="k">else</span> <span class="s">{</span> | 
|  | 196  183             <span class="c"># Internal error...</span> | 
|  | 197  184             <span class="k">print</span> <span class="q">"Warning: Ignoring residue $Residue in sequence $ID during ResidueFrequencyAnalysis calculation: Unknown residue...\n"</span><span class="sc">;</span> | 
|  | 198  185           <span class="s">}</span> | 
|  | 199  186         <span class="s">}</span> | 
|  | 200  187       <span class="s">}</span> | 
|  | 201  188     <span class="s">}</span> | 
|  | 202  189   <span class="s">}</span> | 
|  | 203  190 <span class="s">}</span> | 
|  | 204  191 | 
|  | 205  192 <span class="c"># Calculate percent frequency for various residues in the sequence regions...</span> | 
|  | 206 <a name="CalculatePercentResidueFrequencyInRegions-"></a> 193 <span class="k">sub </span><span class="m">CalculatePercentResidueFrequencyInRegions</span> <span class="s">{</span> | 
|  | 207  194   <span class="k">my</span><span class="s">(</span><span class="i">$FileIndex</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 208  195   <span class="k">my</span><span class="s">(</span><span class="i">$RegionID</span><span class="cm">,</span> <span class="i">$StartResNum</span><span class="cm">,</span> <span class="i">$EndResNum</span><span class="cm">,</span> <span class="i">$ResNum</span><span class="cm">,</span> <span class="i">$Residue</span><span class="cm">,</span> <span class="i">$Count</span><span class="cm">,</span> <span class="i">$PercentCount</span><span class="cm">,</span> <span class="i">$MaxResiduesCount</span><span class="cm">,</span> <span class="i">$Precision</span><span class="s">)</span><span class="sc">;</span> | 
|  | 209  196 | 
|  | 210  197   <span class="i">$MaxResiduesCount</span> = <span class="i">$SequenceDataRef</span>->{<span class="w">Count</span>}<span class="sc">;</span> | 
|  | 211  198   <span class="i">$Precision</span> = <span class="i">$OptionsInfo</span>{<span class="w">Precision</span>}<span class="sc">;</span> | 
|  | 212  199   <span class="k">for</span> <span class="i">$RegionID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="w">RegionIDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 213  200     <span class="i">$StartResNum</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">StartResNum</span>}<span class="sc">;</span> | 
|  | 214  201     <span class="i">$EndResNum</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">EndResNum</span>}<span class="sc">;</span> | 
|  | 215  202     <span class="j">RESNUM:</span> <span class="k">for</span> <span class="i">$ResNum</span> <span class="s">(</span><span class="i">$StartResNum</span> .. <span class="i">$EndResNum</span><span class="s">)</span> <span class="s">{</span> | 
|  | 216  203       <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">IgnoreGaps</span>} && <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$FileIndex</span>]{<span class="w">IsGap</span>}{<span class="i">$ResNum</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 217  204         <span class="k">next</span> <span class="j">RESNUM</span><span class="sc">;</span> | 
|  | 218  205       <span class="s">}</span> | 
|  | 219  206       <span class="k">for</span> <span class="i">$Residue</span> <span class="s">(</span><span class="k">keys</span> <span class="i">%</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">Count</span>}{<span class="i">$ResNum</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 220  207         <span class="i">$Count</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">Count</span>}{<span class="i">$ResNum</span>}{<span class="i">$Residue</span>}<span class="sc">;</span> | 
|  | 221  208         <span class="i">$PercentCount</span> = <span class="s">(</span><span class="i">$Count</span> / <span class="i">$MaxResiduesCount</span><span class="s">)</span> * <span class="n">100</span><span class="sc">;</span> | 
|  | 222  209         <span class="i">$PercentCount</span> = <span class="k">sprintf</span><span class="s">(</span><span class="q">"%.${Precision}f"</span><span class="cm">,</span> <span class="i">$PercentCount</span><span class="s">)</span> + <span class="n">0</span><span class="sc">;</span> | 
|  | 223  210         <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">PercentCount</span>}{<span class="i">$ResNum</span>}{<span class="i">$Residue</span>} = <span class="i">$PercentCount</span><span class="sc">;</span> | 
|  | 224  211       <span class="s">}</span> | 
|  | 225  212     <span class="s">}</span> | 
|  | 226  213   <span class="s">}</span> | 
|  | 227  214 <span class="s">}</span> | 
|  | 228  215 | 
|  | 229  216 <span class="c"># Generate output files...</span> | 
|  | 230 <a name="GeneratePercentResidueFrequencyOutFilesForRegions-"></a> 217 <span class="k">sub </span><span class="m">GeneratePercentResidueFrequencyOutFilesForRegions</span> <span class="s">{</span> | 
|  | 231  218   <span class="k">my</span><span class="s">(</span><span class="i">$FileIndex</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 232  219 | 
|  | 233  220   <span class="c"># Setup rerfernce sequence data...</span> | 
|  | 234  221   <span class="k">my</span><span class="s">(</span><span class="i">$RefereceSequenceID</span><span class="cm">,</span> <span class="i">$RefereceSequence</span><span class="s">)</span><span class="sc">;</span> | 
|  | 235  222   <span class="i">$RefereceSequenceID</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceID</span>}[<span class="i">$FileIndex</span>]<span class="sc">;</span> | 
|  | 236  223   <span class="i">$RefereceSequence</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequence</span>}[<span class="i">$FileIndex</span>]<span class="sc">;</span> | 
|  | 237  224 | 
|  | 238  225   <span class="k">my</span><span class="s">(</span><span class="i">$RegionID</span><span class="cm">,</span> <span class="i">$StartResNum</span><span class="cm">,</span> <span class="i">$EndResNum</span><span class="cm">,</span> <span class="i">$ResNum</span><span class="cm">,</span> <span class="i">$Count</span><span class="cm">,</span> <span class="i">$PercentCount</span><span class="cm">,</span> <span class="i">$Residue</span><span class="cm">,</span> <span class="i">$RegionNum</span><span class="cm">,</span> <span class="i">$RegionOutFile</span><span class="cm">,</span> <span class="i">$PercentRegionOutFile</span><span class="cm">,</span> <span class="i">$OutFileRoot</span><span class="cm">,</span> <span class="i">$OutFileExt</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">@PercentLineWords</span><span class="s">)</span><span class="sc">;</span> | 
|  | 239  226 | 
|  | 240  227   <span class="i">$OutFileRoot</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileRoot</span>}[<span class="i">$FileIndex</span>]<span class="sc">;</span> | 
|  | 241  228   <span class="i">$OutFileExt</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileExt</span>}[<span class="i">$FileIndex</span>]<span class="sc">;</span> | 
|  | 242  229   <span class="i">$RegionNum</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 243  230   <span class="k">for</span> <span class="i">$RegionID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="w">RegionIDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 244  231     <span class="i">$RegionNum</span>++<span class="sc">;</span> | 
|  | 245  232     <span class="i">$StartResNum</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">StartResNum</span>}<span class="sc">;</span> | 
|  | 246  233     <span class="i">$EndResNum</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">EndResNum</span>}<span class="sc">;</span> | 
|  | 247  234 | 
|  | 248  235     <span class="i">$RegionOutFile</span> = <span class="q">"${OutFileRoot}ResidueFrequencyAnalysisRegion${RegionNum}.${OutFileExt}"</span><span class="sc">;</span> | 
|  | 249  236     <span class="i">$PercentRegionOutFile</span> = <span class="q">"${OutFileRoot}PercentResidueFrequencyAnalysisRegion${RegionNum}.${OutFileExt}"</span><span class="sc">;</span> | 
|  | 250  237 | 
|  | 251  238     <span class="k">print</span> <span class="q">"Generating $RegionOutFile and $PercentRegionOutFile...\n"</span><span class="sc">;</span> | 
|  | 252  239     <span class="k">open</span> <span class="w">REGIONOUTFILE</span><span class="cm">,</span> <span class="q">">$RegionOutFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Error: Can't open $RegionOutFile: $! \n"</span><span class="sc">;</span> | 
|  | 253  240     <span class="k">open</span> <span class="w">PERCENTREGIONOUTFILE</span><span class="cm">,</span> <span class="q">">$PercentRegionOutFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Error: Can't open $PercentRegionOutFile: $! \n"</span><span class="sc">;</span> | 
|  | 254  241 | 
|  | 255  242     <span class="c"># Write out reference residue positions as column values....</span> | 
|  | 256  243     <span class="i">@LineWords</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 257  244     <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="q">''</span><span class="sc">;</span> | 
|  | 258  245     <span class="j">RESNUM:</span> <span class="k">for</span> <span class="i">$ResNum</span> <span class="s">(</span><span class="i">$StartResNum</span> .. <span class="i">$EndResNum</span><span class="s">)</span> <span class="s">{</span> | 
|  | 259  246       <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">IgnoreGaps</span>} && <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$FileIndex</span>]{<span class="w">IsGap</span>}{<span class="i">$ResNum</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 260  247         <span class="k">next</span> <span class="j">RESNUM</span><span class="sc">;</span> | 
|  | 261  248       <span class="s">}</span> | 
|  | 262  249       <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$ResNum</span><span class="sc">;</span> | 
|  | 263  250     <span class="s">}</span> | 
|  | 264  251     <span class="i">$Line</span> = <span class="i">JoinWords</span><span class="s">(</span>\<span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutDelim</span>}<span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutQuote</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 265  252     <span class="k">print</span> <span class="i">REGIONOUTFILE</span> <span class="q">"$Line\n"</span><span class="sc">;</span> | 
|  | 266  253     <span class="k">print</span> <span class="i">PERCENTREGIONOUTFILE</span> <span class="q">"$Line\n"</span><span class="sc">;</span> | 
|  | 267  254 | 
|  | 268  255 | 
|  | 269  256     <span class="c"># Write out row data for each residue; Gap residue is written last...</span> | 
|  | 270  257     <span class="j">RESIDUE:</span> <span class="k">for</span> <span class="i">$Residue</span> <span class="s">(</span><span class="k">sort</span> <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">ResidueCodes</span>}[<span class="i">$FileIndex</span>]}<span class="s">)</span> <span class="s">{</span> | 
|  | 271  258       <span class="k">if</span> <span class="s">(</span><span class="i">$Residue</span> =~ <span class="q">/^Gap$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 272  259         <span class="k">next</span> <span class="j">RESIDUE</span><span class="sc">;</span> | 
|  | 273  260       <span class="s">}</span> | 
|  | 274  261       <span class="i">@LineWords</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 275  262       <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$Residue</span><span class="sc">;</span> | 
|  | 276  263       <span class="i">@PercentLineWords</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 277  264       <span class="k">push</span> <span class="i">@PercentLineWords</span><span class="cm">,</span> <span class="i">$Residue</span><span class="sc">;</span> | 
|  | 278  265 | 
|  | 279  266       <span class="j">RESNUM:</span> <span class="k">for</span> <span class="i">$ResNum</span> <span class="s">(</span><span class="i">$StartResNum</span> .. <span class="i">$EndResNum</span><span class="s">)</span> <span class="s">{</span> | 
|  | 280  267         <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">IgnoreGaps</span>} && <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$FileIndex</span>]{<span class="w">IsGap</span>}{<span class="i">$ResNum</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 281  268           <span class="k">next</span> <span class="j">RESNUM</span><span class="sc">;</span> | 
|  | 282  269         <span class="s">}</span> | 
|  | 283  270         <span class="i">$Count</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">Count</span>}{<span class="i">$ResNum</span>}{<span class="i">$Residue</span>}<span class="sc">;</span> | 
|  | 284  271         <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$Count</span><span class="sc">;</span> | 
|  | 285  272         <span class="i">$PercentCount</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">PercentCount</span>}{<span class="i">$ResNum</span>}{<span class="i">$Residue</span>}<span class="sc">;</span> | 
|  | 286  273         <span class="k">push</span> <span class="i">@PercentLineWords</span><span class="cm">,</span> <span class="i">$PercentCount</span><span class="sc">;</span> | 
|  | 287  274       <span class="s">}</span> | 
|  | 288  275       <span class="i">$Line</span> = <span class="i">JoinWords</span><span class="s">(</span>\<span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutDelim</span>}<span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutQuote</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 289  276       <span class="k">print</span> <span class="i">REGIONOUTFILE</span> <span class="q">"$Line\n"</span><span class="sc">;</span> | 
|  | 290  277 | 
|  | 291  278       <span class="i">$Line</span> = <span class="i">JoinWords</span><span class="s">(</span>\<span class="i">@PercentLineWords</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutDelim</span>}<span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutQuote</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 292  279       <span class="k">print</span> <span class="i">PERCENTREGIONOUTFILE</span> <span class="q">"$Line\n"</span><span class="sc">;</span> | 
|  | 293  280     <span class="s">}</span> | 
|  | 294  281 | 
|  | 295  282     <span class="c"># Write out data for gap...</span> | 
|  | 296  283     <span class="i">$Residue</span> = <span class="q">'Gap'</span><span class="sc">;</span> | 
|  | 297  284     <span class="i">@LineWords</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 298  285     <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$Residue</span><span class="sc">;</span> | 
|  | 299  286     <span class="i">@PercentLineWords</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 300  287     <span class="k">push</span> <span class="i">@PercentLineWords</span><span class="cm">,</span> <span class="i">$Residue</span><span class="sc">;</span> | 
|  | 301  288 | 
|  | 302  289     <span class="j">RESNUM:</span> <span class="k">for</span> <span class="i">$ResNum</span> <span class="s">(</span><span class="i">$StartResNum</span> .. <span class="i">$EndResNum</span><span class="s">)</span> <span class="s">{</span> | 
|  | 303  290       <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">IgnoreGaps</span>} && <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$FileIndex</span>]{<span class="w">IsGap</span>}{<span class="i">$ResNum</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 304  291         <span class="k">next</span> <span class="j">RESNUM</span><span class="sc">;</span> | 
|  | 305  292       <span class="s">}</span> | 
|  | 306  293       <span class="i">$Count</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">Count</span>}{<span class="i">$ResNum</span>}{<span class="i">$Residue</span>}<span class="sc">;</span> | 
|  | 307  294       <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$Count</span><span class="sc">;</span> | 
|  | 308  295 | 
|  | 309  296       <span class="i">$PercentCount</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$FileIndex</span>]{<span class="i">$RegionID</span>}{<span class="w">PercentCount</span>}{<span class="i">$ResNum</span>}{<span class="i">$Residue</span>}<span class="sc">;</span> | 
|  | 310  297       <span class="k">push</span> <span class="i">@PercentLineWords</span><span class="cm">,</span> <span class="i">$PercentCount</span><span class="sc">;</span> | 
|  | 311  298     <span class="s">}</span> | 
|  | 312  299     <span class="i">$Line</span> = <span class="i">JoinWords</span><span class="s">(</span>\<span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutDelim</span>}<span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutQuote</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 313  300     <span class="k">print</span> <span class="i">REGIONOUTFILE</span> <span class="q">"$Line\n"</span><span class="sc">;</span> | 
|  | 314  301 | 
|  | 315  302     <span class="i">$Line</span> = <span class="i">JoinWords</span><span class="s">(</span>\<span class="i">@PercentLineWords</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutDelim</span>}<span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutQuote</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 316  303     <span class="k">print</span> <span class="i">PERCENTREGIONOUTFILE</span> <span class="q">"$Line\n"</span><span class="sc">;</span> | 
|  | 317  304 | 
|  | 318  305     <span class="k">close</span> <span class="w">REGIONOUTFILE</span><span class="sc">;</span> | 
|  | 319  306     <span class="k">close</span> <span class="w">PERCENTREGIONOUTFILE</span><span class="sc">;</span> | 
|  | 320  307   <span class="s">}</span> | 
|  | 321  308 <span class="s">}</span> | 
|  | 322  309 | 
|  | 323  310 <span class="c"># Process option values...</span> | 
|  | 324 <a name="ProcessOptions-"></a> 311 <span class="k">sub </span><span class="m">ProcessOptions</span> <span class="s">{</span> | 
|  | 325  312   <span class="i">%OptionsInfo</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 326  313 | 
|  | 327  314   <span class="c"># Setup analysis mode...</span> | 
|  | 328  315   <span class="i">$OptionsInfo</span>{<span class="w">CalculatePercentIdentityMatrix</span>} = <span class="s">(</span><span class="i">$Options</span>{<span class="w">mode</span>} =~ <span class="q">/^(PercentIdentityMatrix|All)$/i</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 329  316   <span class="i">$OptionsInfo</span>{<span class="w">PerformResidueFrequencyAnalysis</span>} = <span class="s">(</span><span class="i">$Options</span>{<span class="w">mode</span>} =~ <span class="q">/^(ResidueFrequencyAnalysis|All)$/i</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 330  317 | 
|  | 331  318   <span class="c"># Setup delimiter and quotes...</span> | 
|  | 332  319   <span class="i">$OptionsInfo</span>{<span class="w">OutDelim</span>} = <span class="s">(</span><span class="i">$Options</span>{<span class="w">outdelim</span>} =~ <span class="q">/tab/i</span> <span class="s">)</span> ? <span class="q">"\t"</span> <span class="co">:</span> <span class="s">(</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">outdelim</span>} =~ <span class="q">/semicolon/i</span><span class="s">)</span> ? <span class="q">"\;"</span> <span class="co">:</span> <span class="q">"\,"</span><span class="s">)</span><span class="sc">;</span> | 
|  | 333  320   <span class="i">$OptionsInfo</span>{<span class="w">OutQuote</span>} = <span class="s">(</span><span class="i">$Options</span>{<span class="w">quote</span>} =~ <span class="q">/yes/i</span> <span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 334  321 | 
|  | 335  322   <span class="c"># Setup reference sequence and regions for residue frequence analysis...</span> | 
|  | 336  323   <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedRefereceSequence</span>} = <span class="i">$Options</span>{<span class="w">referencesequence</span>}<span class="sc">;</span> | 
|  | 337  324   <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedRegion</span>} = <span class="i">$Options</span>{<span class="w">region</span>}<span class="sc">;</span> | 
|  | 338  325   <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedRegions</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 339  326 | 
|  | 340  327   <span class="k">my</span><span class="s">(</span><span class="i">@SpecifiedRegions</span><span class="s">)</span><span class="sc">;</span> | 
|  | 341  328   <span class="i">@SpecifiedRegions</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 342  329   <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">region</span>} =~ <span class="q">/\,/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 343  330     <span class="i">@SpecifiedRegions</span> = <span class="k">split</span> <span class="q">/\,/</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedRegion</span>}<span class="sc">;</span> | 
|  | 344  331     <span class="k">if</span> <span class="s">(</span><span class="i">@SpecifiedRegions</span> % <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 345  332       <span class="k">die</span> <span class="q">"Error: The value specified, $Options{region}, for option \"--region\" is not valid. Allowed values: \"StartResNum,EndResNum,[StartResNum,EndResNum...]\" or UseCompleteSequence\n"</span><span class="sc">;</span> | 
|  | 346  333     <span class="s">}</span> | 
|  | 347  334     <span class="c"># Make sure EndResNum > StartResNum...</span> | 
|  | 348  335     <span class="k">my</span><span class="s">(</span><span class="i">$StartResNum</span><span class="cm">,</span> <span class="i">$EndResNum</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="i">$RegionNum</span><span class="s">)</span><span class="sc">;</span> | 
|  | 349  336     <span class="i">$RegionNum</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 350  337     <span class="k">for</span> <span class="s">(</span><span class="i">$Index</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$Index</span> <= <span class="i">$#SpecifiedRegions</span><span class="sc">;</span> <span class="i">$Index</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 351  338       <span class="i">$StartResNum</span> = <span class="i">$SpecifiedRegions</span>[<span class="i">$Index</span>]<span class="sc">;</span> | 
|  | 352  339       <span class="i">$EndResNum</span> = <span class="i">$SpecifiedRegions</span>[<span class="i">$Index</span> + <span class="n">1</span>]<span class="sc">;</span> | 
|  | 353  340       <span class="i">$RegionNum</span>++<span class="sc">;</span> | 
|  | 354  341       <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$StartResNum</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 355  342         <span class="k">die</span> <span class="q">"Error: The value specified, $Options{region}, for option \"--region\" is not valid: The start residue number, $StartResNum, must be a positive integer for region $RegionNum.\n"</span><span class="sc">;</span> | 
|  | 356  343       <span class="s">}</span> | 
|  | 357  344       <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$EndResNum</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 358  345         <span class="k">die</span> <span class="q">"Error: The value specified, $Options{region}, for option \"--region\" is not valid: The start residue number, $EndResNum, must be a positive integer for region $RegionNum.\n"</span><span class="sc">;</span> | 
|  | 359  346       <span class="s">}</span> | 
|  | 360  347       <span class="k">if</span> <span class="s">(</span><span class="i">$StartResNum</span> >= <span class="i">$EndResNum</span><span class="s">)</span> <span class="s">{</span> | 
|  | 361  348         <span class="k">die</span> <span class="q">"Error: The value specified, $Options{region}, for option \"--region\" is not valid: The start residue number, $StartResNum, must be smaller than end residue number, $EndResNum, for region $RegionNum.\n"</span><span class="sc">;</span> | 
|  | 362  349       <span class="s">}</span> | 
|  | 363  350     <span class="s">}</span> | 
|  | 364  351   <span class="s">}</span> | 
|  | 365  352   <span class="k">else</span> <span class="s">{</span> | 
|  | 366  353     <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">region</span>} !~ <span class="q">/^UseCompleteSequence$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 367  354       <span class="k">die</span> <span class="q">"Error: The value specified, $Options{region}, for option \"--region\" is not valid. Allowed values: \"StartResNum,EndResNum,[StartResNum,EndResNum...]\" or UseCompleteSequence\n"</span><span class="sc">;</span> | 
|  | 368  355     <span class="s">}</span> | 
|  | 369  356   <span class="s">}</span> | 
|  | 370  357   <span class="k">push</span> <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedRegions</span>}}<span class="cm">,</span> <span class="i">@SpecifiedRegions</span><span class="sc">;</span> | 
|  | 371  358 | 
|  | 372  359   <span class="c"># Miscellaneous options...</span> | 
|  | 373  360   <span class="i">$OptionsInfo</span>{<span class="w">Precision</span>} = <span class="i">$Options</span>{<span class="w">precision</span>}<span class="sc">;</span> | 
|  | 374  361   <span class="i">$OptionsInfo</span>{<span class="w">IgnoreGaps</span>} = <span class="s">(</span><span class="i">$Options</span>{<span class="w">ignoregaps</span>} =~ <span class="q">/Yes/i</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 375  362   <span class="i">$OptionsInfo</span>{<span class="w">RegionResiduesMode</span>} = <span class="i">$Options</span>{<span class="w">regionresiduesmode</span>}<span class="sc">;</span> | 
|  | 376  363 | 
|  | 377  364   <span class="i">$OptionsInfo</span>{<span class="w">OverwriteFiles</span>} = <span class="i">$Options</span>{<span class="w">overwrite</span>} ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 378  365   <span class="i">$OptionsInfo</span>{<span class="w">OutFileRoot</span>} = <span class="i">$Options</span>{<span class="w">root</span>} ? <span class="i">$Options</span>{<span class="w">root</span>} <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 379  366 <span class="s">}</span> | 
|  | 380  367 | 
|  | 381  368 <span class="c"># Retrieve information about sequence files...</span> | 
|  | 382 <a name="RetrieveSequenceFilesInfo-"></a> 369 <span class="k">sub </span><span class="m">RetrieveSequenceFilesInfo</span> <span class="s">{</span> | 
|  | 383  370   <span class="k">my</span><span class="s">(</span><span class="i">$Index</span><span class="cm">,</span> <span class="i">$SequenceFile</span><span class="cm">,</span> <span class="i">$FileSupported</span><span class="cm">,</span> <span class="i">$FileFormat</span><span class="cm">,</span> <span class="i">$SequenceCount</span><span class="cm">,</span> <span class="i">$RefereceSequence</span><span class="cm">,</span> <span class="i">$RefereceSequenceID</span><span class="cm">,</span> <span class="i">$RefereceSequenceLen</span><span class="cm">,</span> <span class="i">$RefereceSequenceWithNoGaps</span><span class="cm">,</span> <span class="i">$RefereceSequenceWithNoGapsLen</span><span class="cm">,</span> <span class="i">$RefereceSequenceRegionCount</span><span class="cm">,</span> <span class="i">$FileDir</span><span class="cm">,</span> <span class="i">$FileName</span><span class="cm">,</span> <span class="i">$FileExt</span><span class="cm">,</span> <span class="i">$OutFileRoot</span><span class="cm">,</span> <span class="i">$OutFileExt</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="cm">,</span> <span class="i">$SpecifiedRefereceSequence</span><span class="cm">,</span> <span class="i">@SpecifiedRegions</span><span class="cm">,</span> <span class="i">@RefereceSequenceRegions</span><span class="s">)</span><span class="sc">;</span> | 
|  | 384  371 | 
|  | 385  372   <span class="i">%SequenceFilesInfo</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 386  373   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">FilesOkay</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 387  374   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileRoot</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 388  375   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileExt</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 389  376   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">Format</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 390  377   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">SequenceCount</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 391  378   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceID</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 392  379   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequence</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 393  380   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceLen</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 394  381   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceWithNoGaps</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 395  382   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceWithNoGapsLen</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 396  383   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceRegions</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 397  384   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceRegionCount</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 398  385   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">ResidueCodes</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 399  386 | 
|  | 400  387   <span class="j">FILELIST:</span> <span class="k">for</span> <span class="i">$Index</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#SequenceFilesList</span><span class="s">)</span> <span class="s">{</span> | 
|  | 401  388     <span class="i">$SequenceFile</span> = <span class="i">$SequenceFilesList</span>[<span class="i">$Index</span>]<span class="sc">;</span> | 
|  | 402  389     <span class="i">$SequenceFilesInfo</span>{<span class="w">FilesOkay</span>}[<span class="i">$Index</span>] = <span class="n">0</span><span class="sc">;</span> | 
|  | 403  390     <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileRoot</span>}[<span class="i">$Index</span>] = <span class="q">''</span><span class="sc">;</span> | 
|  | 404  391     <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileExt</span>}[<span class="i">$Index</span>] = <span class="q">''</span><span class="sc">;</span> | 
|  | 405  392     <span class="i">$SequenceFilesInfo</span>{<span class="w">Format</span>}[<span class="i">$Index</span>] = <span class="q">'NotSupported'</span><span class="sc">;</span> | 
|  | 406  393     <span class="i">$SequenceFilesInfo</span>{<span class="w">SequenceCount</span>}[<span class="i">$Index</span>] = <span class="n">0</span><span class="sc">;</span> | 
|  | 407  394     <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceID</span>}[<span class="i">$Index</span>] = <span class="q">''</span><span class="sc">;</span> | 
|  | 408  395     <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequence</span>}[<span class="i">$Index</span>] = <span class="q">''</span><span class="sc">;</span> | 
|  | 409  396     <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceLen</span>}[<span class="i">$Index</span>] = <span class="q">''</span><span class="sc">;</span> | 
|  | 410  397     <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceWithNoGaps</span>}[<span class="i">$Index</span>] = <span class="q">''</span><span class="sc">;</span> | 
|  | 411  398     <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceWithNoGapsLen</span>}[<span class="i">$Index</span>] = <span class="q">''</span><span class="sc">;</span> | 
|  | 412  399     <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceRegions</span>}[<span class="i">$Index</span>]} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 413  400     <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceRegionCount</span>}[<span class="i">$Index</span>] = <span class="n">0</span><span class="sc">;</span> | 
|  | 414  401     <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">ResidueCodes</span>}[<span class="i">$Index</span>]} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 415  402 | 
|  | 416  403     <span class="k">if</span> <span class="s">(</span>! <span class="k">open</span> <span class="w">SEQUENCEFILE</span><span class="cm">,</span> <span class="q">"$SequenceFile"</span><span class="s">)</span> <span class="s">{</span> | 
|  | 417  404       <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: Couldn't open it: $! \n"</span><span class="sc">;</span> | 
|  | 418  405       <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 419  406     <span class="s">}</span> | 
|  | 420  407     <span class="k">close</span> <span class="w">SEQUENCEFILE</span><span class="sc">;</span> | 
|  | 421  408 | 
|  | 422  409     <span class="s">(</span><span class="i">$FileSupported</span><span class="cm">,</span> <span class="i">$FileFormat</span><span class="s">)</span> = <span class="i">IsSupportedSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 423  410     <span class="k">if</span> <span class="s">(</span>!<span class="i">$FileSupported</span><span class="s">)</span> <span class="s">{</span> | 
|  | 424  411       <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: Sequence file format is not supported.\n"</span><span class="sc">;</span> | 
|  | 425  412       <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 426  413     <span class="s">}</span> | 
|  | 427  414 | 
|  | 428  415     <span class="i">$SequenceDataRef</span> = <span class="i">ReadSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 429  416 | 
|  | 430  417     <span class="i">$SequenceCount</span> = <span class="i">$SequenceDataRef</span>->{<span class="w">Count</span>}<span class="sc">;</span> | 
|  | 431  418     <span class="k">if</span> <span class="s">(</span>!<span class="i">$SequenceCount</span><span class="s">)</span> <span class="s">{</span> | 
|  | 432  419       <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: Sequence data is missing.\n"</span><span class="sc">;</span> | 
|  | 433  420       <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 434  421     <span class="s">}</span> | 
|  | 435  422 | 
|  | 436  423     <span class="c"># Make sure all sequence lengths are identical...</span> | 
|  | 437  424     <span class="k">if</span> <span class="s">(</span>!<span class="i">AreSequenceLengthsIdentical</span><span class="s">(</span><span class="i">$SequenceDataRef</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 438  425       <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: Sequence legths are not identical.\n"</span><span class="sc">;</span> | 
|  | 439  426       <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 440  427     <span class="s">}</span> | 
|  | 441  428     <span class="i">$SpecifiedRefereceSequence</span> = <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedRefereceSequence</span>}<span class="sc">;</span> | 
|  | 442  429     <span class="c"># Make sure reference sequence ID is valid...</span> | 
|  | 443  430     <span class="k">if</span> <span class="s">(</span><span class="i">$SpecifiedRefereceSequence</span> =~ <span class="q">/^UseFirstSequenceID$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 444  431       <span class="i">$RefereceSequenceID</span> = <span class="i">$SequenceDataRef</span>->{<span class="w">IDs</span>}[<span class="n">0</span>]<span class="sc">;</span> | 
|  | 445  432     <span class="s">}</span> | 
|  | 446  433     <span class="k">else</span> <span class="s">{</span> | 
|  | 447  434       <span class="k">if</span> <span class="s">(</span>!<span class="k">exists</span><span class="s">(</span><span class="i">$SequenceDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$SpecifiedRefereceSequence</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 448  435         <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: Rreference sequence ID, $SpecifiedRefereceSequence, specified using option \"--referencesequence\" is missing.\n"</span><span class="sc">;</span> | 
|  | 449  436         <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 450  437       <span class="s">}</span> | 
|  | 451  438       <span class="i">$RefereceSequenceID</span> = <span class="i">$SpecifiedRefereceSequence</span><span class="sc">;</span> | 
|  | 452  439     <span class="s">}</span> | 
|  | 453  440 | 
|  | 454  441     <span class="c"># Make sure sequence regions corresponding to reference sequence are valid...</span> | 
|  | 455  442     <span class="i">@RefereceSequenceRegions</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 456  443     <span class="i">$RefereceSequenceRegionCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 457  444     <span class="i">$RefereceSequence</span> = <span class="i">$SequenceDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$RefereceSequenceID</span>}<span class="sc">;</span> | 
|  | 458  445     <span class="i">$RefereceSequenceLen</span> = <span class="k">length</span> <span class="i">$RefereceSequence</span><span class="sc">;</span> | 
|  | 459  446 | 
|  | 460  447     <span class="i">$RefereceSequenceWithNoGaps</span> = <span class="i">RemoveSequenceGaps</span><span class="s">(</span><span class="i">$RefereceSequence</span><span class="s">)</span><span class="sc">;</span> | 
|  | 461  448     <span class="i">$RefereceSequenceWithNoGapsLen</span> = <span class="k">length</span> <span class="i">$RefereceSequenceWithNoGaps</span><span class="sc">;</span> | 
|  | 462  449 | 
|  | 463  450     <span class="i">@SpecifiedRegions</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 464  451     <span class="k">push</span> <span class="i">@SpecifiedRegions</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedRegions</span>}}<span class="sc">;</span> | 
|  | 465  452     <span class="k">if</span> <span class="s">(</span><span class="i">@SpecifiedRegions</span><span class="s">)</span> <span class="s">{</span> | 
|  | 466  453       <span class="c"># Make sure specified regions are valid...</span> | 
|  | 467  454       <span class="k">my</span><span class="s">(</span><span class="i">$StartResNum</span><span class="cm">,</span> <span class="i">$EndResNum</span><span class="cm">,</span> <span class="i">$RegionIndex</span><span class="cm">,</span> <span class="i">$RegionNum</span><span class="s">)</span><span class="sc">;</span> | 
|  | 468  455       <span class="i">$RegionNum</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 469  456       <span class="k">for</span> <span class="s">(</span><span class="i">$RegionIndex</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$RegionIndex</span> <= <span class="i">$#SpecifiedRegions</span><span class="sc">;</span> <span class="i">$RegionIndex</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 470  457         <span class="i">$StartResNum</span> = <span class="i">$SpecifiedRegions</span>[<span class="i">$RegionIndex</span>]<span class="sc">;</span> | 
|  | 471  458         <span class="i">$EndResNum</span> = <span class="i">$SpecifiedRegions</span>[<span class="i">$RegionIndex</span> + <span class="n">1</span>]<span class="sc">;</span> | 
|  | 472  459         <span class="i">$RegionNum</span>++<span class="sc">;</span> | 
|  | 473  460         <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">IgnoreGaps</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 474  461           <span class="k">if</span> <span class="s">(</span><span class="i">$StartResNum</span> > <span class="i">$RefereceSequenceWithNoGapsLen</span><span class="s">)</span> <span class="s">{</span> | 
|  | 475  462             <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: The value specified, $Options{region}, for option \"--region\" is not valid: The start residue number, $StartResNum, must be smaller the sequence length, $RefereceSequenceWithNoGapsLen, of reference sequence ID,  $RefereceSequenceID, in region $RegionNum. The reference sequence residue numbers correspond to the sequence with no gaps. Specify \"No\" value for \"-i, --ignoregaps\" option to use residue numbers corresponding to reference sequence including gaps.\n"</span><span class="sc">;</span> | 
|  | 476  463             <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 477  464           <span class="s">}</span> | 
|  | 478  465           <span class="k">if</span> <span class="s">(</span><span class="i">$EndResNum</span> > <span class="i">$RefereceSequenceWithNoGapsLen</span><span class="s">)</span> <span class="s">{</span> | 
|  | 479  466             <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: The value specified, $Options{region}, for option \"--region\" is not valid: The end residue number, $EndResNum, must be smaller the sequence length, $RefereceSequenceWithNoGapsLen, of reference sequence ID,  $RefereceSequenceID, in region $RegionNum. The reference sequence residue numbers correspond to the sequence with no gaps. Specify \"No\" value for \"-i, --ignoregaps\" option to use residue numbers corresponding to reference sequence including gaps.\n"</span><span class="sc">;</span> | 
|  | 480  467             <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 481  468           <span class="s">}</span> | 
|  | 482  469         <span class="s">}</span> | 
|  | 483  470         <span class="k">else</span> <span class="s">{</span> | 
|  | 484  471           <span class="k">if</span> <span class="s">(</span><span class="i">$StartResNum</span> > <span class="i">$RefereceSequenceLen</span><span class="s">)</span> <span class="s">{</span> | 
|  | 485  472             <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: The value specified, $Options{region}, for option \"--region\" is not valid: The start residue number, $StartResNum, must be smaller the sequence length, $RefereceSequenceLen, of reference sequence ID,  $RefereceSequenceID, in region $RegionNum.\n"</span><span class="sc">;</span> | 
|  | 486  473             <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 487  474           <span class="s">}</span> | 
|  | 488  475           <span class="k">if</span> <span class="s">(</span><span class="i">$EndResNum</span> > <span class="i">$RefereceSequenceLen</span><span class="s">)</span> <span class="s">{</span> | 
|  | 489  476             <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: The value specified, $Options{region}, for option \"--region\" is not valid: The end residue number, $EndResNum, must be smaller the sequence length, $RefereceSequenceLen, of reference sequence ID,  $RefereceSequenceID, in region $RegionNum.\n"</span><span class="sc">;</span> | 
|  | 490  477             <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 491  478           <span class="s">}</span> | 
|  | 492  479         <span class="s">}</span> | 
|  | 493  480         <span class="k">push</span> <span class="i">@RefereceSequenceRegions</span><span class="cm">,</span> <span class="s">(</span><span class="i">$StartResNum</span><span class="cm">,</span> <span class="i">$EndResNum</span><span class="s">)</span><span class="sc">;</span> | 
|  | 494  481       <span class="s">}</span> | 
|  | 495  482       <span class="i">$RefereceSequenceRegionCount</span> = <span class="i">$RegionNum</span><span class="sc">;</span> | 
|  | 496  483     <span class="s">}</span> | 
|  | 497  484     <span class="k">else</span> <span class="s">{</span> | 
|  | 498  485       <span class="c"># Use complete sequence corresponding to reference sequence ID...</span> | 
|  | 499  486       <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">IgnoreGaps</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 500  487         <span class="k">push</span> <span class="i">@RefereceSequenceRegions</span><span class="cm">,</span> <span class="s">(</span><span class="n">1</span><span class="cm">,</span> <span class="i">$RefereceSequenceWithNoGapsLen</span><span class="s">)</span><span class="sc">;</span> | 
|  | 501  488       <span class="s">}</span> | 
|  | 502  489       <span class="k">else</span> <span class="s">{</span> | 
|  | 503  490         <span class="k">push</span> <span class="i">@RefereceSequenceRegions</span><span class="cm">,</span> <span class="s">(</span><span class="n">1</span><span class="cm">,</span> <span class="i">$RefereceSequenceLen</span><span class="s">)</span><span class="sc">;</span> | 
|  | 504  491       <span class="s">}</span> | 
|  | 505  492       <span class="i">$RefereceSequenceRegionCount</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 506  493     <span class="s">}</span> | 
|  | 507  494     <span class="c"># Setup output file names...</span> | 
|  | 508  495     <span class="i">$FileDir</span> = <span class="q">""</span><span class="sc">;</span> <span class="i">$FileName</span> = <span class="q">""</span><span class="sc">;</span> <span class="i">$FileExt</span> = <span class="q">""</span><span class="sc">;</span> | 
|  | 509  496     <span class="s">(</span><span class="i">$FileDir</span><span class="cm">,</span> <span class="i">$FileName</span><span class="cm">,</span> <span class="i">$FileExt</span><span class="s">)</span> = <span class="i">ParseFileName</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 510  497     <span class="i">$FileExt</span> = <span class="q">"csv"</span><span class="sc">;</span> | 
|  | 511  498     <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">outdelim</span>} =~ <span class="q">/^tab$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 512  499       <span class="i">$FileExt</span> = <span class="q">"tsv"</span><span class="sc">;</span> | 
|  | 513  500     <span class="s">}</span> | 
|  | 514  501     <span class="i">$OutFileExt</span> = <span class="i">$FileExt</span><span class="sc">;</span> | 
|  | 515  502     <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">OutFileRoot</span>} && <span class="s">(</span><span class="i">@SequenceFilesList</span> == <span class="n">1</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 516  503       <span class="k">my</span> <span class="s">(</span><span class="i">$RootFileDir</span><span class="cm">,</span> <span class="i">$RootFileName</span><span class="cm">,</span> <span class="i">$RootFileExt</span><span class="s">)</span> = <span class="i">ParseFileName</span><span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">OutFileRoot</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 517  504       <span class="k">if</span> <span class="s">(</span><span class="i">$RootFileName</span> && <span class="i">$RootFileExt</span><span class="s">)</span> <span class="s">{</span> | 
|  | 518  505         <span class="i">$FileName</span> = <span class="i">$RootFileName</span><span class="sc">;</span> | 
|  | 519  506       <span class="s">}</span> | 
|  | 520  507       <span class="k">else</span> <span class="s">{</span> | 
|  | 521  508         <span class="i">$FileName</span> = <span class="i">$OptionsInfo</span>{<span class="w">OutFileRoot</span>}<span class="sc">;</span> | 
|  | 522  509       <span class="s">}</span> | 
|  | 523  510       <span class="i">$OutFileRoot</span> = <span class="i">$FileName</span><span class="sc">;</span> | 
|  | 524  511     <span class="s">}</span> | 
|  | 525  512     <span class="k">else</span> <span class="s">{</span> | 
|  | 526  513       <span class="i">$OutFileRoot</span> = <span class="i">$FileName</span><span class="sc">;</span> | 
|  | 527  514     <span class="s">}</span> | 
|  | 528  515     <span class="k">if</span> <span class="s">(</span>!<span class="i">$OptionsInfo</span>{<span class="w">OverwriteFiles</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 529  516       <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">CalculatePercentIdentityMatrix</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 530  517         <span class="k">if</span> <span class="s">(</span><span class="k">-e</span> <span class="q">"${OutFileRoot}PercentIdentityMatrix.${OutFileExt}"</span><span class="s">)</span> <span class="s">{</span> | 
|  | 531  518           <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: The file ${OutFileRoot}PercentIdentityMatrix.${OutFileExt} already exists\n"</span><span class="sc">;</span> | 
|  | 532  519           <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 533  520         <span class="s">}</span> | 
|  | 534  521       <span class="s">}</span> | 
|  | 535  522       <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">PerformResidueFrequencyAnalysis</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 536  523         <span class="k">my</span><span class="s">(</span><span class="i">$RegionNum</span><span class="s">)</span><span class="sc">;</span> | 
|  | 537  524         <span class="k">for</span> <span class="i">$RegionNum</span> <span class="s">(</span><span class="n">1</span> .. <span class="i">$RefereceSequenceRegionCount</span><span class="s">)</span> <span class="s">{</span> | 
|  | 538  525           <span class="k">if</span> <span class="s">(</span><span class="k">-e</span> <span class="q">"${OutFileRoot}ResidueFrequencyAnalysisRegion${RegionNum}.${OutFileExt}"</span><span class="s">)</span> <span class="s">{</span> | 
|  | 539  526             <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: The file ${OutFileRoot}ResidueFrequencyAnalysisRegion${RegionNum}.${OutFileExt} already exists\n"</span><span class="sc">;</span> | 
|  | 540  527             <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 541  528           <span class="s">}</span> | 
|  | 542  529           <span class="k">if</span> <span class="s">(</span><span class="k">-e</span> <span class="q">"${OutFileRoot}PercentResidueFrequencyAnalysisRegion${RegionNum}.${OutFileExt}"</span><span class="s">)</span> <span class="s">{</span> | 
|  | 543  530             <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: The file ${OutFileRoot}PercentResidueFrequencyAnalysisRegion${RegionNum}.${OutFileExt} already exists\n"</span><span class="sc">;</span> | 
|  | 544  531             <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 545  532           <span class="s">}</span> | 
|  | 546  533         <span class="s">}</span> | 
|  | 547  534       <span class="s">}</span> | 
|  | 548  535     <span class="s">}</span> | 
|  | 549  536 | 
|  | 550  537     <span class="i">$SequenceFilesInfo</span>{<span class="w">FilesOkay</span>}[<span class="i">$Index</span>] = <span class="n">1</span><span class="sc">;</span> | 
|  | 551  538     <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileRoot</span>}[<span class="i">$Index</span>] = <span class="i">$OutFileRoot</span><span class="sc">;</span> | 
|  | 552  539     <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileExt</span>}[<span class="i">$Index</span>] = <span class="i">$OutFileExt</span><span class="sc">;</span> | 
|  | 553  540 | 
|  | 554  541     <span class="i">$SequenceFilesInfo</span>{<span class="w">Format</span>}[<span class="i">$Index</span>] = <span class="i">$FileFormat</span><span class="sc">;</span> | 
|  | 555  542     <span class="i">$SequenceFilesInfo</span>{<span class="w">SequenceCount</span>}[<span class="i">$Index</span>] = <span class="i">$SequenceCount</span><span class="sc">;</span> | 
|  | 556  543     <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceID</span>}[<span class="i">$Index</span>] = <span class="i">$RefereceSequenceID</span><span class="sc">;</span> | 
|  | 557  544     <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequence</span>}[<span class="i">$Index</span>] = <span class="i">$RefereceSequence</span><span class="sc">;</span> | 
|  | 558  545     <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceLen</span>}[<span class="i">$Index</span>] = <span class="i">$RefereceSequenceLen</span><span class="sc">;</span> | 
|  | 559  546     <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceWithNoGaps</span>}[<span class="i">$Index</span>] = <span class="i">$RefereceSequenceWithNoGaps</span><span class="sc">;</span> | 
|  | 560  547     <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceWithNoGapsLen</span>}[<span class="i">$Index</span>] = <span class="i">$RefereceSequenceWithNoGapsLen</span><span class="sc">;</span> | 
|  | 561  548     <span class="k">push</span> <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceRegions</span>}[<span class="i">$Index</span>]}<span class="cm">,</span> <span class="i">@RefereceSequenceRegions</span><span class="sc">;</span> | 
|  | 562  549     <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceRegionCount</span>}[<span class="i">$Index</span>] = <span class="i">$RefereceSequenceRegionCount</span><span class="sc">;</span> | 
|  | 563  550 | 
|  | 564  551     <span class="c"># Setup residue codes...</span> | 
|  | 565  552     <span class="i">SetupSequenceFileResidueCodes</span><span class="s">(</span><span class="i">$SequenceDataRef</span><span class="cm">,</span> <span class="i">$Index</span><span class="s">)</span><span class="sc">;</span> | 
|  | 566  553   <span class="s">}</span> | 
|  | 567  554 <span class="s">}</span> | 
|  | 568  555 | 
|  | 569 <a name="SetupSequenceFileResidueCodes-"></a> 556 <span class="k">sub </span><span class="m">SetupSequenceFileResidueCodes</span> <span class="s">{</span> | 
|  | 570  557   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceDataRef</span><span class="cm">,</span> <span class="i">$FileIndex</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 571  558   <span class="k">my</span><span class="s">(</span><span class="i">$Residue</span><span class="cm">,</span> <span class="i">@ResidueCodesList</span><span class="cm">,</span> <span class="i">%ResidueCodesMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 572  559 | 
|  | 573  560   <span class="c"># Initialize</span> | 
|  | 574  561   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">ResidueCodes</span>}[<span class="i">$FileIndex</span>]} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 575  562 | 
|  | 576  563   <span class="i">%ResidueCodesMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 577  564   <span class="i">@ResidueCodesList</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 578  565 | 
|  | 579  566   <span class="c"># Setup standard amino acids and nucleic acids one letter codes...</span> | 
|  | 580  567   <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">RegionResiduesMode</span>} =~ <span class="q">/^AminoAcids$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 581  568     <span class="i">@ResidueCodesList</span> = <span class="i">AminoAcids::GetAminoAcids</span><span class="s">(</span><span class="q">'OneLetterCode'</span><span class="s">)</span><span class="sc">;</span> | 
|  | 582  569   <span class="s">}</span> | 
|  | 583  570   <span class="k">elsif</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">RegionResiduesMode</span>} =~ <span class="q">/^NucleicAcids$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 584  571     <span class="k">push</span> <span class="i">@ResidueCodesList</span><span class="cm">,</span> <span class="s">(</span><span class="q">'A'</span><span class="cm">,</span> <span class="q">'G'</span><span class="cm">,</span> <span class="q">'T'</span><span class="cm">,</span> <span class="q">'U'</span><span class="cm">,</span> <span class="q">'C'</span><span class="s">)</span><span class="sc">;</span> | 
|  | 585  572   <span class="s">}</span> | 
|  | 586  573   <span class="k">push</span> <span class="i">@ResidueCodesList</span><span class="cm">,</span> <span class="q">'Gap'</span><span class="sc">;</span> | 
|  | 587  574   <span class="k">for</span> <span class="i">$Residue</span> <span class="s">(</span><span class="i">@ResidueCodesList</span><span class="s">)</span> <span class="s">{</span> | 
|  | 588  575     <span class="i">$ResidueCodesMap</span>{<span class="i">$Residue</span>} = <span class="i">$Residue</span><span class="sc">;</span> | 
|  | 589  576   <span class="s">}</span> | 
|  | 590  577 | 
|  | 591  578   <span class="c"># Go over all the residues in all the sequences and add missing ones to the list...</span> | 
|  | 592  579   <span class="k">my</span><span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$ResIndex</span><span class="s">)</span><span class="sc">;</span> | 
|  | 593  580   <span class="k">for</span> <span class="i">$ID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequenceDataRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 594  581     <span class="i">$Sequence</span> = <span class="i">$SequenceDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 595  582     <span class="j">RES:</span> <span class="k">for</span> <span class="i">$ResIndex</span> <span class="s">(</span><span class="n">0</span> .. <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Sequence</span><span class="s">)</span> - <span class="n">1</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 596  583       <span class="i">$Residue</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$Sequence</span><span class="cm">,</span> <span class="i">$ResIndex</span><span class="cm">,</span> <span class="n">1</span><span class="s">)</span><span class="sc">;</span> | 
|  | 597  584       <span class="k">if</span> <span class="s">(</span><span class="i">IsGapResidue</span><span class="s">(</span><span class="i">$Residue</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 598  585         <span class="k">next</span> <span class="j">RES</span><span class="sc">;</span> | 
|  | 599  586       <span class="s">}</span> | 
|  | 600  587       <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$ResidueCodesMap</span>{<span class="i">$Residue</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 601  588         <span class="k">next</span> <span class="j">RES</span><span class="sc">;</span> | 
|  | 602  589       <span class="s">}</span> | 
|  | 603  590       <span class="k">push</span> <span class="i">@ResidueCodesList</span><span class="cm">,</span> <span class="i">$Residue</span><span class="sc">;</span> | 
|  | 604  591       <span class="i">$ResidueCodesMap</span>{<span class="i">$Residue</span>} = <span class="i">$Residue</span><span class="sc">;</span> | 
|  | 605  592     <span class="s">}</span> | 
|  | 606  593   <span class="s">}</span> | 
|  | 607  594   <span class="k">push</span> <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">ResidueCodes</span>}[<span class="i">$FileIndex</span>]}<span class="cm">,</span> <span class="i">@ResidueCodesList</span><span class="sc">;</span> | 
|  | 608  595 <span class="s">}</span> | 
|  | 609  596 | 
|  | 610  597 <span class="c"># Setup regions data for performing residue frequency analysis...</span> | 
|  | 611 <a name="SetupSequenceRegionsData-"></a> 598 <span class="k">sub </span><span class="m">SetupSequenceRegionsData</span> <span class="s">{</span> | 
|  | 612  599   <span class="k">my</span><span class="s">(</span><span class="i">$Index</span><span class="cm">,</span> <span class="i">$RefereceSequence</span><span class="cm">,</span> <span class="i">$RefereceSequenceLen</span><span class="cm">,</span> <span class="i">$RegionID</span><span class="cm">,</span> <span class="i">$StartResNum</span><span class="cm">,</span> <span class="i">$EndResNum</span><span class="cm">,</span> <span class="i">$RegionIndex</span><span class="cm">,</span> <span class="i">$RegionNum</span><span class="cm">,</span> <span class="i">$NoGapResNum</span><span class="cm">,</span> <span class="i">$ResNum</span><span class="cm">,</span> <span class="i">$ResIndex</span><span class="cm">,</span> <span class="i">$Residue</span><span class="cm">,</span> <span class="i">$ResidueCode</span><span class="cm">,</span> <span class="i">@RefereceSequenceRegions</span><span class="s">)</span><span class="sc">;</span> | 
|  | 613  600 | 
|  | 614  601 | 
|  | 615  602   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 616  603   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 617  604 | 
|  | 618  605   <span class="j">FILELIST:</span> <span class="k">for</span> <span class="i">$Index</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#SequenceFilesList</span><span class="s">)</span> <span class="s">{</span> | 
|  | 619  606     <span class="i">%</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$Index</span>]{<span class="w">IsGap</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 620  607     <span class="i">%</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$Index</span>]{<span class="w">NoGapToGap</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 621  608     <span class="i">%</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$Index</span>]{<span class="w">GapToNoGap</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 622  609     <span class="i">%</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$Index</span>]} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 623  610     <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$Index</span>]{<span class="w">RegionIDs</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 624  611 | 
|  | 625  612     <span class="k">if</span> <span class="s">(</span>!<span class="i">$SequenceFilesInfo</span>{<span class="w">FilesOkay</span>}[<span class="i">$Index</span>]<span class="s">)</span> <span class="s">{</span> | 
|  | 626  613       <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 627  614     <span class="s">}</span> | 
|  | 628  615     <span class="k">if</span> <span class="s">(</span>!<span class="i">$OptionsInfo</span>{<span class="w">PerformResidueFrequencyAnalysis</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 629  616       <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 630  617     <span class="s">}</span> | 
|  | 631  618 | 
|  | 632  619     <span class="i">$RefereceSequence</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequence</span>}[<span class="i">$Index</span>]<span class="sc">;</span> | 
|  | 633  620     <span class="i">$RefereceSequenceLen</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceLen</span>}[<span class="i">$Index</span>]<span class="sc">;</span> | 
|  | 634  621 | 
|  | 635  622     <span class="c"># Setup residue number mapping and gap status for refernece sequence...</span> | 
|  | 636  623     <span class="i">$NoGapResNum</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 637  624     <span class="i">$ResNum</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 638  625     <span class="k">for</span> <span class="i">$ResIndex</span> <span class="s">(</span><span class="n">0</span> .. <span class="s">(</span><span class="i">$RefereceSequenceLen</span> - <span class="n">1</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 639  626       <span class="i">$ResNum</span>++<span class="sc">;</span> | 
|  | 640  627       <span class="i">$Residue</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$RefereceSequence</span><span class="cm">,</span> <span class="i">$ResIndex</span><span class="cm">,</span> <span class="n">1</span><span class="s">)</span><span class="sc">;</span> | 
|  | 641  628       <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$Index</span>]{<span class="w">IsGap</span>}{<span class="i">$ResNum</span>} = <span class="n">1</span><span class="sc">;</span> | 
|  | 642  629       <span class="k">if</span> <span class="s">(</span>!<span class="i">IsGapResidue</span><span class="s">(</span><span class="i">$Residue</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 643  630         <span class="i">$NoGapResNum</span>++<span class="sc">;</span> | 
|  | 644  631         <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$Index</span>]{<span class="w">IsGap</span>}{<span class="i">$ResNum</span>} = <span class="n">0</span><span class="sc">;</span> | 
|  | 645  632         <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$Index</span>]{<span class="w">NoGapToGap</span>}{<span class="i">$NoGapResNum</span>} = <span class="i">$ResNum</span><span class="sc">;</span> | 
|  | 646  633         <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$Index</span>]{<span class="w">GapToNoGap</span>}{<span class="i">$ResNum</span>} = <span class="i">$NoGapResNum</span><span class="sc">;</span> | 
|  | 647  634       <span class="s">}</span> | 
|  | 648  635     <span class="s">}</span> | 
|  | 649  636     <span class="c"># Map residue numbers for specified regions to the reference residue in input sequence/alignment files</span> | 
|  | 650  637     <span class="i">$RegionNum</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 651  638     <span class="i">@RefereceSequenceRegions</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 652  639     <span class="k">push</span> <span class="i">@RefereceSequenceRegions</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceRegions</span>}[<span class="i">$Index</span>]}<span class="sc">;</span> | 
|  | 653  640     <span class="k">for</span> <span class="s">(</span><span class="i">$RegionIndex</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$RegionIndex</span> <= <span class="i">$#RefereceSequenceRegions</span><span class="sc">;</span> <span class="i">$RegionIndex</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 654  641       <span class="i">$StartResNum</span> = <span class="i">$RefereceSequenceRegions</span>[<span class="i">$RegionIndex</span>]<span class="sc">;</span> | 
|  | 655  642       <span class="i">$EndResNum</span> = <span class="i">$RefereceSequenceRegions</span>[<span class="i">$RegionIndex</span> + <span class="n">1</span>]<span class="sc">;</span> | 
|  | 656  643       <span class="i">$RegionNum</span>++<span class="sc">;</span> | 
|  | 657  644       <span class="i">$RegionID</span> = <span class="q">"Region${RegionNum}"</span><span class="sc">;</span> | 
|  | 658  645       <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">IgnoreGaps</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 659  646         <span class="c"># Map residue numbers to the reference sequence residue numbers to account for any ignored gaps...</span> | 
|  | 660  647         <span class="i">$StartResNum</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$Index</span>]{<span class="w">NoGapToGap</span>}{<span class="i">$StartResNum</span>}<span class="sc">;</span> | 
|  | 661  648         <span class="i">$EndResNum</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">RefereceSequenceResNums</span>}[<span class="i">$Index</span>]{<span class="w">NoGapToGap</span>}{<span class="i">$EndResNum</span>}<span class="sc">;</span> | 
|  | 662  649       <span class="s">}</span> | 
|  | 663  650       <span class="k">push</span> <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$Index</span>]{<span class="w">RegionIDs</span>}}<span class="cm">,</span> <span class="i">$RegionID</span><span class="sc">;</span> | 
|  | 664  651       <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$Index</span>]{<span class="i">$RegionID</span>}{<span class="w">StartResNum</span>} = <span class="i">$StartResNum</span><span class="sc">;</span> | 
|  | 665  652       <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$Index</span>]{<span class="i">$RegionID</span>}{<span class="w">EndResNum</span>} = <span class="i">$EndResNum</span><span class="sc">;</span> | 
|  | 666  653 | 
|  | 667  654       <span class="c"># Initialize data for residue codes...</span> | 
|  | 668  655       <span class="k">for</span> <span class="i">$ResNum</span> <span class="s">(</span><span class="i">$StartResNum</span> .. <span class="i">$EndResNum</span><span class="s">)</span> <span class="s">{</span> | 
|  | 669  656         <span class="k">for</span> <span class="i">$ResidueCode</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">ResidueCodes</span>}[<span class="i">$Index</span>]}<span class="s">)</span> <span class="s">{</span> | 
|  | 670  657           <span class="i">$SequenceFilesInfo</span>{<span class="w">RegionsData</span>}[<span class="i">$Index</span>]{<span class="i">$RegionID</span>}{<span class="w">Count</span>}{<span class="i">$ResNum</span>}{<span class="i">$ResidueCode</span>} = <span class="n">0</span><span class="sc">;</span> | 
|  | 671  658         <span class="s">}</span> | 
|  | 672  659       <span class="s">}</span> | 
|  | 673  660     <span class="s">}</span> | 
|  | 674  661   <span class="s">}</span> | 
|  | 675  662 <span class="s">}</span> | 
|  | 676  663 | 
|  | 677  664 <span class="c"># Setup script usage  and retrieve command line arguments specified using various options...</span> | 
|  | 678 <a name="SetupScriptUsage-"></a> 665 <span class="k">sub </span><span class="m">SetupScriptUsage</span> <span class="s">{</span> | 
|  | 679  666 | 
|  | 680  667   <span class="c"># Retrieve all the options...</span> | 
|  | 681  668   <span class="i">%Options</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 682  669   <span class="i">$Options</span>{<span class="w">ignoregaps</span>} = <span class="q">'yes'</span><span class="sc">;</span> | 
|  | 683  670   <span class="i">$Options</span>{<span class="w">regionresiduesmode</span>} = <span class="q">'None'</span><span class="sc">;</span> | 
|  | 684  671   <span class="i">$Options</span>{<span class="w">mode</span>} = <span class="q">'PercentIdentityMatrix'</span><span class="sc">;</span> | 
|  | 685  672   <span class="i">$Options</span>{<span class="w">outdelim</span>} = <span class="q">'comma'</span><span class="sc">;</span> | 
|  | 686  673   <span class="i">$Options</span>{<span class="w">precision</span>} = <span class="n">2</span><span class="sc">;</span> | 
|  | 687  674   <span class="i">$Options</span>{<span class="w">quote</span>} = <span class="q">'yes'</span><span class="sc">;</span> | 
|  | 688  675   <span class="i">$Options</span>{<span class="w">referencesequence</span>} = <span class="q">'UseFirstSequenceID'</span><span class="sc">;</span> | 
|  | 689  676   <span class="i">$Options</span>{<span class="w">region</span>} = <span class="q">'UseCompleteSequence'</span><span class="sc">;</span> | 
|  | 690  677 | 
|  | 691  678   <span class="k">if</span> <span class="s">(</span>!<span class="i">GetOptions</span><span class="s">(</span>\<span class="i">%Options</span><span class="cm">,</span> <span class="q">"help|h"</span><span class="cm">,</span> <span class="q">"ignoregaps|i=s"</span><span class="cm">,</span> <span class="q">"mode|m=s"</span><span class="cm">,</span> <span class="q">"outdelim=s"</span><span class="cm">,</span> <span class="q">"overwrite|o"</span><span class="cm">,</span> <span class="q">"precision|p=i"</span><span class="cm">,</span> <span class="q">"quote|q=s"</span><span class="cm">,</span> <span class="q">"referencesequence=s"</span><span class="cm">,</span> <span class="q">"region=s"</span><span class="cm">,</span> <span class="q">"regionresiduesmode=s"</span><span class="cm">,</span> <span class="q">"root|r=s"</span><span class="cm">,</span> <span class="q">"workingdir|w=s"</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 692  679     <span class="k">die</span> <span class="q">"\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"</span><span class="sc">;</span> | 
|  | 693  680   <span class="s">}</span> | 
|  | 694  681   <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">workingdir</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 695  682     <span class="k">if</span> <span class="s">(</span>! <span class="k">-d</span> <span class="i">$Options</span>{<span class="w">workingdir</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 696  683       <span class="k">die</span> <span class="q">"Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"</span><span class="sc">;</span> | 
|  | 697  684     <span class="s">}</span> | 
|  | 698  685     <span class="k">chdir</span> <span class="i">$Options</span>{<span class="w">workingdir</span>} <span class="k">or</span> <span class="k">die</span> <span class="q">"Error: Couldn't chdir $Options{workingdir}: $! \n"</span><span class="sc">;</span> | 
|  | 699  686   <span class="s">}</span> | 
|  | 700  687   <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">ignoregaps</span>} !~ <span class="q">/^(yes|no)$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 701  688     <span class="k">die</span> <span class="q">"Error: The value specified, $Options{ignoregaps}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n"</span><span class="sc">;</span> | 
|  | 702  689   <span class="s">}</span> | 
|  | 703  690   <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">regionresiduesmode</span>} !~ <span class="q">/^(AminoAcids|NucleicAcids|None)$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 704  691     <span class="k">die</span> <span class="q">"Error: The value specified, $Options{regionresiduesmode}, for option \"--regionresiduesmode\" is not valid. Allowed values: AminoAcids, NucleicAcids or None\n"</span><span class="sc">;</span> | 
|  | 705  692   <span class="s">}</span> | 
|  | 706  693   <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">mode</span>} !~ <span class="q">/^(PercentIdentityMatrix|ResidueFrequencyAnalysis|All)$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 707  694     <span class="k">die</span> <span class="q">"Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: PercentIdentityMatrix, ResidueFrequencyAnalysis  or All\n"</span><span class="sc">;</span> | 
|  | 708  695   <span class="s">}</span> | 
|  | 709  696   <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">outdelim</span>} !~ <span class="q">/^(comma|semicolon|tab)$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 710  697     <span class="k">die</span> <span class="q">"Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n"</span><span class="sc">;</span> | 
|  | 711  698   <span class="s">}</span> | 
|  | 712  699   <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">quote</span>} !~ <span class="q">/^(yes|no)$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 713  700     <span class="k">die</span> <span class="q">"Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n"</span><span class="sc">;</span> | 
|  | 714  701   <span class="s">}</span> | 
|  | 715  702   <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">precision</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 716  703     <span class="k">die</span> <span class="q">"Error: The value specified, $Options{precision}, for option \"-p --precision\" is not valid. Allowed values: > 0 \n"</span><span class="sc">;</span> | 
|  | 717  704   <span class="s">}</span> | 
|  | 718  705 <span class="s">}</span> | 
|  | 719  706 | 
|  | 720 <a name="EOF-"></a></pre> | 
|  | 721 <p> </p> | 
|  | 722 <br /> | 
|  | 723 <center> | 
|  | 724 <img src="../../../images/h2o2.png"> | 
|  | 725 </center> | 
|  | 726 </body> | 
|  | 727 </html> |