| 0 | 1 <html> | 
|  | 2 <head> | 
|  | 3 <title>MayaChemTools:Code:ExtractFromSequenceFiles.pl</title> | 
|  | 4 <meta http-equiv="content-type" content="text/html;charset=utf-8"> | 
|  | 5 <link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css"> | 
|  | 6 </head> | 
|  | 7 <body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10"> | 
|  | 8 <br/> | 
|  | 9 <center> | 
|  | 10 <a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a> | 
|  | 11 </center> | 
|  | 12 <br/> | 
|  | 13 <pre> | 
|  | 14    1 #!/usr/bin/perl -w | 
|  | 15    2 <span class="c">#</span> | 
|  | 16    3 <span class="c"># $RCSfile: ExtractFromSequenceFiles.pl,v $</span> | 
|  | 17    4 <span class="c"># $Date: 2015/02/28 20:46:19 $</span> | 
|  | 18    5 <span class="c"># $Revision: 1.23 $</span> | 
|  | 19    6 <span class="c">#</span> | 
|  | 20    7 <span class="c"># Author: Manish Sud <msud@san.rr.com></span> | 
|  | 21    8 <span class="c">#</span> | 
|  | 22    9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span> | 
|  | 23   10 <span class="c">#</span> | 
|  | 24   11 <span class="c"># This file is part of MayaChemTools.</span> | 
|  | 25   12 <span class="c">#</span> | 
|  | 26   13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span> | 
|  | 27   14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span> | 
|  | 28   15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span> | 
|  | 29   16 <span class="c"># later version.</span> | 
|  | 30   17 <span class="c">#</span> | 
|  | 31   18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span> | 
|  | 32   19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span> | 
|  | 33   20 <span class="c"># for a particular purpose.  See the GNU Lesser General Public License for more</span> | 
|  | 34   21 <span class="c"># details.</span> | 
|  | 35   22 <span class="c">#</span> | 
|  | 36   23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span> | 
|  | 37   24 <span class="c"># along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or</span> | 
|  | 38   25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span> | 
|  | 39   26 <span class="c"># Boston, MA, 02111-1307, USA.</span> | 
|  | 40   27 <span class="c">#</span> | 
|  | 41   28 | 
|  | 42   29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span> | 
|  | 43   30 <span class="k">use</span> <span class="w">FindBin</span><span class="sc">;</span> <span class="k">use</span> <span class="w">lib</span> <span class="q">"$FindBin::Bin/../lib"</span><span class="sc">;</span> | 
|  | 44   31 <span class="k">use</span> <span class="w">Getopt::Long</span><span class="sc">;</span> | 
|  | 45   32 <span class="k">use</span> <span class="w">File::Basename</span><span class="sc">;</span> | 
|  | 46   33 <span class="k">use</span> <span class="w">Text::ParseWords</span><span class="sc">;</span> | 
|  | 47   34 <span class="k">use</span> <span class="w">Benchmark</span><span class="sc">;</span> | 
|  | 48   35 <span class="k">use</span> <span class="w">FileUtil</span><span class="sc">;</span> | 
|  | 49   36 <span class="k">use</span> <span class="w">TextUtil</span><span class="sc">;</span> | 
|  | 50   37 <span class="k">use</span> <span class="w">SequenceFileUtil</span><span class="sc">;</span> | 
|  | 51   38 | 
|  | 52   39 <span class="k">my</span><span class="s">(</span><span class="i">$ScriptName</span><span class="cm">,</span> <span class="i">%Options</span><span class="cm">,</span> <span class="i">$StartTime</span><span class="cm">,</span> <span class="i">$EndTime</span><span class="cm">,</span> <span class="i">$TotalTime</span><span class="s">)</span><span class="sc">;</span> | 
|  | 53   40 | 
|  | 54   41 <span class="c"># Autoflush STDOUT</span> | 
|  | 55   42 <span class="i">$|</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 56   43 | 
|  | 57   44 <span class="c"># Starting message...</span> | 
|  | 58   45 <span class="i">$ScriptName</span> = <span class="i">basename</span><span class="s">(</span><span class="i">$0</span><span class="s">)</span><span class="sc">;</span> | 
|  | 59   46 <span class="k">print</span> <span class="q">"\n$ScriptName: Starting...\n\n"</span><span class="sc">;</span> | 
|  | 60   47 <span class="i">$StartTime</span> = <span class="w">new</span> <span class="w">Benchmark</span><span class="sc">;</span> | 
|  | 61   48 | 
|  | 62   49 <span class="c"># Setup script usage message...</span> | 
|  | 63   50 <span class="i">SetupScriptUsage</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 64   51 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">help</span>} || <span class="i">@ARGV</span> < <span class="n">1</span><span class="s">)</span> <span class="s">{</span> | 
|  | 65   52   <span class="k">die</span> <span class="i">GetUsageFromPod</span><span class="s">(</span><span class="q">"$FindBin::Bin/$ScriptName"</span><span class="s">)</span><span class="sc">;</span> | 
|  | 66   53 <span class="s">}</span> | 
|  | 67   54 | 
|  | 68   55 <span class="c"># Expand wild card file names...</span> | 
|  | 69   56 <span class="k">my</span><span class="s">(</span><span class="i">@SequenceFilesList</span><span class="s">)</span><span class="sc">;</span> | 
|  | 70   57 <span class="i">@SequenceFilesList</span> = <span class="i">ExpandFileNames</span><span class="s">(</span>\<span class="i">@ARGV</span><span class="cm">,</span> <span class="q">"aln msf fasta fta pir"</span><span class="s">)</span><span class="sc">;</span> | 
|  | 71   58 | 
|  | 72   59 <span class="c"># Process options...</span> | 
|  | 73   60 <span class="k">print</span> <span class="q">"Processing options...\n"</span><span class="sc">;</span> | 
|  | 74   61 <span class="k">my</span><span class="s">(</span><span class="i">%OptionsInfo</span><span class="s">)</span><span class="sc">;</span> | 
|  | 75   62 <span class="i">ProcessOptions</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 76   63 | 
|  | 77   64 <span class="c"># Set up information about input files...</span> | 
|  | 78   65 <span class="k">print</span> <span class="q">"Checking input sequence file(s)...\n"</span><span class="sc">;</span> | 
|  | 79   66 <span class="k">my</span><span class="s">(</span><span class="i">%SequenceFilesInfo</span><span class="s">)</span><span class="sc">;</span> | 
|  | 80   67 <span class="i">RetrieveSequenceFilesInfo</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 81   68 | 
|  | 82   69 <span class="c"># Process input files..</span> | 
|  | 83   70 <span class="k">my</span><span class="s">(</span><span class="i">$FileIndex</span><span class="s">)</span><span class="sc">;</span> | 
|  | 84   71 <span class="k">if</span> <span class="s">(</span><span class="i">@SequenceFilesList</span> > <span class="n">1</span><span class="s">)</span> <span class="s">{</span> | 
|  | 85   72   <span class="k">print</span> <span class="q">"\nProcessing sequence files...\n"</span><span class="sc">;</span> | 
|  | 86   73 <span class="s">}</span> | 
|  | 87   74 <span class="k">for</span> <span class="i">$FileIndex</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#SequenceFilesList</span><span class="s">)</span> <span class="s">{</span> | 
|  | 88   75   <span class="k">if</span> <span class="s">(</span><span class="i">$SequenceFilesInfo</span>{<span class="w">FilesOkay</span>}[<span class="i">$FileIndex</span>]<span class="s">)</span> <span class="s">{</span> | 
|  | 89   76     <span class="k">print</span> <span class="q">"\nProcessing file $SequenceFilesList[$FileIndex]...\n"</span><span class="sc">;</span> | 
|  | 90   77     <span class="i">ExtractFromSequenceFiles</span><span class="s">(</span><span class="i">$FileIndex</span><span class="s">)</span><span class="sc">;</span> | 
|  | 91   78   <span class="s">}</span> | 
|  | 92   79 <span class="s">}</span> | 
|  | 93   80 <span class="k">print</span> <span class="q">"\n$ScriptName:Done...\n\n"</span><span class="sc">;</span> | 
|  | 94   81 | 
|  | 95   82 <span class="i">$EndTime</span> = <span class="w">new</span> <span class="w">Benchmark</span><span class="sc">;</span> | 
|  | 96   83 <span class="i">$TotalTime</span> = <span class="w">timediff</span> <span class="s">(</span><span class="i">$EndTime</span><span class="cm">,</span> <span class="i">$StartTime</span><span class="s">)</span><span class="sc">;</span> | 
|  | 97   84 <span class="k">print</span> <span class="q">"Total time: "</span><span class="cm">,</span> <span class="i">timestr</span><span class="s">(</span><span class="i">$TotalTime</span><span class="s">)</span><span class="cm">,</span> <span class="q">"\n"</span><span class="sc">;</span> | 
|  | 98   85 | 
|  | 99   86 <span class="c">###############################################################################</span> | 
|  | 100   87 | 
|  | 101   88 <span class="c"># Extract from sequence files...</span> | 
|  | 102 <a name="ExtractFromSequenceFiles-"></a>  89 <span class="k">sub </span><span class="m">ExtractFromSequenceFiles</span> <span class="s">{</span> | 
|  | 103   90   <span class="k">my</span><span class="s">(</span><span class="i">$FileIndex</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 104   91   <span class="k">my</span><span class="s">(</span><span class="i">$OutSequenceFile</span><span class="cm">,</span> <span class="i">$SequenceFile</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="cm">,</span> <span class="i">$SpecifiedSequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 105   92 | 
|  | 106   93   <span class="c"># Read sequence file...</span> | 
|  | 107   94   <span class="i">$SequenceFile</span> = <span class="i">$SequenceFilesList</span>[<span class="i">$FileIndex</span>]<span class="sc">;</span> | 
|  | 108   95   <span class="k">open</span> <span class="w">SEQUENCEFILE</span><span class="cm">,</span> <span class="q">"$SequenceFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Error: Can't open $SequenceFile: $! \n"</span><span class="sc">;</span> | 
|  | 109   96   <span class="i">$SequenceDataRef</span> = <span class="i">ReadSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 110   97   <span class="k">close</span> <span class="w">SEQUENCEFILE</span><span class="sc">;</span> | 
|  | 111   98 | 
|  | 112   99   <span class="i">$OutSequenceFile</span> = <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFile</span>}[<span class="i">$FileIndex</span>]<span class="sc">;</span> | 
|  | 113  100   <span class="k">print</span> <span class="q">"Generating sequence file $OutSequenceFile...\n"</span><span class="sc">;</span> | 
|  | 114  101 | 
|  | 115  102   <span class="c"># Retrieve sequence data for specified sequences...</span> | 
|  | 116  103   <span class="i">$SpecifiedSequenceDataRef</span> = <span class="i">GetSpecifiedSequenceData</span><span class="s">(</span><span class="i">$SequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 117  104 | 
|  | 118  105   <span class="c"># Handle gaps...</span> | 
|  | 119  106   <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">IgnoreGaps</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 120  107     <span class="k">if</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SpecifiedSequenceDataRef</span>->{<span class="w">IDs</span>}} > <span class="n">1</span><span class="s">)</span> <span class="s">{</span> | 
|  | 121  108       <span class="k">if</span> <span class="s">(</span><span class="i">AreSequenceLengthsIdentical</span><span class="s">(</span><span class="i">$SpecifiedSequenceDataRef</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 122  109         <span class="i">$SpecifiedSequenceDataRef</span> = <span class="i">RemoveSequenceAlignmentGapColumns</span><span class="s">(</span><span class="i">$SpecifiedSequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 123  110       <span class="s">}</span> | 
|  | 124  111     <span class="s">}</span> | 
|  | 125  112     <span class="k">else</span> <span class="s">{</span> | 
|  | 126  113       <span class="c"># Remove the gaps from the sequence...</span> | 
|  | 127  114       <span class="k">my</span><span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$Sequence</span><span class="s">)</span><span class="sc">;</span> | 
|  | 128  115       <span class="i">$ID</span> = <span class="i">$SpecifiedSequenceDataRef</span>->{<span class="w">IDs</span>}[<span class="n">0</span>]<span class="sc">;</span> | 
|  | 129  116       <span class="i">$Sequence</span> = <span class="i">$SpecifiedSequenceDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 130  117       <span class="i">$SpecifiedSequenceDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>} = <span class="i">RemoveSequenceGaps</span><span class="s">(</span><span class="i">$Sequence</span><span class="s">)</span><span class="sc">;</span> | 
|  | 131  118     <span class="s">}</span> | 
|  | 132  119   <span class="s">}</span> | 
|  | 133  120 | 
|  | 134  121   <span class="c"># Write out the file...</span> | 
|  | 135  122   <span class="i">WritePearsonFastaSequenceFile</span><span class="s">(</span><span class="i">$OutSequenceFile</span><span class="cm">,</span> <span class="i">$SpecifiedSequenceDataRef</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">MaxSequenceLength</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 136  123 <span class="s">}</span> | 
|  | 137  124 | 
|  | 138  125 <span class="c"># Get specified sequence data...</span> | 
|  | 139 <a name="GetSpecifiedSequenceData-"></a> 126 <span class="k">sub </span><span class="m">GetSpecifiedSequenceData</span> <span class="s">{</span> | 
|  | 140  127   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 141  128 | 
|  | 142  129   <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">Mode</span>} =~ <span class="q">/^SequenceID$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 143  130     <span class="k">return</span> <span class="i">GetDataBySequenceIDs</span><span class="s">(</span><span class="i">$SequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 144  131   <span class="s">}</span> | 
|  | 145  132   <span class="k">elsif</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">mode</span>} =~ <span class="q">/^SequenceNum$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 146  133     <span class="k">return</span> <span class="i">GetDataBySequenceNums</span><span class="s">(</span><span class="i">$SequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 147  134   <span class="s">}</span> | 
|  | 148  135   <span class="k">elsif</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">mode</span>} =~ <span class="q">/^SequenceNumRange$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 149  136     <span class="k">return</span> <span class="i">GetDataBySequenceNumRange</span><span class="s">(</span><span class="i">$SequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 150  137   <span class="s">}</span> | 
|  | 151  138   <span class="k">else</span> <span class="s">{</span> | 
|  | 152  139     <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span> | 
|  | 153  140   <span class="s">}</span> | 
|  | 154  141 <span class="s">}</span> | 
|  | 155  142 | 
|  | 156  143 <span class="c"># Get specified sequence data...</span> | 
|  | 157 <a name="GetDataBySequenceIDs-"></a> 144 <span class="k">sub </span><span class="m">GetDataBySequenceIDs</span> <span class="s">{</span> | 
|  | 158  145   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 159  146   <span class="k">my</span><span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$SequenceCount</span><span class="cm">,</span> <span class="i">$IDMatched</span><span class="cm">,</span> <span class="i">$SpecifiedID</span><span class="cm">,</span> <span class="i">%SpecifiedSequenceDataMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 160  147 | 
|  | 161  148   <span class="c"># Go over sequences and collect sequences for writing out a new sequence file...</span> | 
|  | 162  149   <span class="i">%SpecifiedSequenceDataMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 163  150   <span class="i">@</span>{<span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">IDs</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 164  151   <span class="i">%</span>{<span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">Description</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 165  152   <span class="i">%</span>{<span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">Sequence</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 166  153 | 
|  | 167  154   <span class="i">$SequenceCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 168  155   <span class="j">ID:</span> <span class="k">for</span> <span class="i">$ID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequenceDataRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 169  156     <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">MatchExactSequenceIDs</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 170  157       <span class="k">if</span> <span class="s">(</span>!<span class="k">exists</span> <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDsMap</span>}{<span class="k">lc</span><span class="s">(</span><span class="i">$ID</span><span class="s">)</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 171  158         <span class="k">next</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 172  159       <span class="s">}</span> | 
|  | 173  160       <span class="k">if</span> <span class="s">(</span><span class="i">$SequenceCount</span> >= <span class="k">scalar</span> <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 174  161         <span class="k">last</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 175  162       <span class="s">}</span> | 
|  | 176  163       <span class="i">$SequenceCount</span>++<span class="sc">;</span> | 
|  | 177  164     <span class="s">}</span> | 
|  | 178  165     <span class="k">else</span> <span class="s">{</span> | 
|  | 179  166       <span class="c"># Does this ID contains specified ID as substring...</span> | 
|  | 180  167       <span class="i">$IDMatched</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 181  168       <span class="j">SPECIFIEDID:</span> <span class="k">for</span> <span class="i">$SpecifiedID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 182  169         <span class="k">if</span> <span class="s">(</span><span class="i">$ID</span> =~ <span class="q">/$SpecifiedID/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 183  170           <span class="i">$IDMatched</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 184  171           <span class="k">last</span> <span class="j">SPECIFIEDID</span><span class="sc">;</span> | 
|  | 185  172         <span class="s">}</span> | 
|  | 186  173       <span class="s">}</span> | 
|  | 187  174       <span class="k">if</span> <span class="s">(</span>!<span class="i">$IDMatched</span><span class="s">)</span> <span class="s">{</span> | 
|  | 188  175         <span class="k">next</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 189  176       <span class="s">}</span> | 
|  | 190  177       <span class="i">$SequenceCount</span>++<span class="sc">;</span> | 
|  | 191  178     <span class="s">}</span> | 
|  | 192  179     <span class="c"># Collect sequence data...</span> | 
|  | 193  180     <span class="k">push</span> <span class="i">@</span>{<span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">IDs</span>}}<span class="cm">,</span> <span class="i">$ID</span><span class="sc">;</span> | 
|  | 194  181     <span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">Description</span>}{<span class="i">$ID</span>} = <span class="i">$SequenceDataRef</span>->{<span class="w">Description</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 195  182     <span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} = <span class="i">$SequenceDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 196  183   <span class="s">}</span> | 
|  | 197  184 | 
|  | 198  185   <span class="k">return</span> \<span class="i">%SpecifiedSequenceDataMap</span><span class="sc">;</span> | 
|  | 199  186 <span class="s">}</span> | 
|  | 200  187 | 
|  | 201  188 <span class="c"># Get specified sequence data...</span> | 
|  | 202 <a name="GetDataBySequenceNums-"></a> 189 <span class="k">sub </span><span class="m">GetDataBySequenceNums</span> <span class="s">{</span> | 
|  | 203  190   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 204  191   <span class="k">my</span><span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$SequenceNum</span><span class="cm">,</span> <span class="i">$SequenceCount</span><span class="cm">,</span> <span class="i">%SpecifiedSequenceDataMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 205  192 | 
|  | 206  193   <span class="c"># Go over sequences and collect sequences for writing out a new sequence file...</span> | 
|  | 207  194   <span class="i">%SpecifiedSequenceDataMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 208  195   <span class="i">@</span>{<span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">IDs</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 209  196   <span class="i">%</span>{<span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">Description</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 210  197   <span class="i">%</span>{<span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">Sequence</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 211  198 | 
|  | 212  199   <span class="i">$SequenceNum</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 213  200   <span class="i">$SequenceCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 214  201   <span class="j">ID:</span> <span class="k">for</span> <span class="i">$ID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequenceDataRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 215  202     <span class="i">$SequenceNum</span>++<span class="sc">;</span> | 
|  | 216  203     <span class="k">if</span> <span class="s">(</span>!<span class="k">exists</span> <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDsMap</span>}{<span class="i">$SequenceNum</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 217  204       <span class="k">next</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 218  205     <span class="s">}</span> | 
|  | 219  206     <span class="k">if</span> <span class="s">(</span><span class="i">$SequenceCount</span> >= <span class="k">scalar</span> <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 220  207       <span class="k">last</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 221  208     <span class="s">}</span> | 
|  | 222  209     <span class="i">$SequenceCount</span>++<span class="sc">;</span> | 
|  | 223  210 | 
|  | 224  211     <span class="c"># Collect sequence data...</span> | 
|  | 225  212     <span class="k">push</span> <span class="i">@</span>{<span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">IDs</span>}}<span class="cm">,</span> <span class="i">$ID</span><span class="sc">;</span> | 
|  | 226  213     <span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">Description</span>}{<span class="i">$ID</span>} = <span class="i">$SequenceDataRef</span>->{<span class="w">Description</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 227  214     <span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} = <span class="i">$SequenceDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 228  215   <span class="s">}</span> | 
|  | 229  216 | 
|  | 230  217   <span class="k">return</span> \<span class="i">%SpecifiedSequenceDataMap</span><span class="sc">;</span> | 
|  | 231  218 <span class="s">}</span> | 
|  | 232  219 | 
|  | 233  220 <span class="c"># Get specified sequence data...</span> | 
|  | 234 <a name="GetDataBySequenceNumRange-"></a> 221 <span class="k">sub </span><span class="m">GetDataBySequenceNumRange</span> <span class="s">{</span> | 
|  | 235  222   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceDataRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 236  223   <span class="k">my</span><span class="s">(</span><span class="i">$ID</span><span class="cm">,</span> <span class="i">$SequenceNum</span><span class="cm">,</span> <span class="i">$SequenceCount</span><span class="cm">,</span> <span class="i">%SpecifiedSequenceDataMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 237  224 | 
|  | 238  225   <span class="c"># Go over sequences and collect sequences for writing out a new sequence file...</span> | 
|  | 239  226   <span class="i">%SpecifiedSequenceDataMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 240  227   <span class="i">@</span>{<span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">IDs</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 241  228   <span class="i">%</span>{<span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">Description</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 242  229   <span class="i">%</span>{<span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">Sequence</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 243  230 | 
|  | 244  231   <span class="i">$SequenceNum</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 245  232   <span class="i">$SequenceCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 246  233   <span class="j">ID:</span> <span class="k">for</span> <span class="i">$ID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SequenceDataRef</span>->{<span class="w">IDs</span>}}<span class="s">)</span> <span class="s">{</span> | 
|  | 247  234     <span class="i">$SequenceNum</span>++<span class="sc">;</span> | 
|  | 248  235 | 
|  | 249  236     <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="i">$SequenceNum</span> >= <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDs</span>}[<span class="n">0</span>] && <span class="i">$SequenceNum</span> <= <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDs</span>}[<span class="n">1</span>]<span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 250  237       <span class="k">next</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 251  238     <span class="s">}</span> | 
|  | 252  239     <span class="k">if</span> <span class="s">(</span><span class="i">$SequenceNum</span> > <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDs</span>}[<span class="n">1</span>]<span class="s">)</span> <span class="s">{</span> | 
|  | 253  240       <span class="k">last</span> <span class="j">ID</span><span class="sc">;</span> | 
|  | 254  241     <span class="s">}</span> | 
|  | 255  242     <span class="i">$SequenceCount</span>++<span class="sc">;</span> | 
|  | 256  243     <span class="c"># Collect sequence data...</span> | 
|  | 257  244     <span class="k">push</span> <span class="i">@</span>{<span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">IDs</span>}}<span class="cm">,</span> <span class="i">$ID</span><span class="sc">;</span> | 
|  | 258  245     <span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">Description</span>}{<span class="i">$ID</span>} = <span class="i">$SequenceDataRef</span>->{<span class="w">Description</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 259  246     <span class="i">$SpecifiedSequenceDataMap</span>{<span class="w">Sequence</span>}{<span class="i">$ID</span>} = <span class="i">$SequenceDataRef</span>->{<span class="w">Sequence</span>}{<span class="i">$ID</span>}<span class="sc">;</span> | 
|  | 260  247   <span class="s">}</span> | 
|  | 261  248 | 
|  | 262  249   <span class="k">return</span> \<span class="i">%SpecifiedSequenceDataMap</span><span class="sc">;</span> | 
|  | 263  250 <span class="s">}</span> | 
|  | 264  251 | 
|  | 265  252 | 
|  | 266  253 <span class="c"># Process option values...</span> | 
|  | 267 <a name="ProcessOptions-"></a> 254 <span class="k">sub </span><span class="m">ProcessOptions</span> <span class="s">{</span> | 
|  | 268  255   <span class="i">%OptionsInfo</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 269  256 | 
|  | 270  257   <span class="c"># Miscellaneous options...</span> | 
|  | 271  258   <span class="i">$OptionsInfo</span>{<span class="w">IgnoreGaps</span>} = <span class="s">(</span><span class="i">$Options</span>{<span class="w">ignoregaps</span>} =~ <span class="q">/Yes/i</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 272  259 | 
|  | 273  260   <span class="i">$OptionsInfo</span>{<span class="w">Mode</span>} = <span class="i">$Options</span>{<span class="w">mode</span>}<span class="sc">;</span> | 
|  | 274  261   <span class="i">$OptionsInfo</span>{<span class="w">MatchExactSequenceIDs</span>} = <span class="i">$Options</span>{<span class="w">sequenceidmatch</span>} =~ <span class="q">/Exact/i</span> ? <span class="n">1</span> <span class="co">:</span><span class="n">0</span><span class="sc">;</span> | 
|  | 275  262 | 
|  | 276  263   <span class="c"># Check specified sequences value...</span> | 
|  | 277  264   <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequences</span>} = <span class="i">$Options</span>{<span class="w">sequences</span>}<span class="sc">;</span> | 
|  | 278  265   <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDs</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 279  266   <span class="i">%</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDsMap</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 280  267 | 
|  | 281  268   <span class="k">my</span><span class="s">(</span><span class="i">@SpecifiedSequenceIDs</span><span class="s">)</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 282  269   <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">mode</span>} =~ <span class="q">/^SequenceID$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 283  270     <span class="k">if</span> <span class="s">(</span>!<span class="i">$Options</span>{<span class="w">sequences</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 284  271       <span class="k">die</span> <span class="q">"Error: No value specified for option \"-s, --Sequences\" during \"SequenceID\" of \"-m, --mode\" option\n"</span><span class="sc">;</span> | 
|  | 285  272     <span class="s">}</span> | 
|  | 286  273     <span class="i">@SpecifiedSequenceIDs</span> = <span class="k">split</span> <span class="q">/\,/</span><span class="cm">,</span> <span class="i">$Options</span>{<span class="w">sequences</span>}<span class="sc">;</span> | 
|  | 287  274   <span class="s">}</span> | 
|  | 288  275   <span class="k">elsif</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">mode</span>} =~ <span class="q">/^SequenceNum$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 289  276     <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">sequences</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 290  277       <span class="i">@SpecifiedSequenceIDs</span> = <span class="k">split</span> <span class="q">/\,/</span><span class="cm">,</span> <span class="i">$Options</span>{<span class="w">sequences</span>}<span class="sc">;</span> | 
|  | 291  278       <span class="k">my</span><span class="s">(</span><span class="i">$SequenceNum</span><span class="s">)</span><span class="sc">;</span> | 
|  | 292  279       <span class="k">for</span> <span class="i">$SequenceNum</span> <span class="s">(</span><span class="i">@SpecifiedSequenceIDs</span><span class="s">)</span> <span class="s">{</span> | 
|  | 293  280         <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$SequenceNum</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 294  281           <span class="k">die</span> <span class="q">"Error: The value specified, $SequenceNum, in \"$Options{sequences}\" for option \"-s, --Sequences\" is not valid: Valid values: > 0\n"</span><span class="sc">;</span> | 
|  | 295  282         <span class="s">}</span> | 
|  | 296  283       <span class="s">}</span> | 
|  | 297  284     <span class="s">}</span> | 
|  | 298  285     <span class="k">else</span> <span class="s">{</span> | 
|  | 299  286       <span class="k">push</span> <span class="i">@SpecifiedSequenceIDs</span><span class="cm">,</span> <span class="q">"1"</span><span class="sc">;</span> | 
|  | 300  287     <span class="s">}</span> | 
|  | 301  288   <span class="s">}</span> | 
|  | 302  289   <span class="k">elsif</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">mode</span>} =~ <span class="q">/^SequenceNumRange$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 303  290     <span class="k">if</span> <span class="s">(</span>!<span class="i">$Options</span>{<span class="w">sequences</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 304  291       <span class="k">die</span> <span class="q">"Error: No value specified for option \"-s, --Sequences\" during \"SequenceNumRange\" of \"-m, --mode\" option\n"</span><span class="sc">;</span> | 
|  | 305  292     <span class="s">}</span> | 
|  | 306  293     <span class="i">@SpecifiedSequenceIDs</span> = <span class="k">split</span> <span class="q">/\,/</span><span class="cm">,</span> <span class="i">$Options</span>{<span class="w">sequences</span>}<span class="sc">;</span> | 
|  | 307  294     <span class="k">if</span> <span class="s">(</span><span class="i">@SpecifiedSequenceIDs</span> != <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 308  295       <span class="k">die</span> <span class="q">"Error: The number of values"</span><span class="cm">,</span> <span class="k">scalar</span> <span class="i">@SpecifiedSequenceIDs</span><span class="cm">,</span> <span class="q">" specified, $Options{sequences}, for option \"-s, --Sequences\" are not valid. Number of values must be 2 to indicate starting and ending sequence number.\n"</span><span class="sc">;</span> | 
|  | 309  296     <span class="s">}</span> | 
|  | 310  297     <span class="k">my</span><span class="s">(</span><span class="i">$SequenceNum</span><span class="s">)</span><span class="sc">;</span> | 
|  | 311  298     <span class="k">for</span> <span class="i">$SequenceNum</span> <span class="s">(</span><span class="i">@SpecifiedSequenceIDs</span><span class="s">)</span> <span class="s">{</span> | 
|  | 312  299       <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$SequenceNum</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 313  300         <span class="k">die</span> <span class="q">"Error: The value specified, $SequenceNum, in \"$Options{sequences}\" for option \"-s, --Sequences\" is not valid: Valid values: > 0\n"</span><span class="sc">;</span> | 
|  | 314  301       <span class="s">}</span> | 
|  | 315  302     <span class="s">}</span> | 
|  | 316  303     <span class="k">if</span> <span class="s">(</span><span class="i">$SpecifiedSequenceIDs</span>[<span class="n">0</span>] > <span class="i">$SpecifiedSequenceIDs</span>[<span class="n">1</span>]<span class="s">)</span> <span class="s">{</span> | 
|  | 317  304       <span class="k">die</span> <span class="q">"Error: The value specified \"$Options{sequences}\" for option \"-s, --Sequences\" are not valid: Starting sequence number $SpecifiedSequenceIDs[0] must be smaller than ending sequence number $SpecifiedSequenceIDs[1]\n"</span><span class="sc">;</span> | 
|  | 318  305     <span class="s">}</span> | 
|  | 319  306   <span class="s">}</span> | 
|  | 320  307   <span class="k">push</span> <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDs</span>}}<span class="cm">,</span> <span class="i">@SpecifiedSequenceIDs</span><span class="sc">;</span> | 
|  | 321  308   <span class="k">my</span><span class="s">(</span><span class="i">$SequenceID</span><span class="s">)</span><span class="sc">;</span> | 
|  | 322  309   <span class="k">for</span> <span class="i">$SequenceID</span> <span class="s">(</span><span class="i">@SpecifiedSequenceIDs</span><span class="s">)</span> <span class="s">{</span> | 
|  | 323  310     <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">mode</span>} =~ <span class="q">/^SequenceID$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 324  311       <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDsMap</span>}{<span class="k">lc</span><span class="s">(</span><span class="i">$SequenceID</span><span class="s">)</span>} = <span class="i">$SequenceID</span><span class="sc">;</span> | 
|  | 325  312     <span class="s">}</span> | 
|  | 326  313     <span class="k">else</span> <span class="s">{</span> | 
|  | 327  314       <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedSequenceIDsMap</span>}{<span class="i">$SequenceID</span>} = <span class="i">$SequenceID</span><span class="sc">;</span> | 
|  | 328  315     <span class="s">}</span> | 
|  | 329  316   <span class="s">}</span> | 
|  | 330  317 | 
|  | 331  318   <span class="i">$OptionsInfo</span>{<span class="w">MaxSequenceLength</span>} = <span class="i">$Options</span>{<span class="w">sequencelength</span>}<span class="sc">;</span> | 
|  | 332  319   <span class="i">$OptionsInfo</span>{<span class="w">OverwriteFiles</span>} = <span class="i">$Options</span>{<span class="w">overwrite</span>} ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 333  320   <span class="i">$OptionsInfo</span>{<span class="w">OutFileRoot</span>} = <span class="i">$Options</span>{<span class="w">root</span>} ? <span class="i">$Options</span>{<span class="w">root</span>} <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 334  321 <span class="s">}</span> | 
|  | 335  322 | 
|  | 336  323 <span class="c"># Retrieve information about sequence files...</span> | 
|  | 337 <a name="RetrieveSequenceFilesInfo-"></a> 324 <span class="k">sub </span><span class="m">RetrieveSequenceFilesInfo</span> <span class="s">{</span> | 
|  | 338  325   <span class="k">my</span><span class="s">(</span><span class="i">$Index</span><span class="cm">,</span> <span class="i">$SequenceFile</span><span class="cm">,</span> <span class="i">$FileSupported</span><span class="cm">,</span> <span class="i">$FileFormat</span><span class="cm">,</span> <span class="i">$SequenceCount</span><span class="cm">,</span> <span class="i">$FileDir</span><span class="cm">,</span> <span class="i">$FileName</span><span class="cm">,</span> <span class="i">$FileExt</span><span class="cm">,</span> <span class="i">$OutFileRoot</span><span class="cm">,</span> <span class="i">$OutFileExt</span><span class="cm">,</span> <span class="i">$OutFileMode</span><span class="cm">,</span> <span class="i">$SequenceDataRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 339  326 | 
|  | 340  327   <span class="i">%SequenceFilesInfo</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 341  328   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">FilesOkay</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 342  329   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileRoot</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 343  330   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileExt</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 344  331   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">OutFile</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 345  332   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">Format</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 346  333   <span class="i">@</span>{<span class="i">$SequenceFilesInfo</span>{<span class="w">SequenceCount</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 347  334 | 
|  | 348  335   <span class="j">FILELIST:</span> <span class="k">for</span> <span class="i">$Index</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#SequenceFilesList</span><span class="s">)</span> <span class="s">{</span> | 
|  | 349  336     <span class="i">$SequenceFile</span> = <span class="i">$SequenceFilesList</span>[<span class="i">$Index</span>]<span class="sc">;</span> | 
|  | 350  337     <span class="i">$SequenceFilesInfo</span>{<span class="w">FilesOkay</span>}[<span class="i">$Index</span>] = <span class="n">0</span><span class="sc">;</span> | 
|  | 351  338     <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileRoot</span>}[<span class="i">$Index</span>] = <span class="q">''</span><span class="sc">;</span> | 
|  | 352  339     <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileExt</span>}[<span class="i">$Index</span>] = <span class="q">''</span><span class="sc">;</span> | 
|  | 353  340     <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFile</span>}[<span class="i">$Index</span>] = <span class="q">''</span><span class="sc">;</span> | 
|  | 354  341     <span class="i">$SequenceFilesInfo</span>{<span class="w">Format</span>}[<span class="i">$Index</span>] = <span class="q">'NotSupported'</span><span class="sc">;</span> | 
|  | 355  342     <span class="i">$SequenceFilesInfo</span>{<span class="w">SequenceCount</span>}[<span class="i">$Index</span>] = <span class="n">0</span><span class="sc">;</span> | 
|  | 356  343 | 
|  | 357  344     <span class="k">if</span> <span class="s">(</span>! <span class="k">open</span> <span class="w">SEQUENCEFILE</span><span class="cm">,</span> <span class="q">"$SequenceFile"</span><span class="s">)</span> <span class="s">{</span> | 
|  | 358  345       <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: Couldn't open it: $! \n"</span><span class="sc">;</span> | 
|  | 359  346       <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 360  347     <span class="s">}</span> | 
|  | 361  348     <span class="k">close</span> <span class="w">SEQUENCEFILE</span><span class="sc">;</span> | 
|  | 362  349 | 
|  | 363  350     <span class="s">(</span><span class="i">$FileSupported</span><span class="cm">,</span> <span class="i">$FileFormat</span><span class="s">)</span> = <span class="i">IsSupportedSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 364  351     <span class="k">if</span> <span class="s">(</span>!<span class="i">$FileSupported</span><span class="s">)</span> <span class="s">{</span> | 
|  | 365  352       <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: Sequence file format is not supported.\n"</span><span class="sc">;</span> | 
|  | 366  353       <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 367  354     <span class="s">}</span> | 
|  | 368  355     <span class="i">$SequenceDataRef</span> = <span class="i">ReadSequenceFile</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 369  356 | 
|  | 370  357     <span class="i">$SequenceCount</span> = <span class="i">$SequenceDataRef</span>->{<span class="w">Count</span>}<span class="sc">;</span> | 
|  | 371  358     <span class="k">if</span> <span class="s">(</span>!<span class="i">$SequenceCount</span><span class="s">)</span> <span class="s">{</span> | 
|  | 372  359       <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: Sequence data is missing.\n"</span><span class="sc">;</span> | 
|  | 373  360       <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 374  361     <span class="s">}</span> | 
|  | 375  362 | 
|  | 376  363     <span class="c"># Setup output file names...</span> | 
|  | 377  364     <span class="i">$FileDir</span> = <span class="q">""</span><span class="sc">;</span> <span class="i">$FileName</span> = <span class="q">""</span><span class="sc">;</span> <span class="i">$FileExt</span> = <span class="q">""</span><span class="sc">;</span> | 
|  | 378  365     <span class="s">(</span><span class="i">$FileDir</span><span class="cm">,</span> <span class="i">$FileName</span><span class="cm">,</span> <span class="i">$FileExt</span><span class="s">)</span> = <span class="i">ParseFileName</span><span class="s">(</span><span class="i">$SequenceFile</span><span class="s">)</span><span class="sc">;</span> | 
|  | 379  366     <span class="i">$OutFileExt</span> = <span class="q">'fasta'</span><span class="sc">;</span> | 
|  | 380  367     <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">OutFileRoot</span>} && <span class="s">(</span><span class="i">@SequenceFilesList</span> == <span class="n">1</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 381  368       <span class="k">my</span> <span class="s">(</span><span class="i">$RootFileDir</span><span class="cm">,</span> <span class="i">$RootFileName</span><span class="cm">,</span> <span class="i">$RootFileExt</span><span class="s">)</span> = <span class="i">ParseFileName</span><span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">OutFileRoot</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 382  369       <span class="k">if</span> <span class="s">(</span><span class="i">$RootFileName</span> && <span class="i">$RootFileExt</span><span class="s">)</span> <span class="s">{</span> | 
|  | 383  370         <span class="i">$FileName</span> = <span class="i">$RootFileName</span><span class="sc">;</span> | 
|  | 384  371       <span class="s">}</span> | 
|  | 385  372       <span class="k">else</span> <span class="s">{</span> | 
|  | 386  373         <span class="i">$FileName</span> = <span class="i">$OptionsInfo</span>{<span class="w">OutFileRoot</span>}<span class="sc">;</span> | 
|  | 387  374       <span class="s">}</span> | 
|  | 388  375       <span class="i">$OutFileRoot</span> = <span class="i">$FileName</span><span class="sc">;</span> | 
|  | 389  376     <span class="s">}</span> | 
|  | 390  377     <span class="k">else</span> <span class="s">{</span> | 
|  | 391  378       <span class="i">$OutFileRoot</span> = <span class="i">$FileName</span><span class="sc">;</span> | 
|  | 392  379     <span class="s">}</span> | 
|  | 393  380     <span class="j">MODE:</span> <span class="s">{</span> | 
|  | 394  381         <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">Mode</span>} =~ <span class="q">/^SequenceID$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$OutFileMode</span> = <span class="q">'SequenceID'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">MODE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 395  382         <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">Mode</span>} =~ <span class="q">/^SequenceNum$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$OutFileMode</span> = <span class="q">'SequenceNum'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">MODE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 396  383         <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">Mode</span>} =~ <span class="q">/^SequenceNumRange$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$OutFileMode</span> = <span class="q">'SequenceNumRange'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">MODE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 397  384         <span class="i">$OutFileMode</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 398  385     <span class="s">}</span> | 
|  | 399  386     <span class="k">if</span> <span class="s">(</span>!<span class="i">$OptionsInfo</span>{<span class="w">OverwriteFiles</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 400  387       <span class="k">if</span> <span class="s">(</span><span class="k">-e</span> <span class="q">"${OutFileRoot}${OutFileMode}.${OutFileExt}"</span><span class="s">)</span> <span class="s">{</span> | 
|  | 401  388         <span class="k">warn</span> <span class="q">"Warning: Ignoring file $SequenceFile: The file ${OutFileRoot}${OutFileMode}.${OutFileExt} already exists\n"</span><span class="sc">;</span> | 
|  | 402  389         <span class="k">next</span> <span class="j">FILELIST</span><span class="sc">;</span> | 
|  | 403  390       <span class="s">}</span> | 
|  | 404  391     <span class="s">}</span> | 
|  | 405  392 | 
|  | 406  393     <span class="i">$SequenceFilesInfo</span>{<span class="w">FilesOkay</span>}[<span class="i">$Index</span>] = <span class="n">1</span><span class="sc">;</span> | 
|  | 407  394     <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileRoot</span>}[<span class="i">$Index</span>] = <span class="i">$OutFileRoot</span><span class="sc">;</span> | 
|  | 408  395     <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFileExt</span>}[<span class="i">$Index</span>] = <span class="i">$OutFileExt</span><span class="sc">;</span> | 
|  | 409  396     <span class="i">$SequenceFilesInfo</span>{<span class="w">OutFile</span>}[<span class="i">$Index</span>] = <span class="q">"${OutFileRoot}${OutFileMode}.${OutFileExt}"</span><span class="sc">;</span> | 
|  | 410  397 | 
|  | 411  398     <span class="i">$SequenceFilesInfo</span>{<span class="w">Format</span>}[<span class="i">$Index</span>] = <span class="i">$FileFormat</span><span class="sc">;</span> | 
|  | 412  399     <span class="i">$SequenceFilesInfo</span>{<span class="w">SequenceCount</span>}[<span class="i">$Index</span>] = <span class="i">$SequenceCount</span><span class="sc">;</span> | 
|  | 413  400   <span class="s">}</span> | 
|  | 414  401 <span class="s">}</span> | 
|  | 415  402 | 
|  | 416  403 <span class="c"># Setup script usage  and retrieve command line arguments specified using various options...</span> | 
|  | 417 <a name="SetupScriptUsage-"></a> 404 <span class="k">sub </span><span class="m">SetupScriptUsage</span> <span class="s">{</span> | 
|  | 418  405 | 
|  | 419  406   <span class="c"># Retrieve all the options...</span> | 
|  | 420  407   <span class="i">%Options</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 421  408   <span class="i">$Options</span>{<span class="w">ignoregaps</span>} = <span class="q">'Yes'</span><span class="sc">;</span> | 
|  | 422  409   <span class="i">$Options</span>{<span class="w">mode</span>} = <span class="q">'SequenceNum'</span><span class="sc">;</span> | 
|  | 423  410   <span class="i">$Options</span>{<span class="w">sequenceidmatch</span>} = <span class="q">'Relaxed'</span><span class="sc">;</span> | 
|  | 424  411   <span class="i">$Options</span>{<span class="w">sequencelength</span>} = <span class="n">80</span><span class="sc">;</span> | 
|  | 425  412 | 
|  | 426  413   <span class="k">if</span> <span class="s">(</span>!<span class="i">GetOptions</span><span class="s">(</span>\<span class="i">%Options</span><span class="cm">,</span> <span class="q">"help|h"</span><span class="cm">,</span> <span class="q">"ignoregaps|i=s"</span><span class="cm">,</span> <span class="q">"mode|m=s"</span><span class="cm">,</span> <span class="q">"overwrite|o"</span><span class="cm">,</span> <span class="q">"root|r=s"</span><span class="cm">,</span> <span class="q">"sequences|s=s"</span><span class="cm">,</span> <span class="q">"sequenceidmatch=s"</span><span class="cm">,</span> <span class="q">"sequencelength=i"</span><span class="cm">,</span> <span class="q">"workingdir|w=s"</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 427  414     <span class="k">die</span> <span class="q">"\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"</span><span class="sc">;</span> | 
|  | 428  415   <span class="s">}</span> | 
|  | 429  416   <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">workingdir</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 430  417     <span class="k">if</span> <span class="s">(</span>! <span class="k">-d</span> <span class="i">$Options</span>{<span class="w">workingdir</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 431  418       <span class="k">die</span> <span class="q">"Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"</span><span class="sc">;</span> | 
|  | 432  419     <span class="s">}</span> | 
|  | 433  420     <span class="k">chdir</span> <span class="i">$Options</span>{<span class="w">workingdir</span>} <span class="k">or</span> <span class="k">die</span> <span class="q">"Error: Couldn't chdir $Options{workingdir}: $! \n"</span><span class="sc">;</span> | 
|  | 434  421   <span class="s">}</span> | 
|  | 435  422   <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">ignoregaps</span>} !~ <span class="q">/^(yes|no)$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 436  423     <span class="k">die</span> <span class="q">"Error: The value specified, $Options{ignoregaps}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n"</span><span class="sc">;</span> | 
|  | 437  424   <span class="s">}</span> | 
|  | 438  425   <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">mode</span>} !~ <span class="q">/^(SequenceID|SequenceNum|SequenceNumRange)$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 439  426     <span class="k">die</span> <span class="q">"Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: SequenceID, SequenceNum, or SequenceNumRange\n"</span><span class="sc">;</span> | 
|  | 440  427   <span class="s">}</span> | 
|  | 441  428   <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">sequenceidmatch</span>} !~ <span class="q">/^(Exact|Relaxed)$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 442  429     <span class="k">die</span> <span class="q">"Error: The value specified, $Options{sequenceidmatch}, for option \"--SequenceIDMatch\" is not valid. Allowed values: Exact or Relaxed\n"</span><span class="sc">;</span> | 
|  | 443  430   <span class="s">}</span> | 
|  | 444  431   <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">sequencelength</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 445  432     <span class="k">die</span> <span class="q">"Error: The value specified, $Options{sequencelength}, for option \"--SequenceLength\" is not valid. Allowed values: >0\n"</span><span class="sc">;</span> | 
|  | 446  433   <span class="s">}</span> | 
|  | 447  434 <span class="s">}</span> | 
|  | 448  435 | 
|  | 449 <a name="EOF-"></a></pre> | 
|  | 450 <p> </p> | 
|  | 451 <br /> | 
|  | 452 <center> | 
|  | 453 <img src="../../../images/h2o2.png"> | 
|  | 454 </center> | 
|  | 455 </body> | 
|  | 456 </html> |