Mercurial > repos > deepakjadmin > mayatool3_test2
comparison bin/InfoSequenceFiles.pl @ 0:4816e4a8ae95 draft default tip
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 09:23:18 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4816e4a8ae95 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 # | |
| 3 # $RCSfile: InfoSequenceFiles.pl,v $ | |
| 4 # $Date: 2015/02/28 20:46:20 $ | |
| 5 # $Revision: 1.29 $ | |
| 6 # | |
| 7 # Author: Manish Sud <msud@san.rr.com> | |
| 8 # | |
| 9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 10 # | |
| 11 # This file is part of MayaChemTools. | |
| 12 # | |
| 13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 14 # the terms of the GNU Lesser General Public License as published by the Free | |
| 15 # Software Foundation; either version 3 of the License, or (at your option) any | |
| 16 # later version. | |
| 17 # | |
| 18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
| 19 # any warranty; without even the implied warranty of merchantability of fitness | |
| 20 # for a particular purpose. See the GNU Lesser General Public License for more | |
| 21 # details. | |
| 22 # | |
| 23 # You should have received a copy of the GNU Lesser General Public License | |
| 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
| 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
| 26 # Boston, MA, 02111-1307, USA. | |
| 27 # | |
| 28 | |
| 29 use strict; | |
| 30 use FindBin; use lib "$FindBin::Bin/../lib"; | |
| 31 use Getopt::Long; | |
| 32 use File::Basename; | |
| 33 use Text::ParseWords; | |
| 34 use Benchmark; | |
| 35 use FileUtil; | |
| 36 use TextUtil; | |
| 37 use SequenceFileUtil; | |
| 38 use StatisticsUtil; | |
| 39 | |
| 40 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); | |
| 41 | |
| 42 # Autoflush STDOUT | |
| 43 $| = 1; | |
| 44 | |
| 45 # Starting message... | |
| 46 $ScriptName = basename($0); | |
| 47 print "\n$ScriptName: Starting...\n\n"; | |
| 48 $StartTime = new Benchmark; | |
| 49 | |
| 50 # Get the options and setup script... | |
| 51 SetupScriptUsage(); | |
| 52 if ($Options{help} || @ARGV < 1) { | |
| 53 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); | |
| 54 } | |
| 55 | |
| 56 my(@SequenceFilesList); | |
| 57 @SequenceFilesList = ExpandFileNames(\@ARGV, "aln msf fasta fta pir"); | |
| 58 | |
| 59 print "Processing options...\n"; | |
| 60 my(%OptionsInfo); | |
| 61 ProcessOptions(); | |
| 62 | |
| 63 print "Checking input sequence file(s)...\n"; | |
| 64 my(%SequenceFilesInfo); | |
| 65 RetrieveSequenceFilesInfo(); | |
| 66 | |
| 67 my($FileIndex); | |
| 68 if (@SequenceFilesList > 1) { | |
| 69 print "\nProcessing sequence files...\n"; | |
| 70 } | |
| 71 for $FileIndex (0 .. $#SequenceFilesList) { | |
| 72 if ($SequenceFilesInfo{FileOkay}[$FileIndex]) { | |
| 73 print "\nProcessing file $SequenceFilesList[$FileIndex]...\n"; | |
| 74 ListSequenceFileInfo($FileIndex); | |
| 75 } | |
| 76 } | |
| 77 ListTotalSizeOfFiles(); | |
| 78 | |
| 79 print "\n$ScriptName:Done...\n\n"; | |
| 80 | |
| 81 $EndTime = new Benchmark; | |
| 82 $TotalTime = timediff ($EndTime, $StartTime); | |
| 83 print "Total time: ", timestr($TotalTime), "\n"; | |
| 84 | |
| 85 ############################################################################### | |
| 86 | |
| 87 # List appropriate information... | |
| 88 sub ListSequenceFileInfo { | |
| 89 my($Index) = @_; | |
| 90 my($SequenceFile, $SequenceDataRef); | |
| 91 | |
| 92 $SequenceFile = $SequenceFilesList[$Index]; | |
| 93 | |
| 94 $SequenceDataRef = ReadSequenceFile($SequenceFile); | |
| 95 | |
| 96 my($SequencesCount) = $SequenceDataRef->{Count}; | |
| 97 print "\nNumber of sequences: $SequencesCount\n"; | |
| 98 | |
| 99 if ($OptionsInfo{ListShortestSequence} && ($SequencesCount > 1)) { | |
| 100 my($ShortestSeqID, $ShortestSeq, $ShortestSeqLen, $Description) = GetShortestSequence($SequenceDataRef, $OptionsInfo{IgnoreGaps}); | |
| 101 print "\nShortest sequence information:\nID: $ShortestSeqID; Length:$ShortestSeqLen\n"; | |
| 102 if ($OptionsInfo{DetailLevel} >= 2) { | |
| 103 print "Description: $Description\n"; | |
| 104 } | |
| 105 if ($OptionsInfo{DetailLevel} >= 3) { | |
| 106 print "Sequence: $ShortestSeq\n"; | |
| 107 } | |
| 108 } | |
| 109 if ($OptionsInfo{ListLongestSequence} && ($SequencesCount > 1)) { | |
| 110 my($LongestSeqID, $LongestSeq, $LongestSeqLen, $Description) = GetLongestSequence($SequenceDataRef, $OptionsInfo{IgnoreGaps}); | |
| 111 print "\nLongest sequence information:\nID: $LongestSeqID; Length: $LongestSeqLen\n"; | |
| 112 if ($OptionsInfo{DetailLevel} >= 2) { | |
| 113 print "Description: $Description\n"; | |
| 114 } | |
| 115 if ($OptionsInfo{DetailLevel} >= 3) { | |
| 116 print "Sequence: $LongestSeq\n"; | |
| 117 } | |
| 118 } | |
| 119 if ($OptionsInfo{FrequencyAnalysis} && ($SequencesCount > 1)) { | |
| 120 PerformLengthFrequencyAnalysis($SequenceDataRef); | |
| 121 } | |
| 122 if ($OptionsInfo{ListSequenceLengths}) { | |
| 123 ListSequenceLengths($SequenceDataRef); | |
| 124 } | |
| 125 | |
| 126 # File size and modification information... | |
| 127 print "\nFile size: ", FormatFileSize($SequenceFilesInfo{FileSize}[$Index]), " \n"; | |
| 128 print "Last modified: ", $SequenceFilesInfo{FileLastModified}[$Index], " \n"; | |
| 129 } | |
| 130 | |
| 131 # List information about sequence lengths... | |
| 132 sub ListSequenceLengths { | |
| 133 my($SequenceDataRef) = @_; | |
| 134 my($ID, $SeqLen, $Sequence, $Description); | |
| 135 | |
| 136 print "\nSequence lengths information:\n"; | |
| 137 for $ID (@{$SequenceDataRef->{IDs}}) { | |
| 138 $Sequence = $SequenceDataRef->{Sequence}{$ID}; | |
| 139 $Description = $SequenceDataRef->{Description}{$ID}; | |
| 140 $SeqLen = GetSequenceLength($Sequence, $OptionsInfo{IgnoreGaps}); | |
| 141 if ($OptionsInfo{IgnoreGaps}) { | |
| 142 $Sequence = RemoveSequenceGaps($Sequence); | |
| 143 } | |
| 144 print "ID: $ID; Length:$SeqLen\n"; | |
| 145 if ($OptionsInfo{DetailLevel} >= 2) { | |
| 146 print "Description: $Description\n"; | |
| 147 } | |
| 148 if ($OptionsInfo{DetailLevel} >= 3) { | |
| 149 print "Sequence: $Sequence\n"; | |
| 150 } | |
| 151 if ($OptionsInfo{DetailLevel} >= 2) { | |
| 152 print "\n"; | |
| 153 } | |
| 154 } | |
| 155 } | |
| 156 | |
| 157 # Total size of all the fiels... | |
| 158 sub ListTotalSizeOfFiles { | |
| 159 my($FileOkayCount, $TotalSize, $Index); | |
| 160 | |
| 161 $FileOkayCount = 0; | |
| 162 $TotalSize = 0; | |
| 163 | |
| 164 for $Index (0 .. $#SequenceFilesList) { | |
| 165 if ($SequenceFilesInfo{FileOkay}[$Index]) { | |
| 166 $FileOkayCount++; | |
| 167 $TotalSize += $SequenceFilesInfo{FileSize}[$Index]; | |
| 168 } | |
| 169 } | |
| 170 if ($FileOkayCount > 1) { | |
| 171 print "\nTotal size of $FileOkayCount files: ", FormatFileSize($TotalSize), "\n"; | |
| 172 } | |
| 173 } | |
| 174 | |
| 175 | |
| 176 # Perform frequency analysis of sequence lengths | |
| 177 sub PerformLengthFrequencyAnalysis { | |
| 178 my($SequenceDataRef, $SequenceLengthsRef) = @_; | |
| 179 my ($ID, $SeqLen, $Sequence, $SequenceLenBin, $LenBin, $SequenceLenCount, @SequenceLengths, %SequenceLenFrequency); | |
| 180 | |
| 181 @SequenceLengths = (); | |
| 182 %SequenceLenFrequency = (); | |
| 183 for $ID (@{$SequenceDataRef->{IDs}}) { | |
| 184 $Sequence = $SequenceDataRef->{Sequence}{$ID}; | |
| 185 $SeqLen = GetSequenceLength($Sequence, $OptionsInfo{IgnoreGaps}); | |
| 186 push @SequenceLengths, $SeqLen; | |
| 187 } | |
| 188 if (@{$OptionsInfo{BinRange}}) { | |
| 189 %SequenceLenFrequency = Frequency(\@SequenceLengths, \@{$OptionsInfo{BinRange}}); | |
| 190 } | |
| 191 else { | |
| 192 %SequenceLenFrequency = Frequency(\@SequenceLengths, $OptionsInfo{NumOfBins}); | |
| 193 } | |
| 194 print "\nDistribution of sequence lengths (LengthBin => Count):\n"; | |
| 195 for $SequenceLenBin (sort { $a <=> $b} keys %SequenceLenFrequency) { | |
| 196 $SequenceLenCount = $SequenceLenFrequency{$SequenceLenBin}; | |
| 197 $LenBin = sprintf("%.1f", $SequenceLenBin) + 0; | |
| 198 print "$LenBin => $SequenceLenCount; "; | |
| 199 } | |
| 200 print "\n"; | |
| 201 } | |
| 202 | |
| 203 # Retrieve information about sequence files... | |
| 204 sub RetrieveSequenceFilesInfo { | |
| 205 my($Index, $SequenceFile, $FileSupported, $FileFormat, $ModifiedTimeString, $ModifiedDateString); | |
| 206 | |
| 207 %SequenceFilesInfo = (); | |
| 208 @{$SequenceFilesInfo{FileOkay}} = (); | |
| 209 @{$SequenceFilesInfo{FileFormat}} = (); | |
| 210 @{$SequenceFilesInfo{FileSize}} = (); | |
| 211 @{$SequenceFilesInfo{FileLastModified}} = (); | |
| 212 | |
| 213 FILELIST: for $Index (0 .. $#SequenceFilesList) { | |
| 214 $SequenceFile = $SequenceFilesList[$Index]; | |
| 215 | |
| 216 if (! open SEQUENCEFILE, "$SequenceFile") { | |
| 217 warn "Warning: Ignoring file $SequenceFile: Couldn't open it: $! \n"; | |
| 218 next FILELIST; | |
| 219 } | |
| 220 close SEQUENCEFILE; | |
| 221 | |
| 222 $SequenceFilesInfo{FileOkay}[$Index] = 0; | |
| 223 $SequenceFilesInfo{FileFormat}[$Index] = 'NotSupported'; | |
| 224 $SequenceFilesInfo{FileSize}[$Index] = 0; | |
| 225 $SequenceFilesInfo{FileLastModified}[$Index] = ''; | |
| 226 | |
| 227 ($FileSupported, $FileFormat) = IsSupportedSequenceFile($SequenceFile); | |
| 228 if (!$FileSupported) { | |
| 229 warn "Warning: Ignoring file $SequenceFile: Sequence file format is not supported.\n"; | |
| 230 next FILELIST; | |
| 231 } | |
| 232 | |
| 233 $SequenceFilesInfo{FileOkay}[$Index] = 1; | |
| 234 $SequenceFilesInfo{FileFormat}[$Index] = $FileFormat; | |
| 235 $SequenceFilesInfo{FileSize}[$Index] = FileSize($SequenceFile); | |
| 236 | |
| 237 ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($SequenceFile); | |
| 238 $SequenceFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString"; | |
| 239 } | |
| 240 } | |
| 241 | |
| 242 # Process option values... | |
| 243 sub ProcessOptions { | |
| 244 | |
| 245 $OptionsInfo{All} = defined $Options{all} ? $Options{all} : undef; | |
| 246 | |
| 247 $OptionsInfo{Count} = defined $Options{count} ? $Options{count} : undef; | |
| 248 $OptionsInfo{DetailLevel} = $Options{detail}; | |
| 249 $OptionsInfo{Frequency} = defined $Options{frequency} ? $Options{frequency} : undef; | |
| 250 $OptionsInfo{FrequencyBins} = defined $Options{frequencybins} ? $Options{frequencybins} : undef; | |
| 251 $OptionsInfo{IgnoreGaps} = defined $Options{ignoregaps} ? $Options{ignoregaps} : undef; | |
| 252 $OptionsInfo{Longest} = defined $Options{longest} ? $Options{longest} : undef; | |
| 253 $OptionsInfo{Shortest} = defined $Options{shortest} ? $Options{shortest} : undef; | |
| 254 $OptionsInfo{SequenceLengths} = defined $Options{sequencelengths} ? $Options{sequencelengths} : undef; | |
| 255 | |
| 256 $OptionsInfo{FrequencyAnalysis} = ($Options{all} || $Options{frequency}) ? 1 : 0; | |
| 257 $OptionsInfo{ListLongestSequence} = ($Options{all} || $Options{longest}) ? 1 : 0; | |
| 258 $OptionsInfo{ListShortestSequence} = ($Options{all} || $Options{shortest}) ? 1 : 0; | |
| 259 $OptionsInfo{ListSequenceLengths} = ($Options{all} || $Options{sequencelengths}) ? 1 : 0; | |
| 260 $OptionsInfo{IgnoreGaps} = ($Options{ignoregaps} =~ /Yes/i) ? 1 : 0; | |
| 261 | |
| 262 # Setup frequency bin values... | |
| 263 $OptionsInfo{NumOfBins} = 4; | |
| 264 @{$OptionsInfo{BinRange}} = (); | |
| 265 | |
| 266 if ($Options{frequencybins} =~ /\,/) { | |
| 267 my($BinValue, @SpecifiedBinRange); | |
| 268 @SpecifiedBinRange = split /\,/, $Options{frequencybins}; | |
| 269 if (@SpecifiedBinRange < 2) { | |
| 270 die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid: Must contain at least two values. \n"; | |
| 271 } | |
| 272 for $BinValue (@SpecifiedBinRange) { | |
| 273 if (!IsNumerical($BinValue)) { | |
| 274 die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid: Contains non numeric values. \n"; | |
| 275 } | |
| 276 } | |
| 277 my($Index1, $Index2); | |
| 278 for $Index1 (0 .. $#SpecifiedBinRange) { | |
| 279 for $Index2 (($Index1 + 1) .. $#SpecifiedBinRange) { | |
| 280 if ($SpecifiedBinRange[$Index1] >= $SpecifiedBinRange[$Index2]) { | |
| 281 die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid: Must contain values in ascending order. \n"; | |
| 282 } | |
| 283 } | |
| 284 } | |
| 285 push @{$OptionsInfo{BinRange}}, @SpecifiedBinRange; | |
| 286 } | |
| 287 else { | |
| 288 $OptionsInfo{NumOfBins} = $Options{frequencybins}; | |
| 289 if (!IsPositiveInteger($OptionsInfo{NumOfBins})) { | |
| 290 die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid. Allowed values: positive integer or \"number,number,[number]...\". \n"; | |
| 291 } | |
| 292 } | |
| 293 } | |
| 294 | |
| 295 # Setup script usage and retrieve command line arguments specified using various options... | |
| 296 sub SetupScriptUsage { | |
| 297 | |
| 298 # Retrieve all the options... | |
| 299 %Options = (); | |
| 300 $Options{detail} = 1; | |
| 301 $Options{ignoregaps} = 'no'; | |
| 302 $Options{frequencybins} = 10; | |
| 303 | |
| 304 if (!GetOptions(\%Options, "all|a", "count|c", "detail|d=i", "frequency|f", "frequencybins=s", "help|h", "ignoregaps|i=s", "longest|l", "shortest|s", "sequencelengths", "workingdir|w=s")) { | |
| 305 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; | |
| 306 } | |
| 307 if ($Options{workingdir}) { | |
| 308 if (! -d $Options{workingdir}) { | |
| 309 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; | |
| 310 } | |
| 311 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; | |
| 312 } | |
| 313 if (!IsPositiveInteger($Options{detail})) { | |
| 314 die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Allowed values: > 0\n"; | |
| 315 } | |
| 316 if ($Options{ignoregaps} !~ /^(yes|no)$/i) { | |
| 317 die "Error: The value specified, $Options{ignoregaps}, for option \"-i --IgnoreGaps\" is not valid. Allowed values: yes or no\n"; | |
| 318 } | |
| 319 } | |
| 320 | |
| 321 __END__ | |
| 322 | |
| 323 =head1 NAME | |
| 324 | |
| 325 InfoSequenceFiles.pl - List information about sequence and alignment files | |
| 326 | |
| 327 =head1 SYNOPSIS | |
| 328 | |
| 329 InfoSequenceFiles.pl SequenceFile(s) AlignmentFile(s)... | |
| 330 | |
| 331 InfoSequenceFiles.pl [B<-a, --all>] [B<-c, --count>] [B<-d, --detail> infolevel] | |
| 332 [B<-f, --frequency>] [B<--FrequencyBins> number | "number, number, [number,...]"] | |
| 333 [B<-h, --help>] [B<-i, --IgnoreGaps> yes | no] [B<-l, --longest>] [B<-s, --shortest>] | |
| 334 [B<--SequenceLengths>] [B<-w, --workingdir> dirname] SequenceFile(s)... | |
| 335 | |
| 336 =head1 DESCRIPTION | |
| 337 | |
| 338 List information about contents of I<SequenceFile(s) and AlignmentFile(s)>: number of sequences, | |
| 339 shortest and longest sequences, distribution of sequence lengths and so on. The file names are | |
| 340 separated by spaces. All the sequence files in a current directory can be specified by I<*.aln>, | |
| 341 I<*.msf>, I<*.fasta>, I<*.fta>, I<*.pir> or any other supported formats; additionally, I<DirName> | |
| 342 corresponds to all the sequence files in the current directory with any of the supported file | |
| 343 extension: I<.aln, .msf, .fasta, .fta, and .pir>. | |
| 344 | |
| 345 Supported sequence formats are: I<ALN/CLustalW>, I<GCG/MSF>, I<PILEUP/MSF>, I<Pearson/FASTA>, | |
| 346 and I<NBRF/PIR>. Instead of using file extensions, file formats are detected by parsing the contents | |
| 347 of I<SequenceFile(s) and AlignmentFile(s)>. | |
| 348 | |
| 349 =head1 OPTIONS | |
| 350 | |
| 351 =over 4 | |
| 352 | |
| 353 =item B<-a, --all> | |
| 354 | |
| 355 List all the available information. | |
| 356 | |
| 357 =item B<-c, --count> | |
| 358 | |
| 359 List number of of sequences. This is B<default behavior>. | |
| 360 | |
| 361 =item B<-d, --detail> I<InfoLevel> | |
| 362 | |
| 363 Level of information to print about sequences during various options. Default: I<1>. | |
| 364 Possible values: I<1, 2 or 3>. | |
| 365 | |
| 366 =item B<-f, --frequency> | |
| 367 | |
| 368 List distribution of sequence lengths using the specified number of bins or bin range specified | |
| 369 using B<FrequencyBins> option. | |
| 370 | |
| 371 This option is ignored for input files containing only single sequence. | |
| 372 | |
| 373 =item B<--FrequencyBins> I<number | "number,number,[number,...]"> | |
| 374 | |
| 375 This value is used with B<-f, --frequency> option to list distribution of sequence lengths using | |
| 376 the specified number of bins or bin range. Default value: I<10>. | |
| 377 | |
| 378 The bin range list is used to group sequence lengths into different groups; It must contain | |
| 379 values in ascending order. Examples: | |
| 380 | |
| 381 100,200,300,400,500,600 | |
| 382 200,400,600,800,1000 | |
| 383 | |
| 384 The frequency value calculated for a specific bin corresponds to all the sequence lengths | |
| 385 which are greater than the previous bin value and less than or equal to the current bin value. | |
| 386 | |
| 387 =item B<-h, --help> | |
| 388 | |
| 389 Print this help message. | |
| 390 | |
| 391 =item B<-i, --IgnoreGaps> I<yes | no> | |
| 392 | |
| 393 Ignore gaps during calculation of sequence lengths. Possible values: I<yes or | |
| 394 no>. Default value: I<no>. | |
| 395 | |
| 396 =item B<-l, --longest> | |
| 397 | |
| 398 List information about longest sequence: ID, sequence and sequence length. This option | |
| 399 is ignored for input files containing only single sequence. | |
| 400 | |
| 401 =item B<-s, --shortest> | |
| 402 | |
| 403 List information about shortest sequence: ID, sequence and sequence length. This option | |
| 404 is ignored for input files containing only single sequence. | |
| 405 | |
| 406 =item B<--SequenceLengths> | |
| 407 | |
| 408 List information about sequence lengths. | |
| 409 | |
| 410 =item B<-w, --WorkingDir> I<dirname> | |
| 411 | |
| 412 Location of working directory. Default: current directory. | |
| 413 | |
| 414 =back | |
| 415 | |
| 416 =head1 EXAMPLES | |
| 417 | |
| 418 To count number of sequences in sequence files, type: | |
| 419 | |
| 420 % InfoSequenceFiles.pl Sample1.fasta | |
| 421 % InfoSequenceFiles.pl Sample1.msf Sample1.aln Sample1.pir | |
| 422 % InfoSequenceFiles.pl *.fasta *.fta *.msf *.pir *.aln | |
| 423 | |
| 424 To list all available information with maximum level of available detail for a sequence | |
| 425 alignment file Sample1.msf, type: | |
| 426 | |
| 427 % InfoSequenceFiles.pl -a -d 3 Sample1.msf | |
| 428 | |
| 429 To list sequence length information after ignoring sequence gaps in Sample1.aln file, type: | |
| 430 | |
| 431 % InfoSequenceFiles.pl --SequenceLengths --IgnoreGaps Yes | |
| 432 Sample1.aln | |
| 433 | |
| 434 To list shortest and longest sequence length information after ignoring sequence | |
| 435 gaps in Sample1.aln file, type: | |
| 436 | |
| 437 % InfoSequenceFiles.pl --longest --shortest --IgnoreGaps Yes | |
| 438 Sample1.aln | |
| 439 | |
| 440 To list distribution of sequence lengths after ignoring sequence gaps in Sample1.aln file and | |
| 441 report the frequency distribution into 10 bins, type: | |
| 442 | |
| 443 % InfoSequenceFiles.pl --frequency --FrequencyBins 10 | |
| 444 --IgnoreGaps Yes Sample1.aln | |
| 445 | |
| 446 To list distribution of sequence lengths after ignoring sequence gaps in Sample1.aln file and | |
| 447 report the frequency distribution into specified bin range, type: | |
| 448 | |
| 449 % InfoSequenceFiles.pl --frequency --FrequencyBins | |
| 450 "150,200,250,300,350" --IgnoreGaps Yes Sample1.aln | |
| 451 | |
| 452 =head1 AUTHOR | |
| 453 | |
| 454 Manish Sud <msud@san.rr.com> | |
| 455 | |
| 456 =head1 SEE ALSO | |
| 457 | |
| 458 AnalyzeSequenceFilesData.pl, ExtractFromSequenceFiles.pl, InfoAminoAcids.pl, InfoNucleicAcids.pl | |
| 459 | |
| 460 =head1 COPYRIGHT | |
| 461 | |
| 462 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 463 | |
| 464 This file is part of MayaChemTools. | |
| 465 | |
| 466 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 467 the terms of the GNU Lesser General Public License as published by the Free | |
| 468 Software Foundation; either version 3 of the License, or (at your option) | |
| 469 any later version. | |
| 470 | |
| 471 =cut |
