comparison docs/scripts/man1/InfoSequenceFiles.1 @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 .\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22)
2 .\"
3 .\" Standard preamble:
4 .\" ========================================================================
5 .de Sp \" Vertical space (when we can't use .PP)
6 .if t .sp .5v
7 .if n .sp
8 ..
9 .de Vb \" Begin verbatim text
10 .ft CW
11 .nf
12 .ne \\$1
13 ..
14 .de Ve \" End verbatim text
15 .ft R
16 .fi
17 ..
18 .\" Set up some character translations and predefined strings. \*(-- will
19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
20 .\" double quote, and \*(R" will give a right double quote. \*(C+ will
21 .\" give a nicer C++. Capital omega is used to do unbreakable dashes and
22 .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
23 .\" nothing in troff, for use with C<>.
24 .tr \(*W-
25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
26 .ie n \{\
27 . ds -- \(*W-
28 . ds PI pi
29 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
30 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
31 . ds L" ""
32 . ds R" ""
33 . ds C` ""
34 . ds C' ""
35 'br\}
36 .el\{\
37 . ds -- \|\(em\|
38 . ds PI \(*p
39 . ds L" ``
40 . ds R" ''
41 'br\}
42 .\"
43 .\" Escape single quotes in literal strings from groff's Unicode transform.
44 .ie \n(.g .ds Aq \(aq
45 .el .ds Aq '
46 .\"
47 .\" If the F register is turned on, we'll generate index entries on stderr for
48 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
49 .\" entries marked with X<> in POD. Of course, you'll have to process the
50 .\" output yourself in some meaningful fashion.
51 .ie \nF \{\
52 . de IX
53 . tm Index:\\$1\t\\n%\t"\\$2"
54 ..
55 . nr % 0
56 . rr F
57 .\}
58 .el \{\
59 . de IX
60 ..
61 .\}
62 .\"
63 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
64 .\" Fear. Run. Save yourself. No user-serviceable parts.
65 . \" fudge factors for nroff and troff
66 .if n \{\
67 . ds #H 0
68 . ds #V .8m
69 . ds #F .3m
70 . ds #[ \f1
71 . ds #] \fP
72 .\}
73 .if t \{\
74 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
75 . ds #V .6m
76 . ds #F 0
77 . ds #[ \&
78 . ds #] \&
79 .\}
80 . \" simple accents for nroff and troff
81 .if n \{\
82 . ds ' \&
83 . ds ` \&
84 . ds ^ \&
85 . ds , \&
86 . ds ~ ~
87 . ds /
88 .\}
89 .if t \{\
90 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
91 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
92 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
93 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
94 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
95 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
96 .\}
97 . \" troff and (daisy-wheel) nroff accents
98 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
99 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
100 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
101 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
102 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
103 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
104 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
105 .ds ae a\h'-(\w'a'u*4/10)'e
106 .ds Ae A\h'-(\w'A'u*4/10)'E
107 . \" corrections for vroff
108 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
109 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
110 . \" for low resolution devices (crt and lpr)
111 .if \n(.H>23 .if \n(.V>19 \
112 \{\
113 . ds : e
114 . ds 8 ss
115 . ds o a
116 . ds d- d\h'-1'\(ga
117 . ds D- D\h'-1'\(hy
118 . ds th \o'bp'
119 . ds Th \o'LP'
120 . ds ae ae
121 . ds Ae AE
122 .\}
123 .rm #[ #] #H #V #F C
124 .\" ========================================================================
125 .\"
126 .IX Title "INFOSEQUENCEFILES 1"
127 .TH INFOSEQUENCEFILES 1 "2015-03-29" "perl v5.14.2" "MayaChemTools"
128 .\" For nroff, turn off justification. Always turn off hyphenation; it makes
129 .\" way too many mistakes in technical documents.
130 .if n .ad l
131 .nh
132 .SH "NAME"
133 InfoSequenceFiles.pl \- List information about sequence and alignment files
134 .SH "SYNOPSIS"
135 .IX Header "SYNOPSIS"
136 InfoSequenceFiles.pl SequenceFile(s) AlignmentFile(s)...
137 .PP
138 InfoSequenceFiles.pl [\fB\-a, \-\-all\fR] [\fB\-c, \-\-count\fR] [\fB\-d, \-\-detail\fR infolevel]
139 [\fB\-f, \-\-frequency\fR] [\fB\-\-FrequencyBins\fR number | \*(L"number, number, [number,...]\*(R"]
140 [\fB\-h, \-\-help\fR] [\fB\-i, \-\-IgnoreGaps\fR yes | no] [\fB\-l, \-\-longest\fR] [\fB\-s, \-\-shortest\fR]
141 [\fB\-\-SequenceLengths\fR] [\fB\-w, \-\-workingdir\fR dirname] SequenceFile(s)...
142 .SH "DESCRIPTION"
143 .IX Header "DESCRIPTION"
144 List information about contents of \fISequenceFile(s) and AlignmentFile(s)\fR: number of sequences,
145 shortest and longest sequences, distribution of sequence lengths and so on. The file names are
146 separated by spaces. All the sequence files in a current directory can be specified by \fI*.aln\fR,
147 \&\fI*.msf\fR, \fI*.fasta\fR, \fI*.fta\fR, \fI*.pir\fR or any other supported formats; additionally, \fIDirName\fR
148 corresponds to all the sequence files in the current directory with any of the supported file
149 extension: \fI.aln, .msf, .fasta, .fta, and .pir\fR.
150 .PP
151 Supported sequence formats are: \fIALN/CLustalW\fR, \fI\s-1GCG/MSF\s0\fR, \fI\s-1PILEUP/MSF\s0\fR, \fIPearson/FASTA\fR,
152 and \fI\s-1NBRF/PIR\s0\fR. Instead of using file extensions, file formats are detected by parsing the contents
153 of \fISequenceFile(s) and AlignmentFile(s)\fR.
154 .SH "OPTIONS"
155 .IX Header "OPTIONS"
156 .IP "\fB\-a, \-\-all\fR" 4
157 .IX Item "-a, --all"
158 List all the available information.
159 .IP "\fB\-c, \-\-count\fR" 4
160 .IX Item "-c, --count"
161 List number of of sequences. This is \fBdefault behavior\fR.
162 .IP "\fB\-d, \-\-detail\fR \fIInfoLevel\fR" 4
163 .IX Item "-d, --detail InfoLevel"
164 Level of information to print about sequences during various options. Default: \fI1\fR.
165 Possible values: \fI1, 2 or 3\fR.
166 .IP "\fB\-f, \-\-frequency\fR" 4
167 .IX Item "-f, --frequency"
168 List distribution of sequence lengths using the specified number of bins or bin range specified
169 using \fBFrequencyBins\fR option.
170 .Sp
171 This option is ignored for input files containing only single sequence.
172 .ie n .IP "\fB\-\-FrequencyBins\fR \fInumber | ""number,number,[number,...]""\fR" 4
173 .el .IP "\fB\-\-FrequencyBins\fR \fInumber | ``number,number,[number,...]''\fR" 4
174 .IX Item "--FrequencyBins number | number,number,[number,...]"
175 This value is used with \fB\-f, \-\-frequency\fR option to list distribution of sequence lengths using
176 the specified number of bins or bin range. Default value: \fI10\fR.
177 .Sp
178 The bin range list is used to group sequence lengths into different groups; It must contain
179 values in ascending order. Examples:
180 .Sp
181 .Vb 2
182 \& 100,200,300,400,500,600
183 \& 200,400,600,800,1000
184 .Ve
185 .Sp
186 The frequency value calculated for a specific bin corresponds to all the sequence lengths
187 which are greater than the previous bin value and less than or equal to the current bin value.
188 .IP "\fB\-h, \-\-help\fR" 4
189 .IX Item "-h, --help"
190 Print this help message.
191 .IP "\fB\-i, \-\-IgnoreGaps\fR \fIyes | no\fR" 4
192 .IX Item "-i, --IgnoreGaps yes | no"
193 Ignore gaps during calculation of sequence lengths. Possible values: \fIyes or
194 no\fR. Default value: \fIno\fR.
195 .IP "\fB\-l, \-\-longest\fR" 4
196 .IX Item "-l, --longest"
197 List information about longest sequence: \s-1ID\s0, sequence and sequence length. This option
198 is ignored for input files containing only single sequence.
199 .IP "\fB\-s, \-\-shortest\fR" 4
200 .IX Item "-s, --shortest"
201 List information about shortest sequence: \s-1ID\s0, sequence and sequence length. This option
202 is ignored for input files containing only single sequence.
203 .IP "\fB\-\-SequenceLengths\fR" 4
204 .IX Item "--SequenceLengths"
205 List information about sequence lengths.
206 .IP "\fB\-w, \-\-WorkingDir\fR \fIdirname\fR" 4
207 .IX Item "-w, --WorkingDir dirname"
208 Location of working directory. Default: current directory.
209 .SH "EXAMPLES"
210 .IX Header "EXAMPLES"
211 To count number of sequences in sequence files, type:
212 .PP
213 .Vb 3
214 \& % InfoSequenceFiles.pl Sample1.fasta
215 \& % InfoSequenceFiles.pl Sample1.msf Sample1.aln Sample1.pir
216 \& % InfoSequenceFiles.pl *.fasta *.fta *.msf *.pir *.aln
217 .Ve
218 .PP
219 To list all available information with maximum level of available detail for a sequence
220 alignment file Sample1.msf, type:
221 .PP
222 .Vb 1
223 \& % InfoSequenceFiles.pl \-a \-d 3 Sample1.msf
224 .Ve
225 .PP
226 To list sequence length information after ignoring sequence gaps in Sample1.aln file, type:
227 .PP
228 .Vb 2
229 \& % InfoSequenceFiles.pl \-\-SequenceLengths \-\-IgnoreGaps Yes
230 \& Sample1.aln
231 .Ve
232 .PP
233 To list shortest and longest sequence length information after ignoring sequence
234 gaps in Sample1.aln file, type:
235 .PP
236 .Vb 2
237 \& % InfoSequenceFiles.pl \-\-longest \-\-shortest \-\-IgnoreGaps Yes
238 \& Sample1.aln
239 .Ve
240 .PP
241 To list distribution of sequence lengths after ignoring sequence gaps in Sample1.aln file and
242 report the frequency distribution into 10 bins, type:
243 .PP
244 .Vb 2
245 \& % InfoSequenceFiles.pl \-\-frequency \-\-FrequencyBins 10
246 \& \-\-IgnoreGaps Yes Sample1.aln
247 .Ve
248 .PP
249 To list distribution of sequence lengths after ignoring sequence gaps in Sample1.aln file and
250 report the frequency distribution into specified bin range, type:
251 .PP
252 .Vb 2
253 \& % InfoSequenceFiles.pl \-\-frequency \-\-FrequencyBins
254 \& "150,200,250,300,350" \-\-IgnoreGaps Yes Sample1.aln
255 .Ve
256 .SH "AUTHOR"
257 .IX Header "AUTHOR"
258 Manish Sud <msud@san.rr.com>
259 .SH "SEE ALSO"
260 .IX Header "SEE ALSO"
261 AnalyzeSequenceFilesData.pl, ExtractFromSequenceFiles.pl, InfoAminoAcids.pl, InfoNucleicAcids.pl
262 .SH "COPYRIGHT"
263 .IX Header "COPYRIGHT"
264 Copyright (C) 2015 Manish Sud. All rights reserved.
265 .PP
266 This file is part of MayaChemTools.
267 .PP
268 MayaChemTools is free software; you can redistribute it and/or modify it under
269 the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free
270 Software Foundation; either version 3 of the License, or (at your option)
271 any later version.