comparison docs/scripts/man1/ExtractFromSequenceFiles.1 @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 .\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22)
2 .\"
3 .\" Standard preamble:
4 .\" ========================================================================
5 .de Sp \" Vertical space (when we can't use .PP)
6 .if t .sp .5v
7 .if n .sp
8 ..
9 .de Vb \" Begin verbatim text
10 .ft CW
11 .nf
12 .ne \\$1
13 ..
14 .de Ve \" End verbatim text
15 .ft R
16 .fi
17 ..
18 .\" Set up some character translations and predefined strings. \*(-- will
19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
20 .\" double quote, and \*(R" will give a right double quote. \*(C+ will
21 .\" give a nicer C++. Capital omega is used to do unbreakable dashes and
22 .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
23 .\" nothing in troff, for use with C<>.
24 .tr \(*W-
25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
26 .ie n \{\
27 . ds -- \(*W-
28 . ds PI pi
29 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
30 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
31 . ds L" ""
32 . ds R" ""
33 . ds C` ""
34 . ds C' ""
35 'br\}
36 .el\{\
37 . ds -- \|\(em\|
38 . ds PI \(*p
39 . ds L" ``
40 . ds R" ''
41 'br\}
42 .\"
43 .\" Escape single quotes in literal strings from groff's Unicode transform.
44 .ie \n(.g .ds Aq \(aq
45 .el .ds Aq '
46 .\"
47 .\" If the F register is turned on, we'll generate index entries on stderr for
48 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
49 .\" entries marked with X<> in POD. Of course, you'll have to process the
50 .\" output yourself in some meaningful fashion.
51 .ie \nF \{\
52 . de IX
53 . tm Index:\\$1\t\\n%\t"\\$2"
54 ..
55 . nr % 0
56 . rr F
57 .\}
58 .el \{\
59 . de IX
60 ..
61 .\}
62 .\"
63 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
64 .\" Fear. Run. Save yourself. No user-serviceable parts.
65 . \" fudge factors for nroff and troff
66 .if n \{\
67 . ds #H 0
68 . ds #V .8m
69 . ds #F .3m
70 . ds #[ \f1
71 . ds #] \fP
72 .\}
73 .if t \{\
74 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
75 . ds #V .6m
76 . ds #F 0
77 . ds #[ \&
78 . ds #] \&
79 .\}
80 . \" simple accents for nroff and troff
81 .if n \{\
82 . ds ' \&
83 . ds ` \&
84 . ds ^ \&
85 . ds , \&
86 . ds ~ ~
87 . ds /
88 .\}
89 .if t \{\
90 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
91 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
92 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
93 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
94 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
95 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
96 .\}
97 . \" troff and (daisy-wheel) nroff accents
98 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
99 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
100 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
101 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
102 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
103 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
104 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
105 .ds ae a\h'-(\w'a'u*4/10)'e
106 .ds Ae A\h'-(\w'A'u*4/10)'E
107 . \" corrections for vroff
108 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
109 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
110 . \" for low resolution devices (crt and lpr)
111 .if \n(.H>23 .if \n(.V>19 \
112 \{\
113 . ds : e
114 . ds 8 ss
115 . ds o a
116 . ds d- d\h'-1'\(ga
117 . ds D- D\h'-1'\(hy
118 . ds th \o'bp'
119 . ds Th \o'LP'
120 . ds ae ae
121 . ds Ae AE
122 .\}
123 .rm #[ #] #H #V #F C
124 .\" ========================================================================
125 .\"
126 .IX Title "EXTRACTFROMSEQUENCEFILES 1"
127 .TH EXTRACTFROMSEQUENCEFILES 1 "2015-03-29" "perl v5.14.2" "MayaChemTools"
128 .\" For nroff, turn off justification. Always turn off hyphenation; it makes
129 .\" way too many mistakes in technical documents.
130 .if n .ad l
131 .nh
132 .SH "NAME"
133 ExtractFromSequenceFiles.pl \- Extract data from sequence and alignment files
134 .SH "SYNOPSIS"
135 .IX Header "SYNOPSIS"
136 ExtractFromSequenceFiles.pl SequenceFile(s) AlignmentFile(s)...
137 .PP
138 ExtractFromSequenceFiles.pl [\fB\-h, \-\-help\fR] [\fB\-i, \-\-IgnoreGaps\fR yes | no]
139 [\fB\-m, \-\-mode\fR SequenceID | SequenceNum | SequenceNumRange] [\fB\-o, \-\-overwrite\fR]
140 [\fB\-r, \-\-root\fR rootname] [\fB\-s, \-\-Sequences\fR \*(L"SequenceID, [SequenceID,...]\*(R" | \*(L"SequenceNum, [SequenceNum,...]\*(R" |
141 \&\*(L"StartingSeqNum, EndingSeqNum\*(R"] [\fB\-\-SequenceIDMatch\fR Exact | Relaxed]
142 [\fB\-w, \-\-WorkingDir\fR dirname] SequenceFile(s) AlignmentFile(s)...
143 .SH "DESCRIPTION"
144 .IX Header "DESCRIPTION"
145 Extract specific data from \fISequenceFile(s) and AlignmentFile(s)\fR and generate
146 \&\s-1FASTA\s0 files. You can extract sequences using sequence IDs or sequence numbers.
147 .PP
148 The file names are separated by spaces. All the sequence files in a current directory can
149 be specified by \fI*.aln\fR, \fI*.msf\fR, \fI*.fasta\fR, \fI*.fta\fR, \fI*.pir\fR or any other supported
150 formats; additionally, \fIDirName\fR corresponds to all the sequence files in the current directory
151 with any of the supported file extension: \fI.aln, .msf, .fasta, .fta, and .pir\fR.
152 .PP
153 Supported sequence formats are: \fIALN/CLustalW\fR, \fI\s-1GCG/MSF\s0\fR, \fI\s-1PILEUP/MSF\s0\fR, \fIPearson/FASTA\fR,
154 and \fI\s-1NBRF/PIR\s0\fR. Instead of using file extensions, file formats are detected by parsing the contents
155 of \fISequenceFile(s) and AlignmentFile(s)\fR.
156 .SH "OPTIONS"
157 .IX Header "OPTIONS"
158 .IP "\fB\-h, \-\-help\fR" 4
159 .IX Item "-h, --help"
160 Print this help message.
161 .IP "\fB\-i, \-\-IgnoreGaps\fR \fIyes | no\fR" 4
162 .IX Item "-i, --IgnoreGaps yes | no"
163 Ignore gaps or gap columns during during generation of new sequence or alignment file(s).
164 Possible values: \fIyes or no\fR. Default value: \fIyes\fR.
165 .Sp
166 In order to remove gap columns, length of all the sequence must be same; otherwise,
167 this option is ignored.
168 .IP "\fB\-m, \-\-mode\fR \fISequenceID | SequenceNum | SequenceNumRange\fR" 4
169 .IX Item "-m, --mode SequenceID | SequenceNum | SequenceNumRange"
170 Specify how to extract data from sequence files: extract sequences using sequence
171 IDs or sequence numbers. Possible values: \fISequenceID | SequenceNum
172 | SequenceNumRange\fR. Default: \fISequenceNum\fR with value of 1.
173 .Sp
174 The sequence numbers correspond to position of sequences starting from 1 for first sequence
175 in \fISequenceFile(s) and AlignmentFile(s)\fR.
176 .IP "\fB\-o, \-\-overwrite\fR" 4
177 .IX Item "-o, --overwrite"
178 Overwrite existing files.
179 .IP "\fB\-r, \-\-root\fR \fIrootname\fR" 4
180 .IX Item "-r, --root rootname"
181 New sequence file name is generated using the root: <Root><Mode>.<Ext>. Default new file:
182 <SequenceFileName><Mode>.<Ext>. This option is ignored for multiple input files.
183 .ie n .IP "\fB\-s, \-\-Sequences\fR \fI""SequenceID,[SequenceID,...]"" | ""SequenceNum,[SequenceNum,...]"" | ""StartingSeqNum,EndingSeqNum""\fR" 4
184 .el .IP "\fB\-s, \-\-Sequences\fR \fI``SequenceID,[SequenceID,...]'' | ``SequenceNum,[SequenceNum,...]'' | ``StartingSeqNum,EndingSeqNum''\fR" 4
185 .IX Item "-s, --Sequences SequenceID,[SequenceID,...] | SequenceNum,[SequenceNum,...] | StartingSeqNum,EndingSeqNum"
186 This value is \fB\-m, \-\-mode\fR specific. In general, it's a comma delimites list of sequence IDs or sequence
187 numbers.
188 .Sp
189 For \fISequenceID\fR value of \fB\-m, \-\-mode\fR option, input value format is: \fISequenceID,...\fR. Examples:
190 .Sp
191 .Vb 2
192 \& ACHE_BOVIN
193 \& ACHE_BOVIN,ACHE_HUMAN
194 .Ve
195 .Sp
196 For \fISequenceNum\fR value of \fB\-m, \-\-mode\fR option, input value format is: \fISequenceNum,...\fR. Examples:
197 .Sp
198 .Vb 2
199 \& 2
200 \& 1,5
201 .Ve
202 .Sp
203 For \fISequenceNum\fR value of \fB\-m, \-\-mode\fR option, input value format is: \fIStaringSeqNum,EndingSeqNum\fR. Examples:
204 .Sp
205 .Vb 1
206 \& 2,4
207 .Ve
208 .IP "\fB\-\-SequenceIDMatch\fR \fIExact | Relaxed\fR" 4
209 .IX Item "--SequenceIDMatch Exact | Relaxed"
210 Sequence IDs matching criterion during \fISequenceID\fR value of \fB\-m, \-\-mode\fR option: match
211 specified sequence \s-1ID\s0 exactly or as sub string against sequence IDs in the files. Possible
212 values: \fIExact | Relaxed\fR. Default: \fIRelaxed\fR. Sequence \s-1ID\s0 match is case insenstitive
213 during both options.
214 .IP "\fB\-\-SequenceLength\fR \fInumber\fR" 4
215 .IX Item "--SequenceLength number"
216 Maximum sequence length per line in sequence file(s). Default: \fI80\fR.
217 .IP "\fB\-w \-\-WorkingDir\fR \fItext\fR" 4
218 .IX Item "-w --WorkingDir text"
219 Location of working directory. Default: current directory.
220 .SH "EXAMPLES"
221 .IX Header "EXAMPLES"
222 To extract first sequence from Sample1.fasta sequence file and generate Sample1SequenceNum.fasta
223 sequence file, type:
224 .PP
225 .Vb 1
226 \& % ExtractFromSequenceFiles.pl \-o Sample1.fasta
227 .Ve
228 .PP
229 To extract first sequence from Sample1.aln alignment file and generate Sample1SequenceNum.fasta
230 sequence file without any column gaps, type:
231 .PP
232 .Vb 1
233 \& % ExtractFromSequenceFiles.pl \-o Sample1.aln
234 .Ve
235 .PP
236 To extract first sequence from Sample1.aln alignment file and generate Sample1SequenceNum.fasta
237 sequence file with column gaps, type:
238 .PP
239 .Vb 1
240 \& % ExtractFromSequenceFiles.pl \-\-IgnroreGaps No \-o Sample1.aln
241 .Ve
242 .PP
243 To extract sequence number 1 and 4 from Sample1.fasta sequence file and generate
244 Sample1SequenceNum.fasta sequence file, type:
245 .PP
246 .Vb 2
247 \& % ExtractFromSequenceFiles.pl \-o \-m SequenceNum \-\-Sequences 1,4
248 \& \-o Sample1.fasta
249 .Ve
250 .PP
251 To extract sequences from sequence number 1 to 4 from Sample1.fasta sequence file and generate
252 Sample1SequenceNumRange.fasta sequence file, type:
253 .PP
254 .Vb 2
255 \& % ExtractFromSequenceFiles.pl \-o \-m SequenceNumRange \-\-Sequences
256 \& 1,4 \-o Sample1.fasta
257 .Ve
258 .PP
259 To extract sequence \s-1ID\s0 \*(L"Q9P993/104\-387\*(R" from sequence from Sample1.fasta sequence file and generate
260 Sample1SequenceID.fasta sequence file, type:
261 .PP
262 .Vb 2
263 \& % ExtractFromSequenceFiles.pl \-o \-m SequenceID \-\-Sequences
264 \& "Q9P993/104\-387" \-\-SequenceIDMatch Exact \-o Sample1.fasta
265 .Ve
266 .SH "AUTHOR"
267 .IX Header "AUTHOR"
268 Manish Sud <msud@san.rr.com>
269 .SH "SEE ALSO"
270 .IX Header "SEE ALSO"
271 AnalyzeSequenceFilesData.pl, InfoSequenceFiles.pl
272 .SH "COPYRIGHT"
273 .IX Header "COPYRIGHT"
274 Copyright (C) 2015 Manish Sud. All rights reserved.
275 .PP
276 This file is part of MayaChemTools.
277 .PP
278 MayaChemTools is free software; you can redistribute it and/or modify it under
279 the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free
280 Software Foundation; either version 3 of the License, or (at your option)
281 any later version.