comparison docs/scripts/man1/AnalyzeSequenceFilesData.1 @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 .\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22)
2 .\"
3 .\" Standard preamble:
4 .\" ========================================================================
5 .de Sp \" Vertical space (when we can't use .PP)
6 .if t .sp .5v
7 .if n .sp
8 ..
9 .de Vb \" Begin verbatim text
10 .ft CW
11 .nf
12 .ne \\$1
13 ..
14 .de Ve \" End verbatim text
15 .ft R
16 .fi
17 ..
18 .\" Set up some character translations and predefined strings. \*(-- will
19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
20 .\" double quote, and \*(R" will give a right double quote. \*(C+ will
21 .\" give a nicer C++. Capital omega is used to do unbreakable dashes and
22 .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
23 .\" nothing in troff, for use with C<>.
24 .tr \(*W-
25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
26 .ie n \{\
27 . ds -- \(*W-
28 . ds PI pi
29 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
30 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
31 . ds L" ""
32 . ds R" ""
33 . ds C` ""
34 . ds C' ""
35 'br\}
36 .el\{\
37 . ds -- \|\(em\|
38 . ds PI \(*p
39 . ds L" ``
40 . ds R" ''
41 'br\}
42 .\"
43 .\" Escape single quotes in literal strings from groff's Unicode transform.
44 .ie \n(.g .ds Aq \(aq
45 .el .ds Aq '
46 .\"
47 .\" If the F register is turned on, we'll generate index entries on stderr for
48 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
49 .\" entries marked with X<> in POD. Of course, you'll have to process the
50 .\" output yourself in some meaningful fashion.
51 .ie \nF \{\
52 . de IX
53 . tm Index:\\$1\t\\n%\t"\\$2"
54 ..
55 . nr % 0
56 . rr F
57 .\}
58 .el \{\
59 . de IX
60 ..
61 .\}
62 .\"
63 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
64 .\" Fear. Run. Save yourself. No user-serviceable parts.
65 . \" fudge factors for nroff and troff
66 .if n \{\
67 . ds #H 0
68 . ds #V .8m
69 . ds #F .3m
70 . ds #[ \f1
71 . ds #] \fP
72 .\}
73 .if t \{\
74 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
75 . ds #V .6m
76 . ds #F 0
77 . ds #[ \&
78 . ds #] \&
79 .\}
80 . \" simple accents for nroff and troff
81 .if n \{\
82 . ds ' \&
83 . ds ` \&
84 . ds ^ \&
85 . ds , \&
86 . ds ~ ~
87 . ds /
88 .\}
89 .if t \{\
90 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
91 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
92 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
93 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
94 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
95 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
96 .\}
97 . \" troff and (daisy-wheel) nroff accents
98 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
99 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
100 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
101 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
102 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
103 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
104 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
105 .ds ae a\h'-(\w'a'u*4/10)'e
106 .ds Ae A\h'-(\w'A'u*4/10)'E
107 . \" corrections for vroff
108 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
109 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
110 . \" for low resolution devices (crt and lpr)
111 .if \n(.H>23 .if \n(.V>19 \
112 \{\
113 . ds : e
114 . ds 8 ss
115 . ds o a
116 . ds d- d\h'-1'\(ga
117 . ds D- D\h'-1'\(hy
118 . ds th \o'bp'
119 . ds Th \o'LP'
120 . ds ae ae
121 . ds Ae AE
122 .\}
123 .rm #[ #] #H #V #F C
124 .\" ========================================================================
125 .\"
126 .IX Title "ANALYZESEQUENCEFILESDATA 1"
127 .TH ANALYZESEQUENCEFILESDATA 1 "2015-03-29" "perl v5.14.2" "MayaChemTools"
128 .\" For nroff, turn off justification. Always turn off hyphenation; it makes
129 .\" way too many mistakes in technical documents.
130 .if n .ad l
131 .nh
132 .SH "NAME"
133 AnalyzeSequenceFilesData.pl \- Analyze sequence and alignment files
134 .SH "SYNOPSIS"
135 .IX Header "SYNOPSIS"
136 AnalyzeSequenceFilesData.pl SequenceFile(s) AlignmentFile(s)...
137 .PP
138 AnalyzeSequenceFilesData.pl [\fB\-h, \-\-help\fR] [\fB\-i, \-\-IgnoreGaps\fR yes | no]
139 [\fB\-m, \-\-mode\fR PercentIdentityMatrix | ResidueFrequencyAnalysis | All]
140 [\fB\-\-outdelim\fR comma | tab | semicolon] [\fB\-o, \-\-overwrite\fR] [\fB\-p, \-\-precision\fR number] [\fB\-q, \-\-quote\fR yes | no]
141 [\fB\-\-ReferenceSequence\fR SequenceID | UseFirstSequenceID]
142 [\fB\-\-region\fR \*(L"StartResNum, EndResNum, [StartResNum, EndResNum...]\*(R" | UseCompleteSequence]
143 [\fB\-\-RegionResiduesMode\fR AminoAcids | NucleicAcids | None]
144 [\fB\-w, \-\-WorkingDir\fR dirname] SequenceFile(s) AlignmentFile(s)...
145 .SH "DESCRIPTION"
146 .IX Header "DESCRIPTION"
147 Analyze \fISequenceFile(s) and AlignmentFile(s)\fR data: calculate pairwise percent identity matrix or
148 calculate percent occurrence of various residues in specified sequence regions. All the sequences
149 in the input file must have the same sequence lengths; otherwise, the sequence file is ignored.
150 .PP
151 The file names are separated by spaces. All the sequence files in a current directory can
152 be specified by \fI*.aln\fR, \fI*.msf\fR, \fI*.fasta\fR, \fI*.fta\fR, \fI*.pir\fR or any other supported
153 formats; additionally, \fIDirName\fR corresponds to all the sequence files in the current directory
154 with any of the supported file extension: \fI.aln, .msf, .fasta, .fta, and .pir\fR.
155 .PP
156 Supported sequence formats are: \fIALN/CLustalW\fR, \fI\s-1GCG/MSF\s0\fR, \fI\s-1PILEUP/MSF\s0\fR, \fIPearson/FASTA\fR,
157 and \fI\s-1NBRF/PIR\s0\fR. Instead of using file extensions, file formats are detected by parsing the contents
158 of \fISequenceFile(s) and AlignmentFile(s)\fR.
159 .SH "OPTIONS"
160 .IX Header "OPTIONS"
161 .IP "\fB\-h, \-\-help\fR" 4
162 .IX Item "-h, --help"
163 Print this help message.
164 .IP "\fB\-i, \-\-IgnoreGaps\fR \fIyes | no\fR" 4
165 .IX Item "-i, --IgnoreGaps yes | no"
166 Ignore gaps during calculation of sequence lengths and specification of regions during residue
167 frequency analysis. Possible values: \fIyes or no\fR. Default value: \fIyes\fR.
168 .IP "\fB\-m, \-\-mode\fR \fIPercentIdentityMatrix | ResidueFrequencyAnalysis | All\fR" 4
169 .IX Item "-m, --mode PercentIdentityMatrix | ResidueFrequencyAnalysis | All"
170 Specify how to analyze data in sequence files: calculate percent identity matrix or calculate
171 frequency of occurrence of residues in specific regions. During \fIResidueFrequencyAnalysis\fR value
172 of \fB\-m, \-\-mode\fR option, output files are generated for both the residue count and percent residue
173 count. Possible values: \fIPercentIdentityMatrix, ResidueFrequencyAnalysis, or All\fR. Default value:
174 \&\fIPercentIdentityMatrix\fR.
175 .IP "\fB\-\-outdelim\fR \fIcomma | tab | semicolon\fR" 4
176 .IX Item "--outdelim comma | tab | semicolon"
177 Output text file delimiter. Possible values: \fIcomma, tab, or semicolon\fR.
178 Default value: \fIcomma\fR.
179 .IP "\fB\-o, \-\-overwrite\fR" 4
180 .IX Item "-o, --overwrite"
181 Overwrite existing files.
182 .IP "\fB\-p, \-\-precision\fR \fInumber\fR" 4
183 .IX Item "-p, --precision number"
184 Precision of calculated values in the output file. Default: up to \fI2\fR decimal places.
185 Valid values: positive integers.
186 .IP "\fB\-q, \-\-quote\fR \fIyes | no\fR" 4
187 .IX Item "-q, --quote yes | no"
188 Put quotes around column values in output text file. Possible values: \fIyes or
189 no\fR. Default value: \fIyes\fR.
190 .IP "\fB\-\-ReferenceSequence\fR \fISequenceID | UseFirstSequenceID\fR" 4
191 .IX Item "--ReferenceSequence SequenceID | UseFirstSequenceID"
192 Specify reference sequence \s-1ID\s0 to identify regions for performing \fIResidueFrequencyAnalysis\fR specified
193 using \fB\-m, \-\-mode\fR option. Default: \fIUseFirstSequenceID\fR.
194 .IP "\fB\-\-region\fR \fIStartResNum,EndResNum,[StartResNum,EndResNum...] | UseCompleteSequence\fR" 4
195 .IX Item "--region StartResNum,EndResNum,[StartResNum,EndResNum...] | UseCompleteSequence"
196 Specify how to perform frequency of occurrence analysis for residues: use specific regions
197 indicated by starting and ending residue numbers in reference sequence or use the whole reference
198 sequence as one region. Default: \fIUseCompleteSequence\fR.
199 .Sp
200 Based on the value of \fB\-i, \-\-IgnoreGaps\fR option, specified residue numbers \fIStartResNum,EndResNum\fR
201 correspond to the positions in the reference sequence without gaps or with gaps.
202 .Sp
203 For residue numbers corresponding to the reference sequence including gaps, percent occurrence
204 of various residues corresponding to gap position in reference sequence is also calculated.
205 .IP "\fB\-\-RegionResiduesMode\fR \fIAminoAcids | NucleicAcids | None\fR" 4
206 .IX Item "--RegionResiduesMode AminoAcids | NucleicAcids | None"
207 Specify how to process residues in the regions specified using \fB\-\-region\fR option during
208 \&\fIResidueFrequencyAnalysis\fR calculation: categorize residues as amino acids, nucleic acids, or simply
209 ignore residue category during the calculation. Possible values: \fIAminoAcids, NucleicAcids or None\fR.
210 Default value: \fINone\fR.
211 .Sp
212 For \fIAminoAcids\fR or \fINucleicAcids\fR values of \fB\-\-RegionResiduesMode\fR option, all the standard amino
213 acids or nucleic acids are listed in the output file for each region; Any gaps and other non standard residues
214 are added to the list as encountered.
215 .Sp
216 For \fINone\fR value of \fB\-\-RegionResiduesMode\fR option, no assumption is made about type of residues.
217 Residue and gaps are added to the list as encountered.
218 .IP "\fB\-r, \-\-root\fR \fIrootname\fR" 4
219 .IX Item "-r, --root rootname"
220 New sequence file name is generated using the root: <Root><Mode>.<Ext> and
221 <Root><Mode><RegionNum>.<Ext>. Default new file
222 name: <SequenceFileName><Mode>.<Ext> for \fIPercentIdentityMatrix\fR value \fBm, \-\-mode\fR option
223 and <SequenceFileName><Mode><RegionNum>.<Ext> for \fIResidueFrequencyAnalysis\fR.
224 The csv, and tsv <Ext> values are used for comma/semicolon, and tab delimited text
225 files respectively. This option is ignored for multiple input files.
226 .IP "\fB\-w \-\-WorkingDir\fR \fItext\fR" 4
227 .IX Item "-w --WorkingDir text"
228 Location of working directory. Default: current directory.
229 .SH "EXAMPLES"
230 .IX Header "EXAMPLES"
231 To calculate percent identity matrix for all sequences in Sample1.msf file and generate
232 Sample1PercentIdentityMatrix.csv, type:
233 .PP
234 .Vb 1
235 \& % AnalyzeSequenceFilesData.pl Sample1.msf
236 .Ve
237 .PP
238 To perform residue frequency analysis for all sequences in Sample1.aln file corresponding to
239 non-gap positions in the first sequence and generate Sample1ResidueFrequencyAnalysisRegion1.csv
240 and Sample1PercentResidueFrequencyAnalysisRegion1.csv files, type:
241 .PP
242 .Vb 2
243 \& % AnalyzeSequenceFilesData.pl \-m ResidueFrequencyAnalysis \-o
244 \& Sample1.aln
245 .Ve
246 .PP
247 To perform residue frequency analysis for all sequences in Sample1.aln file corresponding to
248 all positions in the first sequence and generate TestResidueFrequencyAnalysisRegion1.csv
249 and TestPercentResidueFrequencyAnalysisRegion1.csv files, type:
250 .PP
251 .Vb 2
252 \& % AnalyzeSequenceFilesData.pl \-m ResidueFrequencyAnalysis \-\-IgnoreGaps
253 \& No \-o \-r Test Sample1.aln
254 .Ve
255 .PP
256 To perform residue frequency analysis for all sequences in Sample1.aln file corresponding to
257 non-gap residue positions 5 to 10, and 30 to 40 in sequence \s-1ACHE_BOVIN\s0 and generate
258 Sample1ResidueFrequencyAnalysisRegion1.csv, Sample1ResidueFrequencyAnalysisRegion2.csv,
259 SamplePercentResidueFrequencyAnalysisRegion1.csv, and
260 SamplePercentResidueFrequencyAnalysisRegion2.csv files, type:
261 .PP
262 .Vb 2
263 \& % AnalyzeSequenceFilesData.pl \-m ResidueFrequencyAnalysis
264 \& \-\-ReferenceSequence ACHE_BOVIN \-\-region "5,15,30,40" \-o Sample1.msf
265 .Ve
266 .SH "AUTHOR"
267 .IX Header "AUTHOR"
268 Manish Sud <msud@san.rr.com>
269 .SH "SEE ALSO"
270 .IX Header "SEE ALSO"
271 ExtractFromSequenceFiles.pl, InfoSequenceFiles.pl
272 .SH "COPYRIGHT"
273 .IX Header "COPYRIGHT"
274 Copyright (C) 2015 Manish Sud. All rights reserved.
275 .PP
276 This file is part of MayaChemTools.
277 .PP
278 MayaChemTools is free software; you can redistribute it and/or modify it under
279 the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free
280 Software Foundation; either version 3 of the License, or (at your option)
281 any later version.