comparison mayachemtools/docs/modules/man3/SequenceFileUtil.3 @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:73ae111cf86f
1 .\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22)
2 .\"
3 .\" Standard preamble:
4 .\" ========================================================================
5 .de Sp \" Vertical space (when we can't use .PP)
6 .if t .sp .5v
7 .if n .sp
8 ..
9 .de Vb \" Begin verbatim text
10 .ft CW
11 .nf
12 .ne \\$1
13 ..
14 .de Ve \" End verbatim text
15 .ft R
16 .fi
17 ..
18 .\" Set up some character translations and predefined strings. \*(-- will
19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
20 .\" double quote, and \*(R" will give a right double quote. \*(C+ will
21 .\" give a nicer C++. Capital omega is used to do unbreakable dashes and
22 .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
23 .\" nothing in troff, for use with C<>.
24 .tr \(*W-
25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
26 .ie n \{\
27 . ds -- \(*W-
28 . ds PI pi
29 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
30 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
31 . ds L" ""
32 . ds R" ""
33 . ds C` ""
34 . ds C' ""
35 'br\}
36 .el\{\
37 . ds -- \|\(em\|
38 . ds PI \(*p
39 . ds L" ``
40 . ds R" ''
41 'br\}
42 .\"
43 .\" Escape single quotes in literal strings from groff's Unicode transform.
44 .ie \n(.g .ds Aq \(aq
45 .el .ds Aq '
46 .\"
47 .\" If the F register is turned on, we'll generate index entries on stderr for
48 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
49 .\" entries marked with X<> in POD. Of course, you'll have to process the
50 .\" output yourself in some meaningful fashion.
51 .ie \nF \{\
52 . de IX
53 . tm Index:\\$1\t\\n%\t"\\$2"
54 ..
55 . nr % 0
56 . rr F
57 .\}
58 .el \{\
59 . de IX
60 ..
61 .\}
62 .\"
63 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
64 .\" Fear. Run. Save yourself. No user-serviceable parts.
65 . \" fudge factors for nroff and troff
66 .if n \{\
67 . ds #H 0
68 . ds #V .8m
69 . ds #F .3m
70 . ds #[ \f1
71 . ds #] \fP
72 .\}
73 .if t \{\
74 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
75 . ds #V .6m
76 . ds #F 0
77 . ds #[ \&
78 . ds #] \&
79 .\}
80 . \" simple accents for nroff and troff
81 .if n \{\
82 . ds ' \&
83 . ds ` \&
84 . ds ^ \&
85 . ds , \&
86 . ds ~ ~
87 . ds /
88 .\}
89 .if t \{\
90 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
91 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
92 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
93 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
94 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
95 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
96 .\}
97 . \" troff and (daisy-wheel) nroff accents
98 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
99 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
100 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
101 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
102 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
103 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
104 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
105 .ds ae a\h'-(\w'a'u*4/10)'e
106 .ds Ae A\h'-(\w'A'u*4/10)'E
107 . \" corrections for vroff
108 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
109 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
110 . \" for low resolution devices (crt and lpr)
111 .if \n(.H>23 .if \n(.V>19 \
112 \{\
113 . ds : e
114 . ds 8 ss
115 . ds o a
116 . ds d- d\h'-1'\(ga
117 . ds D- D\h'-1'\(hy
118 . ds th \o'bp'
119 . ds Th \o'LP'
120 . ds ae ae
121 . ds Ae AE
122 .\}
123 .rm #[ #] #H #V #F C
124 .\" ========================================================================
125 .\"
126 .IX Title "SEQUENCEFILEUTIL 1"
127 .TH SEQUENCEFILEUTIL 1 "2015-03-29" "perl v5.14.2" "MayaChemTools"
128 .\" For nroff, turn off justification. Always turn off hyphenation; it makes
129 .\" way too many mistakes in technical documents.
130 .if n .ad l
131 .nh
132 .SH "NAME"
133 SequenceFileUtil
134 .SH "SYNOPSIS"
135 .IX Header "SYNOPSIS"
136 use SequenceFileUtil ;
137 .PP
138 use SequenceFileUtil qw(:all);
139 .SH "DESCRIPTION"
140 .IX Header "DESCRIPTION"
141 \&\fBSequenceFileUtil\fR module provides the following functions:
142 .PP
143 AreSequenceLengthsIdentical, CalcuatePercentSequenceIdentity,
144 CalculatePercentSequenceIdentityMatrix, GetLongestSequence, GetSequenceLength,
145 GetShortestSequence, IsClustalWSequenceFile, IsGapResidue, IsMSFSequenceFile,
146 IsPIRFastaSequenceFile, IsPearsonFastaSequenceFile, IsSupportedSequenceFile,
147 ReadClustalWSequenceFile, ReadMSFSequenceFile, ReadPIRFastaSequenceFile,
148 ReadPearsonFastaSequenceFile, ReadSequenceFile, RemoveSequenceAlignmentGapColumns,
149 RemoveSequenceGaps, WritePearsonFastaSequenceFile
150 SequenceFileUtil module provides various methods to process sequence
151 files and retreive appropriate information.
152 .SH "FUNCTIONS"
153 .IX Header "FUNCTIONS"
154 .IP "\fBAreSequenceLengthsIdentical\fR" 4
155 .IX Item "AreSequenceLengthsIdentical"
156 .Vb 1
157 \& $Status = AreSequenceLengthsIdentical($SequencesDataRef);
158 .Ve
159 .Sp
160 Checks the lengths of all the sequences available in \fISequencesDataRef\fR and returns 1
161 or 0 based whether lengths of all the sequence is same.
162 .IP "\fBCalcuatePercentSequenceIdentity\fR" 4
163 .IX Item "CalcuatePercentSequenceIdentity"
164 .Vb 3
165 \& $PercentIdentity =
166 \& AreSequenceLengthsIdenticalAreSequenceLengthsIdentical(
167 \& $Sequence1, $Sequence2, [$IgnoreGaps, $Precision]);
168 .Ve
169 .Sp
170 Returns percent identity between \fISequence1\fR and \fISequence2\fR. Optional arguments
171 \&\fIIgnoreGaps\fR and \fIPrecision\fR control handling of gaps in sequences and precision of the
172 returned value. By default, gaps are ignored and precision is set up to 1 decimal.
173 .IP "\fBCalculatePercentSequenceIdentityMatrix\fR" 4
174 .IX Item "CalculatePercentSequenceIdentityMatrix"
175 .Vb 3
176 \& $IdentityMatrixDataRef = CalculatePercentSequenceIdentityMatrix(
177 \& $SequencesDataRef, [$IgnoreGaps,
178 \& $Precision]);
179 .Ve
180 .Sp
181 Calculate pairwise percent identity between all the sequences available in \fISequencesDataRef\fR
182 and returns a reference to identity matrix hash. Optional arguments \fIIgnoreGaps\fR and
183 \&\fIPrecision\fR control handling of gaps in sequences and precision of the returned value. By default, gaps
184 are ignored and precision is set up to 1 decimal.
185 .IP "\fBGetSequenceLength\fR" 4
186 .IX Item "GetSequenceLength"
187 .Vb 1
188 \& $SeqquenceLength = GetSequenceLength($Sequence, [$IgnoreGaps]);
189 .Ve
190 .Sp
191 Returns length of the specified sequence. Optional argument \fIIgnoreGaps\fR controls handling
192 of gaps. By default, gaps are ignored.
193 .IP "\fBGetShortestSequence\fR" 4
194 .IX Item "GetShortestSequence"
195 .Vb 2
196 \& ($ID, $Sequence, $SeqLen, $Description) = GetShortestSequence(
197 \& $SequencesDataRef, [$IgnoreGaps]);
198 .Ve
199 .Sp
200 Checks the lengths of all the sequences available in \f(CW$SequencesDataRef\fR and returns \f(CW$ID\fR,
201 \&\f(CW$Sequence\fR, \f(CW$SeqLen\fR, and \f(CW$Description\fR values for the shortest sequence. Optional arguments \f(CW$IgnoreGaps\fR
202 controls handling of gaps in sequences. By default, gaps are ignored.
203 .IP "\fBGetLongestSequence\fR" 4
204 .IX Item "GetLongestSequence"
205 .Vb 2
206 \& ($ID, $Sequence, $SeqLen, $Description) = GetLongestSequence(
207 \& $SequencesDataRef, [$IgnoreGaps]);
208 .Ve
209 .Sp
210 Checks the lengths of all the sequences available in \fISequencesDataRef\fR and returns \fB\s-1ID\s0\fR,
211 \&\fBSequence\fR, \fBSeqLen\fR, and \fBDescription\fR values for the longest sequence. Optional argument
212 $\fIIgnoreGaps\fR controls handling of gaps in sequences. By default, gaps are ignored.
213 .IP "\fBIsGapResidue\fR" 4
214 .IX Item "IsGapResidue"
215 .Vb 1
216 \& $Status = AreSequenceLengthsIdentical($Residue);
217 .Ve
218 .Sp
219 Returns 1 or 0 based on whether \fIResidue\fR corresponds to a gap. Any character other than A to Z is
220 considered a gap residue.
221 .IP "\fBIsSupportedSequenceFile\fR" 4
222 .IX Item "IsSupportedSequenceFile"
223 .Vb 1
224 \& $Status = IsSupportedSequenceFile($SequenceFile);
225 .Ve
226 .Sp
227 Returns 1 or 0 based on whether \fISequenceFile\fR corresponds to a supported sequence
228 format.
229 .IP "\fBIsClustalWSequenceFile\fR" 4
230 .IX Item "IsClustalWSequenceFile"
231 .Vb 1
232 \& $Status = IsClustalWSequenceFile($SequenceFile);
233 .Ve
234 .Sp
235 Returns 1 or 0 based on whether \fISequenceFile\fR corresponds to Clustal sequence alignment
236 format.
237 .IP "\fBIsPearsonFastaSequenceFile\fR" 4
238 .IX Item "IsPearsonFastaSequenceFile"
239 .Vb 1
240 \& $Status = IsPearsonFastaSequenceFile($SequenceFile);
241 .Ve
242 .Sp
243 Returns 1 or 0 based on whether \fISequenceFile\fR corresponds to Pearson \s-1FASTA\s0 sequence
244 format.
245 .IP "\fBIsPIRFastaSequenceFile\fR" 4
246 .IX Item "IsPIRFastaSequenceFile"
247 .Vb 1
248 \& $Status = IsPIRFastaSequenceFile($SequenceFile);
249 .Ve
250 .Sp
251 Returns 1 or 0 based on whether \fISequenceFile\fR corresponds to \s-1PIR\s0 \s-1FASTA\s0 sequence
252 format.
253 .IP "\fBIsMSFSequenceFile\fR" 4
254 .IX Item "IsMSFSequenceFile"
255 .Vb 1
256 \& $Status = IsClustalWSequenceFile($SequenceFile);
257 .Ve
258 .Sp
259 Returns 1 or 0 based on whether \fISequenceFile\fR corresponds to \s-1MSF\s0 sequence alignment
260 format.
261 .IP "\fBReadSequenceFile\fR" 4
262 .IX Item "ReadSequenceFile"
263 .Vb 1
264 \& $SequenceDataMapRef = ReadSequenceFile($SequenceFile);
265 .Ve
266 .Sp
267 Reads \fISequenceFile\fR and returns reference to a hash containing following key/value
268 pairs:
269 .Sp
270 .Vb 5
271 \& $SequenceDataMapRef\->{IDs} \- Array of sequence IDs
272 \& $SequenceDataMapRef\->{Count} \- Number of sequences
273 \& $SequenceDataMapRef\->{Description}{$ID} \- Sequence description
274 \& $SequenceDataMapRef\->{Sequence}{$ID} \- Sequence for a specific ID
275 \& $SequenceDataMapRef\->{Sequence}{InputFileType} \- File format
276 .Ve
277 .IP "\fBReadClustalWSequenceFile\fR" 4
278 .IX Item "ReadClustalWSequenceFile"
279 .Vb 1
280 \& $SequenceDataMapRef = ReadClustalWSequenceFile($SequenceFile);
281 .Ve
282 .Sp
283 Reads ClustalW \fISequenceFile\fR and returns reference to a hash containing following key/value
284 pairs as describes in \fBReadSequenceFile\fR method.
285 .IP "\fBReadMSFSequenceFile\fR" 4
286 .IX Item "ReadMSFSequenceFile"
287 .Vb 1
288 \& $SequenceDataMapRef = ReadMSFSequenceFile($SequenceFile);
289 .Ve
290 .Sp
291 Reads \s-1MSF\s0 \fISequenceFile\fR and returns reference to a hash containing following key/value
292 pairs as describes in \fBReadSequenceFile\fR method.
293 .IP "\fBReadPIRFastaSequenceFile\fR" 4
294 .IX Item "ReadPIRFastaSequenceFile"
295 .Vb 1
296 \& $SequenceDataMapRef = ReadPIRFastaSequenceFile($SequenceFile);
297 .Ve
298 .Sp
299 Reads \s-1PIR\s0 \s-1FASTA\s0 \fISequenceFile\fR and returns reference to a hash containing following key/value
300 pairs as describes in \fBReadSequenceFile\fR method.
301 .IP "\fBReadPearsonFastaSequenceFile\fR" 4
302 .IX Item "ReadPearsonFastaSequenceFile"
303 .Vb 1
304 \& $SequenceDataMapRef = ReadPearsonFastaSequenceFile($SequenceFile);
305 .Ve
306 .Sp
307 Reads Pearson \s-1FASTA\s0 \fISequenceFile\fR and returns reference to a hash containing following key/value
308 pairs as describes in \fBReadSequenceFile\fR method.
309 .IP "\fBRemoveSequenceGaps\fR" 4
310 .IX Item "RemoveSequenceGaps"
311 .Vb 1
312 \& $SeqWithoutGaps = RemoveSequenceGaps($Sequence);
313 .Ve
314 .Sp
315 Removes gaps from \fISequence\fR and return a sequence without any gaps.
316 .IP "\fBRemoveSequenceAlignmentGapColumns\fR" 4
317 .IX Item "RemoveSequenceAlignmentGapColumns"
318 .Vb 2
319 \& $NewAlignmentDataMapRef = RemoveSequenceAlignmentGapColumns(
320 \& $AlignmentDataMapRef);
321 .Ve
322 .Sp
323 Using input alignment data map ref containing following keys, generate a new hash with
324 same set of keys after residue columns containg only gaps have been removed:
325 .Sp
326 .Vb 4
327 \& {IDs} : Array of IDs in order as they appear in file
328 \& {Count}: ID count
329 \& {Description}{$ID} : Description data
330 \& {Sequence}{$ID} : Sequence data
331 .Ve
332 .IP "\fBWritePearsonFastaSequenceFile\fR" 4
333 .IX Item "WritePearsonFastaSequenceFile"
334 .Vb 2
335 \& WritePearsonFastaSequenceFile($SequenceFileName, $SequenceDataRef,
336 \& [$MaxLength]);
337 .Ve
338 .Sp
339 Using sequence data specified via \fISequenceDataRef\fR, write out a Pearson \s-1FASTA\s0 sequence
340 file. Optional argument \fIMaxLength\fR controls maximum length sequence in each line; default is
341 80.
342 .SH "AUTHOR"
343 .IX Header "AUTHOR"
344 Manish Sud <msud@san.rr.com>
345 .SH "SEE ALSO"
346 .IX Header "SEE ALSO"
347 PDBFileUtil.pm
348 .SH "COPYRIGHT"
349 .IX Header "COPYRIGHT"
350 Copyright (C) 2015 Manish Sud. All rights reserved.
351 .PP
352 This file is part of MayaChemTools.
353 .PP
354 MayaChemTools is free software; you can redistribute it and/or modify it under
355 the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free
356 Software Foundation; either version 3 of the License, or (at your option)
357 any later version.