0
|
1 NAME
|
|
2 SequenceFileUtil
|
|
3
|
|
4 SYNOPSIS
|
|
5 use SequenceFileUtil ;
|
|
6
|
|
7 use SequenceFileUtil qw(:all);
|
|
8
|
|
9 DESCRIPTION
|
|
10 SequenceFileUtil module provides the following functions:
|
|
11
|
|
12 AreSequenceLengthsIdentical, CalcuatePercentSequenceIdentity,
|
|
13 CalculatePercentSequenceIdentityMatrix, GetLongestSequence,
|
|
14 GetSequenceLength, GetShortestSequence, IsClustalWSequenceFile,
|
|
15 IsGapResidue, IsMSFSequenceFile, IsPIRFastaSequenceFile,
|
|
16 IsPearsonFastaSequenceFile, IsSupportedSequenceFile,
|
|
17 ReadClustalWSequenceFile, ReadMSFSequenceFile, ReadPIRFastaSequenceFile,
|
|
18 ReadPearsonFastaSequenceFile, ReadSequenceFile,
|
|
19 RemoveSequenceAlignmentGapColumns, RemoveSequenceGaps,
|
|
20 WritePearsonFastaSequenceFile SequenceFileUtil module provides various
|
|
21 methods to process sequence files and retreive appropriate information.
|
|
22
|
|
23 FUNCTIONS
|
|
24 AreSequenceLengthsIdentical
|
|
25 $Status = AreSequenceLengthsIdentical($SequencesDataRef);
|
|
26
|
|
27 Checks the lengths of all the sequences available in
|
|
28 *SequencesDataRef* and returns 1 or 0 based whether lengths of all
|
|
29 the sequence is same.
|
|
30
|
|
31 CalcuatePercentSequenceIdentity
|
|
32 $PercentIdentity =
|
|
33 AreSequenceLengthsIdenticalAreSequenceLengthsIdentical(
|
|
34 $Sequence1, $Sequence2, [$IgnoreGaps, $Precision]);
|
|
35
|
|
36 Returns percent identity between *Sequence1* and *Sequence2*.
|
|
37 Optional arguments *IgnoreGaps* and *Precision* control handling of
|
|
38 gaps in sequences and precision of the returned value. By default,
|
|
39 gaps are ignored and precision is set up to 1 decimal.
|
|
40
|
|
41 CalculatePercentSequenceIdentityMatrix
|
|
42 $IdentityMatrixDataRef = CalculatePercentSequenceIdentityMatrix(
|
|
43 $SequencesDataRef, [$IgnoreGaps,
|
|
44 $Precision]);
|
|
45
|
|
46 Calculate pairwise percent identity between all the sequences
|
|
47 available in *SequencesDataRef* and returns a reference to identity
|
|
48 matrix hash. Optional arguments *IgnoreGaps* and *Precision* control
|
|
49 handling of gaps in sequences and precision of the returned value.
|
|
50 By default, gaps are ignored and precision is set up to 1 decimal.
|
|
51
|
|
52 GetSequenceLength
|
|
53 $SeqquenceLength = GetSequenceLength($Sequence, [$IgnoreGaps]);
|
|
54
|
|
55 Returns length of the specified sequence. Optional argument
|
|
56 *IgnoreGaps* controls handling of gaps. By default, gaps are
|
|
57 ignored.
|
|
58
|
|
59 GetShortestSequence
|
|
60 ($ID, $Sequence, $SeqLen, $Description) = GetShortestSequence(
|
|
61 $SequencesDataRef, [$IgnoreGaps]);
|
|
62
|
|
63 Checks the lengths of all the sequences available in
|
|
64 $SequencesDataRef and returns $ID, $Sequence, $SeqLen, and
|
|
65 $Description values for the shortest sequence. Optional arguments
|
|
66 $IgnoreGaps controls handling of gaps in sequences. By default, gaps
|
|
67 are ignored.
|
|
68
|
|
69 GetLongestSequence
|
|
70 ($ID, $Sequence, $SeqLen, $Description) = GetLongestSequence(
|
|
71 $SequencesDataRef, [$IgnoreGaps]);
|
|
72
|
|
73 Checks the lengths of all the sequences available in
|
|
74 *SequencesDataRef* and returns ID, Sequence, SeqLen, and Description
|
|
75 values for the longest sequence. Optional argument $*IgnoreGaps*
|
|
76 controls handling of gaps in sequences. By default, gaps are
|
|
77 ignored.
|
|
78
|
|
79 IsGapResidue
|
|
80 $Status = AreSequenceLengthsIdentical($Residue);
|
|
81
|
|
82 Returns 1 or 0 based on whether *Residue* corresponds to a gap. Any
|
|
83 character other than A to Z is considered a gap residue.
|
|
84
|
|
85 IsSupportedSequenceFile
|
|
86 $Status = IsSupportedSequenceFile($SequenceFile);
|
|
87
|
|
88 Returns 1 or 0 based on whether *SequenceFile* corresponds to a
|
|
89 supported sequence format.
|
|
90
|
|
91 IsClustalWSequenceFile
|
|
92 $Status = IsClustalWSequenceFile($SequenceFile);
|
|
93
|
|
94 Returns 1 or 0 based on whether *SequenceFile* corresponds to
|
|
95 Clustal sequence alignment format.
|
|
96
|
|
97 IsPearsonFastaSequenceFile
|
|
98 $Status = IsPearsonFastaSequenceFile($SequenceFile);
|
|
99
|
|
100 Returns 1 or 0 based on whether *SequenceFile* corresponds to
|
|
101 Pearson FASTA sequence format.
|
|
102
|
|
103 IsPIRFastaSequenceFile
|
|
104 $Status = IsPIRFastaSequenceFile($SequenceFile);
|
|
105
|
|
106 Returns 1 or 0 based on whether *SequenceFile* corresponds to PIR
|
|
107 FASTA sequence format.
|
|
108
|
|
109 IsMSFSequenceFile
|
|
110 $Status = IsClustalWSequenceFile($SequenceFile);
|
|
111
|
|
112 Returns 1 or 0 based on whether *SequenceFile* corresponds to MSF
|
|
113 sequence alignment format.
|
|
114
|
|
115 ReadSequenceFile
|
|
116 $SequenceDataMapRef = ReadSequenceFile($SequenceFile);
|
|
117
|
|
118 Reads *SequenceFile* and returns reference to a hash containing
|
|
119 following key/value pairs:
|
|
120
|
|
121 $SequenceDataMapRef->{IDs} - Array of sequence IDs
|
|
122 $SequenceDataMapRef->{Count} - Number of sequences
|
|
123 $SequenceDataMapRef->{Description}{$ID} - Sequence description
|
|
124 $SequenceDataMapRef->{Sequence}{$ID} - Sequence for a specific ID
|
|
125 $SequenceDataMapRef->{Sequence}{InputFileType} - File format
|
|
126
|
|
127 ReadClustalWSequenceFile
|
|
128 $SequenceDataMapRef = ReadClustalWSequenceFile($SequenceFile);
|
|
129
|
|
130 Reads ClustalW *SequenceFile* and returns reference to a hash
|
|
131 containing following key/value pairs as describes in
|
|
132 ReadSequenceFile method.
|
|
133
|
|
134 ReadMSFSequenceFile
|
|
135 $SequenceDataMapRef = ReadMSFSequenceFile($SequenceFile);
|
|
136
|
|
137 Reads MSF *SequenceFile* and returns reference to a hash containing
|
|
138 following key/value pairs as describes in ReadSequenceFile method.
|
|
139
|
|
140 ReadPIRFastaSequenceFile
|
|
141 $SequenceDataMapRef = ReadPIRFastaSequenceFile($SequenceFile);
|
|
142
|
|
143 Reads PIR FASTA *SequenceFile* and returns reference to a hash
|
|
144 containing following key/value pairs as describes in
|
|
145 ReadSequenceFile method.
|
|
146
|
|
147 ReadPearsonFastaSequenceFile
|
|
148 $SequenceDataMapRef = ReadPearsonFastaSequenceFile($SequenceFile);
|
|
149
|
|
150 Reads Pearson FASTA *SequenceFile* and returns reference to a hash
|
|
151 containing following key/value pairs as describes in
|
|
152 ReadSequenceFile method.
|
|
153
|
|
154 RemoveSequenceGaps
|
|
155 $SeqWithoutGaps = RemoveSequenceGaps($Sequence);
|
|
156
|
|
157 Removes gaps from *Sequence* and return a sequence without any gaps.
|
|
158
|
|
159 RemoveSequenceAlignmentGapColumns
|
|
160 $NewAlignmentDataMapRef = RemoveSequenceAlignmentGapColumns(
|
|
161 $AlignmentDataMapRef);
|
|
162
|
|
163 Using input alignment data map ref containing following keys,
|
|
164 generate a new hash with same set of keys after residue columns
|
|
165 containg only gaps have been removed:
|
|
166
|
|
167 {IDs} : Array of IDs in order as they appear in file
|
|
168 {Count}: ID count
|
|
169 {Description}{$ID} : Description data
|
|
170 {Sequence}{$ID} : Sequence data
|
|
171
|
|
172 WritePearsonFastaSequenceFile
|
|
173 WritePearsonFastaSequenceFile($SequenceFileName, $SequenceDataRef,
|
|
174 [$MaxLength]);
|
|
175
|
|
176 Using sequence data specified via *SequenceDataRef*, write out a
|
|
177 Pearson FASTA sequence file. Optional argument *MaxLength* controls
|
|
178 maximum length sequence in each line; default is 80.
|
|
179
|
|
180 AUTHOR
|
|
181 Manish Sud <msud@san.rr.com>
|
|
182
|
|
183 SEE ALSO
|
|
184 PDBFileUtil.pm
|
|
185
|
|
186 COPYRIGHT
|
|
187 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
188
|
|
189 This file is part of MayaChemTools.
|
|
190
|
|
191 MayaChemTools is free software; you can redistribute it and/or modify it
|
|
192 under the terms of the GNU Lesser General Public License as published by
|
|
193 the Free Software Foundation; either version 3 of the License, or (at
|
|
194 your option) any later version.
|
|
195
|