annotate variant_effect_predictor/Bio/Assembly/Contig.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: Contig.pm,v 1.1 2002/11/04 11:50:11 heikki Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # BioPerl module for Bio::Assembly::Contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 # Mostly based on Bio::SimpleAlign by Ewan Birney
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 # Cared for by Robson francisco de Souza <rfsouza@citri.iq.usp.br>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 # Copyright Robson Francisco de Souza
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 # You may distribute this module under the same terms as perl itself
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 # POD documentation - main docs before the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Bio::Assembly::Contig - Perl module to hold and manipulate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 sequence assembly contigs.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =head1 SYNOPSYS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 # Module loading
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 use Bio::Assembly::IO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 # Assembly loading methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 $aio = new Bio::Assembly::IO(-file=>"test.ace.1",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 -format=>'phrap');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 $assembly = $aio->next_assembly;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 foreach $contig ($assembly->all_contigs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 # do something
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 # OR, if you want to build the contig yourself,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 use Bio::Assembly::Contig;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 $c = Bio::Assembly::Contig->new(-id=>"1");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 $ls = Bio::LocatableSeq->new(-seq=>"ACCG-T",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 -id=>"r1",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 -alphabet=>'dna');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 $ls2 = Bio::LocatableSeq->new(-seq=>"ACA-CG-T",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 -id=>"r2",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 -alphabet=>'dna');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 $ls_coord = Bio::SeqFeature::Generic->new(-start=>3,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 -end=>8,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 -strand=>1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 $ls2_coord = Bio::SeqFeature::Generic->new(-start=>1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 -end=>8,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 -strand=>1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 $c->add_seq($ls);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 $c->add_seq($ls2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 $c->set_seq_coord($ls_coord,$ls);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 $c->set_seq_coord($ls2_coord,$ls2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 $con = Bio::LocatableSeq->new(-seq=>"ACACCG-T",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 -alphabet=>'dna');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 $c->set_consensus_sequence($con);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 $l = $c->change_coord('unaligned r2','ungapped consensus',6);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 print "6 in unaligned r2 => $l in ungapped consensus\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 A contig is as a set of sequences, locally aligned to each other, so
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 that every sequence has overlapping regions with at least one sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 in the contig, such that a continuous of overlapping sequences is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 formed, allowing the deduction of a consensus sequence which may be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 longer than any of the sequences from which it was deduced.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 In this documentation we refer to the overlapping sequences used to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 build the contig as "aligned sequences" and to the sequence deduced
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 from the overlap of aligned sequences as the "consensus". Methods to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 deduce the consensus sequence from aligned sequences were not yet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 implemented in this module, but its posssible to add a consensus
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 sequence deduced by other means, e.g, by the assembly program used to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 build the alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 All aligned sequences in a Bio::Assembly::Contig must be Bio::Assembly::Locatable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 objects and have a unique ID. The unique ID restriction is due to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 nature of the module's internal data structures and is also a request
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 of some assembly programs. If two sequences with the same ID are added
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 to a contig, the first sequence added is replaced by the second one.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 =head2 Coordinate_systems
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 There are four base coordinate systems in Bio::Assembly::Contig. When
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 you need to access contig elements or data that exists on a certain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 range or location, you may be specifying coordinates in relation to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 different sequences, which may be either the contig consensus or one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 of the aligned sequences that were used to do the assembly.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 =========================================================
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 Name | Referenced sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 ---------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 "gapped consensus" | Contig (with gaps)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 "ungapped consensus" | Contig (without gaps)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 "aligned $seqID" | sequence $seqID (with gaps)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 "unaligned $seqID" | sequence $seqID (without gaps)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 =========================================================
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 "gapped consensus" refers to positions in the aligned consensus
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 sequence, which is the consensus sequence including the gaps inserted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 to align it agains the aligned sequences that were used to assemble
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 the contig. So, its limits are [ 1, (consensus length + number of gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 in consensus) ]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 "ungapped consensus" is a coordinate system based on the consensus
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 sequence, but excluding consensus gaps. This is just the coordinate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 system that you have when considering the consensus sequence alone,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 instead of aligned to other sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 "aligned $seqID" refers to locations in the sequence $seqID after
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 alignment of $seqID against the consensus sequence (reverse
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 complementing the original sequence, if needed). Coordinate 1 in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 "aligned $seqID" is equivalent to the start location (first base) of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 $seqID in the consensus sequence, just like if the aligned sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 $seqID was a feature of the consensus sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 "unaligned $seqID" is equivalent to a location in the isolated
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 sequence, just like you would have when considering the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 alone, out of an alignment. When changing coordinates from "aligned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 $seq2" to "unaligned $seq2", if $seq2 was reverse complemented when
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 included in the alignment, the output coordinates will be reversed to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 fit that fact, i.e. 1 will be changed to length($seq2), 2 will be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 length($seq)-1 and so on.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 An important note: when you change gap coordinates from a gapped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 system ("gapped consensus" or "aligned $seqID") to a system that does
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 not include gaps ("ungapped consensus" or "unaligned $seqID"), the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 position returned will be the first location before all gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 neighboring the input location.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 =head2 Feature_collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 Bio::Assembly::Contig stores much information about a contig in a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 Bio::Assembly::SeqFeature::Collection object. Relevant information on the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 alignment is accessed by selecting features based on their primary
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 tags (e.g. all features which have a primary tag of the form
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 '_aligned_coord:$seqID', where $seqID is an aligned sequence ID, are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 coordinates for sequences in the contig alignment) and, by using
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 methods from Bio::Assembly::SeqFeature::Collection, it's possible to select
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 features by overlap with other features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 We suggest that you use the primary tags of features as identifiers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 for feature classes. By convention, features with primary tags
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 starting with a '_' are generated by modules that populate the contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 data structure and return the contig object, maybe as part of an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 assembly object, e.g. drivers from the Bio::Assembly::IO set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 Features in the features collection may be associated with particular
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 aligned sequences. To obtain this, you must attach the sequence to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 feature, using attach() seq from Bio::Assembly::SeqFeatureI, before you add the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 feature to the feature collection. We also suggest to add the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 id to the primary tag, so that is easy to select feature for a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 particular sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 There is only one feature class that some methods in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 Bio::Assembly::Contig expect to find in the feature collection: features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 with primary tags of the form '_aligned_coord:$seqID', where $seqID is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 the aligned sequence id (like returned by $seq-E<gt>id()). These features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 describe the position (in "gapped consensus" coordinates) of aligned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 sequences, and the method set_seq_coord() automatically changes a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 feature's primary tag to this form whenever the feature is added to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 the collection by this method. Only two methods in Bio::Assembly::Contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 will not work unless there are features from this class:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 change_coord() and get_seq_coord().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 Other feature classes will be automatically available only when
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 Bio::Assembly::Contig objects are created by a specific module. Such
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 feature classes are (or should be) documented in the documentation of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 the module which create them, to which the user should refer.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 Bioperl modules. Send your comments and suggestions preferably to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 Bioperl mailing lists Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 http://bio.perl.org/MailList.html - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 the bugs and their resolution. Bug reports can be submitted via email
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 =head1 AUTHOR - Robson Francisco de Souza
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 rfsouza@citri.iq.usp.br
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 The rest of the documentation details each of the object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 methods. Internal methods are usually preceded with a _
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 package Bio::Assembly::Contig;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 use vars qw(@ISA $VERSION);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 use Bio::Root::Root;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 use Bio::Align::AlignI;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 use Bio::SeqFeature::Collection;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 use Bio::Seq::PrimaryQual;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 @ISA = qw(Bio::Root::Root Bio::Align::AlignI);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 =head1 Object creator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 Title : new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 Usage : my $contig = new Bio::Assembly::Contig();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 Function : Creates a new contig object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 Returns : Bio::Assembly::Contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 Args : -source => string representing the source
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 program where this contig came
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 -id => contig unique ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 my ($class,@args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 my $self = $class->SUPER::new(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 my ($src, $id) = $self->_rearrange([qw(SOURCE ID)], @args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 $src && $self->source($src);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 ($id && $self->id($id)) || ($self->{'_id'} = 'NoName'); # Alignment (contig) name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 ($id && $self->id($id)) || ($self->{'_source'} = 'Unknown'); # Program used to build the contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 # we need to set up internal hashes first!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 # Bio::SimpleAlign derived fields (check which ones are needed for AlignI compatibility)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 $self->{'_elem'} = {}; # contig elements: aligned sequence objects (keyed by ID)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 $self->{'_order'} = {}; # store sequence order
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 # $self->{'start_end_lists'} = {}; # References to entries in {'_seq'}. Keyed by seq ids.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 # $self->{'_dis_name'} = {}; # Display names for each sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 $self->{'_symbols'} = {}; # List of symbols
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 #Contig specific slots
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 $self->{'_consensus_sequence'} = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 $self->{'_consensus_quality'} = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 $self->{'_nof_residues'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 $self->{'_nof_seqs'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 # $self->{'_nof_segments'} = 0; # Let's not make it heavier than needed by now...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 $self->{'_sfc'} = Bio::SeqFeature::Collection->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 # Assembly specifcs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 $self->{'_assembly'} = undef; # Reference to a Bio::Assembly::Scaffold object, if contig belongs to one.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 $self->{'_strand'} = 0; # Reverse (-1) or forward (1), if contig is in a scaffold. 0 otherwise
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 $self->{'_neighbor_start'} = undef; # Will hold a reference to another contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 $self->{'_neighbor_end'} = undef; # Will hold a reference to another contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 return $self; # success - we hope!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 =head1 Assembly related methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 These methods exist to enable adding information about possible
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 relations among contigs, e.g. when you already have a scaffold for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 your assembly, describing the ordering of contigs in the final
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 assembly, but no sequences covering the gaps between neighboring
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 contigs.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 =head2 source
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 Title : source
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 Usage : $contig->source($program);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 Function : Get/Set program used to build this contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 Returns : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 Argument : [optional] string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 sub source {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 my $source = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 $self->{'_source'} = $source if (defined $source);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 return $self->{'_source'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 =head2 assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 Title : assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 Usage : $contig->assembly($assembly);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 Function : Get/Set assembly object for this contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 Returns : a Bio::Assembly::Scaffold object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 Argument : a Bio::Assembly::Scaffold object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 sub assembly {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 my $assembly = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 $self->throw("Using non Bio::Assembly::Scaffold object when assign contig to assembly")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 if (defined $assembly && ! $assembly->isa("Bio::Assembly::Scaffold"));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 $self->{'_assembly'} = $assembly if (defined $assembly);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 return $self->{'_assembly'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 =head2 strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 Title : strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 Usage : $contig->strand($num);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 Function : Get/Set contig orientation in a scaffold/assembly.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 Its equivalent to the strand property of sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 objects and sets whether the contig consensus should
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 be reversed and complemented before being added to a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 scaffold or assembly.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 Returns : integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 Argument : 1 if orientaion is forward, -1 if reverse and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 0 if none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 sub strand {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 my $ori = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 $self->throw("Contig strand must be either 1, -1 or 0")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 unless (defined $ori && ($ori == 1 || $ori == 0 || $ori == -1));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 $self->{'_strand'} = $ori;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 return $self->{'_strand'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 =head2 upstream_neighbor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 Title : upstream_neighbor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 Usage : $contig->upstream_neighbor($contig);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 Function : Get/Set a contig neighbor for the current contig when
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 building a scaffold. The upstream neighbor is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 located before $contig first base
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 Returns : nothing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 Argument : Bio::Assembly::Contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 sub upstream_neighbor {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 my $ref = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 $self->throw("Trying to assign a non Bio::Assembly::Contig object to upstream contig")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 if (defined $ref && ! $ref->isa("Bio::Assembly::Contig"));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 $self->{'_neighbor_start'} = $ref if (defined $ref);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 return $self->{'_neighbor_start'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 =head2 downstream_neighbor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 Title : downstream_neighbor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 Usage : $contig->downstream_neighbor($num);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 Function : Get/Set a contig neighbor for the current contig when
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 building a scaffold. The downstream neighbor is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 located after $contig last base
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 Returns : nothing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 Argument : Bio::Assembly::Contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 sub downstream_neighbor {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 my $ref = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 $self->throw("Trying to assign a non Bio::Assembly::Contig object to downstream contig")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 if (defined $ref && ! $ref->isa("Bio::Assembly::Contig"));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 $self->{'_neighbor_end'} = $ref if (defined $ref);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 return $self->{'_neighbor_end'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 =head1 Contig feature collection methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 =head2 add_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 Title : add_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 Usage : $contig->add_features($feat,$flag)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 Add an array of features to the contig feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 collection. The consensus sequence may be attached to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 added feature, if $flag is set to 1. If $flag is 0 and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 the feature attached to one of the contig aligned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 sequences, the feature is registered as an aligned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 sequence feature. If $flag is 0 and the feature is not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 attched to any sequence in the contig, the feature is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 simply added to the feature collection and no attachment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 or registration is made.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 Note: You must attach aligned sequences to their features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 prior to calling add_features, otherwise you won't be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 able to access the feature through get_seq_feat_by_tag()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 Returns : number of features added.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 $feat : A reference to an array of Bio::SeqFeatureI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 $flag : boolean - true if consensus sequence object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 should be attached to this feature, false if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 no consensus attachment should be made.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 Default: false.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 sub add_features {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 my ($self, $args, $flag) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 # Adding shortcuts for aligned sequence features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 $flag = 0 unless (defined $flag);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 if ($flag && defined $self->{'_consensus_sequence'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 foreach my $feat (@$args) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 next if (defined $feat->seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 $feat->attach_seq($self->{'_consensus_sequence'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 } elsif (!$flag) { # Register aligned sequence features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 foreach my $feat (@$args) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 if (my $seq = $feat->entire_seq()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 $self->warn("Adding contig feature attached to unknown sequence $seqID!")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 unless (exists $self->{'_elem'}{$seqID});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 my $tag = $feat->primary_tag;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 $self->{'_elem'}{$seqID}{'_feat'}{$tag} = $feat;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 # Add feature to feature collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 my $nof_added = $self->{'_sfc'}->add_features($args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 return $nof_added;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 =head2 remove_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 Title : remove_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 Usage : $contig->remove_features(@feat)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 Function : Remove an array of contig features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 Returns : number of features removed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 Argument : An array of Bio::SeqFeatureI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 sub remove_features {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 my ($self, @args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 # Removing shortcuts for aligned sequence features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 foreach my $feat (@args) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 if (my $seq = $feat->entire_seq()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 my $tag = $feat->primary_tag;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 $tag =~ s/:$seqID$/$1/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 delete( $self->{'_elem'}{$seqID}{'_feat'}{$tag} )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 if (exists $self->{'_elem'}{$seqID}{'_feat'}{$tag} &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 $self->{'_elem'}{$seqID}{'_feat'}{$tag} eq $feat);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 return $self->{'_sfc'}->remove_features(\@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 =head2 get_features_collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 Title : get_features_collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 Usage : $contig->get_features_collection()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 Function : Get the collection of all contig features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 Returns : Bio::SeqFeature::Collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 Argument : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 sub get_features_collection {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 return $self->{'_sfc'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 =head1 Coordinate system's related methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 See L<Coordinate_Systems> above.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 =head2 change_coord
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 Title : change_coord
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 Usage : $contig->change_coord($in,$out,$query)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 Change coordinate system for $query. This method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 transforms locations between coordinate systems described
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 in section "Coordinate Systems" of this document.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 Note: this method will throw an exception when changing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 coordinates between "ungapped consensus" and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 systems if consensus sequence was not set. It will also
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 throw exceptions when changing coordinates among aligned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 sequence, either with or without gaps, and other systems
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 if sequence locations were not set with set_seq_coord().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 Returns : integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 $in : [string] input coordinate system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 $out : [string] output coordinate system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 $query : [integer] a position in a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 sub change_coord {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 my $type_in = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 my $type_out = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 my $query = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 # Parsing arguments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 # Loading read objects (these calls will throw exceptions whether $read_in or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 # $read_out is not found
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 my ($read_in,$read_out) = (undef,undef);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 my $in_ID = ( split(' ',$type_in) )[1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 my $out_ID = ( split(' ',$type_out) )[1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 if ($in_ID ne 'consensus') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 $read_in = $self->get_seq_coord( $self->get_seq_by_name($in_ID) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 $self->throw("Can't change coordinates without sequence location for $in_ID")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 unless (defined $read_in);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 if ($out_ID ne 'consensus') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 $read_out = $self->get_seq_coord( $self->get_seq_by_name($out_ID) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 $self->throw("Can't change coordinates without sequence location for $out_ID")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 unless (defined $read_out);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 # Performing transformation between coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 SWITCH1: {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 # Transformations between contig padded and contig unpadded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 (($type_in eq 'gapped consensus') && ($type_out eq 'ungapped consensus')) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 $self->throw("Can't use ungapped consensus coordinates without a consensus sequence")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 unless (defined $self->{'_consensus_sequence'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 $query = &_padded_unpadded($self->{'_consensus_gaps'}, $query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 (($type_in eq 'ungapped consensus') && ($type_out eq 'gapped consensus')) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 $self->throw("Can't use ungapped consensus coordinates without a consensus sequence")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 unless (defined $self->{'_consensus_sequence'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 $query = &_unpadded_padded($self->{'_consensus_gaps'},$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 # Transformations between contig (padded) and read (padded)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 (($type_in eq 'gapped consensus') &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 ($type_out =~ /^aligned /) && defined($read_out)) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 $query = $query - $read_out->start() + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 (($type_in =~ /^aligned /) && defined($read_in) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 ($type_out eq 'gapped consensus')) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 $query = $query + $read_in->start() - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 # Transformations between contig (unpadded) and read (padded)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 (($type_in eq 'ungapped consensus') &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 ($type_out =~ /^aligned /) && defined($read_out)) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 $query = $self->change_coord('ungapped consensus','gapped consensus',$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 $query = $self->change_coord('gapped consensus',"aligned $out_ID",$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 (($type_in =~ /^aligned /) && defined($read_in) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 ($type_out eq 'ungapped consensus')) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 $query = $self->change_coord("aligned $in_ID",'gapped consensus',$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 $query = $self->change_coord('gapped consensus','ungapped consensus',$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 # Transformations between seq $read_in padded and seq $read_out padded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 (defined($read_in) && ($type_in =~ /^aligned /) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 defined($read_out) && ($type_out =~ /^aligned /)) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 $query = $self->change_coord("aligned $in_ID",'gapped consensus',$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 $query = $self->change_coord('gapped consensus',"aligned $out_ID",$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 # Transformations between seq $read_in padded and seq $read_out unpadded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 (defined($read_in) && ($type_in =~ /^aligned /) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 defined($read_out) && ($type_out =~ /^unaligned /)) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 if ($read_in ne $read_out) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 $query = $self->change_coord("aligned $in_ID",'gapped consensus',$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 $query = $self->change_coord('gapped consensus',"aligned $out_ID",$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 my $list_out = $self->{'_elem'}{$out_ID}{'_gaps'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 $query = &_padded_unpadded($list_out,$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 # Changing read orientation if read was reverse complemented when aligned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 if ($read_out->strand == -1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 my ($length) = $read_out->length();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 $length = $length - &_nof_gaps($list_out,$length);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 $query = $length - $query + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 (defined($read_in) && ($type_in =~ /^unaligned /) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 defined($read_out) && ($type_out =~ /^aligned /)) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 my $list_in = $self->{'_elem'}{$in_ID}{'_gaps'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 # Changing read orientation if read was reverse complemented when aligned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 if ($read_in->strand == -1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 my ($length) = $read_in->length();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 $length = $length - &_nof_gaps($list_in,$length);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 $query = $length - $query + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 $query = &_unpadded_padded($list_in,$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 if ($read_in ne $read_out) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 $query = $self->change_coord("aligned $in_ID",'gapped consensus',$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627 $query = $self->change_coord('gapped consensus',"aligned $out_ID",$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 # Transformations between seq $read_in unpadded and seq $read_out unpadded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 (defined($read_in) && ($type_in =~ /^unaligned /) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 defined($read_out) && ($type_out =~ /^unaligned /)) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 $query = $self->change_coord("unaligned $in_ID","aligned $out_ID",$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 $query = $self->change_coord("aligned $out_ID","unaligned $out_ID",$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 # Transformations between contig (padded) and read (unpadded)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 (($type_in eq 'gapped consensus') &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 ($type_out =~ /^unaligned /) && defined($read_out)) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 $query = $self->change_coord('gapped consensus',"aligned $out_ID",$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 $query = $self->change_coord("aligned $out_ID","unaligned $out_ID",$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 (($type_in =~ /^unaligned /) && defined($read_in) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 ($type_out eq 'gapped consensus')) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 $query = $self->change_coord("unaligned $in_ID","aligned $in_ID",$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 $query = $self->change_coord("aligned $in_ID",'gapped consensus',$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 # Transformations between contig (unpadded) and read (unpadded)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655 (($type_in eq 'ungapped consensus') &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 ($type_out =~ /^unaligned /) && defined($read_out)) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 $query = $self->change_coord('ungapped consensus','gapped consensus',$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658 $query = $self->change_coord('gapped consensus',"unaligned $out_ID",$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 (($type_in =~ /^unaligned /) && defined($read_in) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 ($type_out eq 'ungapped consensus')) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 $query = $self->change_coord("unaligned $in_ID",'gapped consensus',$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 $query = $self->change_coord('gapped consensus','ungapped consensus',$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 last SWITCH1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 $self->throw("Unknow coordinate system. Args: $type_in, $type_out.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 $query = undef; # If a coordinate systems just requested is unknown
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 return $query;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675 =head2 get_seq_coord
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 Title : get_seq_coord
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 Usage : $contig->get_seq_coord($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 Function : Get "gapped consensus" location for aligned sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 Returns : Bio::SeqFeature::Generic for coordinates or undef.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 A warning is printed if sequence coordinates were not set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 Argument : Bio::LocatabaleSeq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 sub get_seq_coord {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 my ($self,$seq) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 $self->throw("$seq is not a Bio::LocatableSeq");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 unless (exists( $self->{'_elem'}{$seqID} )) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695 $self->warn("No such sequence ($seqID) in contig ".$self->id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698 unless (exists( $self->{'_elem'}{$seqID}{'_feat'}{"_aligned_coord:$seqID"} )) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 $self->warn("Location not set for sequence ($seqID) in contig ".$self->id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 return $self->{'_elem'}{$seqID}{'_feat'}{"_aligned_coord:$seqID"};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706 =head2 set_seq_coord
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708 Title : set_seq_coord
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 Usage : $contig->set_seq_coord($feat,$seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 Set "gapped consensus" location for an aligned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 sequence. If the sequence was previously added using
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 add_seq, its coordinates are changed/set. Otherwise,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715 add_seq is called and the sequence is added to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 contig.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718 Returns : Bio::SeqFeature::Generic for old coordinates or undef.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720 $feat : a Bio::SeqFeature::Generic object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 representing a location for the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 aligned sequence, in "gapped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 consensus" coordinates.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725 Note: the original feature primary tag will
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 be lost.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 $seq : a Bio::LocatabaleSeq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 sub set_seq_coord {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 my ($self,$feat,$seq) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735 if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 $self->throw("Unable to process non locatable sequences [".ref($seq)."]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739 # Complaining about inadequate feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 $self->throw("Coordinates must be a Bio::SeqFeature::Generic object!")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741 unless ( $feat->isa("Bio::SeqFeature::Generic") );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 $self->throw("Sequence coordinates must have an end!")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 unless (defined $feat->end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 $self->throw("Sequence coordinates must have a start!")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 unless (defined $feat->start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 if (exists( $self->{'_elem'}{$seqID} ) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 exists( $self->{'_elem'}{$seqID}{'_seq'} ) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 defined( $self->{'_elem'}{$seqID}{'_seq'} ) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 ($seq ne $self->{'_elem'}{$seqID}{'_seq'}) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 $self->warn("Replacing sequence $seqID\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753 $self->remove_seq($self->{'_elem'}{$seqID}{'_seq'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 $self->add_seq($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 # Remove previous coordinates, if any
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 $self->remove_features($feat);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 # Add new Bio::Generic::SeqFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761 $feat->add_tag_value('contig',$self->id)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 unless ( $feat->has_tag('contig') );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763 $feat->primary_tag("_aligned_coord:$seqID");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 $feat->attach_seq($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765 $self->{'_elem'}{$seqID}{'_feat'}{"_aligned_coord:$seqID"} = $feat;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766 $self->add_features([ $feat ]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769 =head1 Bio::Assembly::Contig consensus methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 =head2 set_consensus_sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 Title : set_consensus_sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 Usage : $contig->set_consensus_sequence($seq)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 Function : Set the consensus sequence object for this contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 Returns : consensus length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 Argument : Bio::LocatableSeq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 sub set_consensus_sequence {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 my $seq = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785 $self->throw("Consensus sequence must be a Bio::LocatableSeq!")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 unless ($seq->isa("Bio::LocatableSeq"));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 my $con_len = $seq->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 $seq->start(1); $seq->end($con_len);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791 $self->{'_consensus_gaps'} = []; # Consensus Gap registry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 $self->_register_gaps($seq->seq,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793 $self->{'_consensus_gaps'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 $self->{'_consensus_sequence'} = $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 return $con_len;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799 =head2 set_consensus_quality
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801 Title : set_consensus_quality
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802 Usage : $contig->set_consensus_quality($qual)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803 Function : Set the quality object for consensus sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 Returns : nothing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805 Argument : Bio::Seq::QualI object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809 sub set_consensus_quality {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811 my $qual = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 $self->throw("Consensus quality must be a Bio::Seq::QualI object!")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814 unless ( $qual->isa("Bio::Seq::QualI") );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816 $self->throw("Consensus quality can't be added before you set the consensus sequence!")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817 unless (defined $self->{'_consensus_sequence'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819 $self->{'_consensus_quality'} = $qual;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822 =head2 get_consensus_length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824 Title : get_consensus_length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825 Usage : $contig->get_consensus_length()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826 Function : Get consensus sequence length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827 Returns : integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828 Argument : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 sub get_consensus_length {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 return $self->{'_consensus_sequence'}->length();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838 =head2 get_consensus_sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840 Title : get_consensus_sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841 Usage : $contig->get_consensus_sequence()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842 Function : Get a reference to the consensus sequence object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 for this contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844 Returns : Bio::SeqI object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845 Argument : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 sub get_consensus_sequence {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850 my ($self, @args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852 return $self->{'_consensus_sequence'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855 =head2 get_consensus_quality
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857 Title : get_consensus_quality
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858 Usage : $contig->get_consensus_quality()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859 Function : Get a reference to the consensus quality object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860 for this contig.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861 Returns : A Bio::QualI object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862 Argument : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866 sub get_consensus_quality {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867 my ($self, @args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869 return $self->{'_consensus_quality'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872 =head1 Bio::Assembly::Contig aligned sequences methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874 =head2 set_seq_qual
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876 Title : set_seq_qual
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877 Usage : $contig->set_seq_qual($seq,$qual);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878 Function : Adds quality to an aligned sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879 Returns : nothing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880 Argument : a Bio::LocatableSeq object and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881 a Bio::Seq::QualI object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883 See L<Bio::LocatableSeq> for more information.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887 sub set_seq_qual {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888 my ($self,$seq,$qual) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890 if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891 $self->throw("Unable to process non locatable sequences [", ref($seq), "]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893 my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895 $self->throw("Consensus quality must be a Bio::Seq::QualI object!")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896 unless ( $qual->isa("Bio::Seq::QualI") );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897 $self->throw("Use add_seq first: aligned sequence qualities can't be added before you load the sequence!")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 unless (exists $self->{'_elem'}{$seqID}{'_seq'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899 $self->throw("Use set_seq_coord first: aligned sequence qualities can't be added before you add coordinates for the sequence!") unless (defined( $self->get_seq_coord($seq) ));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901 # Adding gaps to quality object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902 my $sequence = $self->{'_elem'}{$seqID}{'_seq'}->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903 my $tmp = $qual->qual();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904 @{$tmp} = reverse(@{$tmp}) if ($self->get_seq_coord($seq)->strand() == -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905 my @quality = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906 my $previous = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907 my $next = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908 my $i = 0; my $j = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909 while ($i<=$#{$tmp}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910 # IF base is a gap, quality is the average for neighbouring sites
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911 if (substr($sequence,$j,1) eq '-') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912 $previous = $tmp->[$i-1] unless ($i == 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913 if ($i < $#{$tmp}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914 $next = $tmp->[$i+1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 $next = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918 push(@quality,int( ($previous+$next)/2 ));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920 push(@quality,$tmp->[$i]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921 $i++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923 $j++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926 $self->{'_elem'}{$seqID}{'_qual'} = Bio::Seq::PrimaryQual->new(-qual=>join(" ",@quality),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927 -id=>$seqID);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930 =head2 get_seq_ids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932 Title : get_seq_ids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 Usage : $contig->get_seq_ids(-start=>$start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934 -end=>$end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935 -type=>"gapped A0QR67B08.b");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936 Function : Get list of sequence IDs overlapping inteval [$start, $end]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937 The default interval is [1,$contig->length]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938 Default coordinate system is "gapped contig"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939 Returns : An array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940 Argument : A hash with optional elements:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941 -start : consensus subsequence start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942 -end : consensus subsequence end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943 -type : the coordinate system type for $start and $end arguments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944 Coordinate system avaliable are:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945 "gapped consensus" : consensus coordinates with gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946 "ungapped consensus" : consensus coordinates without gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947 "aligned $ReadID" : read $ReadID coordinates with gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948 "unaligned $ReadID" : read $ReadID coordinates without gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953 sub get_seq_ids {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954 my ($self, @args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956 my ($type,$start,$end) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957 $self->_rearrange([qw(TYPE START END)], @args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959 if (defined($start) && defined($end)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960 if (defined($type) && ($type ne 'gapped consensus')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961 $start = $self->change_coord($type,'gapped consensus',$start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962 $end = $self->change_coord($type,'gapped consensus',$end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965 my @list = grep { $_->isa("Bio::SeqFeature::Generic") &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966 ($_->primary_tag =~ /^_aligned_coord:/) }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967 $self->{'_sfc'}->features_in_range(-start=>$start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968 -end=>$end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969 -contain=>0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970 -strandmatch=>'ignore');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971 @list = map { $_->entire_seq->id } @list;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972 return @list;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975 # Entire aligned sequences list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976 return map { $self->{'_order'}{$_} } sort { $a cmp $b } keys %{ $self->{'_order'} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979 =head2 get_seq_feat_by_tag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981 Title : get_seq_feat_by_tag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982 Usage : $seq = $contig->get_seq_feat_by_tag($seq,"_aligned_coord:$seqID")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985 Get a sequence feature based on its primary_tag.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986 When you add
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988 Returns : a Bio::SeqFeature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989 Argument : a Bio::LocatableSeq and a string (feature primary tag)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993 sub get_seq_feat_by_tag {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994 my ($self,$seq,$tag) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996 if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997 $self->throw("Unable to process non locatable sequences [", ref($seq), "]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999 my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001 return $self->{'_elem'}{$seqID}{'_feat'}{$tag};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004 =head2 get_seq_by_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006 Title : get_seq_by_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007 Usage : $seq = $contig->get_seq_by_name('Seq1')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008 Function : Gets a sequence based on its id.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009 Returns : a Bio::LocatableSeq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010 undef if name is not found
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 Argument : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015 sub get_seq_by_name {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017 my ($seqID) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019 unless (exists $self->{'_elem'}{$seqID}{'_seq'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020 $self->throw("Could not find sequence $seqID in contig ".$self->id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024 return $self->{'_elem'}{$seqID}{'_seq'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027 =head2 get_qual_by_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029 Title : get_qual_by_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030 Usage : $seq = $contig->get_qual_by_name('Seq1')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033 Gets Bio::Seq::QualI object for a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034 through its id ( as given by $qual->id() ).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036 Returns : a Bio::Seq::QualI object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037 undef if name is not found
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038 Argument : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1042 sub get_qual_by_name {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1043 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1044 my ($seqID) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1045
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1046 unless (exists $self->{'_elem'}{$seqID}{'_qual'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1047 $self->warn("Could not find quality for $seqID in contig!");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1048 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1049 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1050
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1051 return $self->{'_elem'}{$seqID}{'_qual'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1052 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1053
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1054 =head1 Bio::Align::AlignI compatible methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1055
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1056 =head2 Modifier methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1057
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1058 These methods modify the MSE by adding, removing or shuffling complete
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1059 sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1060
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1061 =head2 add_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1062
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1063 Title : add_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1064 Usage : $contig->add_seq($newseq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1065 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1066
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1067 Adds a sequence to the contig. *Does*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1068 *not* align it - just adds it to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1069 hashes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1070
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1071 Returns : nothing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1072 Argument : a Bio::LocatableSeq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1073
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1074 See L<Bio::LocatableSeq> for more information.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1075
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1076 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1077
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1078 sub add_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1079 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1080 my $seq = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1081
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1082 if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1083 $self->throw("Unable to process non locatable sequences [", ref($seq), "]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1084 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1085
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1086 my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1087 $self->{'_elem'}{$seqID} = {} unless (exists $self->{'elem'}{$seqID});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1088
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1089 if (exists( $self->{'_elem'}{$seqID}{'_seq'} ) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1090 ($seq eq $self->{'_elem'}{$seqID}{'_seq'}) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1091 $self->warn("Adding sequence $seqID, which has already been added");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1092 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1093
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1094 # Our locatable sequences are always considered to be complete sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1095 $seq->start(1); $seq->end($seq->length());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1096
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1097 $self->warn("Adding non-nucleotidic sequence ".$seqID)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1098 if (lc($seq->alphabet) ne 'dna' && lc($seq->alphabet) ne 'rna');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1099
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1100 # build the symbol list for this sequence,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1101 # will prune out the gap and missing/match chars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1102 # when actually asked for the symbol list in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1103 # symbol_chars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1104 if (defined $seq->seq) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1105 map { $self->{'_symbols'}->{$_} = 1; } split(//,$seq->seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1106 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1107 $self->{'_symbols'} = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1108 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1110 my $seq_no = ++$self->{'_nof_seqs'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1112 if (ref( $self->{'_elem'}{$seqID}{'_seq'} )) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1113 $self->warn("Replacing one sequence [$seqID]\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1114 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1115 #print STDERR "Assigning $seqID to $order\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1116 $self->{'_order'}->{$seq_no} = $seqID;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1117 # $self->{'_start_end_lists'}->{$id} = []
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1118 # unless(exists $self->{'_start_end_lists'}->{$id});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1119 # push @{$self->{'_start_end_lists'}->{$id}}, $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1120 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1122 $self->{'_elem'}{$seqID}{'_seq'} = $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1123 $self->{'_elem'}{$seqID}{'_feat'} = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1124 $self->{'_elem'}{$seqID}{'_gaps'} = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1125 my $dbref = $self->{'_elem'}{$seqID}{'_gaps'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1126 my $nofgaps = $self->_register_gaps($seq->seq,$dbref);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1128 # Updating residue count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1129 $self->{'_nof_residues'} += $seq->length - $nofgaps;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1130
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1131 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1132 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1134 =head2 remove_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1136 Title : remove_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1137 Usage : $contig->remove_seq($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1138 Function : Removes a single sequence from an alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1139 Returns : 1 on success, 0 otherwise
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1140 Argument : a Bio::LocatableSeq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1142 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1144 sub remove_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1145 my ($self,$seq) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1147 if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1148 $self->throw("Unable to process non locatable sequences [", ref($seq), "]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1149 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1151 my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1152 unless (exists $self->{'_elem'}{$seqID} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1153 $self->warn("No sequence named $seqID [$seq]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1154 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1155 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1157 # Updating residue count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1158 $self->{'_nof_residues'} -= $seq->length() +
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1159 &_nof_gaps( $self->{'_elem'}{$seqID}{'_gaps'}, $seq->length );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1161 # Remove all references to features of this sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1162 my @feats = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1163 foreach my $tag (keys %{ $self->{'_elem'}{$seqID}{'_feat'} }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1164 push(@feats, $self->{'_elem'}{$seqID}{'_feat'}{$tag});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1165 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1166 $self->{'_sfc'}->remove_features(\@feats);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1167 delete $self->{'_elem'}{$seqID};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1169 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1170 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1172 =head2 purge
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1174 Title : purge
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1175 Usage : $contig->purge(0.7);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1176 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1178 Removes sequences above whatever %id.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1180 This function will grind on large alignments. Beware!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1181 (perhaps not ideally implemented)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1182
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1183 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1184 Returns : An array of the removed sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1185 Argument:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1188 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1190 sub purge {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1191 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1192 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1193 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1195 =head2 sort_alphabetically
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1197 Title : sort_alphabetically
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1198 Usage : $contig->sort_alphabetically
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1199 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1201 Changes the order of the alignemnt to alphabetical on name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1202 followed by numerical by number.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1204 Returns :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1205 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1207 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1209 sub sort_alphabetically {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1210 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1211 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1212 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1214 =head2 Sequence selection methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1216 Methods returning one or more sequences objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1218 =head2 each_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1220 Title : each_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1221 Usage : foreach $seq ( $contig->each_seq() )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1222 Function : Gets an array of Seq objects from the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1223 Returns : an array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1224 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1226 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1228 sub each_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1229 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1230
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1231 my (@arr,$seqID);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1232
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1233 foreach $seqID ( map { $self->{'_order'}{$_} } sort { $a <=> $b } keys %{$self->{'_order'}} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1234 push(@arr,$self->{'_elem'}{$seqID}{'_seq'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1235 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1237 return @arr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1238 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1239
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1240 =head2 each_alphabetically
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1241
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1242 Title : each_alphabetically
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1243 Usage : foreach $seq ( $contig->each_alphabetically() )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1244 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1246 Returns an array of sequence object sorted alphabetically
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1247 by name and then by start point.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1248 Does not change the order of the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1250 Returns :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1251 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1252
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1253 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1255 sub each_alphabetically {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1256 my($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1257 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1258 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1259
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1260 =head2 each_seq_with_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1262 Title : each_seq_with_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1263 Usage : foreach $seq ( $contig->each_seq_with_id() )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1264 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1266 Gets an array of Seq objects from the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1267 alignment, the contents being those sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1268 with the given name (there may be more than one)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1270 Returns : an array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1271 Argument : a seq name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1273 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1275 sub each_seq_with_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1276 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1277 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1278 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1280 =head2 get_seq_by_pos
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1282 Title : get_seq_by_pos
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1283 Usage : $seq = $contig->get_seq_by_pos(3)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1284 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1285
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1286 Gets a sequence based on its position in the alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1287 Numbering starts from 1. Sequence positions larger than
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1288 no_sequences() will thow an error.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1290 Returns : a Bio::LocatableSeq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1291 Argument : positive integer for the sequence osition
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1293 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1294
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1295 sub get_seq_by_pos {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1296 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1297 my ($pos) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1299 $self->throw("Sequence position has to be a positive integer, not [$pos]")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1300 unless $pos =~ /^\d+$/ and $pos > 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1301 $self->throw("No sequence at position [$pos]")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1302 unless $pos <= $self->no_sequences ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1303
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1304 my $seqID = $self->{'_order'}->{--$pos};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1305 return $self->{'_elem'}{$seqID}{'_seq'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1306 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1307
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1308 =head2 Create new alignments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1310 The result of these methods are horizontal or vertical subsets of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1311 current MSE.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1312
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1313 =head2 select
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1315 Title : select
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1316 Usage : $contig2 = $contig->select(1, 3) # three first sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1317 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1318
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1319 Creates a new alignment from a continuous subset of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1320 sequences. Numbering starts from 1. Sequence positions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1321 larger than no_sequences() will thow an error.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1323 Returns : a Bio::Assembly::Contig object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1324 Argument : positive integer for the first sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1325 positive integer for the last sequence to include (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1327 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1328
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1329 sub select {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1330 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1331 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1332 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1333
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1334
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1335 =head2 select_noncont
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1336
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1337 Title : select_noncont
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1338 Usage : $contig2 = $contig->select_noncont(1, 3) # first and 3rd sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1339 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1341 Creates a new alignment from a subset of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1342 sequences. Numbering starts from 1. Sequence positions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1343 larger than no_sequences() will thow an error.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1344
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1345 Returns : a Bio::Assembly::Contig object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1346 Args : array of integers for the sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1348 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1349
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1350 sub select_noncont {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1351 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1352 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1353 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1355 =head2 slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1357 Title : slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1358 Usage : $contig2 = $contig->slice(20, 30)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1359 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1360
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1361 Creates a slice from the alignment inclusive of start and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1362 end columns. Sequences with no residues in the slice are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1363 excluded from the new alignment and a warning is printed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1364 Slice beyond the length of the sequence does not do
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1365 padding.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1367 Returns : a Bio::Assembly::Contig object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1368 Argument : positive integer for start column
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1369 positive integer for end column
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1370
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1371 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1373 sub slice {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1374 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1375 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1376 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1377
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1378 =head2 Change sequences within the MSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1379
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1380 These methods affect characters in all sequences without changeing the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1381 alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1382
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1383
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1384 =head2 map_chars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1385
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1386 Title : map_chars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1387 Usage : $contig->map_chars('\.','-')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1388 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1389
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1390 Does a s/$arg1/$arg2/ on the sequences. Useful for gap
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1391 characters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1392
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1393 Notice that the from (arg1) is interpretted as a regex,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1394 so be careful about quoting meta characters (eg
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1395 $contig->map_chars('.','-') wont do what you want)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1397 Returns :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1398 Argument : 'from' rexexp
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1399 'to' string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1400
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1401 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1402
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1403 sub map_chars {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1404 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1405 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1406 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1407
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1408 =head2 uppercase
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1409
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1410 Title : uppercase()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1411 Usage : $contig->uppercase()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1412 Function : Sets all the sequences to uppercase
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1413 Returns :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1414 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1415
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1416 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1417
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1418 sub uppercase {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1419 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1420 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1421 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1422
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1423 =head2 match_line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1424
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1425 Title : match_line()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1426 Usage : $contig->match_line()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1427 Function : Generates a match line - much like consensus string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1428 except that a line indicating the '*' for a match.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1429 Argument : (optional) Match line characters ('*' by default)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1430 (optional) Strong match char (':' by default)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1431 (optional) Weak match char ('.' by default)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1432
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1433 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1435 sub match_line {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1436 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1437 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1438 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1439
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1440 =head2 match
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1441
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1442 Title : match()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1443 Usage : $contig->match()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1444 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1445
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1446 Goes through all columns and changes residues that are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1447 identical to residue in first sequence to match '.'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1448 character. Sets match_char.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1450 USE WITH CARE: Most MSE formats do not support match
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1451 characters in sequences, so this is mostly for output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1452 only. NEXUS format (Bio::AlignIO::nexus) can handle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1453 it.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1454
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1455 Returns : 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1456 Argument : a match character, optional, defaults to '.'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1458 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1459
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1460 sub match {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1461 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1462 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1463 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1464
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1465 =head2 unmatch
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1466
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1467 Title : unmatch()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1468 Usage : $contig->unmatch()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1469 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1470
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1471 Undoes the effect of method match. Unsets match_char.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1472
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1473 Returns : 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1474 Argument : a match character, optional, defaults to '.'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1475
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1476 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1477
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1478 sub unmatch {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1479 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1480 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1481 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1482
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1483
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1484 =head2 MSE attibutes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1485
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1486 Methods for setting and reading the MSE attributes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1487
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1488 Note that the methods defining character semantics depend on the user
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1489 to set them sensibly. They are needed only by certain input/output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1490 methods. Unset them by setting to an empty string ('').
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1491
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1492 =head2 id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1493
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1494 Title : id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1495 Usage : $contig->id("Ig")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1496 Function : Gets/sets the id field of the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1497 Returns : An id string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1498 Argument : An id string (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1499
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1500 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1501
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1502 sub id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1503 my ($self, $contig_name) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1505 if (defined( $contig_name )) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1506 $self->{'_id'} = $contig_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1507 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1508
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1509 return $self->{'_id'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1510 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1511
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1512 =head2 missing_char
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1513
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1514 Title : missing_char
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1515 Usage : $contig->missing_char("?")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1516 Function : Gets/sets the missing_char attribute of the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1517 It is generally recommended to set it to 'n' or 'N'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1518 for nucleotides and to 'X' for protein.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1519 Returns : An missing_char string,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1520 Argument : An missing_char string (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1521
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1522 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1523
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1524 sub missing_char {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1525 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1526 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1527 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1529 =head2 match_char
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1530
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1531 Title : match_char
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1532 Usage : $contig->match_char('.')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1533 Function : Gets/sets the match_char attribute of the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1534 Returns : An match_char string,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1535 Argument : An match_char string (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1537 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1538
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1539 sub match_char {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1540 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1541 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1542 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1544 =head2 gap_char
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1545
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1546 Title : gap_char
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1547 Usage : $contig->gap_char('-')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1548 Function : Gets/sets the gap_char attribute of the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1549 Returns : An gap_char string, defaults to '-'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1550 Argument : An gap_char string (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1551
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1552 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1553
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1554 sub gap_char {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1555 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1556 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1557 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1558
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1559 =head2 symbol_chars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1560
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1561 Title : symbol_chars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1562 Usage : my @symbolchars = $contig->symbol_chars;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1563 Function: Returns all the seen symbols (other than gaps)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1564 Returns : array of characters that are the seen symbols
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1565 Argument: boolean to include the gap/missing/match characters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1567 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1569 sub symbol_chars{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1570 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1571 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1572 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1573
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1574 =head2 Alignment descriptors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1576 These read only methods describe the MSE in various ways.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1577
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1578
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1579 =head2 consensus_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1580
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1581 Title : consensus_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1582 Usage : $str = $contig->consensus_string($threshold_percent)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1583 Function : Makes a strict consensus
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1584 Returns :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1585 Argument : Optional treshold ranging from 0 to 100.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1586 The consensus residue has to appear at least threshold %
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1587 of the sequences at a given location, otherwise a '?'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1588 character will be placed at that location.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1589 (Default value = 0%)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1590
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1591 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1592
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1593 sub consensus_string {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1594 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1595 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1596 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1597
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1598 =head2 consensus_iupac
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1599
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1600 Title : consensus_iupac
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1601 Usage : $str = $contig->consensus_iupac()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1602 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1603
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1604 Makes a consensus using IUPAC ambiguity codes from DNA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1605 and RNA. The output is in upper case except when gaps in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1606 a column force output to be in lower case.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1607
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1608 Note that if your alignment sequences contain a lot of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1609 IUPAC ambiquity codes you often have to manually set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1610 alphabet. Bio::PrimarySeq::_guess_type thinks they
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1611 indicate a protein sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1612
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1613 Returns : consensus string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1614 Argument : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1615 Throws : on protein sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1616
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1618 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1619
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1620 sub consensus_iupac {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1621 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1622 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1623 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1624
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1625 =head2 is_flush
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1626
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1627 Title : is_flush
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1628 Usage : if( $contig->is_flush() )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1629 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1630 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1631 Function : Tells you whether the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1632 : is flush, ie all of the same length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1633 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1634 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1635 Returns : 1 or 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1636 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1637
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1638 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1639
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1640 sub is_flush {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1641 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1642 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1643 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1644
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1645 =head2 length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1646
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1647 Title : length()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1648 Usage : $len = $contig->length()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1649 Function : Returns the maximum length of the alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1650 To be sure the alignment is a block, use is_flush
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1651 Returns :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1652 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1653
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1654 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1656 sub length {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1657 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1658
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1659 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1660 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1661
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1662 =head2 maxdisplayname_length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1663
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1664 Title : maxdisplayname_length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1665 Usage : $contig->maxdisplayname_length()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1666 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1667
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1668 Gets the maximum length of the displayname in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1669 alignment. Used in writing out various MSE formats.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1670
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1671 Returns : integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1672 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1673
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1674 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1676 sub maxname_length {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1677 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1678 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1679 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1680
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1681 =head2 no_residues
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1682
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1683 Title : no_residues
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1684 Usage : $no = $contig->no_residues
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1685 Function : number of residues in total in the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1686 Returns : integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1687 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1689 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1690
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1691 sub no_residues {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1692 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1693
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1694 return $self->{'_nof_residues'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1695 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1696
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1697 =head2 no_sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1698
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1699 Title : no_sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1700 Usage : $depth = $contig->no_sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1701 Function : number of sequence in the sequence alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1702 Returns : integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1703 Argument : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1704
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1705 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1706
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1707 sub no_sequences {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1708 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1709
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1710 return scalar( keys %{ $self->{'_elem'} } );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1711 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1712
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1713 =head2 percentage_identity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1714
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1715 Title : percentage_identity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1716 Usage : $id = $contig->percentage_identity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1717 Function: The function calculates the percentage identity of the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1718 Returns : The percentage identity of the alignment (as defined by the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1719 implementation)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1720 Argument: None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1721
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1722 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1723
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1724 sub percentage_identity{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1725 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1726
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1727 $self->throw_not_implemeneted();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1728 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1729
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1730 =head2 overall_percentage_identity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1731
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1732 Title : percentage_identity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1733 Usage : $id = $contig->percentage_identity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1734 Function: The function calculates the percentage identity of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1735 the conserved columns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1736 Returns : The percentage identity of the conserved columns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1737 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1738
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1739 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1740
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1741 sub overall_percentage_identity{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1742 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1743 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1744 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1745
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1746
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1747 =head2 average_percentage_identity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1748
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1749 Title : average_percentage_identity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1750 Usage : $id = $contig->average_percentage_identity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1751 Function: The function uses a fast method to calculate the average
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1752 percentage identity of the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1753 Returns : The average percentage identity of the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1754 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1755
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1756 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1757
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1758 sub average_percentage_identity {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1759 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1760 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1761 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1762
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1763 =head2 Alignment positions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1764
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1765 Methods to map a sequence position into an alignment column and back.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1766 column_from_residue_number() does the former. The latter is really a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1767 property of the sequence object and can done using
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1768 L<Bio::LocatableSeq::location_from_column>:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1769
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1770 # select somehow a sequence from the alignment, e.g.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1771 my $seq = $contig->get_seq_by_pos(1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1772 #$loc is undef or Bio::LocationI object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1773 my $loc = $seq->location_from_column(5);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1774
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1775
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1776 =head2 column_from_residue_number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1777
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1778 Title : column_from_residue_number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1779 Usage : $col = $contig->column_from_residue_number( $seqname, $resnumber)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1780 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1781
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1782 This function gives the position in the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1783 (i.e. column number) of the given residue number in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1784 sequence with the given name. For example, for the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1785 alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1786
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1787 Seq1/91-97 AC..DEF.GH
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1788 Seq2/24-30 ACGG.RTY..
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1789 Seq3/43-51 AC.DDEFGHI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1790
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1791 column_from_residue_number( "Seq1", 94 ) returns 5.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1792 column_from_residue_number( "Seq2", 25 ) returns 2.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1793 column_from_residue_number( "Seq3", 50 ) returns 9.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1794
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1795 An exception is thrown if the residue number would lie
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1796 outside the length of the aligment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1797 (e.g. column_from_residue_number( "Seq2", 22 )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1798
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1799 Note: If the the parent sequence is represented by more than
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1800 one alignment sequence and the residue number is present in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1801 them, this method finds only the first one.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1802
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1803 Returns : A column number for the position in the alignment of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1804 given residue in the given sequence (1 = first column)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1805 Args : A sequence id/name (not a name/start-end)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1806 A residue number in the whole sequence (not just that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1807 segment of it in the alignment)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1808
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1809 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1810
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1811 sub column_from_residue_number {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1812 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1813 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1814 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1815
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1816 =head2 Sequence names
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1817
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1818 Methods to manipulate the display name. The default name based on the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1819 sequence id and subsequence positions can be overridden in various
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1820 ways.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1821
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1822 =head2 displayname
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1823
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1824 Title : displayname
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1825 Usage : $contig->displayname("Ig", "IgA")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1826 Function : Gets/sets the display name of a sequence in the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1827 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1828 Returns : A display name string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1829 Argument : name of the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1830 displayname of the sequence (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1831
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1832 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1833
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1834 sub displayname { # Do nothing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1835 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1836
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1837 =head2 set_displayname_count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1838
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1839 Title : set_displayname_count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1840 Usage : $contig->set_displayname_count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1841 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1842
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1843 Sets the names to be name_# where # is the number of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1844 times this name has been used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1845
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1846 Returns : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1847 Argument : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1848
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1849 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1850
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1851 sub set_displayname_count {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1852 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1853 $self->throw_not_implemented();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1854 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1855
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1856 =head2 set_displayname_flat
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1857
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1858 Title : set_displayname_flat
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1859 Usage : $contig->set_displayname_flat()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1860 Function : Makes all the sequences be displayed as just their name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1861 not name/start-end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1862 Returns : 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1863 Argument : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1864
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1865 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1866
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1867 sub set_displayname_flat { # Do nothing!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1868 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1869
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1870 =head2 set_displayname_normal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1871
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1872 Title : set_displayname_normal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1873 Usage : $contig->set_displayname_normal()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1874 Function : Makes all the sequences be displayed as name/start-end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1875 Returns : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1876 Argument : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1877
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1878 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1879
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1880 sub set_displayname_normal { # Do nothing!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1881 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1882
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1883 =head1 Internal Methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1884
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1885 =head2 _binary_search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1886
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1887 Title : _binary_search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1888 Usage : _binary_search($list,$query)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1889 Function :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1890
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1891 Find a number in a sorted list of numbers. Return values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1892 may be on or two integers. One positive integer or zero
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1893 (>=0) is the index of the element that stores the queried
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1894 value. Two positive integers (or zero and another
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1895 number) are the indexes of elements among which the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1896 queried value should be placed. Negative single values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1897 mean:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1898
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1899 -1: $query is smaller than smallest element in list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1900 -2: $query is greater than greatest element in list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1901
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1902 Returns : array of integers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1903 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1904 $list : array reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1905 $query : integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1906
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1907 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1908
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1909 sub _binary_search {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1910 my $list = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1911 my $query = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1912 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1913 # If there is only one element in list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1914 if (!$#{$list} && ($query == $list->[0])) { return (0) }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1915 # If there are others...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1916 my $start = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1917 my $end = $#{$list};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1918 (&_compare($query,$list->[$start]) == 0) && do { return ($start) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1919 (&_compare($query,$list->[$end]) == 0) && do { return ($end) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1920 (&_compare($query,$list->[$start]) < 0) && do { return (-1) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1921 (&_compare($query,$list->[$end]) > 0) && do { return (-2) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1922 my $middle = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1923 while ($end - $start > 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1924 $middle = int(($end+$middle)/2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1925 (&_compare($query,$list->[$middle]) == 0) && return ($middle);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1926 (&_compare($query,$list->[$middle]) < 0) && do { $end = $middle ; $middle = 0; next };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1927 $start = $middle; # If &_compare() > 0, move region beggining
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1928 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1929 return ($start,$end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1930 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1931
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1932 =head2 _compare
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1933
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1934 Title : _compare
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1935 Usage : _compare($arg1,$arg2)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1936 Function: Perform numeric or string comparisons
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1937 Returns : integer (0, 1 or -1)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1938 Args : values to be compared
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1939
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1940 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1941
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1942 sub _compare {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1943 my $arg1 = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1944 my $arg2 = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1945 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1946 if (($arg1 =~ /^\d+$/) && ($arg2 =~ /^\d+$/)) { return $arg1 <=> $arg2 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1947 else { return $arg1 cmp $arg2 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1948 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1949
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1950 =head2 _nof_gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1951
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1952 Title : _nof_gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1953 Usage : _nof_gaps($array_ref, $query)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1954 Function: number of gaps found before position $query
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1955 Returns : integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1956 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1957 $array_ref : gap registry reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1958 $query : [integer] a position in a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1959
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1960 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1961
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1962 #' emacs...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1963 sub _nof_gaps {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1964 my $list = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1965 my $query = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1966 # If there are no gaps in this contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1967 return 0 unless (defined($list) && scalar(@{$list}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1968 # Locate query index in gap list (if any)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1969 my @index = &_binary_search($list,$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1970 # If after all alignments, correct using total number of align
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1971 if ($index[0] == -2) { $query = scalar(@{$list}) }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1972 # If before any alignment, return 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1973 elsif ($index[0] == -1) { $query = 0 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1974 elsif ($index[0] >= 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1975 # If query is between alignments, translate coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1976 if ($#index > 0) { $query = $index[0] + 1 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1977 # If query sits upon an alignment, do another correction
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1978 elsif ($#index == 0) { $query = $index[0] }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1979 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1980 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1981 return $query;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1982 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1983
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1984 =head2 _padded_unpadded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1985
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1986 Title : _padded_unpadded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1987 Usage : _padded_unpadded($array_ref, $query)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1988 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1989
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1990 Returns a coordinate corresponding to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1991 position $query after gaps were
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1992 removed from a sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1993
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1994 Returns : integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1995 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1996 $array_ref : reference to this gap registry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1997 $query : [integer] coordionate to change
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1998
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1999 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2001 sub _padded_unpadded {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2002 my $list = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2003 my $query = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2004
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2005 my $align = &_nof_gaps($list,$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2006 $query-- if (defined($list->[$align]) && ($list->[$align] == $query));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2007 $query = $query - $align;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2008 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2009 return $query;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2010 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2011
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2012 =head2 _unpadded_padded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2013
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2014 Title : _unpadded_padded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2015 Usage : _unpadded_padded($array_ref, $query)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2016 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2017
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2018 Returns the value corresponding to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2019 ungapped position $query when gaps are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2020 counted as valid sites in a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2021
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2022 Returns :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2023 Args : $array_ref = a reference to this sequence's gap registry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2024 $query = [integer] location to change
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2025
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2026 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2027
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2028 #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2029 sub _unpadded_padded {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2030 my $list = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2031 my $query = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2032
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2033 my $align = &_nof_gaps($list,$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2034 $query = $query + $align;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2035 my $new_align = &_nof_gaps($list,$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2036 while ($new_align - $align > 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2037 $query = $query + $new_align - $align;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2038 $align = $new_align;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2039 $new_align = &_nof_gaps($list,$query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2040 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2041 # If current position is also a align, look for the first upstream base
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2042 while (defined($list->[$align]) && ($list->[$align] == $query)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2043 $query++; $align++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2044 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2045 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2046 return $query;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2047 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2048
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2049 =head2 _register_gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2050
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2051 Title : _register_gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2052 Usage : $self->_register_gaps($seq, $array_ref)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2053 Function: stores gap locations for a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2054 Returns : number of gaps found
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2055 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2056 $seq : sequence string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2057 $array_ref : a reference to an array,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2058 where gap locations will
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2059 be stored
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2060
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2061 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2062
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2063 sub _register_gaps {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2064 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2065 my $sequence = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2066 my $dbref = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2067
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2068 $self->throw("Not an aligned sequence string to register gaps")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2069 if (ref($sequence));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2070
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2071 $self->throw("Not an array reference for gap registry")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2072 unless (ref($dbref) eq 'ARRAY');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2073
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2074 # Registering alignments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2075 @{$dbref} = (); # Cleaning registry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2076 if (defined $sequence) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2077 my $i = -1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2078 while(1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2079 $i = index($sequence,"-",$i+1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2080 last if ($i == -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2081 push(@{$dbref},$i+1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2082 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2083 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2084 # $self->warn("Found undefined sequence while registering gaps");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2085 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2086 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2087
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2088 return scalar(@{$dbref});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2089 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2090
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2091 1;