annotate variant_effect_predictor/Bio/SimpleAlign.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: SimpleAlign.pm,v 1.65.2.1 2003/07/02 16:00:19 jason Exp $
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2 # BioPerl module for SimpleAlign
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
4 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
5 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
6 # Copyright Ewan Birney
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
7 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
8 # You may distribute this module under the same terms as perl itself
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
9
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
10 # POD documentation - main docs before the code
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
11 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
12 # History:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
13 # 11/3/00 Added threshold feature to consensus and consensus_aa - PS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
14 # May 2001 major rewrite - Heikki Lehvaslaiho
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
15
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
16 =head1 NAME
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
17
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
18 Bio::SimpleAlign - Multiple alignments held as a set of sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
19
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
20 =head1 SYNOPSIS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
21
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
22 # use Bio::AlignIO to read in the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
23 $str = Bio::AlignIO->new('-file' => 't/data/testaln.pfam');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
24 $aln = $str->next_aln();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
25
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
26 # some descriptors
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
27 print $aln->length, "\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
28 print $aln->no_residues, "\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
29 print $aln->is_flush, "\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
30 print $aln->no_sequences, "\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
31 print $aln->percentage_identity, "\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
32 print $aln->consensus_string(50), "\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
33
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
34 # find the position in the alignment for a sequence location
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
35 $pos = $aln->column_from_residue_number('1433_LYCES', 14); # = 6;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
36
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
37 # extract sequences and check values for the alignment column $pos
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
38 foreach $seq ($aln->each_seq) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
39 $res = $seq->subseq($pos, $pos);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
40 $count{$res}++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
41 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
42 foreach $res (keys %count) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
43 printf "Res: %s Count: %2d\n", $res, $count{$res};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
44 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
45
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
46
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
47 =head1 DESCRIPTION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
48
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
49 SimpleAlign handles multiple alignments of sequences. It is very
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
50 permissive of types (it won't insist on things being all same length
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
51 etc): really it is a SequenceSet explicitly held in memory with a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
52 whole series of built in manipulations and especially file format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
53 systems for read/writing alignments.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
54
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
55 SimpleAlign basically views an alignment as an immutable block of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
56 text. SimpleAlign *is not* the object to be using if you want to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
57 perform complex alignment manipulations.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
58
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
59 However for lightweight display/formatting and minimal manipulation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
60 (e.g. removing all-gaps columns) - this is the one to use.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
61
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
62 SimpleAlign uses a subclass of L<Bio::PrimarySeq> class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
63 L<Bio::LocatableSeq> to store its sequences. These are subsequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
64 with a start and end positions in the parent reference sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
65
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
66 Tricky concepts. SimpleAlign expects name,start,end to be 'unique' in
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
67 the alignment, and this is the key for the internal hashes.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
68 (name,start,end is abbreviated nse in the code). However, in many
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
69 cases people don't want the name/start-end to be displayed: either
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
70 multiple names in an alignment or names specific to the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
71 (ROA1_HUMAN_1, ROA1_HUMAN_2 etc). These names are called
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
72 'displayname', and generally is what is used to print out the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
73 alignment. They default to name/start-end.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
74
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
75 The SimpleAlign Module came from Ewan Birney's Align module.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
76
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
77 =head1 PROGRESS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
78
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
79 SimpleAlign is being slowly converted to bioperl coding standards,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
80 mainly by Ewan.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
81
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
82 =over 3
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
83
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
84 =item Use Bio::Root::Object - done
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
85
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
86 =item Use proper exceptions - done
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
87
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
88 =item Use hashed constructor - not done!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
89
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
90 =back
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
91
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
92 =head1 FEEDBACK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
93
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
94 =head2 Mailing Lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
95
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
96 User feedback is an integral part of the evolution of this and other
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
97 Bioperl modules. Send your comments and suggestions preferably to one
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
98 of the Bioperl mailing lists. Your participation is much appreciated.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
99
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
100 bioperl-l@bioperl.org - General discussion
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
101 http://bioperl.org/MailList.shtml - About the mailing lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
102
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
103 =head2 Reporting Bugs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
104
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
105 Report bugs to the Bioperl bug tracking system to help us keep track
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
106 the bugs and their resolution. Bug reports can be submitted via email
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
107 or the web:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
108
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
109 bioperl-bugs@bio.perl.org
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
110 http://bugzilla.bioperl.org/
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
111
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
112 =head1 AUTHOR
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
113
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
114 Ewan Birney, birney@sanger.ac.uk
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
115
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
116 =head1 CONTRIBUTORS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
117
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
118 Richard Adams, Richard.Adams@ed.ac.uk,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
119 David J. Evans, David.Evans@vir.gla.ac.uk,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
120 Heikki Lehvaslaiho, heikki@ebi.ac.uk,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
121 Allen Smith, allens@cpan.org,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
122 Jason Stajich, jason@bioperl.org,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
123 Anthony Underwood, aunderwood@phls.org.uk,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
124 Xintao Wei & Giri Narasimhan, giri@cs.fiu.edu
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
125
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
126
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
127 =head1 SEE ALSO
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
128
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
129 L<Bio::LocatableSeq.pm>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
130
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
131 =head1 APPENDIX
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
132
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
133 The rest of the documentation details each of the object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
134 methods. Internal methods are usually preceded with a _
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
135
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
136 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
137
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
138 # 'Let the code begin...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
139
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
140 package Bio::SimpleAlign;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
141 use vars qw(@ISA %CONSERVATION_GROUPS);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
142 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
143
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
144 use Bio::Root::Root;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
145 use Bio::LocatableSeq; # uses Seq's as list
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
146 use Bio::Align::AlignI;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
147
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
148 BEGIN {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
149 # This data should probably be in a more centralized module...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
150 # it is taken from Clustalw documentation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
151 # These are all the positively scoring groups that occur in the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
152 # Gonnet Pam250 matrix. The strong and weak groups are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
153 # defined as strong score >0.5 and weak score =<0.5 respectively.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
154
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
155 %CONSERVATION_GROUPS = ( 'strong' => [ qw(STA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
156 NEQK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
157 NHQK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
158 NDEQ
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
159 QHRK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
160 MILV
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
161 MILF
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
162 HY
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
163 FYW)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
164 ],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
165 'weak' => [ qw(CSA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
166 ATV
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
167 SAG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
168 STNK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
169 STPA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
170 SGND
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
171 SNDEQK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
172 NDEQHK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
173 NEQHRK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
174 FVLIM
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
175 HFY) ],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
176 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
177
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
178 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
179
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
180 @ISA = qw(Bio::Root::Root Bio::Align::AlignI);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
181
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
182 =head2 new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
183
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
184 Title : new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
185 Usage : my $aln = new Bio::SimpleAlign();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
186 Function : Creates a new simple align object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
187 Returns : Bio::SimpleAlign
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
188 Args : -source => string representing the source program
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
189 where this alignment came from
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
190
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
191 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
192
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
193
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
194 sub new {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
195 my($class,@args) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
196
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
197 my $self = $class->SUPER::new(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
198
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
199 my ($src) = $self->_rearrange([qw(SOURCE)], @args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
200 $src && $self->source($src);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
201 # we need to set up internal hashs first!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
202
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
203 $self->{'_seq'} = {};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
204 $self->{'_order'} = {};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
205 $self->{'_start_end_lists'} = {};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
206 $self->{'_dis_name'} = {};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
207 $self->{'_id'} = 'NoName';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
208 $self->{'_symbols'} = {};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
209 # maybe we should automatically read in from args. Hmmm...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
210
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
211 return $self; # success - we hope!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
212 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
213
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
214 =head1 Modifier methods
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
215
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
216 These methods modify the MSE by adding, removing or shuffling complete
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
217 sequences.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
218
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
219 =head2 add_seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
220
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
221 Title : add_seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
222 Usage : $myalign->add_seq($newseq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
223 Function : Adds another sequence to the alignment. *Does not* align
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
224 it - just adds it to the hashes.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
225 Returns : nothing
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
226 Args : a Bio::LocatableSeq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
227 order (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
228
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
229 See L<Bio::LocatableSeq> for more information
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
230
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
231 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
232
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
233 sub addSeq {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
234 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
235 $self->warn(ref($self). "::addSeq - deprecated method. Use add_seq() instead.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
236 $self->add_seq(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
237 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
238
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
239 sub add_seq {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
240 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
241 my $seq = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
242 my $order = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
243 my ($name,$id,$start,$end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
244
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
245 if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
246 $self->throw("Unable to process non locatable sequences [", ref($seq), "]");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
247 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
248
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
249 $id = $seq->id() ||$seq->display_id || $seq->primary_id;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
250 $start = $seq->start();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
251 $end = $seq->end();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
252
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
253 # build the symbol list for this sequence,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
254 # will prune out the gap and missing/match chars
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
255 # when actually asked for the symbol list in the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
256 # symbol_chars
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
257 map { $self->{'_symbols'}->{$_} = 1; } split(//,$seq->seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
258
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
259 if( !defined $order ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
260 $order = keys %{$self->{'_seq'}};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
261 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
262 $name = sprintf("%s/%d-%d",$id,$start,$end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
263
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
264 if( $self->{'_seq'}->{$name} ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
265 $self->warn("Replacing one sequence [$name]\n");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
266 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
267 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
268 #print STDERR "Assigning $name to $order\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
269
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
270 $self->{'_order'}->{$order} = $name;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
271
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
272 unless( exists( $self->{'_start_end_lists'}->{$id})) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
273 $self->{'_start_end_lists'}->{$id} = [];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
274 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
275 push @{$self->{'_start_end_lists'}->{$id}}, $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
276 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
277
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
278 $self->{'_seq'}->{$name} = $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
279
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
280 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
281
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
282
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
283 =head2 remove_seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
284
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
285 Title : remove_seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
286 Usage : $aln->remove_seq($seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
287 Function : Removes a single sequence from an alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
288 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
289 Argument : a Bio::LocatableSeq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
290
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
291 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
292
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
293 sub removeSeq {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
294 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
295 $self->warn(ref($self). "::removeSeq - deprecated method. Use remove_seq() instead.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
296 $self->remove_seq(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
297 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
298
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
299 sub remove_seq {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
300 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
301 my $seq = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
302 my ($name,$id,$start,$end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
303
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
304 $self->throw("Need Bio::Locatable seq argument ")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
305 unless ref $seq && $seq->isa('Bio::LocatableSeq');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
306
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
307 $id = $seq->id();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
308 $start = $seq->start();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
309 $end = $seq->end();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
310 $name = sprintf("%s/%d-%d",$id,$start,$end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
311
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
312 if( !exists $self->{'_seq'}->{$name} ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
313 $self->throw("Sequence $name does not exist in the alignment to remove!");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
314 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
315
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
316 delete $self->{'_seq'}->{$name};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
317
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
318 # we need to remove this seq from the start_end_lists hash
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
319
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
320 if (exists $self->{'_start_end_lists'}->{$id}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
321 # we need to find the sequence in the array.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
322
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
323 my ($i, $found);;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
324 for ($i=0; $i < @{$self->{'_start_end_lists'}->{$id}}; $i++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
325 if (${$self->{'_start_end_lists'}->{$id}}[$i] eq $seq) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
326 $found = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
327 last;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
328 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
329 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
330 if ($found) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
331 splice @{$self->{'_start_end_lists'}->{$id}}, $i, 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
332 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
333 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
334 $self->throw("Could not find the sequence to remoce from the start-end list");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
335 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
336 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
337 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
338 $self->throw("There is no seq list for the name $id");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
339 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
340 return 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
341 # we can't do anything about the order hash but that is ok
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
342 # because each_seq will handle it
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
343 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
344
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
345
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
346 =head2 purge
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
347
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
348 Title : purge
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
349 Usage : $aln->purge(0.7);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
350 Function:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
351
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
352 Removes sequences above given sequence similarity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
353 This function will grind on large alignments. Beware!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
354
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
355 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
356 Returns : An array of the removed sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
357 Args : float, threshold for similarity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
358
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
359 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
360
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
361 sub purge {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
362 my ($self,$perc) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
363 my (%duplicate, @dups);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
364
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
365 my @seqs = $self->each_seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
366
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
367 for (my $i=0;$i< @seqs - 1;$i++ ) { #for each seq in alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
368 my $seq = $seqs[$i];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
369
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
370 #skip if already in duplicate hash
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
371 next if exists $duplicate{$seq->display_id} ;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
372 my $one = $seq->seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
373
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
374 my @one = split '', $one; #split to get 1aa per array element
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
375
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
376 for (my $j=$i+1;$j < @seqs;$j++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
377 my $seq2 = $seqs[$j];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
378
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
379 #skip if already in duplicate hash
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
380 next if exists $duplicate{$seq2->display_id} ;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
381
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
382 my $two = $seq2->seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
383 my @two = split '', $two;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
384
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
385 my $count = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
386 my $res = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
387 for (my $k=0;$k<@one;$k++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
388 if ( $one[$k] ne '.' && $one[$k] ne '-' && defined($two[$k]) &&
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
389 $one[$k] eq $two[$k]) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
390 $count++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
391 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
392 if ( $one[$k] ne '.' && $one[$k] ne '-' && defined($two[$k]) &&
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
393 $two[$k] ne '.' && $two[$k] ne '-' ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
394 $res++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
395 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
396 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
397
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
398 my $ratio = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
399 $ratio = $count/$res unless $res == 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
400
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
401 # if above threshold put in duplicate hash and push onto
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
402 # duplicate array for returning to get_unique
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
403 if ( $ratio > $perc ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
404 print STDERR "duplicate!", $seq2->display_id, "\n" if $self->verbose > 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
405 $duplicate{$seq2->display_id} = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
406 push @dups, $seq2;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
407 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
408 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
409 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
410 foreach my $seq (@dups) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
411 $self->remove_seq($seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
412 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
413 return @dups;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
414 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
415
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
416 =head2 sort_alphabetically
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
417
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
418 Title : sort_alphabetically
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
419 Usage : $ali->sort_alphabetically
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
420 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
421
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
422 Changes the order of the alignemnt to alphabetical on name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
423 followed by numerical by number.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
424
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
425 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
426 Argument :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
427
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
428 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
429
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
430 sub sort_alphabetically {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
431 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
432 my ($seq,$nse,@arr,%hash,$count);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
433
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
434 foreach $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
435 $nse = $seq->get_nse;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
436 $hash{$nse} = $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
437 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
438
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
439 $count = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
440
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
441 %{$self->{'_order'}} = (); # reset the hash;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
442
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
443 foreach $nse ( sort _alpha_startend keys %hash) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
444 $self->{'_order'}->{$count} = $nse;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
445
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
446 $count++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
447 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
448 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
449 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
450
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
451 =head1 Sequence selection methods
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
452
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
453 Methods returning one or more sequences objects.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
454
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
455 =head2 each_seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
456
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
457 Title : each_seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
458 Usage : foreach $seq ( $align->each_seq() )
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
459 Function : Gets an array of Seq objects from the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
460 Returns : an array
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
461 Argument :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
462
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
463 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
464
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
465 sub eachSeq {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
466 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
467 $self->warn(ref($self). "::eachSeq - deprecated method. Use each_seq() instead.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
468 $self->each_seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
469 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
470
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
471 sub each_seq {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
472 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
473 my (@arr,$order);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
474
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
475 foreach $order ( sort { $a <=> $b } keys %{$self->{'_order'}} ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
476 if( exists $self->{'_seq'}->{$self->{'_order'}->{$order}} ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
477 push(@arr,$self->{'_seq'}->{$self->{'_order'}->{$order}});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
478 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
479 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
480
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
481 return @arr;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
482 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
483
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
484
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
485 =head2 each_alphabetically
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
486
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
487 Title : each_alphabetically
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
488 Usage : foreach $seq ( $ali->each_alphabetically() )
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
489 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
490
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
491 Returns an array of sequence object sorted alphabetically
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
492 by name and then by start point.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
493 Does not change the order of the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
494
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
495 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
496 Argument :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
497
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
498 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
499
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
500 sub each_alphabetically {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
501 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
502 my ($seq,$nse,@arr,%hash,$count);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
503
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
504 foreach $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
505 $nse = $seq->get_nse;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
506 $hash{$nse} = $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
507 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
508
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
509 foreach $nse ( sort _alpha_startend keys %hash) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
510 push(@arr,$hash{$nse});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
511 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
512
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
513 return @arr;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
514
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
515 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
516
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
517 sub _alpha_startend {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
518 my ($aname,$astart,$bname,$bstart);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
519 ($aname,$astart) = split (/-/,$a);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
520 ($bname,$bstart) = split (/-/,$b);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
521
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
522 if( $aname eq $bname ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
523 return $astart <=> $bstart;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
524 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
525 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
526 return $aname cmp $bname;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
527 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
528
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
529 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
530
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
531 =head2 each_seq_with_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
532
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
533 Title : each_seq_with_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
534 Usage : foreach $seq ( $align->each_seq_with_id() )
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
535 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
536
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
537 Gets an array of Seq objects from the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
538 alignment, the contents being those sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
539 with the given name (there may be more than one)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
540
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
541 Returns : an array
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
542 Argument : a seq name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
543
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
544 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
545
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
546 sub eachSeqWithId {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
547 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
548 $self->warn(ref($self). "::eachSeqWithId - deprecated method. Use each_seq_with_id() instead.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
549 $self->each_seq_with_id(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
550 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
551
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
552 sub each_seq_with_id {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
553 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
554 my $id = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
555
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
556 $self->throw("Method each_seq_with_id needs a sequence name argument")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
557 unless defined $id;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
558
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
559 my (@arr, $seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
560
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
561 if (exists($self->{'_start_end_lists'}->{$id})) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
562 @arr = @{$self->{'_start_end_lists'}->{$id}};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
563 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
564 return @arr;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
565 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
566
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
567 =head2 get_seq_by_pos
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
568
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
569 Title : get_seq_by_pos
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
570 Usage : $seq = $aln->get_seq_by_pos(3) # third sequence from the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
571 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
572
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
573 Gets a sequence based on its position in the alignment.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
574 Numbering starts from 1. Sequence positions larger than
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
575 no_sequences() will thow an error.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
576
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
577 Returns : a Bio::LocatableSeq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
578 Args : positive integer for the sequence osition
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
579
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
580 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
581
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
582 sub get_seq_by_pos {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
583
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
584 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
585 my ($pos) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
586
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
587 $self->throw("Sequence position has to be a positive integer, not [$pos]")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
588 unless $pos =~ /^\d+$/ and $pos > 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
589 $self->throw("No sequence at position [$pos]")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
590 unless $pos <= $self->no_sequences ;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
591
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
592 my $nse = $self->{'_order'}->{--$pos};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
593 return $self->{'_seq'}->{$nse};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
594 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
595
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
596 =head1 Create new alignments
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
597
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
598 The result of these methods are horizontal or vertical subsets of the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
599 current MSE.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
600
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
601 =head2 select
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
602
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
603 Title : select
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
604 Usage : $aln2 = $aln->select(1, 3) # three first sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
605 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
606
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
607 Creates a new alignment from a continuous subset of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
608 sequences. Numbering starts from 1. Sequence positions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
609 larger than no_sequences() will thow an error.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
610
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
611 Returns : a Bio::SimpleAlign object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
612 Args : positive integer for the first sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
613 positive integer for the last sequence to include (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
614
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
615 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
616
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
617 sub select {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
618 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
619 my ($start, $end) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
620
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
621 $self->throw("Select start has to be a positive integer, not [$start]")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
622 unless $start =~ /^\d+$/ and $start > 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
623 $self->throw("Select end has to be a positive integer, not [$end]")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
624 unless $end =~ /^\d+$/ and $end > 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
625 $self->throw("Select $start [$start] has to be smaller than or equal to end [$end]")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
626 unless $start <= $end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
627
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
628 my $aln = new $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
629 foreach my $pos ($start .. $end) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
630 $aln->add_seq($self->get_seq_by_pos($pos));
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
631 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
632 $aln->id($self->id);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
633 return $aln;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
634 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
635
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
636 =head2 select_noncont
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
637
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
638 Title : select_noncont
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
639 Usage : $aln2 = $aln->select_noncont(1, 3) # first and 3rd sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
640 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
641
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
642 Creates a new alignment from a subset of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
643 sequences. Numbering starts from 1. Sequence positions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
644 larger than no_sequences() will thow an error.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
645
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
646 Returns : a Bio::SimpleAlign object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
647 Args : array of integers for the sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
648
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
649 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
650
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
651 sub select_noncont {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
652 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
653 my (@pos) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
654 my $end = $self->no_sequences;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
655 foreach ( @pos ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
656 $self->throw("position must be a positive integer, > 0 and <= $end not [$_]")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
657 unless( /^\d+$/ && $_ > 0 && $_ <= $end );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
658 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
659 my $aln = new $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
660 foreach my $p (@pos) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
661 $aln->add_seq($self->get_seq_by_pos($p));
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
662 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
663 $aln->id($self->id);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
664 return $aln;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
665 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
666
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
667 =head2 slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
668
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
669 Title : slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
670 Usage : $aln2 = $aln->slice(20, 30)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
671 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
672
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
673 Creates a slice from the alignment inclusive of start and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
674 end columns. Sequences with no residues in the slice are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
675 excluded from the new alignment and a warning is printed.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
676 Slice beyond the length of the sequence does not do
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
677 padding.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
678
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
679 Returns : a Bio::SimpleAlign object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
680 Args : positive integer for start column
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
681 positive integer for end column
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
682
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
683 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
684
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
685 sub slice {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
686 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
687 my ($start, $end) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
688
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
689 $self->throw("Slice start has to be a positive integer, not [$start]")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
690 unless $start =~ /^\d+$/ and $start > 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
691 $self->throw("Slice end has to be a positive integer, not [$end]")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
692 unless $end =~ /^\d+$/ and $end > 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
693 $self->throw("Slice $start [$start] has to be smaller than or equal to end [$end]")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
694 unless $start <= $end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
695 my $aln_length = $self->length;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
696 $self->throw("This alignment has only ". $self->length.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
697 " residues. Slice start [$start] is too bigger.")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
698 if $start > $self->length;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
699
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
700 my $aln = new $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
701 $aln->id($self->id);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
702 foreach my $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
703
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
704 my $new_seq = new Bio::LocatableSeq (-id => $seq->id);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
705
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
706 # seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
707 my $seq_end = $end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
708 $seq_end = $seq->length if $end > $seq->length;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
709 my $slice_seq = $seq->subseq($start, $seq_end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
710 $new_seq->seq( $slice_seq );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
711
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
712 # start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
713 if ($start > 1) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
714 my $pre_start_seq = $seq->subseq(1, $start - 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
715 $pre_start_seq =~ s/\W//g; #print "$pre_start_seq\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
716 $new_seq->start( $seq->start + CORE::length($pre_start_seq) );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
717 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
718 $new_seq->start( $seq->start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
719 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
720
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
721 # end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
722 $slice_seq =~ s/\W//g;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
723 $new_seq->end( $new_seq->start + CORE::length($slice_seq) - 1 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
724
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
725 if ($new_seq->start and $new_seq->end >= $new_seq->start) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
726 $aln->add_seq($new_seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
727 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
728 my $nse = $seq->get_nse();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
729 $self->warn("Slice [$start-$end] of sequence [$nse] contains no residues.".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
730 " Sequence excluded from the new alignment.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
731 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
732
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
733 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
734
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
735 return $aln;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
736 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
737
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
738 =head2 remove_columns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
739
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
740 Title : remove_column
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
741 Usage : $aln2 = $aln->remove_columns(['mismatch','weak'])
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
742 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
743 Creates an aligment with columns removed corresponding to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
744 the specified criteria.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
745 Returns : a L<Bio::SimpleAlign> object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
746 Args : array ref of types, 'match'|'weak'|'strong'|'mismatch'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
747
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
748 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
749
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
750 sub remove_columns{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
751 my ($self,$type) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
752 my %matchchars = ( 'match' => '\*',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
753 'weak' => '\.',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
754 'strong' => ':',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
755 'mismatch'=> ' ',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
756 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
757 #get the characters to delete against
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
758 my $del_char;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
759 foreach my $type(@{$type}){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
760 $del_char.= $matchchars{$type};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
761 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
762
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
763 my $match_line = $self->match_line;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
764 my $aln = new $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
765
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
766 my @remove;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
767 my $length = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
768
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
769 #do the matching to get the segments to remove
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
770 while($match_line=~m/[$del_char]/g){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
771 my $start = pos($match_line)-1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
772 $match_line=~/\G[$del_char]+/gc;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
773 my $end = pos($match_line)-1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
774
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
775 #have to offset the start and end for subsequent removes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
776 $start-=$length;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
777 $end -=$length;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
778 $length += ($end-$start+1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
779 push @remove, [$start,$end];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
780 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
781
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
782 #remove the segments
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
783 $aln = $self->_remove_col($aln,\@remove);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
784
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
785 return $aln;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
786 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
787
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
788 sub _remove_col {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
789 my ($self,$aln,$remove) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
790 my @new;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
791
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
792 #splice out the segments and create new seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
793 foreach my $seq($self->each_seq){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
794 my $new_seq = new Bio::LocatableSeq(-id=>$seq->id);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
795 my $sequence;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
796 foreach my $pair(@{$remove}){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
797 my $start = $pair->[0];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
798 my $end = $pair->[1];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
799 $sequence = $seq->seq unless $sequence;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
800 my $spliced;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
801 $spliced .= $start > 0 ? substr($sequence,0,$start) : '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
802 $spliced .= substr($sequence,$end+1,$seq->length-$end+1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
803 $sequence = $spliced;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
804 if ($start == 1) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
805 $new_seq->start($end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
806 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
807 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
808 $new_seq->start( $seq->start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
809 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
810 # end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
811 if($end >= $seq->end){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
812 $new_seq->end( $start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
813 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
814 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
815 $new_seq->end($seq->end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
816 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
817 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
818 $new_seq->seq($sequence);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
819 push @new, $new_seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
820 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
821 #add the new seqs to the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
822 foreach my $new(@new){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
823 $aln->add_seq($new);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
824 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
825 return $aln;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
826 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
827
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
828 =head1 Change sequences within the MSE
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
829
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
830 These methods affect characters in all sequences without changeing the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
831 alignment.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
832
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
833
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
834 =head2 map_chars
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
835
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
836 Title : map_chars
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
837 Usage : $ali->map_chars('\.','-')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
838 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
839
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
840 Does a s/$arg1/$arg2/ on the sequences. Useful for gap
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
841 characters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
842
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
843 Notice that the from (arg1) is interpretted as a regex,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
844 so be careful about quoting meta characters (eg
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
845 $ali->map_chars('.','-') wont do what you want)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
846
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
847 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
848 Argument : 'from' rexexp
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
849 'to' string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
850
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
851 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
852
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
853 sub map_chars {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
854 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
855 my $from = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
856 my $to = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
857 my ($seq,$temp);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
858
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
859 $self->throw("Need exactly two arguments")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
860 unless defined $from and defined $to;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
861
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
862 foreach $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
863 $temp = $seq->seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
864 $temp =~ s/$from/$to/g;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
865 $seq->seq($temp);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
866 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
867 return 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
868 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
869
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
870
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
871 =head2 uppercase
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
872
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
873 Title : uppercase()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
874 Usage : $ali->uppercase()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
875 Function : Sets all the sequences to uppercase
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
876 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
877 Argument :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
878
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
879 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
880
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
881 sub uppercase {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
882 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
883 my $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
884 my $temp;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
885
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
886 foreach $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
887 $temp = $seq->seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
888 $temp =~ tr/[a-z]/[A-Z]/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
889
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
890 $seq->seq($temp);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
891 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
892 return 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
893 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
894
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
895 =head2 cigar_line
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
896
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
897 Title : cigar_line()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
898 Usage : $align->cigar_line()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
899 Function : Generates a "cigar" line for each sequence in the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
900 The format is simply A-1,60;B-1,1:4,60;C-5,10:12,58
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
901 where A,B,C,etc. are the sequence identifiers, and the numbers
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
902 refer to conserved positions within the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
903 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
904
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
905 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
906
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
907 sub cigar_line {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
908 my ($self) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
909
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
910 my %cigar;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
911 my %clines;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
912 my @seqchars;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
913 my $seqcount = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
914 my $sc;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
915 foreach my $seq ( $self->each_seq ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
916 push @seqchars, [ split(//, uc ($seq->seq)) ];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
917 $sc = scalar(@seqchars);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
918 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
919
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
920 foreach my $pos ( 0..$self->length ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
921 my $i=0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
922 foreach my $seq ( @seqchars ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
923 $i++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
924 # print STDERR "Seq $i at pos $pos: ".$seq->[$pos]."\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
925 if ($seq->[$pos] eq '.') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
926 if (defined $cigar{$i} && $clines{$i} !~ $cigar{$i}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
927 $clines{$i}.=$cigar{$i};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
928 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
929 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
930 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
931 if (! defined $cigar{$i}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
932 $clines{$i}.=($pos+1).",";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
933 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
934 $cigar{$i}=$pos+1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
935 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
936 if ($pos+1 == $self->length && ($clines{$i} =~ /\,$/) ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
937 $clines{$i}.=$cigar{$i};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
938 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
939 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
940 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
941 for(my $i=1; $i<$sc+1;$i++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
942 print STDERR "Seq $i cigar line ".$clines{$i}."\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
943 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
944 return %clines;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
945 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
946
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
947 =head2 match_line
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
948
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
949 Title : match_line()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
950 Usage : $align->match_line()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
951 Function : Generates a match line - much like consensus string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
952 except that a line indicating the '*' for a match.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
953 Args : (optional) Match line characters ('*' by default)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
954 (optional) Strong match char (':' by default)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
955 (optional) Weak match char ('.' by default)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
956
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
957 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
958
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
959 sub match_line {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
960 my ($self,$matchlinechar, $strong, $weak) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
961 my %matchchars = ( 'match' => $matchlinechar || '*',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
962 'weak' => $weak || '.',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
963 'strong' => $strong || ':',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
964 'mismatch'=> ' ',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
965 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
966
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
967
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
968 my @seqchars;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
969 my $seqcount = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
970 my $alphabet;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
971 foreach my $seq ( $self->each_seq ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
972 push @seqchars, [ split(//, uc ($seq->seq)) ];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
973 $alphabet = $seq->alphabet unless defined $alphabet;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
974 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
975 my $refseq = shift @seqchars;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
976 # let's just march down the columns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
977 my $matchline;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
978 POS: foreach my $pos ( 0..$self->length ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
979 my $refchar = $refseq->[$pos];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
980 next unless $refchar; # skip ''
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
981 my %col = ($refchar => 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
982 my $dash = ($refchar eq '-' || $refchar eq '.' || $refchar eq ' ');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
983 foreach my $seq ( @seqchars ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
984 $dash = 1 if( $seq->[$pos] eq '-' || $seq->[$pos] eq '.' ||
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
985 $seq->[$pos] eq ' ' );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
986 $col{$seq->[$pos]}++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
987 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
988 my @colresidues = sort keys %col;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
989 my $char = $matchchars{'mismatch'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
990 # if all the values are the same
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
991 if( $dash ) { $char = $matchchars{'mismatch'} }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
992 elsif( @colresidues == 1 ) { $char = $matchchars{'match'} }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
993 elsif( $alphabet eq 'protein' ) { # only try to do weak/strong
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
994 # matches for protein seqs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
995 TYPE: foreach my $type ( qw(strong weak) ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
996 # iterate through categories
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
997 my %groups;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
998 # iterate through each of the aa in the col
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
999 # look to see which groups it is in
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1000 foreach my $c ( @colresidues ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1001 foreach my $f ( grep /\Q$c/, @{$CONSERVATION_GROUPS{$type}} ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1002 push @{$groups{$f}},$c;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1003 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1004 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1005 GRP: foreach my $cols ( values %groups ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1006 @$cols = sort @$cols;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1007 # now we are just testing to see if two arrays
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1008 # are identical w/o changing either one
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1009
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1010 # have to be same len
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1011 next if( scalar @$cols != scalar @colresidues );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1012 # walk down the length and check each slot
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1013 for($_=0;$_ < (scalar @$cols);$_++ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1014 next GRP if( $cols->[$_] ne $colresidues[$_] );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1015 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1016 $char = $matchchars{$type};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1017 last TYPE;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1018 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1019 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1020 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1021 $matchline .= $char;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1022 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1023 return $matchline;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1024 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1025
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1026 =head2 match
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1027
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1028 Title : match()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1029 Usage : $ali->match()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1030 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1031
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1032 Goes through all columns and changes residues that are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1033 identical to residue in first sequence to match '.'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1034 character. Sets match_char.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1035
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1036 USE WITH CARE: Most MSE formats do not support match
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1037 characters in sequences, so this is mostly for output
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1038 only. NEXUS format (Bio::AlignIO::nexus) can handle
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1039 it.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1040
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1041 Returns : 1
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1042 Argument : a match character, optional, defaults to '.'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1043
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1044 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1045
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1046 sub match {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1047 my ($self, $match) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1048
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1049 $match ||= '.';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1050 my ($matching_char) = $match;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1051 $matching_char = "\\$match" if $match =~ /[\^.$|()\[\]]/ ; #';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1052 $self->map_chars($matching_char, '-');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1053
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1054 my @seqs = $self->each_seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1055 return 1 unless scalar @seqs > 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1056
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1057 my $refseq = shift @seqs ;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1058 my @refseq = split //, $refseq->seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1059 my $gapchar = $self->gap_char;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1060
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1061 foreach my $seq ( @seqs ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1062 my @varseq = split //, $seq->seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1063 for ( my $i=0; $i < scalar @varseq; $i++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1064 $varseq[$i] = $match if defined $refseq[$i] &&
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1065 ( $refseq[$i] =~ /[A-Za-z\*]/ ||
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1066 $refseq[$i] =~ /$gapchar/ )
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1067 && $refseq[$i] eq $varseq[$i];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1068 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1069 $seq->seq(join '', @varseq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1070 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1071 $self->match_char($match);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1072 return 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1073 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1074
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1075
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1076 =head2 unmatch
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1077
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1078 Title : unmatch()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1079 Usage : $ali->unmatch()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1080 Function : Undoes the effect of method match. Unsets match_char.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1081
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1082 Returns : 1
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1083 Argument : a match character, optional, defaults to '.'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1084
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1085 See L<match> and L<match_char>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1086
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1087 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1088
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1089 sub unmatch {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1090 my ($self, $match) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1091
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1092 $match ||= '.';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1093
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1094 my @seqs = $self->each_seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1095 return 1 unless scalar @seqs > 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1096
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1097 my $refseq = shift @seqs ;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1098 my @refseq = split //, $refseq->seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1099 my $gapchar = $self->gap_char;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1100 foreach my $seq ( @seqs ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1101 my @varseq = split //, $seq->seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1102 for ( my $i=0; $i < scalar @varseq; $i++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1103 $varseq[$i] = $refseq[$i] if defined $refseq[$i] &&
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1104 ( $refseq[$i] =~ /[A-Za-z\*]/ ||
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1105 $refseq[$i] =~ /$gapchar/ ) &&
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1106 $varseq[$i] eq $match;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1107 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1108 $seq->seq(join '', @varseq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1109 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1110 $self->match_char('');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1111 return 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1112 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1113
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1114 =head1 MSE attibutes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1115
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1116 Methods for setting and reading the MSE attributes.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1117
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1118 Note that the methods defining character semantics depend on the user
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1119 to set them sensibly. They are needed only by certain input/output
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1120 methods. Unset them by setting to an empty string ('').
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1121
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1122 =head2 id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1123
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1124 Title : id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1125 Usage : $myalign->id("Ig")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1126 Function : Gets/sets the id field of the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1127 Returns : An id string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1128 Argument : An id string (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1129
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1130 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1131
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1132 sub id {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1133 my ($self, $name) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1134
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1135 if (defined( $name )) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1136 $self->{'_id'} = $name;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1137 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1138
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1139 return $self->{'_id'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1140 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1141
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1142 =head2 missing_char
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1143
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1144 Title : missing_char
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1145 Usage : $myalign->missing_char("?")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1146 Function : Gets/sets the missing_char attribute of the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1147 It is generally recommended to set it to 'n' or 'N'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1148 for nucleotides and to 'X' for protein.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1149 Returns : An missing_char string,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1150 Argument : An missing_char string (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1151
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1152 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1153
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1154 sub missing_char {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1155 my ($self, $char) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1156
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1157 if (defined $char ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1158 $self->throw("Single missing character, not [$char]!") if CORE::length($char) > 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1159 $self->{'_missing_char'} = $char;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1160 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1161
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1162 return $self->{'_missing_char'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1163 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1164
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1165 =head2 match_char
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1166
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1167 Title : match_char
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1168 Usage : $myalign->match_char('.')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1169 Function : Gets/sets the match_char attribute of the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1170 Returns : An match_char string,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1171 Argument : An match_char string (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1172
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1173 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1174
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1175 sub match_char {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1176 my ($self, $char) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1177
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1178 if (defined $char ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1179 $self->throw("Single match character, not [$char]!") if CORE::length($char) > 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1180 $self->{'_match_char'} = $char;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1181 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1182
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1183 return $self->{'_match_char'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1184 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1185
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1186 =head2 gap_char
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1187
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1188 Title : gap_char
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1189 Usage : $myalign->gap_char('-')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1190 Function : Gets/sets the gap_char attribute of the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1191 Returns : An gap_char string, defaults to '-'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1192 Argument : An gap_char string (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1193
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1194 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1195
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1196 sub gap_char {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1197 my ($self, $char) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1198
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1199 if (defined $char || ! defined $self->{'_gap_char'} ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1200 $char= '-' unless defined $char;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1201 $self->throw("Single gap character, not [$char]!") if CORE::length($char) > 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1202 $self->{'_gap_char'} = $char;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1203 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1204 return $self->{'_gap_char'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1205 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1206
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1207 =head2 symbol_chars
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1208
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1209 Title : symbol_chars
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1210 Usage : my @symbolchars = $aln->symbol_chars;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1211 Function: Returns all the seen symbols (other than gaps)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1212 Returns : array of characters that are the seen symbols
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1213 Args : boolean to include the gap/missing/match characters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1214
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1215 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1216
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1217 sub symbol_chars{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1218 my ($self,$includeextra) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1219 if( ! defined $self->{'_symbols'} ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1220 $self->warn("Symbol list was not initialized");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1221 return ();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1222 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1223 my %copy = %{$self->{'_symbols'}};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1224 if( ! $includeextra ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1225 foreach my $char ( $self->gap_char, $self->match_char,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1226 $self->missing_char) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1227 delete $copy{$char} if( defined $char );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1228 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1229 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1230 return keys %copy;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1231 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1232
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1233 =head1 Alignment descriptors
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1234
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1235 These read only methods describe the MSE in various ways.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1236
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1237
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1238 =head2 consensus_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1239
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1240 Title : consensus_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1241 Usage : $str = $ali->consensus_string($threshold_percent)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1242 Function : Makes a strict consensus
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1243 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1244 Argument : Optional treshold ranging from 0 to 100.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1245 The consensus residue has to appear at least threshold %
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1246 of the sequences at a given location, otherwise a '?'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1247 character will be placed at that location.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1248 (Default value = 0%)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1249
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1250 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1251
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1252 sub consensus_string {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1253 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1254 my $threshold = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1255 my $len;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1256 my ($out,$count);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1257
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1258 $out = "";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1259
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1260 $len = $self->length - 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1261
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1262 foreach $count ( 0 .. $len ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1263 $out .= $self->_consensus_aa($count,$threshold);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1264 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1265 return $out;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1266 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1267
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1268 sub _consensus_aa {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1269 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1270 my $point = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1271 my $threshold_percent = shift || -1 ;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1272 my ($seq,%hash,$count,$letter,$key);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1273
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1274 foreach $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1275 $letter = substr($seq->seq,$point,1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1276 $self->throw("--$point-----------") if $letter eq '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1277 ($letter =~ /\./) && next;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1278 # print "Looking at $letter\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1279 $hash{$letter}++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1280 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1281 my $number_of_sequences = $self->no_sequences();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1282 my $threshold = $number_of_sequences * $threshold_percent / 100. ;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1283 $count = -1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1284 $letter = '?';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1285
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1286 foreach $key ( sort keys %hash ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1287 # print "Now at $key $hash{$key}\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1288 if( $hash{$key} > $count && $hash{$key} >= $threshold) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1289 $letter = $key;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1290 $count = $hash{$key};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1291 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1292 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1293 return $letter;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1294 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1295
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1296
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1297 =head2 consensus_iupac
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1298
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1299 Title : consensus_iupac
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1300 Usage : $str = $ali->consensus_iupac()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1301 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1302
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1303 Makes a consensus using IUPAC ambiguity codes from DNA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1304 and RNA. The output is in upper case except when gaps in
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1305 a column force output to be in lower case.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1306
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1307 Note that if your alignment sequences contain a lot of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1308 IUPAC ambiquity codes you often have to manually set
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1309 alphabet. Bio::PrimarySeq::_guess_type thinks they
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1310 indicate a protein sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1311
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1312 Returns : consensus string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1313 Argument : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1314 Throws : on protein sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1315
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1316 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1317
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1318 sub consensus_iupac {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1319 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1320 my $out = "";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1321 my $len = $self->length-1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1322
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1323 # only DNA and RNA sequences are valid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1324 foreach my $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1325 $self->throw("Seq [". $seq->get_nse. "] is a protein")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1326 if $seq->alphabet eq 'protein';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1327 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1328 # loop over the alignment columns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1329 foreach my $count ( 0 .. $len ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1330 $out .= $self->_consensus_iupac($count);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1331 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1332 return $out;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1333 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1334
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1335 sub _consensus_iupac {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1336 my ($self, $column) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1337 my ($string, $char, $rna);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1338
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1339 #determine all residues in a column
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1340 foreach my $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1341 $string .= substr($seq->seq, $column, 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1342 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1343 $string = uc $string;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1344
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1345 # quick exit if there's an N in the string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1346 if ($string =~ /N/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1347 $string =~ /\W/ ? return 'n' : return 'N';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1348 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1349 # ... or if there are only gap characters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1350 return '-' if $string =~ /^\W+$/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1351
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1352 # treat RNA as DNA in regexps
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1353 if ($string =~ /U/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1354 $string =~ s/U/T/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1355 $rna = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1356 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1357
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1358 # the following s///'s only need to be done to the _first_ ambiguity code
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1359 # as we only need to see the _range_ of characters in $string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1360
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1361 if ($string =~ /[VDHB]/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1362 $string =~ s/V/AGC/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1363 $string =~ s/D/AGT/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1364 $string =~ s/H/ACT/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1365 $string =~ s/B/CTG/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1366 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1367
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1368 if ($string =~ /[SKYRWM]/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1369 $string =~ s/S/GC/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1370 $string =~ s/K/GT/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1371 $string =~ s/Y/CT/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1372 $string =~ s/R/AG/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1373 $string =~ s/W/AT/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1374 $string =~ s/M/AC/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1375 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1376
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1377 # and now the guts of the thing
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1378
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1379 if ($string =~ /A/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1380 $char = 'A'; # A A
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1381 if ($string =~ /G/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1382 $char = 'R'; # A and G (purines) R
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1383 if ($string =~ /C/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1384 $char = 'V'; # A and G and C V
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1385 if ($string =~ /T/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1386 $char = 'N'; # A and G and C and T N
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1387 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1388 } elsif ($string =~ /T/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1389 $char = 'D'; # A and G and T D
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1390 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1391 } elsif ($string =~ /C/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1392 $char = 'M'; # A and C M
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1393 if ($string =~ /T/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1394 $char = 'H'; # A and C and T H
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1395 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1396 } elsif ($string =~ /T/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1397 $char = 'W'; # A and T W
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1398 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1399 } elsif ($string =~ /C/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1400 $char = 'C'; # C C
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1401 if ($string =~ /T/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1402 $char = 'Y'; # C and T (pyrimidines) Y
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1403 if ($string =~ /G/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1404 $char = 'B'; # C and T and G B
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1405 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1406 } elsif ($string =~ /G/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1407 $char = 'S'; # C and G S
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1408 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1409 } elsif ($string =~ /G/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1410 $char = 'G'; # G G
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1411 if ($string =~ /C/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1412 $char = 'S'; # G and C S
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1413 } elsif ($string =~ /T/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1414 $char = 'K'; # G and T K
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1415 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1416 } elsif ($string =~ /T/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1417 $char = 'T'; # T T
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1418 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1419
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1420 $char = 'U' if $rna and $char eq 'T';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1421 $char = lc $char if $string =~ /\W/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1422
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1423 return $char;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1424 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1425
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1426 =head2 is_flush
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1427
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1428 Title : is_flush
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1429 Usage : if( $ali->is_flush() )
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1430 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1431 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1432 Function : Tells you whether the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1433 : is flush, ie all of the same length
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1434 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1435 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1436 Returns : 1 or 0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1437 Argument :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1438
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1439 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1440
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1441 sub is_flush {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1442 my ($self,$report) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1443 my $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1444 my $length = (-1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1445 my $temp;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1446
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1447 foreach $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1448 if( $length == (-1) ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1449 $length = CORE::length($seq->seq());
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1450 next;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1451 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1452
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1453 $temp = CORE::length($seq->seq());
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1454 if( $temp != $length ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1455 $self->warn("expecting $length not $temp from ".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1456 $seq->display_id) if( $report );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1457 $self->debug("expecting $length not $temp from ".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1458 $seq->display_id);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1459 $self->debug($seq->seq(). "\n");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1460 return 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1461 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1462 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1463
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1464 return 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1465 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1466
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1467
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1468 =head2 length
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1469
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1470 Title : length()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1471 Usage : $len = $ali->length()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1472 Function : Returns the maximum length of the alignment.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1473 To be sure the alignment is a block, use is_flush
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1474 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1475 Argument :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1476
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1477 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1478
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1479 sub length_aln {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1480 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1481 $self->warn(ref($self). "::length_aln - deprecated method. Use length() instead.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1482 $self->length(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1483 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1484
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1485 sub length {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1486 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1487 my $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1488 my $length = (-1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1489 my ($temp,$len);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1490
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1491 foreach $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1492 $temp = CORE::length($seq->seq());
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1493 if( $temp > $length ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1494 $length = $temp;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1495 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1496 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1497
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1498 return $length;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1499 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1500
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1501
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1502 =head2 maxdisplayname_length
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1503
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1504 Title : maxdisplayname_length
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1505 Usage : $ali->maxdisplayname_length()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1506 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1507
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1508 Gets the maximum length of the displayname in the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1509 alignment. Used in writing out various MSE formats.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1510
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1511 Returns : integer
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1512 Argument :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1513
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1514 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1515
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1516 sub maxname_length {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1517 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1518 $self->warn(ref($self). "::maxname_length - deprecated method.".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1519 " Use maxdisplayname_length() instead.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1520 $self->maxdisplayname_length();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1521 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1522
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1523 sub maxnse_length {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1524 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1525 $self->warn(ref($self). "::maxnse_length - deprecated method.".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1526 " Use maxnse_length() instead.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1527 $self->maxdisplayname_length();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1528 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1529
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1530 sub maxdisplayname_length {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1531 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1532 my $maxname = (-1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1533 my ($seq,$len);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1534
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1535 foreach $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1536 $len = CORE::length $self->displayname($seq->get_nse());
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1537
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1538 if( $len > $maxname ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1539 $maxname = $len;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1540 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1541 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1542
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1543 return $maxname;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1544 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1545
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1546 =head2 no_residues
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1547
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1548 Title : no_residues
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1549 Usage : $no = $ali->no_residues
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1550 Function : number of residues in total in the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1551 Returns : integer
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1552 Argument :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1553
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1554 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1555
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1556 sub no_residues {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1557 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1558 my $count = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1559
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1560 foreach my $seq ($self->each_seq) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1561 my $str = $seq->seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1562
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1563 $count += ($str =~ s/[^A-Za-z]//g);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1564 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1565
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1566 return $count;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1567 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1568
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1569 =head2 no_sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1570
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1571 Title : no_sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1572 Usage : $depth = $ali->no_sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1573 Function : number of sequence in the sequence alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1574 Returns : integer
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1575 Argument :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1576
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1577 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1578
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1579 sub no_sequences {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1580 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1581
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1582 return scalar($self->each_seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1583 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1584
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1585
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1586 =head2 average_percentage_identity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1587
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1588 Title : average_percentage_identity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1589 Usage : $id = $align->average_percentage_identity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1590 Function: The function uses a fast method to calculate the average
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1591 percentage identity of the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1592 Returns : The average percentage identity of the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1593 Args : None
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1594 Notes : This method implemented by Kevin Howe calculates a figure that is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1595 designed to be similar to the average pairwise identity of the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1596 alignment (identical in the absence of gaps), without having to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1597 explicitly calculate pairwise identities proposed by Richard Durbin.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1598 Validated by Ewan Birney ad Alex Bateman.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1599
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1600 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1601
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1602 sub average_percentage_identity{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1603 my ($self,@args) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1604
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1605 my @alphabet = ('A','B','C','D','E','F','G','H','I','J','K','L','M',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1606 'N','O','P','Q','R','S','T','U','V','W','X','Y','Z');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1607
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1608 my ($len, $total, $subtotal, $divisor, $subdivisor, @seqs, @countHashes);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1609
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1610 if (! $self->is_flush()) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1611 $self->throw("All sequences in the alignment must be the same length");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1612 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1613
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1614 @seqs = $self->each_seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1615 $len = $self->length();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1616
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1617 # load the each hash with correct keys for existence checks
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1618
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1619 for( my $index=0; $index < $len; $index++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1620 foreach my $letter (@alphabet) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1621 $countHashes[$index]->{$letter} = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1622 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1623 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1624 foreach my $seq (@seqs) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1625 my @seqChars = split //, $seq->seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1626 for( my $column=0; $column < @seqChars; $column++ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1627 my $char = uc($seqChars[$column]);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1628 if (exists $countHashes[$column]->{$char}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1629 $countHashes[$column]->{$char}++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1630 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1631 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1632 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1633
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1634 $total = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1635 $divisor = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1636 for(my $column =0; $column < $len; $column++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1637 my %hash = %{$countHashes[$column]};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1638 $subdivisor = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1639 foreach my $res (keys %hash) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1640 $total += $hash{$res}*($hash{$res} - 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1641 $subdivisor += $hash{$res};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1642 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1643 $divisor += $subdivisor * ($subdivisor - 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1644 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1645 return $divisor > 0 ? ($total / $divisor )*100.0 : 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1646 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1647
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1648 =head2 percentage_identity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1649
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1650 Title : percentage_identity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1651 Usage : $id = $align->percentage_identity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1652 Function: The function calculates the average percentage identity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1653 (aliased for average_percentage_identity)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1654 Returns : The average percentage identity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1655 Args : None
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1656
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1657 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1658
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1659 sub percentage_identity {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1660 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1661 return $self->average_percentage_identity();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1662 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1663
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1664 =head2 overall_percentage_identity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1665
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1666 Title : percentage_identity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1667 Usage : $id = $align->percentage_identity
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1668 Function: The function calculates the percentage identity of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1669 the conserved columns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1670 Returns : The percentage identity of the conserved columns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1671 Args : None
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1672
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1673 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1674
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1675 sub overall_percentage_identity{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1676 my ($self,@args) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1677
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1678 my @alphabet = ('A','B','C','D','E','F','G','H','I','J','K','L','M',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1679 'N','O','P','Q','R','S','T','U','V','W','X','Y','Z');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1680
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1681 my ($len, $total, @seqs, @countHashes);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1682
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1683 if (! $self->is_flush()) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1684 $self->throw("All sequences in the alignment must be the same length");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1685 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1686
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1687 @seqs = $self->each_seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1688 $len = $self->length();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1689
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1690 # load the each hash with correct keys for existence checks
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1691 for( my $index=0; $index < $len; $index++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1692 foreach my $letter (@alphabet) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1693 $countHashes[$index]->{$letter} = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1694 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1695 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1696 foreach my $seq (@seqs) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1697 my @seqChars = split //, $seq->seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1698 for( my $column=0; $column < @seqChars; $column++ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1699 my $char = uc($seqChars[$column]);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1700 if (exists $countHashes[$column]->{$char}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1701 $countHashes[$column]->{$char}++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1702 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1703 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1704 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1705
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1706 $total = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1707 for(my $column =0; $column < $len; $column++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1708 my %hash = %{$countHashes[$column]};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1709 foreach ( values %hash ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1710 next if( $_ == 0 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1711 $total++ if( $_ == scalar @seqs );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1712 last;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1713 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1714 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1715 return ($total / $len ) * 100.0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1716 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1717
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1718 =head1 Alignment positions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1719
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1720 Methods to map a sequence position into an alignment column and back.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1721 column_from_residue_number() does the former. The latter is really a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1722 property of the sequence object and can done using
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1723 L<Bio::LocatableSeq::location_from_column>:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1724
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1725 # select somehow a sequence from the alignment, e.g.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1726 my $seq = $aln->get_seq_by_pos(1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1727 #$loc is undef or Bio::LocationI object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1728 my $loc = $seq->location_from_column(5);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1729
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1730
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1731 =head2 column_from_residue_number
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1732
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1733 Title : column_from_residue_number
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1734 Usage : $col = $ali->column_from_residue_number( $seqname, $resnumber)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1735 Function:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1736
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1737 This function gives the position in the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1738 (i.e. column number) of the given residue number in the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1739 sequence with the given name. For example, for the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1740 alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1741
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1742 Seq1/91-97 AC..DEF.GH
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1743 Seq2/24-30 ACGG.RTY..
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1744 Seq3/43-51 AC.DDEFGHI
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1745
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1746 column_from_residue_number( "Seq1", 94 ) returns 5.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1747 column_from_residue_number( "Seq2", 25 ) returns 2.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1748 column_from_residue_number( "Seq3", 50 ) returns 9.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1749
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1750 An exception is thrown if the residue number would lie
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1751 outside the length of the aligment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1752 (e.g. column_from_residue_number( "Seq2", 22 )
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1753
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1754 Note: If the the parent sequence is represented by more than
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1755 one alignment sequence and the residue number is present in
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1756 them, this method finds only the first one.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1757
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1758 Returns : A column number for the position in the alignment of the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1759 given residue in the given sequence (1 = first column)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1760 Args : A sequence id/name (not a name/start-end)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1761 A residue number in the whole sequence (not just that
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1762 segment of it in the alignment)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1763
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1764 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1765
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1766 sub column_from_residue_number {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1767 my ($self, $name, $resnumber) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1768
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1769 $self->throw("No sequence with name [$name]") unless $self->{'_start_end_lists'}->{$name};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1770 $self->throw("Second argument residue number missing") unless $resnumber;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1771
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1772 foreach my $seq ($self->each_seq_with_id($name)) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1773 my $col;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1774 eval {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1775 $col = $seq->column_from_residue_number($resnumber);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1776 };
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1777 next if $@;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1778 return $col;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1779 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1780
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1781 $self->throw("Could not find a sequence segment in $name ".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1782 "containing residue number $resnumber");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1783
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1784 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1785
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1786 =head1 Sequence names
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1787
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1788 Methods to manipulate the display name. The default name based on the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1789 sequence id and subsequence positions can be overridden in various
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1790 ways.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1791
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1792 =head2 displayname
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1793
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1794 Title : displayname
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1795 Usage : $myalign->displayname("Ig", "IgA")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1796 Function : Gets/sets the display name of a sequence in the alignment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1797 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1798 Returns : A display name string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1799 Argument : name of the sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1800 displayname of the sequence (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1801
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1802 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1803
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1804 sub get_displayname {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1805 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1806 $self->warn(ref($self). "::get_displayname - deprecated method. Use displayname() instead.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1807 $self->displayname(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1808 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1809
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1810 sub set_displayname {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1811 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1812 $self->warn(ref($self). "::set_displayname - deprecated method. Use displayname() instead.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1813 $self->displayname(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1814 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1815
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1816 sub displayname {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1817 my ($self, $name, $disname) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1818
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1819 $self->throw("No sequence with name [$name]") unless $self->{'_seq'}->{$name};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1820
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1821 if( $disname and $name) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1822 $self->{'_dis_name'}->{$name} = $disname;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1823 return $disname;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1824 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1825 elsif( defined $self->{'_dis_name'}->{$name} ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1826 return $self->{'_dis_name'}->{$name};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1827 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1828 return $name;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1829 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1830 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1831
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1832 =head2 set_displayname_count
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1833
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1834 Title : set_displayname_count
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1835 Usage : $ali->set_displayname_count
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1836 Function :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1837
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1838 Sets the names to be name_# where # is the number of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1839 times this name has been used.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1840
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1841 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1842 Argument :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1843
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1844 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1845
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1846 sub set_displayname_count {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1847 my $self= shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1848 my (@arr,$name,$seq,$count,$temp,$nse);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1849
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1850 foreach $seq ( $self->each_alphabetically() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1851 $nse = $seq->get_nse();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1852
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1853 #name will be set when this is the second
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1854 #time (or greater) is has been seen
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1855
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1856 if( defined $name and $name eq ($seq->id()) ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1857 $temp = sprintf("%s_%s",$name,$count);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1858 $self->displayname($nse,$temp);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1859 $count++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1860 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1861 $count = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1862 $name = $seq->id();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1863 $temp = sprintf("%s_%s",$name,$count);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1864 $self->displayname($nse,$temp);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1865 $count++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1866 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1867 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1868 return 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1869 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1870
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1871 =head2 set_displayname_flat
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1872
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1873 Title : set_displayname_flat
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1874 Usage : $ali->set_displayname_flat()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1875 Function : Makes all the sequences be displayed as just their name,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1876 not name/start-end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1877 Returns : 1
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1878 Argument :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1879
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1880 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1881
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1882 sub set_displayname_flat {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1883 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1884 my ($nse,$seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1885
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1886 foreach $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1887 $nse = $seq->get_nse();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1888 $self->displayname($nse,$seq->id());
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1889 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1890 return 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1891 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1892
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1893 =head2 set_displayname_normal
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1894
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1895 Title : set_displayname_normal
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1896 Usage : $ali->set_displayname_normal()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1897 Function : Makes all the sequences be displayed as name/start-end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1898 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1899 Argument :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1900
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1901 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1902
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1903 sub set_displayname_normal {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1904 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1905 my ($nse,$seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1906
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1907 foreach $seq ( $self->each_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1908 $nse = $seq->get_nse();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1909 $self->displayname($nse,$nse);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1910 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1911 return 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1912 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1913
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1914 =head2 source
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1915
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1916 Title : source
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1917 Usage : $obj->source($newval)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1918 Function: sets the Alignment source program
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1919 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1920 Returns : value of source
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1921 Args : newvalue (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1922
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1923
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1924 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1925
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1926 sub source{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1927 my ($self,$value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1928 if( defined $value) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1929 $self->{'_source'} = $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1930 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1931 return $self->{'_source'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1932 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1933
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1934 1;