annotate variant_effect_predictor/Bio/EnsEMBL/DnaDnaAlignFeature.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
5
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
8
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
10
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
12
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
15
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
18
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
19 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
20
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
22
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::DnaDnaAlignFeature - Ensembl specific dna-dna pairwise
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
24 alignment feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
25
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
26 =head1 SYNOPSIS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
27
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
28 See BaseAlignFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
29
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
30 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
31
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
32
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
33 package Bio::EnsEMBL::DnaDnaAlignFeature;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
34
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
35 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
36
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
37 use Bio::EnsEMBL::BaseAlignFeature;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
38
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
39 use vars qw(@ISA);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
40 use Bio::SimpleAlign;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
41 use Bio::LocatableSeq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
42 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
43
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
44 @ISA = qw( Bio::EnsEMBL::BaseAlignFeature );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
45
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
46
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
47 =head2 new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
48
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
49 Arg [..] : List of named arguments. (-pair_dna_align_feature_id) defined
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
50 in this constructor, others defined in BaseFeaturePair and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
51 SeqFeature superclasses.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
52 Example : $daf = new DnaDnaAlignFeature(-cigar_string => '3M3I12M');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
53 Description: Creates a new DnaDnaAlignFeature using either a cigarstring or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
54 a list of ungapped features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
55 Returntype : Bio::EnsEMBL::DnaDnaAlignFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
56 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
57 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
58 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
59
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
60 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
61
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
62 sub new {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
63
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
64 my $caller = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
65
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
66 my $class = ref($caller) || $caller;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
67
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
68 my $self = $class->SUPER::new(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
69
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
70 my ($pair_dna_align_feature_id) = rearrange([qw(PAIR_DNA_ALIGN_FEATURE_ID)], @_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
71 if (defined $pair_dna_align_feature_id){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
72 $self->{'pair_dna_align_feature_id'} = $pair_dna_align_feature_id;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
73 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
74 return $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
75 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
76
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
77
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
78 =head2 pair_dna_align_feature_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
79
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
80 Arg[1] : (optional) String $arg - value to set
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
81 Example : $self->pair_dna_align_feature_id($pair_feature_id);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
82 Description: Getter/setter for attribute 'pair_dna_align_feature_id'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
83 The id of the dna feature aligned
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
84 Returntype : String
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
85 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
86 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
87 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
88
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
89 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
90
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
91 sub pair_dna_align_feature_id{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
92 my ($self, $arg) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
93 if (defined $arg){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
94 $self->{pair_dna_align_feature_id} = $arg;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
95 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
96 return $self->{pair_dna_align_feature_id};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
97 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
98
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
99 =head2 _hit_unit
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
100
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
101 Arg [1] : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
102 Description: PRIVATE implementation of abstract superclass method. Returns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
103 1 as the 'unit' used for the hit sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
104 Returntype : int
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
105 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
106 Caller : Bio::EnsEMBL::BaseAlignFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
107 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
108
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
109 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
110
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
111 sub _hit_unit {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
112 return 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
113 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
114
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
115
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
116
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
117 =head2 _query_unit
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
118
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
119 Arg [1] : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
120 Description: PRIVATE implementation of abstract superclass method Returns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
121 1 as the 'unit' used for the hit sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
122 Returntype : int
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
123 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
124 Caller : Bio::EnsEMBL::BaseAlignFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
125 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
126
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
127 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
128
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
129 sub _query_unit {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
130 return 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
131 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
132
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
133 =head2 restrict_between_positions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
134
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
135 Arg [1] : int $start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
136 Arg [2] : int $end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
137 Arg [3] : string $flags
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
138 SEQ = $start and $end apply to the seq sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
139 i.e. start and end methods
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
140 HSEQ = $start and $end apply to the hseq sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
141 i.e. hstart and hend methods
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
142 Example : $daf->restrict_between_positions(150,543,"SEQ")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
143 Description: Build a new DnaDnaAlignFeature object that fits within
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
144 the new specified coordinates and sequence reference, cutting
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
145 any pieces hanging upstream and downstream.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
146 Returntype : Bio::EnsEMBL::DnaDnaAlignFeature object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
147 Exceptions :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
148 Caller :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
149 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
150
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
151 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
152
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
153 sub restrict_between_positions {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
154 my ($self,$start,$end,$seqref) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
155
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
156 unless (defined $start && $start =~ /^\d+$/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
157 $self->throw("The first argument is not defined or is not an integer");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
158 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
159 unless (defined $end && $end =~ /^\d+$/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
160 $self->throw("The second argument is not defined or is not an integer");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
161 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
162 unless (defined $seqref &&
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
163 ($seqref eq "SEQ" || $seqref eq "HSEQ")) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
164 $self->throw("The third argument is not defined or is not equal to 'SEQ' or 'HSEQ'");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
165 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
166
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
167 # symbolic method references should be forbidden!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
168 # need to be rewrite at some stage.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
169
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
170 my ($start_method1,$end_method1,$strand_method1,$start_method2,$end_method2,$strand_method2) =
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
171 qw(start end strand hstart hend hstrand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
172
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
173 if ($seqref eq "HSEQ") {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
174 ($start_method1,$end_method1,$strand_method1,$start_method2,$end_method2,$strand_method2) =
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
175 qw(hstart hend hstrand start end strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
176 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
177
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
178 my @restricted_features;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
179
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
180 foreach my $ungapped_feature ($self->ungapped_features) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
181
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
182 if ($ungapped_feature->$start_method1() > $end ||
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
183 $ungapped_feature->$end_method1() < $start) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
184
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
185 next;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
186
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
187 } elsif ($ungapped_feature->$end_method1() <= $end &&
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
188 $ungapped_feature->$start_method1() >= $start) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
189
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
190 push @restricted_features, $ungapped_feature;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
191
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
192 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
193
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
194 if ($ungapped_feature->$strand_method1() eq $ungapped_feature->$strand_method2()) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
195
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
196 if ($ungapped_feature->$start_method1() < $start) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
197
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
198 my $offset = $start - $ungapped_feature->$start_method1();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
199 $ungapped_feature->$start_method1($start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
200 $ungapped_feature->$start_method2($ungapped_feature->$start_method2() + $offset);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
201
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
202 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
203 if ($ungapped_feature->$end_method1() > $end) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
204
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
205 my $offset = $ungapped_feature->$end_method1() - $end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
206 $ungapped_feature->$end_method1($end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
207 $ungapped_feature->$end_method2($ungapped_feature->$end_method2() - $offset);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
208
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
209 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
210 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
211
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
212 if ($ungapped_feature->$start_method1() < $start) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
213
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
214 my $offset = $start - $ungapped_feature->$start_method1();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
215 $ungapped_feature->$start_method1($start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
216 $ungapped_feature->$end_method2($ungapped_feature->$end_method2() - $offset);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
217
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
218 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
219 if ($ungapped_feature->$end_method1() > $end) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
220
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
221 my $offset = $ungapped_feature->$end_method1() - $end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
222 $ungapped_feature->$end_method1($end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
223 $ungapped_feature->$start_method2($ungapped_feature->$start_method2() + $offset);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
224
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
225 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
226 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
227
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
228 push @restricted_features, $ungapped_feature;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
229 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
230 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
231
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
232 if (scalar @restricted_features) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
233 my $DnaDnaAlignFeature = new Bio::EnsEMBL::DnaDnaAlignFeature('-features' =>\@restricted_features);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
234 if (defined $self->slice) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
235 $DnaDnaAlignFeature->slice($self->slice);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
236 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
237 if (defined $self->hslice) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
238 $DnaDnaAlignFeature->hslice($self->hslice);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
239 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
240 return $DnaDnaAlignFeature;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
241 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
242 return undef;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
243 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
244 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
245
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
246 =head2 alignment_strings
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
247
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
248 Arg [1] : list of string $flags
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
249 FIX_SEQ = does not introduce gaps (dashes) in seq aligned sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
250 and delete the corresponding insertions in hseq aligned sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
251 FIX_HSEQ = does not introduce gaps (dashes) in hseq aligned sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
252 and delete the corresponding insertions in seq aligned sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
253 NO_SEQ = return the seq aligned sequence as an empty string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
254 NO_HSEQ = return the hseq aligned sequence as an empty string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
255 This 2 last flags would save a bit of time as doing so no querying to the core
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
256 database in done to get the sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
257 Example : $daf->alignment_strings or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
258 $daf->alignment_strings("FIX_HSEQ") or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
259 $daf->alignment_strings("NO_SEQ","FIX_SEQ")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
260 Description: Allows to rebuild the alignment string of both the seq and hseq sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
261 using the cigar_string information and the slice and hslice objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
262 Returntype : array reference containing 2 strings
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
263 the first corresponds to seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
264 the second corresponds to hseq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
265 Exceptions :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
266 Caller :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
267 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
268
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
269 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
270
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
271
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
272 sub alignment_strings {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
273 my ( $self, @flags ) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
274
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
275 # set the flags
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
276 my $seq_flag = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
277 my $hseq_flag = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
278 my $fix_seq_flag = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
279 my $fix_hseq_flag = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
280
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
281 for my $flag ( @flags ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
282 $seq_flag = 0 if ($flag eq "NO_SEQ");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
283 $hseq_flag = 0 if ($flag eq "NO_HSEQ");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
284 $fix_seq_flag = 1 if ($flag eq "FIX_SEQ");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
285 $fix_hseq_flag = 1 if ($flag eq "FIX_HSEQ");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
286 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
287
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
288 my ($seq, $hseq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
289 $seq = $self->slice->subseq($self->start, $self->end, $self->strand) if ($seq_flag || $fix_seq_flag);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
290 $hseq = $self->hslice->subseq($self->hstart, $self->hend, $self->hstrand) if ($hseq_flag || $fix_hseq_flag);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
291
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
292 my $rseq= "";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
293 # rseq - result sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
294 my $rhseq= "";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
295 # rhseq - result hsequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
296
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
297 my $seq_pos = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
298 my $hseq_pos = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
299
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
300 my @cig = ( $self->cigar_string =~ /(\d*[DIM])/g );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
301
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
302 for my $cigElem ( @cig ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
303 my $cigType = substr( $cigElem, -1, 1 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
304 my $cigCount = substr( $cigElem, 0 ,-1 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
305 $cigCount = 1 unless $cigCount;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
306
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
307 if( $cigType eq "M" ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
308 $rseq .= substr( $seq, $seq_pos, $cigCount ) if ($seq_flag);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
309 $rhseq .= substr( $hseq, $hseq_pos, $cigCount ) if ($hseq_flag);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
310 $seq_pos += $cigCount;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
311 $hseq_pos += $cigCount;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
312 } elsif( $cigType eq "D" ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
313 if( ! $fix_seq_flag ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
314 $rseq .= "-" x $cigCount if ($seq_flag);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
315 $rhseq .= substr( $hseq, $hseq_pos, $cigCount ) if ($hseq_flag);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
316 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
317 $hseq_pos += $cigCount;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
318 } elsif( $cigType eq "I" ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
319 if( ! $fix_hseq_flag ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
320 $rseq .= substr( $seq, $seq_pos, $cigCount ) if ($seq_flag);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
321 $rhseq .= "-" x $cigCount if ($hseq_flag);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
322 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
323 $seq_pos += $cigCount;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
324 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
325 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
326 return [ $rseq,$rhseq ];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
327 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
328
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
329 =head2 get_SimpleAlign
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
330
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
331 Arg [1] : list of string $flags
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
332 translated = by default, the sequence alignment will be on nucleotide. With translated flag
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
333 the aligned sequences are translated.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
334 uc = by default aligned sequences are given in lower cases. With uc flag, the aligned
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
335 sequences are given in upper cases.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
336 Example : $daf->get_SimpleAlign or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
337 $daf->get_SimpleAlign("translated") or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
338 $daf->get_SimpleAlign("translated","uc")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
339 Description: Allows to rebuild the alignment string of both the seq and hseq sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
340 using the cigar_string information and the slice and hslice objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
341 Returntype : a Bio::SimpleAlign object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
342 Exceptions :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
343 Caller :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
344 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
345
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
346 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
347
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
348 sub get_SimpleAlign {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
349 my ( $self, @flags ) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
350
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
351 # setting the flags
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
352 my $uc = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
353 my $translated = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
354
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
355 for my $flag ( @flags ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
356 $uc = 1 if ($flag =~ /^uc$/i);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
357 $translated = 1 if ($flag =~ /^translated$/i);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
358 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
359
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
360 my $sa = Bio::SimpleAlign->new();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
361
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
362 #Hack to try to work with both bioperl 0.7 and 1.2:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
363 #Check to see if the method is called 'addSeq' or 'add_seq'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
364 my $bio07 = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
365 if(!$sa->can('add_seq')) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
366 $bio07 = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
367 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
368
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
369 my ($sb_seq,$qy_seq) = @{$self->alignment_strings};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
370
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
371 my $loc_sb_seq = Bio::LocatableSeq->new(-SEQ => $uc ? uc $sb_seq : lc $sb_seq,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
372 -START => $self->seq_region_start,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
373 -END => $self->seq_region_end,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
374 -ID => $self->seqname,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
375 -STRAND => $self->strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
376
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
377 $loc_sb_seq->seq($uc ? uc $loc_sb_seq->translate->seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
378 : lc $loc_sb_seq->translate->seq) if ($translated);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
379
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
380 my $loc_qy_seq = Bio::LocatableSeq->new(-SEQ => $uc ? uc $qy_seq : lc $qy_seq,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
381 -START => $self->hseq_region_start,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
382 -END => $self->hseq_region_end,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
383 -ID => $self->hseqname,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
384 -STRAND => $self->hstrand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
385
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
386 $loc_qy_seq->seq($uc ? uc $loc_qy_seq->translate->seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
387 : lc $loc_qy_seq->translate->seq) if ($translated);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
388
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
389 if($bio07) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
390 $sa->addSeq($loc_sb_seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
391 $sa->addSeq($loc_qy_seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
392 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
393 $sa->add_seq($loc_sb_seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
394 $sa->add_seq($loc_qy_seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
395 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
396
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
397 return $sa;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
398 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
399
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
400 1;