Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/LRGSlice.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::LRGSlice - Arbitary Slice of a genome | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 $sa = $db->get_SliceAdaptor; | |
28 | |
29 $slice = | |
30 $sa->fetch_by_region( 'LRG', 'LRG3'); | |
31 | |
32 # get some attributes of the slice | |
33 my $seqname = $slice->seq_region_name(); | |
34 my $start = $slice->start(); | |
35 my $end = $slice->end(); | |
36 | |
37 # get the sequence from the slice | |
38 my $seq = $slice->seq(); | |
39 | |
40 # get some features from the slice | |
41 foreach $gene ( @{ $slice->get_all_Genes } ) { | |
42 # do something with a gene | |
43 } | |
44 | |
45 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) { | |
46 # do something with dna-dna alignments | |
47 } | |
48 | |
49 =head1 DESCRIPTION | |
50 | |
51 A LRG Slice object represents a region of a genome. It can be used to retrieve | |
52 sequence or features from an area of interest. | |
53 | |
54 =head1 METHODS | |
55 | |
56 =cut | |
57 | |
58 package Bio::EnsEMBL::LRGSlice; | |
59 use vars qw(@ISA); | |
60 use strict; | |
61 | |
62 use Bio::PrimarySeqI; | |
63 | |
64 use Bio::EnsEMBL::Slice; | |
65 | |
66 use vars qw(@ISA); | |
67 | |
68 @ISA = qw(Bio::EnsEMBL::Slice); | |
69 | |
70 sub new{ | |
71 my $class = shift; | |
72 | |
73 my $self = bless {}, $class ; | |
74 | |
75 my $slice = $self = $class->SUPER::new( @_); | |
76 | |
77 return $self; | |
78 } | |
79 | |
80 sub stable_id { | |
81 my $self = shift; | |
82 return $self->seq_region_name; | |
83 } | |
84 | |
85 | |
86 sub display_xref { | |
87 my $self = shift; | |
88 return $self->seq_region_name; | |
89 } | |
90 | |
91 sub feature_Slice { | |
92 my $self = shift; | |
93 return $self->{_chrom_slice} if defined($self->{_chrom_slice}); | |
94 | |
95 my $max=-99999999999; | |
96 my $min=9999999999; | |
97 my $chrom; | |
98 my $strand; | |
99 | |
100 # print STDERR "working out feature slcie\n"; | |
101 foreach my $segment (@{$self->project('chromosome')}) { | |
102 my $from_start = $segment->from_start(); | |
103 my $from_end = $segment->from_end(); | |
104 my $to_name = $segment->to_Slice->seq_region_name(); | |
105 $chrom = $to_name; | |
106 | |
107 my $to_start = $segment->to_Slice->start(); | |
108 my $to_end = $segment->to_Slice->end(); | |
109 if($to_start > $max){ | |
110 $max = $to_start; | |
111 } | |
112 if($to_start < $min){ | |
113 $min = $to_start; | |
114 } | |
115 if($to_end > $max){ | |
116 $max = $to_end; | |
117 } | |
118 if($to_end < $min){ | |
119 $min = $to_end; | |
120 } | |
121 my $ori = $segment->to_Slice->strand(); | |
122 $strand = $ori; | |
123 } | |
124 if(!defined($chrom)){ | |
125 warn "Could not project to chromosome for ".$self->name."??\n"; | |
126 return undef; | |
127 } | |
128 my $chrom_slice = $self->adaptor->fetch_by_region("chromosome",$chrom, $min, $max, $strand); | |
129 $self->{_chrom_slice} = $chrom_slice; | |
130 return $self->{_chrom_slice}; | |
131 } | |
132 | |
133 sub DESTROY{ | |
134 } | |
135 | |
136 sub get_all_differences{ | |
137 my $self = shift; | |
138 | |
139 my @results; | |
140 | |
141 # get seq_region_attrib diffs (always same-length substitutions) | |
142 ################################################################ | |
143 | |
144 my $sth = $self->adaptor->prepare(qq{ | |
145 SELECT sra.value | |
146 FROM seq_region_attrib sra, attrib_type at | |
147 WHERE at.code = '_rna_edit' | |
148 AND at.attrib_type_id = sra.attrib_type_id | |
149 AND sra.seq_region_id = ? | |
150 }); | |
151 | |
152 $sth->execute($self->get_seq_region_id); | |
153 | |
154 my $edit_string; | |
155 | |
156 $sth->bind_columns(\$edit_string); | |
157 | |
158 while($sth->fetch()) { | |
159 my ($start, $end, $edit) = split " ", $edit_string; | |
160 | |
161 my $slice = $self->sub_Slice($start, $end); | |
162 my $chr_proj = $slice->project("chromosome"); | |
163 my $ref_seq = '-'; | |
164 if(scalar @$chr_proj == 1) { | |
165 $ref_seq = $chr_proj->[0]->[2]->seq; | |
166 } | |
167 | |
168 | |
169 my $diff = { | |
170 'start' => $start, | |
171 'end' => $end, | |
172 'type' => 'substitution', | |
173 'seq' => $edit, | |
174 'ref' => $ref_seq, | |
175 }; | |
176 | |
177 push @results, $diff; | |
178 } | |
179 | |
180 # get more complex differences via projections | |
181 ############################################## | |
182 | |
183 # project the LRG slice to contig coordinates | |
184 my @segs = @{$self->project("contig")}; | |
185 | |
186 # if the LRG projects into more than one segment | |
187 if(scalar @segs > 1) { | |
188 | |
189 my ($prev_end, $prev_chr_start, $prev_chr_end, $prev_was_chr); | |
190 | |
191 foreach my $seg(@segs) { | |
192 | |
193 # is this a novel LRG contig, or does it project to a chromosome? | |
194 my @chr_proj = @{$seg->[2]->project("chromosome")}; | |
195 | |
196 # if it is a normal contig | |
197 if(scalar @chr_proj) { | |
198 | |
199 # check if there has been a deletion in LRG | |
200 if($prev_was_chr && $prev_end == $seg->[0] - 1) { | |
201 | |
202 # check it's not just a break in contigs | |
203 unless( | |
204 ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_start == $chr_proj[0]->[2]->end + 1) || | |
205 ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_end == $chr_proj[0]->[2]->start - 1) | |
206 ) { | |
207 | |
208 # now get deleted slice coords, depends on the strand rel to LRG | |
209 my ($s, $e); | |
210 | |
211 # opposite strand | |
212 if($chr_proj[0]->[2]->strand != $self->strand) { | |
213 ($s, $e) = ($prev_chr_start - 1, $chr_proj[0]->[2]->end + 1); | |
214 } | |
215 | |
216 # same strand | |
217 else { | |
218 ($s, $e) = ($prev_chr_end + 1, $chr_proj[0]->[2]->start - 1); | |
219 } | |
220 | |
221 if($s > $e) { | |
222 warn "Oops, trying to create a slice from $s to $e (could have been ", $prev_chr_start - 1, "-", $chr_proj[0]->[2]->end + 1, " or ", $prev_chr_end + 1, "-", $chr_proj[0]->[2]->start - 1, ")"; | |
223 } | |
224 | |
225 else { | |
226 # get a slice representing the sequence that was deleted | |
227 my $deleted_slice = $self->adaptor->fetch_by_region("chromosome", $chr_proj[0]->[2]->seq_region_name, $s, $e); | |
228 | |
229 my $diff = { | |
230 'start' => $seg->[0], | |
231 'end' => $prev_end, | |
232 'type' => 'deletion', | |
233 'seq' => '-', | |
234 'ref' => $deleted_slice->seq." ".$deleted_slice->start.'-'.$deleted_slice->end, | |
235 }; | |
236 | |
237 push @results, $diff; | |
238 } | |
239 } | |
240 } | |
241 | |
242 $prev_was_chr = 1; | |
243 | |
244 $prev_chr_start = $chr_proj[0]->[2]->start; | |
245 $prev_chr_end = $chr_proj[0]->[2]->end; | |
246 } | |
247 | |
248 # if it is an LRG made-up contig for an insertion | |
249 else { | |
250 $prev_was_chr = 0; | |
251 | |
252 my $diff = { | |
253 'start' => $seg->[0], | |
254 'end' => $seg->[1], | |
255 'type' => 'insertion', | |
256 'seq' => substr($self->seq, $seg->[0] - 1, $seg->[1] - $seg->[0] + 1), | |
257 'ref' => '-', | |
258 }; | |
259 | |
260 push @results, $diff; | |
261 } | |
262 | |
263 $prev_end = $seg->[1]; | |
264 } | |
265 } | |
266 | |
267 # return results sorted by start, then end position | |
268 return [sort {$a->{start} <=> $b->{start} || $a->{end} <=> $b->{end}} @results]; | |
269 } | |
270 | |
271 1; |