Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/LRGSlice.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <dev@ensembl.org>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 =cut | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::EnsEMBL::LRGSlice - Arbitary Slice of a genome | |
| 24 | |
| 25 =head1 SYNOPSIS | |
| 26 | |
| 27 $sa = $db->get_SliceAdaptor; | |
| 28 | |
| 29 $slice = | |
| 30 $sa->fetch_by_region( 'LRG', 'LRG3'); | |
| 31 | |
| 32 # get some attributes of the slice | |
| 33 my $seqname = $slice->seq_region_name(); | |
| 34 my $start = $slice->start(); | |
| 35 my $end = $slice->end(); | |
| 36 | |
| 37 # get the sequence from the slice | |
| 38 my $seq = $slice->seq(); | |
| 39 | |
| 40 # get some features from the slice | |
| 41 foreach $gene ( @{ $slice->get_all_Genes } ) { | |
| 42 # do something with a gene | |
| 43 } | |
| 44 | |
| 45 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) { | |
| 46 # do something with dna-dna alignments | |
| 47 } | |
| 48 | |
| 49 =head1 DESCRIPTION | |
| 50 | |
| 51 A LRG Slice object represents a region of a genome. It can be used to retrieve | |
| 52 sequence or features from an area of interest. | |
| 53 | |
| 54 =head1 METHODS | |
| 55 | |
| 56 =cut | |
| 57 | |
| 58 package Bio::EnsEMBL::LRGSlice; | |
| 59 use vars qw(@ISA); | |
| 60 use strict; | |
| 61 | |
| 62 use Bio::PrimarySeqI; | |
| 63 | |
| 64 use Bio::EnsEMBL::Slice; | |
| 65 | |
| 66 use vars qw(@ISA); | |
| 67 | |
| 68 @ISA = qw(Bio::EnsEMBL::Slice); | |
| 69 | |
| 70 sub new{ | |
| 71 my $class = shift; | |
| 72 | |
| 73 my $self = bless {}, $class ; | |
| 74 | |
| 75 my $slice = $self = $class->SUPER::new( @_); | |
| 76 | |
| 77 return $self; | |
| 78 } | |
| 79 | |
| 80 sub stable_id { | |
| 81 my $self = shift; | |
| 82 return $self->seq_region_name; | |
| 83 } | |
| 84 | |
| 85 | |
| 86 sub display_xref { | |
| 87 my $self = shift; | |
| 88 return $self->seq_region_name; | |
| 89 } | |
| 90 | |
| 91 sub feature_Slice { | |
| 92 my $self = shift; | |
| 93 return $self->{_chrom_slice} if defined($self->{_chrom_slice}); | |
| 94 | |
| 95 my $max=-99999999999; | |
| 96 my $min=9999999999; | |
| 97 my $chrom; | |
| 98 my $strand; | |
| 99 | |
| 100 # print STDERR "working out feature slcie\n"; | |
| 101 foreach my $segment (@{$self->project('chromosome')}) { | |
| 102 my $from_start = $segment->from_start(); | |
| 103 my $from_end = $segment->from_end(); | |
| 104 my $to_name = $segment->to_Slice->seq_region_name(); | |
| 105 $chrom = $to_name; | |
| 106 | |
| 107 my $to_start = $segment->to_Slice->start(); | |
| 108 my $to_end = $segment->to_Slice->end(); | |
| 109 if($to_start > $max){ | |
| 110 $max = $to_start; | |
| 111 } | |
| 112 if($to_start < $min){ | |
| 113 $min = $to_start; | |
| 114 } | |
| 115 if($to_end > $max){ | |
| 116 $max = $to_end; | |
| 117 } | |
| 118 if($to_end < $min){ | |
| 119 $min = $to_end; | |
| 120 } | |
| 121 my $ori = $segment->to_Slice->strand(); | |
| 122 $strand = $ori; | |
| 123 } | |
| 124 if(!defined($chrom)){ | |
| 125 warn "Could not project to chromosome for ".$self->name."??\n"; | |
| 126 return undef; | |
| 127 } | |
| 128 my $chrom_slice = $self->adaptor->fetch_by_region("chromosome",$chrom, $min, $max, $strand); | |
| 129 $self->{_chrom_slice} = $chrom_slice; | |
| 130 return $self->{_chrom_slice}; | |
| 131 } | |
| 132 | |
| 133 sub DESTROY{ | |
| 134 } | |
| 135 | |
| 136 sub get_all_differences{ | |
| 137 my $self = shift; | |
| 138 | |
| 139 my @results; | |
| 140 | |
| 141 # get seq_region_attrib diffs (always same-length substitutions) | |
| 142 ################################################################ | |
| 143 | |
| 144 my $sth = $self->adaptor->prepare(qq{ | |
| 145 SELECT sra.value | |
| 146 FROM seq_region_attrib sra, attrib_type at | |
| 147 WHERE at.code = '_rna_edit' | |
| 148 AND at.attrib_type_id = sra.attrib_type_id | |
| 149 AND sra.seq_region_id = ? | |
| 150 }); | |
| 151 | |
| 152 $sth->execute($self->get_seq_region_id); | |
| 153 | |
| 154 my $edit_string; | |
| 155 | |
| 156 $sth->bind_columns(\$edit_string); | |
| 157 | |
| 158 while($sth->fetch()) { | |
| 159 my ($start, $end, $edit) = split " ", $edit_string; | |
| 160 | |
| 161 my $slice = $self->sub_Slice($start, $end); | |
| 162 my $chr_proj = $slice->project("chromosome"); | |
| 163 my $ref_seq = '-'; | |
| 164 if(scalar @$chr_proj == 1) { | |
| 165 $ref_seq = $chr_proj->[0]->[2]->seq; | |
| 166 } | |
| 167 | |
| 168 | |
| 169 my $diff = { | |
| 170 'start' => $start, | |
| 171 'end' => $end, | |
| 172 'type' => 'substitution', | |
| 173 'seq' => $edit, | |
| 174 'ref' => $ref_seq, | |
| 175 }; | |
| 176 | |
| 177 push @results, $diff; | |
| 178 } | |
| 179 | |
| 180 # get more complex differences via projections | |
| 181 ############################################## | |
| 182 | |
| 183 # project the LRG slice to contig coordinates | |
| 184 my @segs = @{$self->project("contig")}; | |
| 185 | |
| 186 # if the LRG projects into more than one segment | |
| 187 if(scalar @segs > 1) { | |
| 188 | |
| 189 my ($prev_end, $prev_chr_start, $prev_chr_end, $prev_was_chr); | |
| 190 | |
| 191 foreach my $seg(@segs) { | |
| 192 | |
| 193 # is this a novel LRG contig, or does it project to a chromosome? | |
| 194 my @chr_proj = @{$seg->[2]->project("chromosome")}; | |
| 195 | |
| 196 # if it is a normal contig | |
| 197 if(scalar @chr_proj) { | |
| 198 | |
| 199 # check if there has been a deletion in LRG | |
| 200 if($prev_was_chr && $prev_end == $seg->[0] - 1) { | |
| 201 | |
| 202 # check it's not just a break in contigs | |
| 203 unless( | |
| 204 ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_start == $chr_proj[0]->[2]->end + 1) || | |
| 205 ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_end == $chr_proj[0]->[2]->start - 1) | |
| 206 ) { | |
| 207 | |
| 208 # now get deleted slice coords, depends on the strand rel to LRG | |
| 209 my ($s, $e); | |
| 210 | |
| 211 # opposite strand | |
| 212 if($chr_proj[0]->[2]->strand != $self->strand) { | |
| 213 ($s, $e) = ($prev_chr_start - 1, $chr_proj[0]->[2]->end + 1); | |
| 214 } | |
| 215 | |
| 216 # same strand | |
| 217 else { | |
| 218 ($s, $e) = ($prev_chr_end + 1, $chr_proj[0]->[2]->start - 1); | |
| 219 } | |
| 220 | |
| 221 if($s > $e) { | |
| 222 warn "Oops, trying to create a slice from $s to $e (could have been ", $prev_chr_start - 1, "-", $chr_proj[0]->[2]->end + 1, " or ", $prev_chr_end + 1, "-", $chr_proj[0]->[2]->start - 1, ")"; | |
| 223 } | |
| 224 | |
| 225 else { | |
| 226 # get a slice representing the sequence that was deleted | |
| 227 my $deleted_slice = $self->adaptor->fetch_by_region("chromosome", $chr_proj[0]->[2]->seq_region_name, $s, $e); | |
| 228 | |
| 229 my $diff = { | |
| 230 'start' => $seg->[0], | |
| 231 'end' => $prev_end, | |
| 232 'type' => 'deletion', | |
| 233 'seq' => '-', | |
| 234 'ref' => $deleted_slice->seq." ".$deleted_slice->start.'-'.$deleted_slice->end, | |
| 235 }; | |
| 236 | |
| 237 push @results, $diff; | |
| 238 } | |
| 239 } | |
| 240 } | |
| 241 | |
| 242 $prev_was_chr = 1; | |
| 243 | |
| 244 $prev_chr_start = $chr_proj[0]->[2]->start; | |
| 245 $prev_chr_end = $chr_proj[0]->[2]->end; | |
| 246 } | |
| 247 | |
| 248 # if it is an LRG made-up contig for an insertion | |
| 249 else { | |
| 250 $prev_was_chr = 0; | |
| 251 | |
| 252 my $diff = { | |
| 253 'start' => $seg->[0], | |
| 254 'end' => $seg->[1], | |
| 255 'type' => 'insertion', | |
| 256 'seq' => substr($self->seq, $seg->[0] - 1, $seg->[1] - $seg->[0] + 1), | |
| 257 'ref' => '-', | |
| 258 }; | |
| 259 | |
| 260 push @results, $diff; | |
| 261 } | |
| 262 | |
| 263 $prev_end = $seg->[1]; | |
| 264 } | |
| 265 } | |
| 266 | |
| 267 # return results sorted by start, then end position | |
| 268 return [sort {$a->{start} <=> $b->{start} || $a->{end} <=> $b->{end}} @results]; | |
| 269 } | |
| 270 | |
| 271 1; |
