Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/LRGSlice.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/LRGSlice.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,271 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::LRGSlice - Arbitary Slice of a genome + +=head1 SYNOPSIS + + $sa = $db->get_SliceAdaptor; + + $slice = + $sa->fetch_by_region( 'LRG', 'LRG3'); + + # get some attributes of the slice + my $seqname = $slice->seq_region_name(); + my $start = $slice->start(); + my $end = $slice->end(); + + # get the sequence from the slice + my $seq = $slice->seq(); + + # get some features from the slice + foreach $gene ( @{ $slice->get_all_Genes } ) { + # do something with a gene + } + + foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) { + # do something with dna-dna alignments + } + +=head1 DESCRIPTION + +A LRG Slice object represents a region of a genome. It can be used to retrieve +sequence or features from an area of interest. + +=head1 METHODS + +=cut + +package Bio::EnsEMBL::LRGSlice; +use vars qw(@ISA); +use strict; + +use Bio::PrimarySeqI; + +use Bio::EnsEMBL::Slice; + +use vars qw(@ISA); + +@ISA = qw(Bio::EnsEMBL::Slice); + +sub new{ + my $class = shift; + + my $self = bless {}, $class ; + + my $slice = $self = $class->SUPER::new( @_); + + return $self; +} + +sub stable_id { + my $self = shift; + return $self->seq_region_name; +} + + +sub display_xref { + my $self = shift; + return $self->seq_region_name; +} + +sub feature_Slice { + my $self = shift; + return $self->{_chrom_slice} if defined($self->{_chrom_slice}); + + my $max=-99999999999; + my $min=9999999999; + my $chrom; + my $strand; + +# print STDERR "working out feature slcie\n"; + foreach my $segment (@{$self->project('chromosome')}) { + my $from_start = $segment->from_start(); + my $from_end = $segment->from_end(); + my $to_name = $segment->to_Slice->seq_region_name(); + $chrom = $to_name; + + my $to_start = $segment->to_Slice->start(); + my $to_end = $segment->to_Slice->end(); + if($to_start > $max){ + $max = $to_start; + } + if($to_start < $min){ + $min = $to_start; + } + if($to_end > $max){ + $max = $to_end; + } + if($to_end < $min){ + $min = $to_end; + } + my $ori = $segment->to_Slice->strand(); + $strand = $ori; + } + if(!defined($chrom)){ + warn "Could not project to chromosome for ".$self->name."??\n"; + return undef; + } + my $chrom_slice = $self->adaptor->fetch_by_region("chromosome",$chrom, $min, $max, $strand); + $self->{_chrom_slice} = $chrom_slice; + return $self->{_chrom_slice}; +} + +sub DESTROY{ +} + +sub get_all_differences{ + my $self = shift; + + my @results; + + # get seq_region_attrib diffs (always same-length substitutions) + ################################################################ + + my $sth = $self->adaptor->prepare(qq{ + SELECT sra.value + FROM seq_region_attrib sra, attrib_type at + WHERE at.code = '_rna_edit' + AND at.attrib_type_id = sra.attrib_type_id + AND sra.seq_region_id = ? + }); + + $sth->execute($self->get_seq_region_id); + + my $edit_string; + + $sth->bind_columns(\$edit_string); + + while($sth->fetch()) { + my ($start, $end, $edit) = split " ", $edit_string; + + my $slice = $self->sub_Slice($start, $end); + my $chr_proj = $slice->project("chromosome"); + my $ref_seq = '-'; + if(scalar @$chr_proj == 1) { + $ref_seq = $chr_proj->[0]->[2]->seq; + } + + + my $diff = { + 'start' => $start, + 'end' => $end, + 'type' => 'substitution', + 'seq' => $edit, + 'ref' => $ref_seq, + }; + + push @results, $diff; + } + + # get more complex differences via projections + ############################################## + + # project the LRG slice to contig coordinates + my @segs = @{$self->project("contig")}; + + # if the LRG projects into more than one segment + if(scalar @segs > 1) { + + my ($prev_end, $prev_chr_start, $prev_chr_end, $prev_was_chr); + + foreach my $seg(@segs) { + + # is this a novel LRG contig, or does it project to a chromosome? + my @chr_proj = @{$seg->[2]->project("chromosome")}; + + # if it is a normal contig + if(scalar @chr_proj) { + + # check if there has been a deletion in LRG + if($prev_was_chr && $prev_end == $seg->[0] - 1) { + + # check it's not just a break in contigs + unless( + ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_start == $chr_proj[0]->[2]->end + 1) || + ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_end == $chr_proj[0]->[2]->start - 1) + ) { + + # now get deleted slice coords, depends on the strand rel to LRG + my ($s, $e); + + # opposite strand + if($chr_proj[0]->[2]->strand != $self->strand) { + ($s, $e) = ($prev_chr_start - 1, $chr_proj[0]->[2]->end + 1); + } + + # same strand + else { + ($s, $e) = ($prev_chr_end + 1, $chr_proj[0]->[2]->start - 1); + } + + if($s > $e) { + warn "Oops, trying to create a slice from $s to $e (could have been ", $prev_chr_start - 1, "-", $chr_proj[0]->[2]->end + 1, " or ", $prev_chr_end + 1, "-", $chr_proj[0]->[2]->start - 1, ")"; + } + + else { + # get a slice representing the sequence that was deleted + my $deleted_slice = $self->adaptor->fetch_by_region("chromosome", $chr_proj[0]->[2]->seq_region_name, $s, $e); + + my $diff = { + 'start' => $seg->[0], + 'end' => $prev_end, + 'type' => 'deletion', + 'seq' => '-', + 'ref' => $deleted_slice->seq." ".$deleted_slice->start.'-'.$deleted_slice->end, + }; + + push @results, $diff; + } + } + } + + $prev_was_chr = 1; + + $prev_chr_start = $chr_proj[0]->[2]->start; + $prev_chr_end = $chr_proj[0]->[2]->end; + } + + # if it is an LRG made-up contig for an insertion + else { + $prev_was_chr = 0; + + my $diff = { + 'start' => $seg->[0], + 'end' => $seg->[1], + 'type' => 'insertion', + 'seq' => substr($self->seq, $seg->[0] - 1, $seg->[1] - $seg->[0] + 1), + 'ref' => '-', + }; + + push @results, $diff; + } + + $prev_end = $seg->[1]; + } + } + + # return results sorted by start, then end position + return [sort {$a->{start} <=> $b->{start} || $a->{end} <=> $b->{end}} @results]; +} + +1;