annotate variant_effect_predictor/Bio/EnsEMBL/Utils/TranscriptAlleles.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 TranscriptAlleles - A utility class used to obtain information about the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 relationships between a transcript and Alleles
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 use Bio::EnsEMBL::Utils::TranscriptAlleles;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 # get the peptide variations caused by a set of Alleles
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 %variations = %{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 Bio::EnsEMBL::Utils::TranscriptAlleles::get_all_peptide_variations(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 $transcript, $alleles ) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 This is a utility class which can be used to find consequence type of an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 AlleleFeature in a transcript, and to determine the amino acid changes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 caused by the AlleleFeature in the Transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 package Bio::EnsEMBL::Utils::TranscriptAlleles;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 use Bio::EnsEMBL::Variation::ConsequenceType;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 use vars qw(@ISA @EXPORT_OK);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 use Data::Dumper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 @ISA = qw(Exporter);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 @EXPORT_OK = qw(&get_all_ConsequenceType &type_variation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 =head2 get_all_ConsequenceType
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 Arg [1] : $transcript the transcript to obtain the peptide variations for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 Arg [2] : $alleles listref of AlleleFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 Example : $consequence_types = get_all_ConsequenceType($transcript, \@alleles);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 foreach my $ct (@{$consequence_types}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 print "Allele : ", $ct->allele_string, " has a consequence type of :",$ct->type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 print " and is affecting the transcript with ",@{$ct->aa_alleles}, "in position ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 $ct->aa_start,"-", $ct->aa_end if (defined $ct->aa_alleles);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 print "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 Description: Takes a list of AlleleFeatures and a Transcritpt, and return a list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 of ConsequenceType of the alleles in the given Transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 Returntype : listref of Bio::EnsEMBL::Variation::ConsequenceType
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 sub get_all_ConsequenceType {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 my $transcript = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 my $alleles = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 if(!ref($transcript) || !$transcript->isa('Bio::EnsEMBL::Transcript')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 throw('Bio::EnsEMBL::Transcript argument is required.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 if(!ref($alleles) || (ref($alleles) ne 'ARRAY')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 throw('Reference to a list of Bio::EnsEMBL::Variation::AlleleFeature objects is required');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 my @alleles_ordered = sort { $a->start <=> $b->start} @$alleles; #sort the alleles by the genomic position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 my @same_codon; #contains up to 3 allele features, that are in the same codon, but each position can contain more than 1 allele
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 my @out; #array containing the consequence types of the alleles in the transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 foreach my $allele (@alleles_ordered) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 # foreach my $allele (@{$alleles}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 #get consequence type of the AlleleFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 # my $new_allele = $allele->transform('chromosome');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 #my $consequence_type = Bio::EnsEMBL::Variation::ConsequenceType->new($transcript->dbID(),'',$allele->start,$allele->end,$allele->strand,[$allele->allele_string]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 ### REAL HACK BY js5 because something is borked in TranscriptMapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 ### This relies on the Allele being of the form i.e. a SNP! [ACGT-](/[ACGT-])+
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 ### The rest don't work anyway until we have a AlignStrainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 ### MUST BE SORTED....
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 #we have to consider het alleles
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 my $allele_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 if ($allele->allele_string =~ /[\|\\\/]/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 my @alleles = split /[\|\\\/]/,$allele->allele_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 if ($alleles[0] ne $allele->ref_allele_string){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 $allele_string = $alleles[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 $allele_string = $alleles[1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 $allele_string = $allele->allele_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 my $opposite_strand = 0; #to indicate wether transcript and allele and in different strands
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 my $transcript_allele = $allele_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 if( $transcript->strand != $allele->strand ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 $transcript_allele =~tr/ACGT/TGCA/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 $opposite_strand = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 my $consequence_type = Bio::EnsEMBL::Variation::ConsequenceType->new($transcript->dbID(),'',$allele->start, $allele->end, $transcript->strand, [$transcript_allele]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 #calculate the consequence type of the Allele if different from the reference Allele
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 #if (($opposite_strand && $allele->ref_allele_string eq $allele_string) || (!$opposite_strand && $allele->ref_allele_string eq $allele_string)){ #same allele as reference, there is no consequence, called SARA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 if ($allele->ref_allele_string eq $allele_string) { #same allele as reference, there is no consequence, called SARA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 #same allele as reference, there is no consequence, called SARA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 #we have to calculate if there are more than 2 in the same codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 empty_codon(\@out,\@same_codon);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 $consequence_type->type('SARA');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 push @out, $consequence_type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 my $ref_consequences = type_variation($transcript,"",$consequence_type);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 if ($allele->start != $allele->end){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 empty_codon(\@out,\@same_codon);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 #do not calculate for indels effects of 2 or more in same codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 push @out, @{$ref_consequences};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 my $new_consequence = shift @{$ref_consequences};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 if (! defined $new_consequence ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 empty_codon(\@out,\@same_codon);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 push @out, $consequence_type; # should be empty
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 if ( !defined $new_consequence->aa_start){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 empty_codon(\@out,\@same_codon);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 push @out, $new_consequence;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 #first element of the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 if (!defined $same_codon[0]){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 push @{$same_codon[0]}, $new_consequence; #goes to the first position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 #for alleles with aa effect, find out if they are in the same codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 if ($same_codon[-1]->[0]->aa_start == $new_consequence->aa_start){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 #they are in the same codon, find out if it is the same position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 if ($same_codon[-1]->[0]->start == $new_consequence->start){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 #it is the same position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 push @{$same_codon[-1]},$new_consequence; #push in the last
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 push @{$same_codon[$#same_codon + 1]},$new_consequence; #this is a new element in the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 #if there is more than one element in the same_codon array, calculate the effect of the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 if (@same_codon > 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 calculate_same_codon(\@same_codon);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 map {push @out, @{$_}} @same_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 @same_codon = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 push @{$same_codon[0]}, $new_consequence; #push the element not in the same codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 #add last consequence_type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 empty_codon(\@out,\@same_codon);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 return \@out;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 sub empty_codon{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 my $out = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 my $same_codon = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 if (@{$same_codon} == 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 map {push @{$out}, @{$_}} @{$same_codon};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 elsif (@{$same_codon} > 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 calculate_same_codon($same_codon);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 map {push @{$out}, @{$_}} @{$same_codon};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 @{$same_codon} = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 # recalculates the effect of 2 or 3 SNPs in the same codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 sub calculate_same_codon{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 my $same_codon = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 my $new_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 my $old_aa;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 my $codon_table = Bio::Tools::CodonTable->new;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 if (@{$same_codon} == 3){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 #if there are 3 alleles in the same codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 map {$new_codon .= @{$_->[0]->alleles};$old_aa = $_->[0]->aa_alleles()->[0]} @{$same_codon};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 #if there are 2 alleles affecting the same codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 my $first_pos = ($same_codon->[0]->[0]->cdna_start -1) % 3; #position of the first allele in the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 my $second_pos = ($same_codon->[1]->[0]->cdna_start -1)% 3; #position of the second allele in the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 if ($first_pos == 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 #codon starts with first allele
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 $new_codon = $same_codon->[0]->[0]->alleles->[0]; #first base in the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 if ($second_pos == 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 $new_codon .= $same_codon->[1]->[0]->alleles->[0]; #second base in the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 $new_codon .= substr($same_codon->[1]->[0]->codon,2,1); #third base in the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 $new_codon .= substr($same_codon->[1]->[0]->codon,1,1); #second base in the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 $new_codon .= $same_codon->[1]->[0]->alleles->[0]; #third base in the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 #alleles are in position 1 and 2 in the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 $new_codon = substr($same_codon->[1]->[0]->codon,0,1); #first base in the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 $new_codon .= $same_codon->[0]->[0]->alleles->[0]; #second base in the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 $new_codon .= $same_codon->[1]->[0]->alleles->[0]; #third base in the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 $old_aa = $same_codon->[0]->[0]->aa_alleles->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 #calculate the new_aa
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 my $new_aa = $codon_table->translate($new_codon);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 #and update the aa_alleles field in all the codons
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 foreach my $codon (@{$same_codon}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 map {$_->aa_alleles([$old_aa,$new_aa])} @{$codon};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 # Classifies a variation which is in the vicinity of a transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 sub type_variation {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 my $tr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 my $g = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 my $var = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 my $UPSTREAM = 5000;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 my $DOWNSTREAM = 5000;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 #empty type first in the case of recursive call
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 $var->empty_type if defined $var->type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 if (!$var->isa('Bio::EnsEMBL::Variation::ConsequenceType')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 throw("Not possible to calculate the consequence type for ",ref($var)," : Bio::EnsEMBL::Variation::ConsequenceType object expected");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 if (($var->start < $tr->start - $UPSTREAM) || ($var->end > $tr->end + $DOWNSTREAM)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 #since the variation is more than UPSTREAM and DOWNSTREAM of the transcript, there is no effect in the transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 return [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 # check the cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 my $tran_features = $tr->{_variation_effect_feature_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 # populate it if not found
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 unless ($tran_features) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 $tran_features = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 mapper => $tr->get_TranscriptMapper,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 my ($attrib) = @{$tr->slice()->get_all_Attributes('codon_table')}; #for mithocondrial dna it is necessary to change the table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 my $codon_table;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 $codon_table = $attrib->value() if($attrib);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 $codon_table ||= 1; # default vertebrate codon table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 if ($tran_features->{translation} = $tr->translate(undef, undef, undef, $codon_table)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 $tran_features->{translateable_seq} = $tr->translateable_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 # to include the stop codon we need to translate the Bio::Seq sequence, not just
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 # $tr->translation, this is the source of the missing STOP_LOSTs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 my $mrna_seqobj = Bio::Seq->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 -seq => $tran_features->{translateable_seq},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 -moltype => 'dna',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 -alphabet => 'dna'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 $tran_features->{peptide} = $mrna_seqobj->translate(undef, undef, undef, $codon_table)->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 $tr->{_variation_effect_feature_cache} = $tran_features;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 if ( !defined( $tran_features->{translation} ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 { # for other biotype rather than coding/IG genes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 # check if the variation is completely outside the transcript:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 if ( $var->end() < $tr->start() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 $var->type( ( $tr->strand() == 1 ) ? 'UPSTREAM' : 'DOWNSTREAM' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 if ( $var->start() > $tr->end() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 $var->type( ( $tr->strand() == 1 ) ? 'DOWNSTREAM' : 'UPSTREAM' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 if ( $var->start() >= $tr->start() and $var->end() <= $tr->end() )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 { # within the transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 if ( $tr->biotype() eq "miRNA" ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 my ($attribute) = @{ $tr->get_all_Attributes('miRNA') };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 # the value is the mature miRNA coordinate within miRNA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 # transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 if ( defined($attribute)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 && $attribute->value() =~ /(\d+)-(\d+)/ )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 # transfer cdna value to genomic coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 my @mapper_objs = $tr->cdna2genomic( $1, $2, $tr->strand() );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 foreach my $obj (@mapper_objs)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 { #Note you can get more than one mature seq per miRNA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 if ( $obj->isa("Bio::EnsEMBL::Mapper::Coordinate") ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 if ( $var->start() >= $obj->start()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 and $var->end() <= $obj->end() )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 $var->type("WITHIN_MATURE_miRNA");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 $var->type("WITHIN_NON_CODING_GENE");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 } ## end if ( $var->start() >= ...)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 } ## end if ( !defined( $tr->translation...))
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 # get a transcript mapper object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 my $tm = $tran_features->{mapper};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 # map to CDNA coords
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 my @cdna_coords = $tm->genomic2cdna($var->start,$var->end,$var->strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 # map to CDS cooords
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 my @cds_coords = $tm->genomic2cds($var->start, $var->end,$var->strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 # map to peptide coords
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 my @pep_coords = $tm->genomic2pep($var->start, $var->end, $var->strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 # get the phase of the first exon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 my $exon_phase = $tr->start_Exon->phase;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 # check for partial codon consequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 if(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 @pep_coords == 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 && @cds_coords == 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 && !($cds_coords[0]->isa('Bio::EnsEMBL::Mapper::Gap'))
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 && !($pep_coords[0]->isa('Bio::EnsEMBL::Mapper::Gap'))
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 # get the CDS sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 my $cds = $tran_features->{translateable_seq};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 my $start = $pep_coords[0]->start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 my $codon_cds_start = ($start * 3) - 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 my $last_codon_length = length($cds) - ($codon_cds_start - 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 if($last_codon_length < 3 && $last_codon_length > 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 $var->type("PARTIAL_CODON");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 # add the CDS coords
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 $var->cds_start($cds_coords[0]->start + ($exon_phase > 0 ? $exon_phase : 0));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 $var->cds_end($cds_coords[0]->end + ($exon_phase > 0 ? $exon_phase : 0));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 # add the cDNA coords
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 $var->cdna_start($cdna_coords[0]->start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 $var->cdna_end($cdna_coords[0]->end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 # Handle simple cases where the variation is not split into parts.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 # Call method recursively with component parts in complicated case.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 # E.g. a single multi-base variation may be both intronic and coding
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 if(@cdna_coords > 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 my @out;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 #this will be a new type, complex_indel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 $var->type('COMPLEX_INDEL');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 # foreach my $c (@coords) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 # my %new_var = %{$var};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 # $new_var{'end'} = $var->start + $c->length() - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 # $var->start( $new_var{'end'} + 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 # #empty the type before re-run
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 # $var->empty_type ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 # push @out, @{type_variation($tr, $g, bless \%new_var, ref($var))};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 # return \@out;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 # look at different splice distances
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 my @coords_splice_2 = $tm->genomic2cdna($var->start -2, $var->end +2, $var->strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 my @coords_splice_3 = $tm->genomic2cdna($var->start -3, $var->end +3, $var->strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 my @coords_splice_8 = $tm->genomic2cdna($var->start -8, $var->end +8, $var->strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 my ($splice_site_2, $splice_site_3, $splice_site_8);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 if (scalar @coords_splice_2 >1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 $splice_site_2=1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 elsif (scalar @coords_splice_3 >1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 $splice_site_3=1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 elsif (scalar @coords_splice_8 >1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 $splice_site_8=1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 my $c = $cdna_coords[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 if($c->isa('Bio::EnsEMBL::Mapper::Gap')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 # check if the variation is completely outside the transcript:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 if($var->end < $tr->start()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 $var->type( ($tr->strand() == 1) ? 'UPSTREAM' : 'DOWNSTREAM' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 if($var->start > $tr->end()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 $var->type( ($tr->strand() == 1) ? 'DOWNSTREAM' : 'UPSTREAM' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 # nonsense-mediated decay transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 if($tr->biotype() eq 'nonsense_mediated_decay') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 $var->type("NMD_TRANSCRIPT");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 #return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 # variation must be intronic since mapped to cdna gap, but is within
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 # transcript, note that ESSENTIAL_SPLICE_SITE only consider first (AG) and last (GT) 2 bases inside the intron.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 # if variation is in intron, we need to check the lenth of intron, if it's shoter than 6, we call it SYNONYMOUS_CODING rather then INTRONIC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 foreach my $intron (@{$tran_features->{introns}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 if ($intron->length <=5) {#the length of frameshift intron could be 1,2,4,5 bases
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 if ($var->start>=$intron->start and $var->end<=$intron->end) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 #this is a type of SYNONYMOUS_CODING since changes happen in frameshift intron, which don't change exon structure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 $var->type('SYNONYMOUS_CODING');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 #if it's not in frameshift intron, then it's in normal intron
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 $var->type('INTRONIC');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 if ($splice_site_2) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 $var->type('ESSENTIAL_SPLICE_SITE');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 elsif ($splice_site_3 or $splice_site_8) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 $var->type('SPLICE_SITE');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 # nonsense-mediated decay transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 if($tr->biotype() eq 'nonsense_mediated_decay') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 $var->type("NMD_TRANSCRIPT");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 #return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 #now variation must be in exons, the first 3 bs into exon could be splice_site
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 if ($splice_site_2 or $splice_site_3) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 my ($se_s, $se_e, $ee_s, $ee_e) = ($tr->start_Exon->start, $tr->start_Exon->end, $tr->end_Exon->start, $tr->end_Exon->end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 ($se_s, $se_e, $ee_s, $ee_e) = ($se_e, $se_s, $ee_e, $ee_s) if $tr->strand < 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 # check coord relative to first exon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 # near beginning of first exon is obv not a splice site
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 if($var->start <= $se_e) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 if(abs($se_e - $var->start) <= 3) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 $var->type('SPLICE_SITE');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 # also check relative to last exon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 # near end of last exon is also not a splice site
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 elsif($var->start >= $ee_s) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 if(abs($ee_s - $var->start) <= 3) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 $var->type('SPLICE_SITE');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 # if not near either end of transcript, then it is definitely a splice site
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 $var->type('SPLICE_SITE');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 $var->cdna_start( $c->start() );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 $var->cdna_end( $c->end() );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 if(@cds_coords > 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 # my @out;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 #this is a new type, complex_indel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 $var->type('COMPLEX_INDEL');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 # foreach my $c (@coords) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 # my %new_var = %{$var};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 # $new_var{'end'} = $var->start + $c->length() - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 # $var->start( $new_var{'end'} + 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 # #empty the type before re-run
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 # $var->empty_type ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 # push @out, @{type_variation($tr, $g, bless \%new_var, ref($var))};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 # return \@out;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 $c = $cds_coords[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 if($c->isa('Bio::EnsEMBL::Mapper::Gap')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 # mapped successfully to CDNA but not to CDS, must be UTR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 if($var->end < $tr->coding_region_start()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 $var->type( ($tr->strand() == 1) ? '5PRIME_UTR' : '3PRIME_UTR' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 elsif($var->start > $tr->coding_region_end()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 $var->type( ($tr->strand() == 1) ? '3PRIME_UTR' : '5PRIME_UTR');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 throw('Unexpected: CDNA variation which is not in CDS is not in UTR');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 # we need to add the exon phase on in case of weird transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 # where the first exon is not in normal phase
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 $var->cds_start( $c->start() + ($exon_phase > 0 ? $exon_phase : 0));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 $var->cds_end( $c->end() + ($exon_phase > 0 ? $exon_phase : 0));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 if(@pep_coords != 1 || $pep_coords[0]->isa('Bio::EnsEMBL::Mapper::Gap')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 throw("Unexpected: Could map to CDS but not to peptide coordinates.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 $c = $pep_coords[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 $var->aa_start( $c->start());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 $var->aa_end( $c->end());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 apply_aa_change($tr, $var);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 # Determines the effect of a coding variation on the peptide sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 sub apply_aa_change {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 my $tr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 my $var = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 my ($attrib) = @{$tr->slice()->get_all_Attributes('codon_table')}; #for mithocondrial dna it is necessary to change the table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 my $codon_table;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 $codon_table = $attrib->value() if($attrib);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 $codon_table ||= 1; # default vertebrate codon table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 # check the cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 my $tran_features = $tr->{_variation_effect_feature_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 # populate it if not found
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 unless ($tran_features) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 $tran_features = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 mapper => $tr->get_TranscriptMapper,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 if ($tran_features->{translation} = $tr->translate(undef, undef, undef, $codon_table)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 $tran_features->{translateable_seq} = $tr->translateable_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 # to include the stop codon we need to translate the Bio::Seq sequence, not just
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 # $tr->translation, this is the source of the missing STOP_LOSTs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 my $mrna_seqobj = Bio::Seq->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 -seq => $tran_features->{translateable_seq},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 -moltype => 'dna',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 -alphabet => 'dna'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 $tran_features->{peptide} = $mrna_seqobj->translate(undef, undef, undef, $codon_table)->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 $tr->{_variation_effect_feature_cache} = $tran_features;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 my $mrna = $tran_features->{translateable_seq}; # get from cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 my $peptide = $tran_features->{peptide}; # get from cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 my $len = $var->aa_end - $var->aa_start + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 my $old_aa = substr($peptide, $var->aa_start -1 , $len);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 my $codon_cds_start = $var->aa_start * 3 - 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 my $codon_cds_end = $var->aa_end * 3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 my $codon_len = $codon_cds_end - $codon_cds_start + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 my @alleles = @{$var->alleles};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 my $var_len = $var->cds_end - $var->cds_start + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 my @aa_alleles = ($old_aa);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 my $ref_codon = substr($mrna, $codon_cds_start-1, $codon_len);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 my @codons;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 push @codons, $ref_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 #here could generate multi type if have multi-allele change: "ACTAGT/-/T"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 foreach my $a (@alleles) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 $a =~ s/\-//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 my $cds = $mrna;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 if($var_len != length($a)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 if(abs(length($a) - $var_len) % 3) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 # frameshifting variation, do not set peptide_allele string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 # since too complicated and could be very long
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 $var->type('FRAMESHIFT_CODING');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 return [$var];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 if($codon_len == 0) { # insertion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 $aa_alleles[0] = '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 $old_aa = '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 my $new_aa;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658 # change sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 substr($cds, $var->cds_start-1, $var_len) = $a;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 # get the new codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 my $codon_str = substr($cds, $codon_cds_start-1, $codon_len + length($a)-$var_len);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 push @codons, $codon_str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 $var->codon($codon_str); #add the codon to the ConsequenceType object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 my $codon_seq = Bio::Seq->new(-seq => $codon_str,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 -moltype => 'dna',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 -alphabet => 'dna');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 $new_aa = $codon_seq->translate(undef,undef,undef,$codon_table)->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 if(length($new_aa)<1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 $new_aa='-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 if(uc($new_aa) ne uc($old_aa)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 push @aa_alleles, $new_aa;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 if ($new_aa =~ /\*/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 $var->type('STOP_GAINED');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 elsif ($old_aa =~ /\*/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 $var->type('STOP_LOST');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 #note if type is already defined as SOTP_GAINED OR STOP_LOST, then even @aa_alleles > 1, we are not given type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688 # of 'NON_SYNONYMOUS_CODING'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 if(@aa_alleles > 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 if (!$var->type or (join ' ',@{$var->type}) !~ /STOP/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 $var->type('NON_SYNONYMOUS_CODING');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695 $var->type('SYNONYMOUS_CODING');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698 #$var->codons(\@codons);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 $var->aa_alleles(\@aa_alleles);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 1;