annotate variant_effect_predictor/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 package Bio::EnsEMBL::Utils::VegaCuration::Translation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 use Data::Dumper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 use Bio::EnsEMBL::Utils::VegaCuration::Transcript;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 @ISA = qw(Bio::EnsEMBL::Utils::VegaCuration::Transcript);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 =head2 check_CDS_start_end_remarks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 Args : B::E::Transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 Example : my $results = $support->check_CDS_end_remarks($transcript)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 Description: identifies incorrect 'CDS end...' transcript remarks in a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 otter-derived Vega database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 Returntype : hashref
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 sub check_CDS_start_end_remarks {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 my $trans = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 # info for checking
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 my @remarks = @{$trans->get_all_Attributes('remark')};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 my $coding_end = $trans->cdna_coding_end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 my $coding_start = $trans->cdna_coding_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 my $trans_end = $trans->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 my $trans_seq = $trans->seq->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 my $stop_codon = substr($trans_seq, $coding_end-3, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 my $start_codon = substr($trans_seq, $coding_start-1, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 #hashref to return results
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 my $results;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 #extra CDS end not found remarks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 if (grep {$_->value eq 'CDS end not found'} @remarks) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 if ( ($coding_end != $trans_end)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 && ( grep {$_ eq $stop_codon} qw(TGA TAA TAG) ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 $results->{'END_EXTRA'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 #missing CDS end not found remark
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 if ( $coding_end == $trans_end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 if (! grep {$_->value eq 'CDS end not found'} @remarks) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 if (grep {$_ eq $stop_codon} qw(TGA TAA TAG)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 $results->{'END_MISSING_2'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 $results->{'END_MISSING_1'} = $stop_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 #extra CDS start not found remark
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 if (grep {$_->value eq 'CDS start not found'} @remarks) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 if ( ($coding_start != 1)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 && ($start_codon eq 'ATG') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 $results->{'START_EXTRA'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 #missing CDS start not found remark
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 if ( $coding_start == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 if ( ! grep {$_->value eq 'CDS start not found'} @remarks) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 if ($start_codon eq 'ATG') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 $results->{'START_MISSING_2'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 $results->{'START_MISSING_1'} = $start_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 return $results;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 =head2 check_CDS_start_end_remarks_loutre
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 Args : B::E::Transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 Example : my $results = $support->check_CDS_end_remarks($transcript)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 Description: identifies incorrect 'CDS end...' transcript attribs in a loutre
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 of a loutre-derived Vega database.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 Returntype : hashref
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 sub check_CDS_start_end_remarks_loutre {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 my $trans = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 # info for checking
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 my @stops = qw(TGA TAA TAG);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 my %attributes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 foreach my $attribute (@{$trans->get_all_Attributes()}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 push @{$attributes{$attribute->code}}, $attribute;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 # warn $trans->stable_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 # warn Data::Dumper::Dumper(\%attributes);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 my $coding_end = $trans->cdna_coding_end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 my $coding_start = $trans->cdna_coding_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 my $trans_end = $trans->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 my $trans_seq = $trans->seq->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 my $stop_codon_offset = 3 + $trans->translation->end_Exon->end_phase;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 my $initial_exon_phase = $trans->translation->start_Exon->phase;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 my $stop_codon = substr($trans_seq, $coding_end-3, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 my $start_codon = substr($trans_seq, $coding_start-1, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 my $start_codon_incorrect = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 if ($start_codon eq 'ATG' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 $start_codon_incorrect = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 elsif ($start_codon eq 'CTG') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 foreach my $attrib (@{$attributes{'remark'}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 if ($attrib->value =~ /non[- ]ATG start/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 $start_codon_incorrect = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 # warn "$start_codon -- $initial_exon_phase -- $coding_start -- $start_codon_incorrect";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 #hashref to return results
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 my $results;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 #extra CDS end not found remarks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 if ($attributes{'cds_end_NF'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 if ( ($attributes{'cds_end_NF'}->[0]->value == 1)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 && ($coding_end != $trans_end)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 && ( grep {$_ eq $stop_codon} @stops) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 # warn $trans->stable_id.": $coding_end--$trans_end--$stop_codon";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 # warn $trans->translation->end_Exon->end_phase;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 $results->{'END_EXTRA'} = $stop_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 #missing CDS end not found remark
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 if ( $coding_end == $trans_end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 if ($attributes{'cds_end_NF'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 if ($attributes{'cds_end_NF'}->[0]->value == 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 if (! grep {$_ eq $stop_codon} @stops) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 # warn $trans->translation->end_Exon->end_phase;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 $results->{'END_MISSING'}{'WRONG'} = $stop_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 elsif (! grep {$_ eq $stop_codon} @stops) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 $results->{'END_MISSING'}{'ABSENT'} = $stop_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 #extra CDS start not found remark
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 if ( $attributes{'cds_start_NF'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 if ( ($attributes{'cds_start_NF'}->[0]->value == 1 )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 && (! $start_codon_incorrect)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 unless ( ($coding_start == 1) && ( $initial_exon_phase > 0)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 $results->{'START_EXTRA'} = $start_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 #missing CDS start not found remark
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 if ( $coding_start == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 if ( $attributes{'cds_start_NF'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 if ( $attributes{'cds_start_NF'}->[0]->value == 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 if ($start_codon_incorrect) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 $results->{'START_MISSING'}{'ABSENT'} = $start_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 elsif ($initial_exon_phase > 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 $results->{'START_MISSING'}{'INITIAL_PHASE'} = $initial_exon_phase;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 elsif ($start_codon ne 'ATG') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 $results->{'START_MISSING'}{'ABSENT'} = $start_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 return $results;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 =head2 get_havana_seleno_comments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 Example : my $results = $support->get_havana_seleno_comments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 Description: parses the HEREDOC containing Havana comments in this module
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 Returntype : hashref
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 sub get_havana_seleno_comments {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 my $seen_translations;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 while (<DATA>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 next if /^\s+$/ or /#+/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 my ($obj,$comment) = split /=/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 $obj =~ s/^\s+|\s+$//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 $comment =~ s/^\s+|\s+$//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 # We add the origin as now "seen" can come from a number of places, and have
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 # a number of consequences in different cases, not just discounted Secs from this method. -- ds23
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 $seen_translations->{$obj} = [ $comment,"notsec-havana" ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 return $seen_translations;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 sub check_for_stops {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 my $support = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 my ($gene,$seen_transcripts,$log_object) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 my $transcripts;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 my $has_log_object=defined($log_object);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 if($has_log_object){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 my @help = $log_object->species_params->get_trans($gene->stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 $transcripts=\@help;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 $log_object=$support;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 $transcripts=$gene->get_all_Transcripts;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 my $gname = $gene->get_all_Attributes('name')->[0]->value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 my $gsi = $gene->stable_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 my $scodon = 'TGA';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 my $mod_date = $support->date_format( $gene->modified_date,'%d/%m/%y' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 my $hidden_remak_ttributes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 TRANS:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 foreach my $trans (@$transcripts) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 my $tsi = $trans->stable_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 my $tID = $trans->dbID;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 my $tname = $trans->get_all_Attributes('name')->[0]->value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 if($has_log_object){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 $hidden_remak_ttributes=$log_object->species_params->get_attributes->{$tsi}->{'hidden_remark'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 $hidden_remak_ttributes=$trans->get_all_Attributes('hidden_remark');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 foreach my $rem (@$hidden_remak_ttributes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 if ($rem->value =~ /not_for_Vega/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 #$support->log_verbose("Skipping transcript $tname ($tsi) since 'not_for_Vega'\n",1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 $log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Skipping transcript $tname ($tsi) since 'not_for_Vega'");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 next TRANS;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 # go no further if there is a ribosomal framshift attribute
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 foreach my $attrib (@{$trans->get_all_Attributes('_rib_frameshift')}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 if ($attrib->value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 $log_object->_save_log('log', '', $gsi, '', $tsi, '', "Skipping $tsi ($tname) since it has a ribosomal frameshift attribute");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 next TRANS;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 #$support->log_verbose("Studying transcript $tsi ($tname, $tID)\n",1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 $log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Studying transcript $tsi ($tname, $tID)");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 my $peptide;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 # go no further if the transcript doesn't translate or if there are no stops
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 next TRANS unless ($peptide = $trans->translate);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 my $pseq = $peptide->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 my $orig_seq = $pseq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 # (translate method trims stops from sequence end)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 next TRANS unless ($pseq =~ /\*/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 # warn sprintf("Stop codon is '%s'\n",substr($trans->translateable_seq,-3));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 #$support->log_verbose("Stops found in $tsi ($tname)\n",1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 $log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Stops found in $tsi ($tname)");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 # find out where and how many stops there are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 my @found_stops;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 my $mrna = $trans->translateable_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 my $offset = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 my $tstop;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 while ($pseq =~ /^([^\*]*)\*(.*)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 my $pseq1_f = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 $pseq = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 my $seq_flag = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 $offset += length($pseq1_f) * 3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 my $stop = substr($mrna, $offset, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 my $aaoffset = int($offset / 3)+1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 push(@found_stops, [ $stop, $aaoffset ]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 $tstop .= "$aaoffset ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 $offset += 3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 # are all stops TGA...?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 my $num_stops = scalar(@found_stops);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 my $num_tga = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 my $positions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 foreach my $stop (@found_stops) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 $positions .= $stop->[0]."(".$stop->[1].") ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 if ($stop->[0] eq $scodon) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 $num_tga++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 my $source = $gene->source;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 #...no - an internal stop codon error in the database...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 if ($num_tga < $num_stops) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 if ($source eq 'havana') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 #$support->log_warning("INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 $log_object->_save_log('log_warning', '', $gsi, 'TRANSCRIPT', $tsi, 'VQCT_internal_stop', "INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]: Sequence = $orig_seq Stops at $positions)");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 #$support->log_warning("INTERNAL STOPS EXTERNAL: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons[$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 $log_object->_save_log('log_warning', '', $gsi, 'TRANSCRIPT', $tsi, 'VQCT_internal_stop', "INTERNAL STOPS EXTERNAL: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons[$mod_date]: Sequence = $orig_seq Stops at $positions)");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 #...yes - check remarks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 my $flag_remark = 0; # 1 if word seleno has been used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 my $flag_remark2 = 0; # 1 if existing remark has correct numbering
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 my $alabel = 'Annotation_remark- selenocysteine ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 my $alabel2 = 'selenocysteine ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 my $annot_stops;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 my $remarks;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 my $att;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 #get both hidden_remarks and remarks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 foreach my $remark_type ('remark','hidden_remark') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 if($has_log_object){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 $att=$log_object->species_params->get_attributes->{$trans->stable_id}->{$remark_type};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 $att=$trans->get_all_Attributes($remark_type)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 foreach my $attrib ( @$att) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 push @{$remarks->{$remark_type}}, $attrib->value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 #parse remarks to check syntax for location of edits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 while (my ($attrib,$remarks)= each %$remarks) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 foreach my $text (@{$remarks}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 if ( ($attrib eq 'remark') && ($text=~/^$alabel(.*)/) ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 #$support->log_warning("seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 $log_object->_save_log('log_warning', '', $gsi, '', $tsi, 'VQCT_wrong_selC_coord', "seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 $annot_stops=$1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 elsif ($text =~ /^$alabel2(.*)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 my $maybe = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 if($maybe =~ /^\s*\d+(\s+\d+)*\s*$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 $annot_stops=$maybe;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 $log_object->_save_log('log', '', $gene->stable_id, '', $tsi, '', "Maybe annotated stop in incorrect format, maybe just a remark that happens to begin '$alabel2'".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 " -- might need to investigate: '$alabel2$maybe' [$mod_date]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 #check the location of the annotated edits matches actual stops in the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 my @annotated_stops;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 if ($annot_stops){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 my $i = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 foreach my $offset (split(/\s+/, $annot_stops)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 #OK if it matches a known stop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 if (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && ($found_stops[$i]->[1] == $offset)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 push @annotated_stops, $offset;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 # catch old annotations where number was in DNA not peptide coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 elsif (defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && (($found_stops[$i]->[1] * 3) == $offset)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 $log_object->_save_log('log_warning', '', $gene->stable_id, 'DNA', $tsi, 'VQCT_wrong_selC_coord', "DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates) [$mod_date]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 # catch old annotations where number off by one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 elsif (defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && (($found_stops[$i]->[1]) == $offset+1)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 $log_object->_save_log('log_warning', '', $gene->stable_id, 'PEPTIDE', $tsi, 'VQCT_wrong_selC_coord', "PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one) [$mod_date]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 elsif (defined($offset) && ($offset=~/^\d+$/)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 if ($offset == length($orig_seq)+1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 if($seen_transcripts->{$tsi} && $seen_transcripts->{$tsi}->[1] eq 'known-tga-stop') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 $log_object->_save_log('log', '', $gene->stable_id, 'TRANSCRIPT', $tsi, '', "Annotated stop for transcript $tsi ($tname) known to be a stop codon. Ok. [$mod_date]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 } elsif($seen_transcripts->{$tsi} && $seen_transcripts->{$tsi}->[1] eq 'known-terminal-sec') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 $log_object->_save_log('log', '', $gene->stable_id, 'TRANSCRIPT', $tsi, '', "Annotated stop for transcript $tsi ($tname) known to be a terminal Sec. Ok. [$mod_date]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 $log_object->_save_log('log_warning', '', $gene->stable_id, 'TRANSCRIPT', $tsi, '', "Annotated stop for transcript $tsi ($tname) \"$offset\" matches actual stop codon yet has no entry in script config to disambiguate it. Please investigate and add appropriate entry to config arrays in add_selcys.pl. [$mod_date]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 $log_object->_save_log('log_warning', '', $gene->stable_id, 'TRANSCRIPT', $tsi, 'VQCT_wrong_selC_coord', "Annotated stop for transcript $tsi ($tname) \"$offset\" does not match a TGA codon) [$mod_date]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 push @annotated_stops, $offset;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 $i++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 #check location of found stops matches annotated ones - any new ones are reported
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 foreach my $stop ( @found_stops ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 my $pos = $stop->[1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 my $seq = $stop->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 unless ( grep { $pos == $_} @annotated_stops) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 if ($seen_transcripts->{$tsi} && $seen_transcripts->{$tsi}->[1] eq 'notsec-havana') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 #$support->log_verbose("Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}.") [$mod_date]\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 $log_object->_save_log('log_verbose', '', $gene->stable_id, '', $tsi, 'VQCT_pot_selC', "Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators: ".$seen_transcripts->{$tsi}->[0].") [$mod_date]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 #$support->log("POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 $log_object->_save_log('log', '', $gene->stable_id, '', $tsi, 'VQCT_pot_selC', "POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 sub _save_log{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 my $self=shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 my $log_type = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 my $chrom_name=shift || '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 my $gsi=shift || '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 my $type=shift || '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 my $tsi=shift || '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 my $tag=shift || '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 my $txt=shift || '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 $self->$log_type($txt."\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 #details of annotators comments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 __DATA__
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 OTTHUMT00000144659 = FIXED- changed to transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 OTTHUMT00000276377 = FIXED- changed to transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 OTTHUMT00000257741 = FIXED- changed to nmd
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 OTTHUMT00000155694 = NOT_FIXED- should be nmd but external annotation but cannot be fixed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 OTTHUMT00000155695 = NOT_FIXED- should be nmd but external annotation but cannot be fixed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 OTTHUMT00000282573 = FIXED- changed to unprocessed pseudogene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 OTTHUMT00000285227 = FIXED- changed start site
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 OTTHUMT00000151008 = FIXED- incorrect trimming of CDS, removed extra stop codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 OTTHUMT00000157999 = FIXED- changed incorrect stop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 OTTHUMT00000150523 = FIXED- incorrect trimming of CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 OTTHUMT00000150525 = FIXED- incorrect trimming of CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 OTTHUMT00000150522 = FIXED- incorrect trimming of CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 OTTHUMT00000150521 = FIXED- incorrect trimming of CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 OTTHUMT00000246819 = FIXED- corrected frame
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 OTTHUMT00000314078 = FIXED- corrected frame
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 OTTHUMT00000080133 = FIXED- corrected frame
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 OTTHUMT00000286423 = FIXED- changed to transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 OTTMUST00000055509 = FIXED- error
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 OTTMUST00000038729 = FIXED- corrected frame
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 OTTMUST00000021760 = FIXED- corrected frame
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 OTTMUST00000023057 = FIXED- corrected frame
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 OTTMUST00000015207 = FIXED- corrected frame
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 OTTMUST00000056646 = FIXED- error
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 OTTMUST00000059686 = FIXED- corrected frame
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 OTTMUST00000013426 = FIXED- corrected frame
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 OTTMUST00000044412 = FIXED- error