annotate variant_effect_predictor/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 package Bio::EnsEMBL::Utils::VegaCuration::Transcript;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 no warnings 'uninitialized';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 use Bio::EnsEMBL::Utils::VegaCuration::Gene;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 use Data::Dumper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 @ISA = qw(Bio::EnsEMBL::Utils::VegaCuration::Gene);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 =head2 find_non_overlaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 Args : arrayref of B::E::Transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 Example : find_non_overlaps($all_transcripts)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 Description: identifies any non-overlapping transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 Returntype : array refs of stable IDs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 sub find_non_overlaps {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 my ($all_transcripts) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 my $non_overlaps = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 foreach my $transcript1 (@{$all_transcripts}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 foreach my $transcript2 (@{$all_transcripts}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 if ($transcript1->end < $transcript2->start) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 push @{$non_overlaps}, $transcript1->stable_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 push @{$non_overlaps}, $transcript2->stable_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 return $non_overlaps;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 =head2 check_remarks_and_update_names
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 Arg[1] : B::E::Gene (with potentially duplicated transcript names)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 Arg[2] : counter 1 (no. of patched genes)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 Arg[3] : counter 2 (no. of patched transcripts)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 Example : $support->update_names($gene,\$c1,\$c2)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 Description: - checks remarks and patches transcripts with identical names according to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 CDS and length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 Returntype : true | false (depending on whether patched or not), counter1, counter2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 sub check_remarks_and_update_names {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 my ($gene,$gene_c,$trans_c) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 my $action = ($self->param('dry_run')) ? 'Would add' : 'Added';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 my $aa = $gene->adaptor->db->get_AttributeAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 my $dbh = $gene->adaptor->db->dbc->db_handle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 #get list of IDs that have previously been sent to annotators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 my $seen_genes = $self->get_havana_fragmented_loci_comments;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 my $gsi = $gene->stable_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 my $gid = $gene->dbID;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 my $g_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 my $study_more = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 eval {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 $g_name = $gene->display_xref->display_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 if ($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 $g_name = $gene->get_all_Attributes('name')->[0]->value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 #get existing gene remarks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 my $remarks = [ map {$_->value} @{$gene->get_all_Attributes('remark')} ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 #shout if there is no remark to identify this as being fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 if ( grep {$_ eq 'fragmented locus' } @$remarks) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 $study_more = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 $self->log_warning("Gene $gsi should have a fragmented locus remark\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 ##patch transcript names according to length and CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 $gene_c++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 #separate coding and non_coding transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 my $coding_trans = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 my $noncoding_trans = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 foreach my $trans ( @{$gene->get_all_Transcripts()} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 if ($trans->translate) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 push @$coding_trans, $trans;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 push @$noncoding_trans, $trans;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 #sort transcripts coding > non-coding, then on length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 my $c = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 $self->log("\nPatching names according to CDS and length:\n",1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 foreach my $array_ref ($coding_trans,$noncoding_trans) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 foreach my $trans ( sort { $b->length <=> $a->length } @$array_ref ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 $trans_c++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 my $tsi = $trans->stable_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 my $t_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 eval {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 $t_name = $trans->display_xref->display_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 if ($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 $t_name = $trans->get_all_Attributes('name')->[0]->value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 $c++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 my $ext = sprintf("%03d", $c);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 my $new_name = $g_name.'-'.$ext;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 $self->log(sprintf("%-20s%-3s%-20s", "$t_name ", "-->", "$new_name")."\n",1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 if (! $self->param('dry_run')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 # update transcript display xref
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 $dbh->do(qq(UPDATE xref x, external_db edb
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 SET x.display_label = "$new_name"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 WHERE x.external_db_id = edb.external_db_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 AND x.dbprimary_acc = "$tsi"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 AND edb.db_name = "Vega_transcript"));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 return ($study_more,$gene_c,$trans_c);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 =head2 check_names_and_overlap
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 Arg[1] : arayref of arrayrefs of duplicated names
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 Arg[2] : B::E::Gene (with potentially duplicated transcript names)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 Arg[3] : FH (to log new duplicates)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 Example : $support->check_names_and_overlap($transcripts,$gene,$fh)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 Description: checks pairs of transcripts identified as having duplicate Vega names:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 - to see if they have identical names in loutre (shouldn't have)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 - distinguish between overlapping and non overlapping transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 sub check_names_and_overlap {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 my ($transcript_info,$gene,$n_flist_fh) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 my $ta = $gene->adaptor->db->get_TranscriptAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 my $gsi = $gene->stable_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 my $g_name = $gene->get_all_Attributes('name')->[0]->value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 foreach my $set (values %{$transcript_info} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 next if (scalar @{$set} == 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 my $transcripts = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 my $all_t_names;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 my %ids_to_names;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 foreach my $id1 (@{$set}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 my ($name1,$tsi1) = split /\|/, $id1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 $ids_to_names{$tsi1} = $name1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 $all_t_names .= "$tsi1 [$name1] ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 my $t = $ta->fetch_by_stable_id($tsi1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 push @{$transcripts}, $t;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 my $non_overlaps;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 eval {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 $non_overlaps = $self->find_non_overlaps($transcripts);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 if ($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 $self->log_warning("Problem looking for overlapping transcripts for gene $gsi (is_current = 0 ?). Skipping this bit\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 #if the transcripts don't overlap
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 elsif (@{$non_overlaps}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 my $tsi_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 foreach my $id (@{$non_overlaps}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 my $string = " $id [ $ids_to_names{$id} ] ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 $tsi_string .= $string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 $self->log_warning("NEW: Non-overlapping: $gsi ($g_name) has non-overlapping transcripts ($tsi_string) with duplicated Vega names, and it has no \'fragmented locus\' gene remark. Neither has it been OKeyed by Havana before. Transcript names are being patched but this needs checking by Havana.\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 #log gsi (to be sent to Havana)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 print $n_flist_fh "$gsi\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 #...otherwise if the transcripts do overlap
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 $self->log_warning("NEW: Overlapping: $gsi ($g_name) has overlapping transcripts ($all_t_names) with duplicated Vega names and it has no \'fragmented locus\' gene_remark. Neither has it been OKeyed by Havana before. Transcript names are being patched but this could be checked by Havana if they were feeling keen.\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 print $n_flist_fh "$gsi\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 =head2 get_havana_fragmented_loci_comments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 Example : my $results = $support->get_havana_fragmented_loci_comments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 Description: parses the HEREDOC containing Havana comments in this module
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 Returntype : hashref
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 sub get_havana_fragmented_loci_comments {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 my $seen_genes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 while (<DATA>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 next if /^\s+$/ or /#+/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 my ($obj,$comment) = split /=/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 $obj =~ s/^\s+|\s+$//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 $comment =~ s/^\s+|\s+$//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 $seen_genes->{$obj} = $comment;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 return $seen_genes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 #details of genes with duplicated transcript names that have already been reported to Havana
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 #identified as either fragmented or as being OK to patch
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 __DATA__
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 OTTMUSG00000005478 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 OTTMUSG00000001936 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 OTTMUSG00000017081 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 OTTMUSG00000011441 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 OTTMUSG00000013335 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 OTTMUSG00000011654 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 OTTMUSG00000001835 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 OTTHUMG00000035221 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 OTTHUMG00000037378 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 OTTHUMG00000060732 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 OTTHUMG00000132441 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 OTTHUMG00000031383 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 OTTHUMG00000012716 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 OTTHUMG00000031102 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 OTTHUMG00000148816 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 OTTHUMG00000149059 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 OTTHUMG00000149221 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 OTTHUMG00000149326 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 OTTHUMG00000149644 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 OTTHUMG00000149574 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 OTTHUMG00000058101 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 OTTHUMG00000150119 = OK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 OTTHUMG00000149850 = OK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 OTTHUMG00000058101 = OK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 OTTHUMG00000058907 = OK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 OTTMUSG00000011654 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 OTTMUSG00000019369 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 OTTMUSG00000017081 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 OTTMUSG00000001835 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 OTTMUSG00000011499 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 OTTMUSG00000013335 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 OTTMUSG00000008023 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 OTTMUSG00000019369 = fragmented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 OTTMUSG00000022266
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 OTTMUSG00000006697
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 OTTMUSG00000012302 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 OTTMUSG00000013368 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 OTTMUSG00000015766 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 OTTMUSG00000016025 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 OTTMUSG00000001066 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 OTTMUSG00000016331 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 OTTMUSG00000006935 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 OTTMUSG00000007263 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 OTTMUSG00000000304 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 OTTMUSG00000009150 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 OTTMUSG00000008023 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 OTTMUSG00000017077 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 OTTMUSG00000003440 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 OTTMUSG00000016310 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 OTTMUSG00000026199 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 OTTMUSG00000028423 =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 OTTMUSG00000007427 =