comparison variant_effect_predictor/Bio/EnsEMBL/TranscriptFactory.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::TranscriptFactory - Module having the fset2transcript*
24 subroutines
25
26 =head1 SYNOPSIS
27
28 use Bio::EnsEMBL::TranscriptFactory;
29
30 &Bio::EnsEMBL::TranscriptFactory::fset2transcript($fset_id);
31
32 =head1 DESCRIPTION
33
34 Module containing the subroutines fset2transcript*,
35 which create transcripts from features (formally housed in
36 Bio::EnsEMBL::DBSQL::Utils).
37
38 =head1 METHODS
39
40 =cut
41
42 package Bio::EnsEMBL::TranscriptFactory;
43
44 use strict;
45
46 use Bio::EnsEMBL::Exon;
47 use Bio::EnsEMBL::Translation;
48 use Bio::EnsEMBL::Transcript;
49
50 sub fset2transcript {
51 my ($genscan,$contig)=@_;
52
53
54 unless ($genscan->isa ("Bio::EnsEMBL::SeqFeatureI"))
55 {print "$genscan must be Bio::EnsEMBL::SeqFeatureI\n";}
56
57 my $transcript = new Bio::EnsEMBL::Transcript;
58 $transcript->temporary_id($contig->id . "." . $genscan->seqname);
59
60 my @exons;
61 my $count= 1;
62
63 foreach my $f ($genscan->sub_SeqFeature) {
64
65 my $exon = new Bio::EnsEMBL::Exon;
66 $transcript->add_Exon($exon);
67 $exon->contig ($contig);
68 $exon->start ($f->start);
69 $exon->end ($f->end );
70 $exon->strand ($f->strand);
71 $exon->phase ($f->phase);
72 $exon->end_phase( ($exon->phase + $exon->length)%3 );
73 #$exon->score($f->score);
74 # $exon->p_value($f->p_value);
75 $exon->slice($contig->primary_seq);
76
77 push(@exons,$exon);
78 $count++;
79
80 }
81
82 if( $count == 1 ) {
83 $genscan->throw("Got a 0 exon genscan");
84 }
85
86 my $translation = new Bio::EnsEMBL::Translation;
87 #
88 # This code got changed due to Translation convention changing. Should work...
89 #
90
91 if ($exons[0]->strand == 1) {
92 @exons = sort {$a->start <=> $b->start} @exons;
93 } else {
94 @exons = sort {$b->start <=> $a->start} @exons;
95 }
96
97 $translation->start(1);
98 $translation->end($exons[scalar(@exons)-1]->length);
99
100 $translation->start_Exon($exons[0]);
101 $translation->end_Exon($exons[$#exons]);
102
103 my $endphase = $exons[0]->end_phase;
104
105 foreach my $exon (@exons) {
106
107 if ( $exon == $exons[0] ){
108 next;
109 }
110 $exon->phase ($endphase);
111 $endphase = $exon->end_phase;
112 }
113
114 $transcript->translation($translation);
115
116 return $transcript;
117 }
118
119 sub fset2transcript_guess_phases {
120 my ($fset,$contig) = @_;
121
122 my $transcript = new Bio::EnsEMBL::Transcript;
123
124 $transcript->temporary_id($contig->id . "." . $fset->id);
125
126
127 my @exons;
128 my $count = 1;
129
130 foreach my $f ($fset->sub_SeqFeature) {
131
132 my $exon = new Bio::EnsEMBL::Exon;
133 $exon->contig ($contig);
134 $exon->start ($f->start);
135 $exon->end ($f->end );
136 $exon->strand ($f->strand);
137 #$exon->score($f->score);
138 # $exon->p_value($f->p_value);
139 $exon->slice($contig);
140 $exon->phase($f->phase);
141 push(@exons,$exon);
142 $count++;
143
144 }
145
146 my $translation = new Bio::EnsEMBL::Translation;
147
148 if ($exons[0]->strand == 1) {
149 @exons = sort {$a->start <=> $b->start} @exons;
150 } else {
151 @exons = sort {$b->start <=> $a->start} @exons;
152 }
153
154 $translation->start (1);
155 $translation->end ($exons[$#exons]->end - $exons[$#exons]->start + 1);
156 $translation->start_Exon($exons[0]);
157 $translation->end_Exon($exons[$#exons]);
158 $transcript->translation($translation);
159
160 my $endphase = 0;
161
162 foreach my $exon (@exons) {
163
164 $exon ->phase ($endphase);
165 $transcript->add_Exon($exon);
166
167 $endphase = $exon->end_phase(($exon->phase + $exon->length)%3);
168
169 }
170
171
172 if ($transcript->translate->seq !~ /\*/) {
173 return $transcript;
174 }
175
176 $endphase = 1;
177
178 foreach my $exon (@exons) {
179 $exon->phase($endphase);
180 $endphase = $exon->end_phase(($exon->phase + $exon->length)%3);
181 }
182
183 if ($transcript->translate->seq !~ /\*/) {
184 return $transcript;
185 }
186
187 $endphase = 2;
188
189 foreach my $exon (@exons) {
190 $exon->phase($endphase);
191 $endphase = $exon->end_phase(($exon->phase + $exon->length)%3);
192 }
193
194 if ($transcript->translate->seq !~ /\*/) {
195 return $transcript;
196 }
197 }
198
199 sub fset2transcript_3frame {
200 my ($fset,$contig) = @_;
201
202 my @f = $fset->sub_SeqFeature;
203
204 if ($f[0]->strand == 1) {
205 @f = sort {$a->start <=> $b->start} @f;
206 } else {
207 @f = sort {$b->start <=> $a->start} @f;
208 }
209
210 my @transcripts;
211
212 my $startphase = 0;
213
214 while ($startphase < 3) {
215 my $endphase = $startphase;
216
217 my $transcript = new Bio::EnsEMBL::Transcript;
218
219 push(@transcripts,$transcript);
220
221 $transcript->temporary_id($contig->id . "." . $endphase);
222
223 my $count = 1;
224 my @exons;
225
226
227 foreach my $f (@f) {
228 #print "exon seqname = ".$f->seqname."\n";
229 my $exon = new Bio::EnsEMBL::Exon;
230 #print STDERR "exon ".$f->gffstring."\n";
231 push(@exons,$exon);
232 $exon->seqname($f->seqname);
233 $exon->temporary_id ($contig->id . ".$count");
234 $exon->contig ($contig);
235 $exon->start ($f->start);
236 $exon->end ($f->end );
237 $exon->strand ($f->strand);
238 $exon->slice($contig);
239 $exon->phase ($endphase);
240 $exon->end_phase( ($exon->phase + $exon->length)%3 );
241 #$exon->score ($f->score);
242 # $exon->p_value ($f->p_value);
243 $endphase = $exon->end_phase;
244
245 $transcript->add_Exon($exon);
246 $count++;
247
248 #print STDERR "Added exon start " . $exon->start . " end " . $exon->end . " strand " . $exon->strand . " score " . $exon->score . " pvalue " . $exon->p_value . "\n";
249 }
250
251 my $translation = new Bio::EnsEMBL::Translation;
252
253 my $contig_id = "";
254 my $fset_id = "";
255
256 if (defined($contig->id)) {
257 $contig_id = $contig->id;
258 }
259 if (defined($fset->id)) {
260 $fset_id = $fset->id;
261 }
262
263 $translation->temporary_id($contig_id . "." . $fset_id);
264 $translation->start (1);
265 $translation->end ($exons[$#exons]->end - $exons[$#exons]->start + 1);
266 $translation->start_Exon($exons[0]);
267 $translation->end_Exon ($exons[$#exons]);
268 $transcript->translation($translation);
269
270 # print STDERR "Phase $startphase " . $transcript->translate->seq . "\n";
271
272 $startphase++;
273 }
274 #print "finshed fset2transcript_3frame\n";
275 return @transcripts;
276 }
277
278
279 sub fset2transcript_with_seq {
280 my ($genscan,$seq)=@_;
281
282
283 unless ($genscan->isa ("Bio::EnsEMBL::SeqFeatureI"))
284 {print "$genscan must be Bio::EnsEMBL::SeqFeatureI\n";}
285 unless ($seq->isa ("Bio::PrimarySeqI") || $seq->isa ("Bio::SeqI"))
286 {print "$seq must be Bio::SeqI or a Bio::PrimarySeqI\n";}
287
288 #print STDERR "running fset2transcript\n";
289 my $transcript = new Bio::EnsEMBL::Transcript;
290 $transcript->temporary_id($seq->id . "." . $genscan->seqname);
291
292 my @exons;
293 my $count= 1;
294
295 foreach my $f ($genscan->sub_SeqFeature) {
296
297 my $exon = new Bio::EnsEMBL::Exon;
298 $exon->contig ($seq);
299 $exon->start ($f->start);
300 $exon->end ($f->end );
301 $exon->strand ($f->strand);
302 $exon->phase ($f->phase);
303 $exon->end_phase( ($exon->phase + $exon->length)%3 );
304 #$exon->score ($f->score);
305 #print STDERR "contig is a = ".$seq."\n";
306 $exon->slice($seq);
307
308 push(@exons,$exon);
309 $count++;
310
311 }
312
313 foreach my $exon (@exons) {
314
315 $transcript->add_Exon($exon);
316
317
318 }
319 return $transcript;
320
321 }
322
323
324
325 1;