Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SeqIO/chado.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: chado.pm,v 1.1 2002/12/03 08:13:55 cjm Exp $ | |
2 # | |
3 # BioPerl module for Bio::SeqIO::chado | |
4 # | |
5 # Chris Mungall <cjm@fruitfly.org> | |
6 # | |
7 # You may distribute this module under the same terms as perl itself | |
8 | |
9 # POD documentation - main docs before the code | |
10 | |
11 =head1 NAME | |
12 | |
13 Bio::SeqIO::chado - chado sequence input/output stream | |
14 | |
15 =head1 SYNOPSIS | |
16 | |
17 It is probably best not to use this object directly, but | |
18 rather go through the SeqIO handler system. Go: | |
19 | |
20 $stream = Bio::SeqIO->new(-file => $filename, -format => 'chado'); | |
21 | |
22 while ( my $seq = $stream->next_seq() ) { | |
23 # do something with $seq | |
24 } | |
25 | |
26 =head1 DESCRIPTION | |
27 | |
28 This object can transform Bio::Seq objects to and from chado flat | |
29 file databases. CURRENTLY ONLY TO | |
30 | |
31 | |
32 =head2 Optional functions | |
33 | |
34 =over 3 | |
35 | |
36 =item _show_dna() | |
37 | |
38 (output only) shows the dna or not | |
39 | |
40 =item _post_sort() | |
41 | |
42 (output only) provides a sorting func which is applied to the FTHelpers | |
43 before printing | |
44 | |
45 | |
46 =back | |
47 | |
48 =head1 FEEDBACK | |
49 | |
50 =head2 Mailing Lists | |
51 | |
52 User feedback is an integral part of the evolution of this | |
53 and other Bioperl modules. Send your comments and suggestions preferably | |
54 to one of the Bioperl mailing lists. | |
55 Your participation is much appreciated. | |
56 | |
57 bioperl-l@bioperl.org - General discussion | |
58 http://www.bioperl.org/MailList.shtml - About the mailing lists | |
59 | |
60 =head2 Reporting Bugs | |
61 | |
62 Report bugs to the Bioperl bug tracking system to help us keep track | |
63 the bugs and their resolution. | |
64 Bug reports can be submitted via email or the web: | |
65 | |
66 bioperl-bugs@bio.perl.org | |
67 http://bio.perl.org/bioperl-bugs/ | |
68 | |
69 =head1 AUTHOR - Chris Mungall | |
70 | |
71 Email cjm@fruitfly.org | |
72 | |
73 =head1 APPENDIX | |
74 | |
75 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
76 | |
77 =cut | |
78 | |
79 # Let the code begin... | |
80 | |
81 package Bio::SeqIO::chado; | |
82 use vars qw(@ISA); | |
83 use strict; | |
84 | |
85 use Bio::SeqIO; | |
86 use Bio::SeqFeature::Generic; | |
87 use Bio::Species; | |
88 use Bio::Seq::SeqFactory; | |
89 use Bio::Annotation::Collection; | |
90 use Bio::Annotation::Comment; | |
91 use Bio::Annotation::Reference; | |
92 use Bio::Annotation::DBLink; | |
93 | |
94 use Data::Stag qw(:all); | |
95 | |
96 @ISA = qw(Bio::SeqIO); | |
97 | |
98 sub _initialize { | |
99 my($self,@args) = @_; | |
100 | |
101 $self->SUPER::_initialize(@args); | |
102 if( ! defined $self->sequence_factory ) { | |
103 $self->sequence_factory(new Bio::Seq::SeqFactory | |
104 (-verbose => $self->verbose(), | |
105 -type => 'Bio::Seq::RichSeq')); | |
106 } | |
107 my $wclass = $self->default_handler_class; | |
108 $self->handler($wclass->new); | |
109 $self->{_end_of_data} = 0; | |
110 $self->handler->S("chado"); | |
111 return; | |
112 } | |
113 | |
114 sub DESTROY { | |
115 my $self = shift; | |
116 $self->end_of_data(); | |
117 $self->SUPER::DESTROY(); | |
118 } | |
119 | |
120 sub end_of_data { | |
121 my $self = shift; | |
122 $self->{_end_of_data} = 1; | |
123 $self->handler->E("chado"); | |
124 } | |
125 | |
126 sub default_handler_class { | |
127 return "Data::Stag::BaseHandler"; | |
128 } | |
129 | |
130 =head2 next_seq | |
131 | |
132 Title : next_seq | |
133 Usage : $seq = $stream->next_seq() | |
134 Function: returns the next sequence in the stream | |
135 Returns : Bio::Seq object | |
136 Args : | |
137 | |
138 =cut | |
139 | |
140 sub next_seq { | |
141 my ($self,@args) = @_; | |
142 my $seq = $self->sequence_factory->create | |
143 ( | |
144 # '-verbose' =>$self->verbose(), | |
145 # %params, | |
146 # -seq => $seqc, | |
147 # -annotation => $annotation, | |
148 # -features => \@features | |
149 ); | |
150 return $seq; | |
151 } | |
152 | |
153 sub handler { | |
154 my $self = shift; | |
155 $self->{_handler} = shift if @_; | |
156 return $self->{_handler}; | |
157 } | |
158 | |
159 | |
160 =head2 write_seq | |
161 | |
162 Title : write_seq | |
163 Usage : $stream->write_seq($seq) | |
164 Function: writes the $seq object (must be seq) to the stream | |
165 Returns : 1 for success and 0 for error | |
166 Args : Bio::Seq | |
167 | |
168 | |
169 =cut | |
170 | |
171 sub write_seq { | |
172 my ($self,$seq) = @_; | |
173 | |
174 if( !defined $seq ) { | |
175 $self->throw("Attempting to write with no seq!"); | |
176 } | |
177 | |
178 if( ! ref $seq || ! $seq->isa('Bio::SeqI') ) { | |
179 $self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!"); | |
180 } | |
181 | |
182 # get a handler - must inherit from Data::Stag::BaseHandler; | |
183 my $w = $self->handler; | |
184 | |
185 # start of data | |
186 $w->S("seqset"); | |
187 | |
188 # my $seq_temp_uid = $self->get_temp_uid($seq); | |
189 | |
190 my $seq_temp_uid = $seq->accession . '.' . ($seq->can('seq_version') ? $seq->seq_version : $seq->version); | |
191 | |
192 # data structure representing the core sequence for this record | |
193 my $seqnode = | |
194 Data::Stag->new(feature=>[ | |
195 [feature_id=>$seq_temp_uid], | |
196 [dbxrefstr=>$seq->accession_number], | |
197 [name=>$seq->display_name], | |
198 [residues=>$seq->seq], | |
199 ]); | |
200 | |
201 # soft properties | |
202 my %prop = (); | |
203 | |
204 my ($div, $mol); | |
205 my $len = $seq->length(); | |
206 | |
207 if ( $seq->can('division') ) { | |
208 $div=$seq->division; | |
209 } | |
210 if( !defined $div || ! $div ) { $div = 'UNK'; } | |
211 | |
212 if( !$seq->can('molecule') || ! defined ($mol = $seq->molecule()) ) { | |
213 $mol = $seq->alphabet || 'DNA'; | |
214 } | |
215 | |
216 | |
217 my $circular = 'linear '; | |
218 $circular = 'circular' if $seq->is_circular; | |
219 | |
220 # cheeky hack - access symbol table | |
221 no strict 'refs'; | |
222 map { | |
223 $prop{$_} = | |
224 $ {*$_}; | |
225 } qw(mol div circular); | |
226 use strict 'refs'; | |
227 | |
228 map { | |
229 $prop{$_} = $seq->$_() if $seq->can($_); | |
230 } qw(desc keywords); | |
231 | |
232 local($^W) = 0; # supressing warnings about uninitialized fields. | |
233 | |
234 # Organism lines | |
235 if (my $spec = $seq->species) { | |
236 my ($species, $genus, @class) = $spec->classification(); | |
237 my $OS; | |
238 if( $spec->common_name ) { | |
239 $OS = $spec->common_name; | |
240 } else { | |
241 $OS = "$genus $species"; | |
242 } | |
243 if (my $ssp = $spec->sub_species) { | |
244 $OS .= " $ssp"; | |
245 } | |
246 } | |
247 | |
248 # Reference lines | |
249 my $count = 1; | |
250 foreach my $ref ( $seq->annotation->get_Annotations('reference') ) { | |
251 # TODO | |
252 } | |
253 # Comment lines | |
254 | |
255 foreach my $comment ( $seq->annotation->get_Annotations('comment') ) { | |
256 $seqnode->add_featureprop([[pkey=>'comment'],[pval=>$comment->text]]); | |
257 } | |
258 | |
259 # throw the writer an event | |
260 $w->ev(@$seqnode); | |
261 | |
262 $seqnode = undef; # free memory | |
263 | |
264 # make events for all the features within the record | |
265 foreach my $sf ( $seq->top_SeqFeatures ) { | |
266 $self->write_sf($sf, $seq_temp_uid); | |
267 } | |
268 | |
269 # data end | |
270 $w->E("seqset"); | |
271 return 1; | |
272 } | |
273 | |
274 # ---- | |
275 # writes a seq feature | |
276 # ---- | |
277 | |
278 sub write_sf { | |
279 my $self = shift; | |
280 my $sf = shift; | |
281 my $seq_temp_uid = shift; | |
282 | |
283 my $w = $self->handler; | |
284 | |
285 my %props = | |
286 map { | |
287 $_=>[$sf->each_tag_value($_)] | |
288 } $sf->all_tags; | |
289 | |
290 my $loc = $sf->location; | |
291 my $name = $sf->display_name; | |
292 my $type = $sf->primary_tag; | |
293 my @subsfs = $sf->sub_SeqFeature; | |
294 my @locnodes = (); | |
295 my $sid = $loc->is_remote ? $loc->seq_id : $seq_temp_uid; | |
296 if( $loc->isa("Bio::Location::SplitLocationI") ) { | |
297 # turn splitlocs into subfeatures | |
298 my $n = 1; | |
299 push(@subsfs, | |
300 map { | |
301 my $ssf = | |
302 Bio::SeqFeature::Generic->new( | |
303 | |
304 -start=>$_->start, | |
305 -end=>$_->end, | |
306 -strand=>$_->strand, | |
307 -primary=>$self->subpartof($type), | |
308 ); | |
309 if ($_->is_remote) { | |
310 $ssf->location->is_remote(1); | |
311 $ssf->location->seq_id($_->seq_id); | |
312 } | |
313 $ssf; | |
314 } $loc->each_Location); | |
315 } | |
316 elsif( $loc->isa("Bio::Location::RemoteLocationI") ) { | |
317 # turn splitlocs into subfeatures | |
318 my $n = 1; | |
319 push(@subsfs, | |
320 map { | |
321 Bio::SeqFeature::Generic->new( | |
322 # -name=>$name.'.'.$n++, | |
323 -start=>$_->start, | |
324 -end=>$_->end, | |
325 -strand=>$_->strand, | |
326 -primary=>$self->subpartof($type), | |
327 ) | |
328 } $loc->each_Location); | |
329 } | |
330 else { | |
331 my ($beg, $end, $strand) = $self->bp2ib($loc); | |
332 @locnodes = ( | |
333 [featureloc=>[ | |
334 [nbeg=>$beg], | |
335 [nend=>$end], | |
336 [strand=>$strand], | |
337 [srcfeature_id=>$sid], | |
338 [group=>0], | |
339 [rank=>0], | |
340 ] | |
341 ] | |
342 ); | |
343 } | |
344 my $feature_id = $self->get_temp_uid($sf); | |
345 | |
346 my $fnode = | |
347 [feature=>[ | |
348 [feature_id=>$feature_id], | |
349 [name=>$name], | |
350 [typename=>$type], | |
351 @locnodes, | |
352 (map { | |
353 my $k = $_; | |
354 map { [featureprop=>[[pkey=>$k],[pval=>$_]]] } @{$props{$k}} | |
355 } keys %props), | |
356 ]]; | |
357 $w->ev(@$fnode); | |
358 | |
359 foreach my $ssf (@subsfs) { | |
360 my $ssfid = $self->write_sf($ssf, $sid); | |
361 $w->ev(feature_relationship=>[ | |
362 [subjfeature_id=>$ssfid], | |
363 [objfeature_id=>$feature_id] | |
364 ] | |
365 ); | |
366 } | |
367 return $feature_id; | |
368 } | |
369 | |
370 # private; | |
371 # an ID for this session that should be | |
372 # unique... hmm | |
373 sub session_id { | |
374 my $self = shift; | |
375 $self->{_session_id} = shift if @_; | |
376 if (!$self->{_session_id}) { | |
377 $self->{_session_id} = $$.time; | |
378 } | |
379 return $self->{_session_id}; | |
380 } | |
381 | |
382 | |
383 our $next_id = 1; | |
384 our %obj2id_hash = (); | |
385 sub get_temp_uid { | |
386 my $self = shift; | |
387 my $ob = shift; | |
388 my $id = $obj2id_hash{$ob}; | |
389 if (!$id) { | |
390 $id = $next_id++; | |
391 $obj2id_hash{$ob} = $id; | |
392 } | |
393 return $self->session_id.'.'.$id; | |
394 } | |
395 | |
396 # interbase and directional semantics | |
397 sub bp2ib { | |
398 my $self = shift; | |
399 my $loc = shift; | |
400 my ($s, $e, $str) = | |
401 ref($loc) eq "ARRAY" ? (@$loc) : ($loc->start, $loc->end, $loc->strand); | |
402 if ($str < 0) { | |
403 ($s, $e) = ($e, $s); | |
404 } | |
405 $s--; | |
406 return ($s, $e, $str); | |
407 } | |
408 | |
409 sub subpartof { | |
410 my $self = shift; | |
411 my $type = 'partof_'.shift; | |
412 $type =~ s/partof_CDS/CDS_exon/; | |
413 $type =~ s/partof_\wRNA/exon/; | |
414 return $type; | |
415 } | |
416 | |
417 1; |