comparison variant_effect_predictor/Bio/SeqIO/chado.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: chado.pm,v 1.1 2002/12/03 08:13:55 cjm Exp $
2 #
3 # BioPerl module for Bio::SeqIO::chado
4 #
5 # Chris Mungall <cjm@fruitfly.org>
6 #
7 # You may distribute this module under the same terms as perl itself
8
9 # POD documentation - main docs before the code
10
11 =head1 NAME
12
13 Bio::SeqIO::chado - chado sequence input/output stream
14
15 =head1 SYNOPSIS
16
17 It is probably best not to use this object directly, but
18 rather go through the SeqIO handler system. Go:
19
20 $stream = Bio::SeqIO->new(-file => $filename, -format => 'chado');
21
22 while ( my $seq = $stream->next_seq() ) {
23 # do something with $seq
24 }
25
26 =head1 DESCRIPTION
27
28 This object can transform Bio::Seq objects to and from chado flat
29 file databases. CURRENTLY ONLY TO
30
31
32 =head2 Optional functions
33
34 =over 3
35
36 =item _show_dna()
37
38 (output only) shows the dna or not
39
40 =item _post_sort()
41
42 (output only) provides a sorting func which is applied to the FTHelpers
43 before printing
44
45
46 =back
47
48 =head1 FEEDBACK
49
50 =head2 Mailing Lists
51
52 User feedback is an integral part of the evolution of this
53 and other Bioperl modules. Send your comments and suggestions preferably
54 to one of the Bioperl mailing lists.
55 Your participation is much appreciated.
56
57 bioperl-l@bioperl.org - General discussion
58 http://www.bioperl.org/MailList.shtml - About the mailing lists
59
60 =head2 Reporting Bugs
61
62 Report bugs to the Bioperl bug tracking system to help us keep track
63 the bugs and their resolution.
64 Bug reports can be submitted via email or the web:
65
66 bioperl-bugs@bio.perl.org
67 http://bio.perl.org/bioperl-bugs/
68
69 =head1 AUTHOR - Chris Mungall
70
71 Email cjm@fruitfly.org
72
73 =head1 APPENDIX
74
75 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
76
77 =cut
78
79 # Let the code begin...
80
81 package Bio::SeqIO::chado;
82 use vars qw(@ISA);
83 use strict;
84
85 use Bio::SeqIO;
86 use Bio::SeqFeature::Generic;
87 use Bio::Species;
88 use Bio::Seq::SeqFactory;
89 use Bio::Annotation::Collection;
90 use Bio::Annotation::Comment;
91 use Bio::Annotation::Reference;
92 use Bio::Annotation::DBLink;
93
94 use Data::Stag qw(:all);
95
96 @ISA = qw(Bio::SeqIO);
97
98 sub _initialize {
99 my($self,@args) = @_;
100
101 $self->SUPER::_initialize(@args);
102 if( ! defined $self->sequence_factory ) {
103 $self->sequence_factory(new Bio::Seq::SeqFactory
104 (-verbose => $self->verbose(),
105 -type => 'Bio::Seq::RichSeq'));
106 }
107 my $wclass = $self->default_handler_class;
108 $self->handler($wclass->new);
109 $self->{_end_of_data} = 0;
110 $self->handler->S("chado");
111 return;
112 }
113
114 sub DESTROY {
115 my $self = shift;
116 $self->end_of_data();
117 $self->SUPER::DESTROY();
118 }
119
120 sub end_of_data {
121 my $self = shift;
122 $self->{_end_of_data} = 1;
123 $self->handler->E("chado");
124 }
125
126 sub default_handler_class {
127 return "Data::Stag::BaseHandler";
128 }
129
130 =head2 next_seq
131
132 Title : next_seq
133 Usage : $seq = $stream->next_seq()
134 Function: returns the next sequence in the stream
135 Returns : Bio::Seq object
136 Args :
137
138 =cut
139
140 sub next_seq {
141 my ($self,@args) = @_;
142 my $seq = $self->sequence_factory->create
143 (
144 # '-verbose' =>$self->verbose(),
145 # %params,
146 # -seq => $seqc,
147 # -annotation => $annotation,
148 # -features => \@features
149 );
150 return $seq;
151 }
152
153 sub handler {
154 my $self = shift;
155 $self->{_handler} = shift if @_;
156 return $self->{_handler};
157 }
158
159
160 =head2 write_seq
161
162 Title : write_seq
163 Usage : $stream->write_seq($seq)
164 Function: writes the $seq object (must be seq) to the stream
165 Returns : 1 for success and 0 for error
166 Args : Bio::Seq
167
168
169 =cut
170
171 sub write_seq {
172 my ($self,$seq) = @_;
173
174 if( !defined $seq ) {
175 $self->throw("Attempting to write with no seq!");
176 }
177
178 if( ! ref $seq || ! $seq->isa('Bio::SeqI') ) {
179 $self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!");
180 }
181
182 # get a handler - must inherit from Data::Stag::BaseHandler;
183 my $w = $self->handler;
184
185 # start of data
186 $w->S("seqset");
187
188 # my $seq_temp_uid = $self->get_temp_uid($seq);
189
190 my $seq_temp_uid = $seq->accession . '.' . ($seq->can('seq_version') ? $seq->seq_version : $seq->version);
191
192 # data structure representing the core sequence for this record
193 my $seqnode =
194 Data::Stag->new(feature=>[
195 [feature_id=>$seq_temp_uid],
196 [dbxrefstr=>$seq->accession_number],
197 [name=>$seq->display_name],
198 [residues=>$seq->seq],
199 ]);
200
201 # soft properties
202 my %prop = ();
203
204 my ($div, $mol);
205 my $len = $seq->length();
206
207 if ( $seq->can('division') ) {
208 $div=$seq->division;
209 }
210 if( !defined $div || ! $div ) { $div = 'UNK'; }
211
212 if( !$seq->can('molecule') || ! defined ($mol = $seq->molecule()) ) {
213 $mol = $seq->alphabet || 'DNA';
214 }
215
216
217 my $circular = 'linear ';
218 $circular = 'circular' if $seq->is_circular;
219
220 # cheeky hack - access symbol table
221 no strict 'refs';
222 map {
223 $prop{$_} =
224 $ {*$_};
225 } qw(mol div circular);
226 use strict 'refs';
227
228 map {
229 $prop{$_} = $seq->$_() if $seq->can($_);
230 } qw(desc keywords);
231
232 local($^W) = 0; # supressing warnings about uninitialized fields.
233
234 # Organism lines
235 if (my $spec = $seq->species) {
236 my ($species, $genus, @class) = $spec->classification();
237 my $OS;
238 if( $spec->common_name ) {
239 $OS = $spec->common_name;
240 } else {
241 $OS = "$genus $species";
242 }
243 if (my $ssp = $spec->sub_species) {
244 $OS .= " $ssp";
245 }
246 }
247
248 # Reference lines
249 my $count = 1;
250 foreach my $ref ( $seq->annotation->get_Annotations('reference') ) {
251 # TODO
252 }
253 # Comment lines
254
255 foreach my $comment ( $seq->annotation->get_Annotations('comment') ) {
256 $seqnode->add_featureprop([[pkey=>'comment'],[pval=>$comment->text]]);
257 }
258
259 # throw the writer an event
260 $w->ev(@$seqnode);
261
262 $seqnode = undef; # free memory
263
264 # make events for all the features within the record
265 foreach my $sf ( $seq->top_SeqFeatures ) {
266 $self->write_sf($sf, $seq_temp_uid);
267 }
268
269 # data end
270 $w->E("seqset");
271 return 1;
272 }
273
274 # ----
275 # writes a seq feature
276 # ----
277
278 sub write_sf {
279 my $self = shift;
280 my $sf = shift;
281 my $seq_temp_uid = shift;
282
283 my $w = $self->handler;
284
285 my %props =
286 map {
287 $_=>[$sf->each_tag_value($_)]
288 } $sf->all_tags;
289
290 my $loc = $sf->location;
291 my $name = $sf->display_name;
292 my $type = $sf->primary_tag;
293 my @subsfs = $sf->sub_SeqFeature;
294 my @locnodes = ();
295 my $sid = $loc->is_remote ? $loc->seq_id : $seq_temp_uid;
296 if( $loc->isa("Bio::Location::SplitLocationI") ) {
297 # turn splitlocs into subfeatures
298 my $n = 1;
299 push(@subsfs,
300 map {
301 my $ssf =
302 Bio::SeqFeature::Generic->new(
303
304 -start=>$_->start,
305 -end=>$_->end,
306 -strand=>$_->strand,
307 -primary=>$self->subpartof($type),
308 );
309 if ($_->is_remote) {
310 $ssf->location->is_remote(1);
311 $ssf->location->seq_id($_->seq_id);
312 }
313 $ssf;
314 } $loc->each_Location);
315 }
316 elsif( $loc->isa("Bio::Location::RemoteLocationI") ) {
317 # turn splitlocs into subfeatures
318 my $n = 1;
319 push(@subsfs,
320 map {
321 Bio::SeqFeature::Generic->new(
322 # -name=>$name.'.'.$n++,
323 -start=>$_->start,
324 -end=>$_->end,
325 -strand=>$_->strand,
326 -primary=>$self->subpartof($type),
327 )
328 } $loc->each_Location);
329 }
330 else {
331 my ($beg, $end, $strand) = $self->bp2ib($loc);
332 @locnodes = (
333 [featureloc=>[
334 [nbeg=>$beg],
335 [nend=>$end],
336 [strand=>$strand],
337 [srcfeature_id=>$sid],
338 [group=>0],
339 [rank=>0],
340 ]
341 ]
342 );
343 }
344 my $feature_id = $self->get_temp_uid($sf);
345
346 my $fnode =
347 [feature=>[
348 [feature_id=>$feature_id],
349 [name=>$name],
350 [typename=>$type],
351 @locnodes,
352 (map {
353 my $k = $_;
354 map { [featureprop=>[[pkey=>$k],[pval=>$_]]] } @{$props{$k}}
355 } keys %props),
356 ]];
357 $w->ev(@$fnode);
358
359 foreach my $ssf (@subsfs) {
360 my $ssfid = $self->write_sf($ssf, $sid);
361 $w->ev(feature_relationship=>[
362 [subjfeature_id=>$ssfid],
363 [objfeature_id=>$feature_id]
364 ]
365 );
366 }
367 return $feature_id;
368 }
369
370 # private;
371 # an ID for this session that should be
372 # unique... hmm
373 sub session_id {
374 my $self = shift;
375 $self->{_session_id} = shift if @_;
376 if (!$self->{_session_id}) {
377 $self->{_session_id} = $$.time;
378 }
379 return $self->{_session_id};
380 }
381
382
383 our $next_id = 1;
384 our %obj2id_hash = ();
385 sub get_temp_uid {
386 my $self = shift;
387 my $ob = shift;
388 my $id = $obj2id_hash{$ob};
389 if (!$id) {
390 $id = $next_id++;
391 $obj2id_hash{$ob} = $id;
392 }
393 return $self->session_id.'.'.$id;
394 }
395
396 # interbase and directional semantics
397 sub bp2ib {
398 my $self = shift;
399 my $loc = shift;
400 my ($s, $e, $str) =
401 ref($loc) eq "ARRAY" ? (@$loc) : ($loc->start, $loc->end, $loc->strand);
402 if ($str < 0) {
403 ($s, $e) = ($e, $s);
404 }
405 $s--;
406 return ($s, $e, $str);
407 }
408
409 sub subpartof {
410 my $self = shift;
411 my $type = 'partof_'.shift;
412 $type =~ s/partof_CDS/CDS_exon/;
413 $type =~ s/partof_\wRNA/exon/;
414 return $type;
415 }
416
417 1;