Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SeqIO/chado.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: chado.pm,v 1.1 2002/12/03 08:13:55 cjm Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::SeqIO::chado | |
| 4 # | |
| 5 # Chris Mungall <cjm@fruitfly.org> | |
| 6 # | |
| 7 # You may distribute this module under the same terms as perl itself | |
| 8 | |
| 9 # POD documentation - main docs before the code | |
| 10 | |
| 11 =head1 NAME | |
| 12 | |
| 13 Bio::SeqIO::chado - chado sequence input/output stream | |
| 14 | |
| 15 =head1 SYNOPSIS | |
| 16 | |
| 17 It is probably best not to use this object directly, but | |
| 18 rather go through the SeqIO handler system. Go: | |
| 19 | |
| 20 $stream = Bio::SeqIO->new(-file => $filename, -format => 'chado'); | |
| 21 | |
| 22 while ( my $seq = $stream->next_seq() ) { | |
| 23 # do something with $seq | |
| 24 } | |
| 25 | |
| 26 =head1 DESCRIPTION | |
| 27 | |
| 28 This object can transform Bio::Seq objects to and from chado flat | |
| 29 file databases. CURRENTLY ONLY TO | |
| 30 | |
| 31 | |
| 32 =head2 Optional functions | |
| 33 | |
| 34 =over 3 | |
| 35 | |
| 36 =item _show_dna() | |
| 37 | |
| 38 (output only) shows the dna or not | |
| 39 | |
| 40 =item _post_sort() | |
| 41 | |
| 42 (output only) provides a sorting func which is applied to the FTHelpers | |
| 43 before printing | |
| 44 | |
| 45 | |
| 46 =back | |
| 47 | |
| 48 =head1 FEEDBACK | |
| 49 | |
| 50 =head2 Mailing Lists | |
| 51 | |
| 52 User feedback is an integral part of the evolution of this | |
| 53 and other Bioperl modules. Send your comments and suggestions preferably | |
| 54 to one of the Bioperl mailing lists. | |
| 55 Your participation is much appreciated. | |
| 56 | |
| 57 bioperl-l@bioperl.org - General discussion | |
| 58 http://www.bioperl.org/MailList.shtml - About the mailing lists | |
| 59 | |
| 60 =head2 Reporting Bugs | |
| 61 | |
| 62 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 63 the bugs and their resolution. | |
| 64 Bug reports can be submitted via email or the web: | |
| 65 | |
| 66 bioperl-bugs@bio.perl.org | |
| 67 http://bio.perl.org/bioperl-bugs/ | |
| 68 | |
| 69 =head1 AUTHOR - Chris Mungall | |
| 70 | |
| 71 Email cjm@fruitfly.org | |
| 72 | |
| 73 =head1 APPENDIX | |
| 74 | |
| 75 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
| 76 | |
| 77 =cut | |
| 78 | |
| 79 # Let the code begin... | |
| 80 | |
| 81 package Bio::SeqIO::chado; | |
| 82 use vars qw(@ISA); | |
| 83 use strict; | |
| 84 | |
| 85 use Bio::SeqIO; | |
| 86 use Bio::SeqFeature::Generic; | |
| 87 use Bio::Species; | |
| 88 use Bio::Seq::SeqFactory; | |
| 89 use Bio::Annotation::Collection; | |
| 90 use Bio::Annotation::Comment; | |
| 91 use Bio::Annotation::Reference; | |
| 92 use Bio::Annotation::DBLink; | |
| 93 | |
| 94 use Data::Stag qw(:all); | |
| 95 | |
| 96 @ISA = qw(Bio::SeqIO); | |
| 97 | |
| 98 sub _initialize { | |
| 99 my($self,@args) = @_; | |
| 100 | |
| 101 $self->SUPER::_initialize(@args); | |
| 102 if( ! defined $self->sequence_factory ) { | |
| 103 $self->sequence_factory(new Bio::Seq::SeqFactory | |
| 104 (-verbose => $self->verbose(), | |
| 105 -type => 'Bio::Seq::RichSeq')); | |
| 106 } | |
| 107 my $wclass = $self->default_handler_class; | |
| 108 $self->handler($wclass->new); | |
| 109 $self->{_end_of_data} = 0; | |
| 110 $self->handler->S("chado"); | |
| 111 return; | |
| 112 } | |
| 113 | |
| 114 sub DESTROY { | |
| 115 my $self = shift; | |
| 116 $self->end_of_data(); | |
| 117 $self->SUPER::DESTROY(); | |
| 118 } | |
| 119 | |
| 120 sub end_of_data { | |
| 121 my $self = shift; | |
| 122 $self->{_end_of_data} = 1; | |
| 123 $self->handler->E("chado"); | |
| 124 } | |
| 125 | |
| 126 sub default_handler_class { | |
| 127 return "Data::Stag::BaseHandler"; | |
| 128 } | |
| 129 | |
| 130 =head2 next_seq | |
| 131 | |
| 132 Title : next_seq | |
| 133 Usage : $seq = $stream->next_seq() | |
| 134 Function: returns the next sequence in the stream | |
| 135 Returns : Bio::Seq object | |
| 136 Args : | |
| 137 | |
| 138 =cut | |
| 139 | |
| 140 sub next_seq { | |
| 141 my ($self,@args) = @_; | |
| 142 my $seq = $self->sequence_factory->create | |
| 143 ( | |
| 144 # '-verbose' =>$self->verbose(), | |
| 145 # %params, | |
| 146 # -seq => $seqc, | |
| 147 # -annotation => $annotation, | |
| 148 # -features => \@features | |
| 149 ); | |
| 150 return $seq; | |
| 151 } | |
| 152 | |
| 153 sub handler { | |
| 154 my $self = shift; | |
| 155 $self->{_handler} = shift if @_; | |
| 156 return $self->{_handler}; | |
| 157 } | |
| 158 | |
| 159 | |
| 160 =head2 write_seq | |
| 161 | |
| 162 Title : write_seq | |
| 163 Usage : $stream->write_seq($seq) | |
| 164 Function: writes the $seq object (must be seq) to the stream | |
| 165 Returns : 1 for success and 0 for error | |
| 166 Args : Bio::Seq | |
| 167 | |
| 168 | |
| 169 =cut | |
| 170 | |
| 171 sub write_seq { | |
| 172 my ($self,$seq) = @_; | |
| 173 | |
| 174 if( !defined $seq ) { | |
| 175 $self->throw("Attempting to write with no seq!"); | |
| 176 } | |
| 177 | |
| 178 if( ! ref $seq || ! $seq->isa('Bio::SeqI') ) { | |
| 179 $self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!"); | |
| 180 } | |
| 181 | |
| 182 # get a handler - must inherit from Data::Stag::BaseHandler; | |
| 183 my $w = $self->handler; | |
| 184 | |
| 185 # start of data | |
| 186 $w->S("seqset"); | |
| 187 | |
| 188 # my $seq_temp_uid = $self->get_temp_uid($seq); | |
| 189 | |
| 190 my $seq_temp_uid = $seq->accession . '.' . ($seq->can('seq_version') ? $seq->seq_version : $seq->version); | |
| 191 | |
| 192 # data structure representing the core sequence for this record | |
| 193 my $seqnode = | |
| 194 Data::Stag->new(feature=>[ | |
| 195 [feature_id=>$seq_temp_uid], | |
| 196 [dbxrefstr=>$seq->accession_number], | |
| 197 [name=>$seq->display_name], | |
| 198 [residues=>$seq->seq], | |
| 199 ]); | |
| 200 | |
| 201 # soft properties | |
| 202 my %prop = (); | |
| 203 | |
| 204 my ($div, $mol); | |
| 205 my $len = $seq->length(); | |
| 206 | |
| 207 if ( $seq->can('division') ) { | |
| 208 $div=$seq->division; | |
| 209 } | |
| 210 if( !defined $div || ! $div ) { $div = 'UNK'; } | |
| 211 | |
| 212 if( !$seq->can('molecule') || ! defined ($mol = $seq->molecule()) ) { | |
| 213 $mol = $seq->alphabet || 'DNA'; | |
| 214 } | |
| 215 | |
| 216 | |
| 217 my $circular = 'linear '; | |
| 218 $circular = 'circular' if $seq->is_circular; | |
| 219 | |
| 220 # cheeky hack - access symbol table | |
| 221 no strict 'refs'; | |
| 222 map { | |
| 223 $prop{$_} = | |
| 224 $ {*$_}; | |
| 225 } qw(mol div circular); | |
| 226 use strict 'refs'; | |
| 227 | |
| 228 map { | |
| 229 $prop{$_} = $seq->$_() if $seq->can($_); | |
| 230 } qw(desc keywords); | |
| 231 | |
| 232 local($^W) = 0; # supressing warnings about uninitialized fields. | |
| 233 | |
| 234 # Organism lines | |
| 235 if (my $spec = $seq->species) { | |
| 236 my ($species, $genus, @class) = $spec->classification(); | |
| 237 my $OS; | |
| 238 if( $spec->common_name ) { | |
| 239 $OS = $spec->common_name; | |
| 240 } else { | |
| 241 $OS = "$genus $species"; | |
| 242 } | |
| 243 if (my $ssp = $spec->sub_species) { | |
| 244 $OS .= " $ssp"; | |
| 245 } | |
| 246 } | |
| 247 | |
| 248 # Reference lines | |
| 249 my $count = 1; | |
| 250 foreach my $ref ( $seq->annotation->get_Annotations('reference') ) { | |
| 251 # TODO | |
| 252 } | |
| 253 # Comment lines | |
| 254 | |
| 255 foreach my $comment ( $seq->annotation->get_Annotations('comment') ) { | |
| 256 $seqnode->add_featureprop([[pkey=>'comment'],[pval=>$comment->text]]); | |
| 257 } | |
| 258 | |
| 259 # throw the writer an event | |
| 260 $w->ev(@$seqnode); | |
| 261 | |
| 262 $seqnode = undef; # free memory | |
| 263 | |
| 264 # make events for all the features within the record | |
| 265 foreach my $sf ( $seq->top_SeqFeatures ) { | |
| 266 $self->write_sf($sf, $seq_temp_uid); | |
| 267 } | |
| 268 | |
| 269 # data end | |
| 270 $w->E("seqset"); | |
| 271 return 1; | |
| 272 } | |
| 273 | |
| 274 # ---- | |
| 275 # writes a seq feature | |
| 276 # ---- | |
| 277 | |
| 278 sub write_sf { | |
| 279 my $self = shift; | |
| 280 my $sf = shift; | |
| 281 my $seq_temp_uid = shift; | |
| 282 | |
| 283 my $w = $self->handler; | |
| 284 | |
| 285 my %props = | |
| 286 map { | |
| 287 $_=>[$sf->each_tag_value($_)] | |
| 288 } $sf->all_tags; | |
| 289 | |
| 290 my $loc = $sf->location; | |
| 291 my $name = $sf->display_name; | |
| 292 my $type = $sf->primary_tag; | |
| 293 my @subsfs = $sf->sub_SeqFeature; | |
| 294 my @locnodes = (); | |
| 295 my $sid = $loc->is_remote ? $loc->seq_id : $seq_temp_uid; | |
| 296 if( $loc->isa("Bio::Location::SplitLocationI") ) { | |
| 297 # turn splitlocs into subfeatures | |
| 298 my $n = 1; | |
| 299 push(@subsfs, | |
| 300 map { | |
| 301 my $ssf = | |
| 302 Bio::SeqFeature::Generic->new( | |
| 303 | |
| 304 -start=>$_->start, | |
| 305 -end=>$_->end, | |
| 306 -strand=>$_->strand, | |
| 307 -primary=>$self->subpartof($type), | |
| 308 ); | |
| 309 if ($_->is_remote) { | |
| 310 $ssf->location->is_remote(1); | |
| 311 $ssf->location->seq_id($_->seq_id); | |
| 312 } | |
| 313 $ssf; | |
| 314 } $loc->each_Location); | |
| 315 } | |
| 316 elsif( $loc->isa("Bio::Location::RemoteLocationI") ) { | |
| 317 # turn splitlocs into subfeatures | |
| 318 my $n = 1; | |
| 319 push(@subsfs, | |
| 320 map { | |
| 321 Bio::SeqFeature::Generic->new( | |
| 322 # -name=>$name.'.'.$n++, | |
| 323 -start=>$_->start, | |
| 324 -end=>$_->end, | |
| 325 -strand=>$_->strand, | |
| 326 -primary=>$self->subpartof($type), | |
| 327 ) | |
| 328 } $loc->each_Location); | |
| 329 } | |
| 330 else { | |
| 331 my ($beg, $end, $strand) = $self->bp2ib($loc); | |
| 332 @locnodes = ( | |
| 333 [featureloc=>[ | |
| 334 [nbeg=>$beg], | |
| 335 [nend=>$end], | |
| 336 [strand=>$strand], | |
| 337 [srcfeature_id=>$sid], | |
| 338 [group=>0], | |
| 339 [rank=>0], | |
| 340 ] | |
| 341 ] | |
| 342 ); | |
| 343 } | |
| 344 my $feature_id = $self->get_temp_uid($sf); | |
| 345 | |
| 346 my $fnode = | |
| 347 [feature=>[ | |
| 348 [feature_id=>$feature_id], | |
| 349 [name=>$name], | |
| 350 [typename=>$type], | |
| 351 @locnodes, | |
| 352 (map { | |
| 353 my $k = $_; | |
| 354 map { [featureprop=>[[pkey=>$k],[pval=>$_]]] } @{$props{$k}} | |
| 355 } keys %props), | |
| 356 ]]; | |
| 357 $w->ev(@$fnode); | |
| 358 | |
| 359 foreach my $ssf (@subsfs) { | |
| 360 my $ssfid = $self->write_sf($ssf, $sid); | |
| 361 $w->ev(feature_relationship=>[ | |
| 362 [subjfeature_id=>$ssfid], | |
| 363 [objfeature_id=>$feature_id] | |
| 364 ] | |
| 365 ); | |
| 366 } | |
| 367 return $feature_id; | |
| 368 } | |
| 369 | |
| 370 # private; | |
| 371 # an ID for this session that should be | |
| 372 # unique... hmm | |
| 373 sub session_id { | |
| 374 my $self = shift; | |
| 375 $self->{_session_id} = shift if @_; | |
| 376 if (!$self->{_session_id}) { | |
| 377 $self->{_session_id} = $$.time; | |
| 378 } | |
| 379 return $self->{_session_id}; | |
| 380 } | |
| 381 | |
| 382 | |
| 383 our $next_id = 1; | |
| 384 our %obj2id_hash = (); | |
| 385 sub get_temp_uid { | |
| 386 my $self = shift; | |
| 387 my $ob = shift; | |
| 388 my $id = $obj2id_hash{$ob}; | |
| 389 if (!$id) { | |
| 390 $id = $next_id++; | |
| 391 $obj2id_hash{$ob} = $id; | |
| 392 } | |
| 393 return $self->session_id.'.'.$id; | |
| 394 } | |
| 395 | |
| 396 # interbase and directional semantics | |
| 397 sub bp2ib { | |
| 398 my $self = shift; | |
| 399 my $loc = shift; | |
| 400 my ($s, $e, $str) = | |
| 401 ref($loc) eq "ARRAY" ? (@$loc) : ($loc->start, $loc->end, $loc->strand); | |
| 402 if ($str < 0) { | |
| 403 ($s, $e) = ($e, $s); | |
| 404 } | |
| 405 $s--; | |
| 406 return ($s, $e, $str); | |
| 407 } | |
| 408 | |
| 409 sub subpartof { | |
| 410 my $self = shift; | |
| 411 my $type = 'partof_'.shift; | |
| 412 $type =~ s/partof_CDS/CDS_exon/; | |
| 413 $type =~ s/partof_\wRNA/exon/; | |
| 414 return $type; | |
| 415 } | |
| 416 | |
| 417 1; |
