ensembl: variant_effect_predictor/Bio/SeqIO/chado.pm comparison

comparison variant_effect_predictor/Bio/SeqIO/chado.pm @ 0:1f6dce3d34e0

Uploaded

author	mahtabm
date	Thu, 11 Apr 2013 02:01:53 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:1f6dce3d34e0
+# $Id: chado.pm,v 1.1 2002/12/03 08:13:55 cjm Exp $
+#
+# BioPerl module for Bio::SeqIO::chado
+#
+# Chris Mungall <cjm@fruitfly.org>
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+=head1 NAME
+Bio::SeqIO::chado - chado sequence input/output stream
+=head1 SYNOPSIS
+It is probably best not to use this object directly, but
+rather go through the SeqIO handler system. Go:
+$stream = Bio::SeqIO->new(-file => $filename, -format => 'chado');
+while ( my $seq = $stream->next_seq() ) {
+	# do something with $seq
+}
+=head1 DESCRIPTION
+This object can transform Bio::Seq objects to and from chado flat
+file databases. CURRENTLY ONLY TO
+=head2 Optional functions
+=over 3
+=item _show_dna()
+(output only) shows the dna or not
+=item _post_sort()
+(output only) provides a sorting func which is applied to the FTHelpers
+before printing
+=back
+=head1 FEEDBACK
+=head2 Mailing Lists
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+bioperl-l@bioperl.org                  - General discussion
+http://www.bioperl.org/MailList.shtml  - About the mailing lists
+=head2 Reporting Bugs
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via email or the web:
+bioperl-bugs@bio.perl.org
+http://bio.perl.org/bioperl-bugs/
+=head1 AUTHOR - Chris Mungall
+Email cjm@fruitfly.org
+=head1 APPENDIX
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+=cut
+# Let the code begin...
+package Bio::SeqIO::chado;
+use vars qw(@ISA);
+use strict;
+use Bio::SeqIO;
+use Bio::SeqFeature::Generic;
+use Bio::Species;
+use Bio::Seq::SeqFactory;
+use Bio::Annotation::Collection;
+use Bio::Annotation::Comment;
+use Bio::Annotation::Reference;
+use Bio::Annotation::DBLink;
+use Data::Stag qw(:all);
+@ISA = qw(Bio::SeqIO);
+sub _initialize {
+my($self,@args) = @_;
+$self->SUPER::_initialize(@args);
+if( ! defined $self->sequence_factory ) {
+	$self->sequence_factory(new Bio::Seq::SeqFactory
+				(-verbose => $self->verbose(),
+				 -type => 'Bio::Seq::RichSeq'));
+}
+my $wclass = $self->default_handler_class;
+$self->handler($wclass->new);
+$self->{_end_of_data} = 0;
+$self->handler->S("chado");
+return;
+}
+sub DESTROY {
+my $self = shift;
+$self->end_of_data();
+$self->SUPER::DESTROY();
+}
+sub end_of_data {
+my $self = shift;
+$self->{_end_of_data} = 1;
+$self->handler->E("chado");
+}
+sub default_handler_class {
+return "Data::Stag::BaseHandler";
+}
+=head2 next_seq
+Title   : next_seq
+Usage   : $seq = $stream->next_seq()
+Function: returns the next sequence in the stream
+Returns : Bio::Seq object
+Args    :
+=cut
+sub next_seq {
+my ($self,@args) = @_;
+my $seq = $self->sequence_factory->create
+	(
+#         '-verbose' =>$self->verbose(),
+#	 %params,
+#	 -seq => $seqc,
+#	 -annotation => $annotation,
+#	 -features => \@features
+);
+return $seq;
+}
+sub handler {
+my $self = shift;
+$self->{_handler} = shift if @_;
+return $self->{_handler};
+}
+=head2 write_seq
+Title   : write_seq
+Usage   : $stream->write_seq($seq)
+Function: writes the $seq object (must be seq) to the stream
+Returns : 1 for success and 0 for error
+Args    : Bio::Seq
+=cut
+sub write_seq {
+my ($self,$seq) = @_;
+if( !defined $seq ) {
+	$self->throw("Attempting to write with no seq!");
+}
+if( ! ref $seq || ! $seq->isa('Bio::SeqI') ) {
+	$self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!");
+}
+# get a handler - must inherit from Data::Stag::BaseHandler;
+my $w = $self->handler;
+# start of data
+$w->S("seqset");
+#    my $seq_temp_uid = $self->get_temp_uid($seq);
+my $seq_temp_uid = $seq->accession . '.' . ($seq->can('seq_version') ? $seq->seq_version : $seq->version);
+# data structure representing the core sequence for this record
+my $seqnode =
+Data::Stag->new(feature=>[
+[feature_id=>$seq_temp_uid],
+[dbxrefstr=>$seq->accession_number],
+[name=>$seq->display_name],
+[residues=>$seq->seq],
+]);
+# soft properties
+my %prop = ();
+my ($div, $mol);
+my $len = $seq->length();
+if ( $seq->can('division') ) {
+	$div=$seq->division;
+}
+if( !defined $div || ! $div ) { $div = 'UNK'; }
+if( !$seq->can('molecule') || ! defined ($mol = $seq->molecule()) ) {
+	$mol = $seq->alphabet || 'DNA';
+}
+my $circular = 'linear  ';
+$circular = 'circular' if $seq->is_circular;
+# cheeky hack - access symbol table
+no strict 'refs';
+map {
+$prop{$_} =
+$ {*$_};
+} qw(mol div circular);
+use strict 'refs';
+map {
+$prop{$_} = $seq->$_() if $seq->can($_);
+} qw(desc keywords);
+local($^W) = 0;   # supressing warnings about uninitialized fields.
+# Organism lines
+if (my $spec = $seq->species) {
+my ($species, $genus, @class) = $spec->classification();
+	my $OS;
+if( $spec->common_name ) {
+	    $OS = $spec->common_name;
+	} else {
+	    $OS = "$genus $species";
+	}
+if (my $ssp = $spec->sub_species) {
+$OS .= " $ssp";
+}
+}
+# Reference lines
+my $count = 1;
+foreach my $ref ( $seq->annotation->get_Annotations('reference') ) {
+# TODO
+}
+# Comment lines
+foreach my $comment ( $seq->annotation->get_Annotations('comment') ) {
+$seqnode->add_featureprop([[pkey=>'comment'],[pval=>$comment->text]]);
+}
+# throw the writer an event
+$w->ev(@$seqnode);
+$seqnode = undef;      # free memory
+# make events for all the features within the record
+foreach my $sf ( $seq->top_SeqFeatures ) {
+$self->write_sf($sf, $seq_temp_uid);
+}
+# data end
+$w->E("seqset");
+return 1;
+}
+# ----
+# writes a seq feature
+# ----
+sub write_sf {
+my $self = shift;
+my $sf = shift;
+my $seq_temp_uid = shift;
+my $w = $self->handler;
+my %props =
+map {
+$_=>[$sf->each_tag_value($_)]
+} $sf->all_tags;
+my $loc = $sf->location;
+my $name = $sf->display_name;
+my $type = $sf->primary_tag;
+my @subsfs = $sf->sub_SeqFeature;
+my @locnodes = ();
+my $sid = $loc->is_remote ? $loc->seq_id : $seq_temp_uid;
+if( $loc->isa("Bio::Location::SplitLocationI") ) {
+# turn splitlocs into subfeatures
+my $n = 1;
+push(@subsfs,
+map {
+my $ssf =
+Bio::SeqFeature::Generic->new(
+-start=>$_->start,
+-end=>$_->end,
+-strand=>$_->strand,
+-primary=>$self->subpartof($type),
+);
+if ($_->is_remote) {
+$ssf->location->is_remote(1);
+$ssf->location->seq_id($_->seq_id);
+}
+$ssf;
+} $loc->each_Location);
+}
+elsif( $loc->isa("Bio::Location::RemoteLocationI") ) {
+# turn splitlocs into subfeatures
+my $n = 1;
+push(@subsfs,
+map {
+Bio::SeqFeature::Generic->new(
+#                                               -name=>$name.'.'.$n++,
+-start=>$_->start,
+-end=>$_->end,
+-strand=>$_->strand,
+-primary=>$self->subpartof($type),
+)
+} $loc->each_Location);
+}
+else {
+my ($beg, $end, $strand) = $self->bp2ib($loc);
+@locnodes = (
+[featureloc=>[
+[nbeg=>$beg],
+[nend=>$end],
+[strand=>$strand],
+[srcfeature_id=>$sid],
+[group=>0],
+[rank=>0],
+]
+]
+);
+}
+my $feature_id = $self->get_temp_uid($sf);
+my $fnode =
+[feature=>[
+[feature_id=>$feature_id],
+[name=>$name],
+[typename=>$type],
+@locnodes,
+(map {
+my $k = $_;
+map { [featureprop=>[[pkey=>$k],[pval=>$_]]] } @{$props{$k}}
+} keys %props),
+]];
+$w->ev(@$fnode);
+foreach my $ssf (@subsfs) {
+my $ssfid = $self->write_sf($ssf, $sid);
+$w->ev(feature_relationship=>[
+[subjfeature_id=>$ssfid],
+[objfeature_id=>$feature_id]
+]
+);
+}
+return $feature_id;
+}
+# private;
+# an ID for this session that should be
+# unique... hmm
+sub session_id {
+my $self = shift;
+$self->{_session_id} = shift if @_;
+if (!$self->{_session_id}) {
+$self->{_session_id} = $$.time;
+}
+return $self->{_session_id};
+}
+our $next_id = 1;
+our %obj2id_hash = ();
+sub get_temp_uid {
+my $self = shift;
+my $ob = shift;
+my $id = $obj2id_hash{$ob};
+if (!$id) {
+$id = $next_id++;
+$obj2id_hash{$ob} = $id;
+}
+return $self->session_id.'.'.$id;
+}
+# interbase and directional semantics
+sub bp2ib {
+my $self = shift;
+my $loc = shift;
+my ($s, $e, $str) =
+ref($loc) eq "ARRAY" ? (@$loc) : ($loc->start, $loc->end, $loc->strand);
+if ($str < 0) {
+($s, $e) = ($e, $s);
+}
+$s--;
+return ($s, $e, $str);
+}
+sub subpartof {
+my $self = shift;
+my $type = 'partof_'.shift;
+$type =~ s/partof_CDS/CDS_exon/;
+$type =~ s/partof_\wRNA/exon/;
+return $type;
+}
+1;

Mercurial > repos > mahtabm > ensembl

comparison variant_effect_predictor/Bio/SeqIO/chado.pm @ 0:1f6dce3d34e0