view variant_effect_predictor/Bio/SeqIO/chado.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

# $Id: chado.pm,v 1.1 2002/12/03 08:13:55 cjm Exp $
#
# BioPerl module for Bio::SeqIO::chado
#
# Chris Mungall <cjm@fruitfly.org>
#
# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

Bio::SeqIO::chado - chado sequence input/output stream

=head1 SYNOPSIS

It is probably best not to use this object directly, but
rather go through the SeqIO handler system. Go:

    $stream = Bio::SeqIO->new(-file => $filename, -format => 'chado');

    while ( my $seq = $stream->next_seq() ) {
	# do something with $seq
    }

=head1 DESCRIPTION

This object can transform Bio::Seq objects to and from chado flat
file databases. CURRENTLY ONLY TO


=head2 Optional functions

=over 3

=item _show_dna()

(output only) shows the dna or not

=item _post_sort()

(output only) provides a sorting func which is applied to the FTHelpers
before printing


=back

=head1 FEEDBACK

=head2 Mailing Lists

User feedback is an integral part of the evolution of this
and other Bioperl modules. Send your comments and suggestions preferably
 to one of the Bioperl mailing lists.
Your participation is much appreciated.

  bioperl-l@bioperl.org                  - General discussion
  http://www.bioperl.org/MailList.shtml  - About the mailing lists

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
 the bugs and their resolution.
 Bug reports can be submitted via email or the web:

  bioperl-bugs@bio.perl.org
  http://bio.perl.org/bioperl-bugs/

=head1 AUTHOR - Chris Mungall

Email cjm@fruitfly.org

=head1 APPENDIX

The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _

=cut

# Let the code begin...

package Bio::SeqIO::chado;
use vars qw(@ISA);
use strict;

use Bio::SeqIO;
use Bio::SeqFeature::Generic;
use Bio::Species;
use Bio::Seq::SeqFactory;
use Bio::Annotation::Collection;
use Bio::Annotation::Comment;
use Bio::Annotation::Reference;
use Bio::Annotation::DBLink;

use Data::Stag qw(:all);

@ISA = qw(Bio::SeqIO);
 
sub _initialize {
    my($self,@args) = @_;
    
    $self->SUPER::_initialize(@args); 
    if( ! defined $self->sequence_factory ) {
	$self->sequence_factory(new Bio::Seq::SeqFactory
				(-verbose => $self->verbose(), 
				 -type => 'Bio::Seq::RichSeq'));
    }
    my $wclass = $self->default_handler_class;
    $self->handler($wclass->new);
    $self->{_end_of_data} = 0;
    $self->handler->S("chado");
    return;
}

sub DESTROY {
    my $self = shift;
    $self->end_of_data();
    $self->SUPER::DESTROY();
}

sub end_of_data {
    my $self = shift;
    $self->{_end_of_data} = 1;
    $self->handler->E("chado");
}

sub default_handler_class {
    return "Data::Stag::BaseHandler";
} 

=head2 next_seq

 Title   : next_seq
 Usage   : $seq = $stream->next_seq()
 Function: returns the next sequence in the stream
 Returns : Bio::Seq object
 Args    :

=cut

sub next_seq {
    my ($self,@args) = @_;
    my $seq = $self->sequence_factory->create
	(
         #         '-verbose' =>$self->verbose(), 
         #	 %params,
         #	 -seq => $seqc,
         #	 -annotation => $annotation,
         #	 -features => \@features
        );
    return $seq;
}

sub handler {
    my $self = shift;
    $self->{_handler} = shift if @_;
    return $self->{_handler};
}


=head2 write_seq

 Title   : write_seq
 Usage   : $stream->write_seq($seq)
 Function: writes the $seq object (must be seq) to the stream
 Returns : 1 for success and 0 for error
 Args    : Bio::Seq


=cut

sub write_seq {
    my ($self,$seq) = @_;
    
    if( !defined $seq ) {
	$self->throw("Attempting to write with no seq!");
    }
    
    if( ! ref $seq || ! $seq->isa('Bio::SeqI') ) {
	$self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!");
    }

    # get a handler - must inherit from Data::Stag::BaseHandler;
    my $w = $self->handler;

    # start of data
    $w->S("seqset");

    #    my $seq_temp_uid = $self->get_temp_uid($seq);

    my $seq_temp_uid = $seq->accession . '.' . ($seq->can('seq_version') ? $seq->seq_version : $seq->version);

    # data structure representing the core sequence for this record
    my $seqnode =
      Data::Stag->new(feature=>[
                                    [feature_id=>$seq_temp_uid],
                                    [dbxrefstr=>$seq->accession_number],
                                    [name=>$seq->display_name],
                                    [residues=>$seq->seq],
                                   ]);
    
    # soft properties
    my %prop = ();

    my ($div, $mol);
    my $len = $seq->length();

    if ( $seq->can('division') ) {
	$div=$seq->division;
    } 
    if( !defined $div || ! $div ) { $div = 'UNK'; }

    if( !$seq->can('molecule') || ! defined ($mol = $seq->molecule()) ) {
	$mol = $seq->alphabet || 'DNA';
    }
    
    
    my $circular = 'linear  ';
    $circular = 'circular' if $seq->is_circular;

    # cheeky hack - access symbol table
    no strict 'refs';
    map {
        $prop{$_} = 
          $ {*$_};
    } qw(mol div circular);
    use strict 'refs';

    map {
        $prop{$_} = $seq->$_() if $seq->can($_);
    } qw(desc keywords);

    local($^W) = 0;   # supressing warnings about uninitialized fields.
    
    # Organism lines
    if (my $spec = $seq->species) {
        my ($species, $genus, @class) = $spec->classification();
	my $OS;
        if( $spec->common_name ) {
	    $OS = $spec->common_name;
	} else { 
	    $OS = "$genus $species";
	}
        if (my $ssp = $spec->sub_species) {
            $OS .= " $ssp";
        }
    }
    
    # Reference lines
    my $count = 1;
    foreach my $ref ( $seq->annotation->get_Annotations('reference') ) {
        # TODO
    }
    # Comment lines
    
    foreach my $comment ( $seq->annotation->get_Annotations('comment') ) {
        $seqnode->add_featureprop([[pkey=>'comment'],[pval=>$comment->text]]);
    }

    # throw the writer an event
    $w->ev(@$seqnode);

    $seqnode = undef;      # free memory

    # make events for all the features within the record
    foreach my $sf ( $seq->top_SeqFeatures ) {
        $self->write_sf($sf, $seq_temp_uid);
    }

    # data end
    $w->E("seqset");
    return 1;
}

# ----
# writes a seq feature
# ----

sub write_sf {
    my $self = shift;
    my $sf = shift;
    my $seq_temp_uid = shift;

    my $w = $self->handler;

    my %props =
      map {
          $_=>[$sf->each_tag_value($_)]
      } $sf->all_tags;

    my $loc = $sf->location;
    my $name = $sf->display_name;
    my $type = $sf->primary_tag;
    my @subsfs = $sf->sub_SeqFeature;
    my @locnodes = ();
    my $sid = $loc->is_remote ? $loc->seq_id : $seq_temp_uid;
    if( $loc->isa("Bio::Location::SplitLocationI") ) {
        # turn splitlocs into subfeatures
        my $n = 1;
        push(@subsfs,
             map {
                 my $ssf =
                   Bio::SeqFeature::Generic->new(

                                                 -start=>$_->start,
                                                 -end=>$_->end,
                                                 -strand=>$_->strand,
                                                 -primary=>$self->subpartof($type),
                                              );
                 if ($_->is_remote) {
                     $ssf->location->is_remote(1);
                     $ssf->location->seq_id($_->seq_id);
                 }
                 $ssf;
               } $loc->each_Location);
    }
    elsif( $loc->isa("Bio::Location::RemoteLocationI") ) {
        # turn splitlocs into subfeatures
        my $n = 1;
        push(@subsfs,
             map {
                 Bio::SeqFeature::Generic->new(
#                                               -name=>$name.'.'.$n++,
                                               -start=>$_->start,
                                               -end=>$_->end,
                                               -strand=>$_->strand,
                                               -primary=>$self->subpartof($type),
                                              )
               } $loc->each_Location);
    }
    else {
        my ($beg, $end, $strand) = $self->bp2ib($loc);
        @locnodes = (
                     [featureloc=>[
                                   [nbeg=>$beg],
                                   [nend=>$end],
                                   [strand=>$strand],
                                   [srcfeature_id=>$sid],
                                   [group=>0],
                                   [rank=>0],
                                  ]
                     ]
                    );
    }
    my $feature_id = $self->get_temp_uid($sf);
    
    my $fnode =
      [feature=>[
                 [feature_id=>$feature_id],
                 [name=>$name],
                 [typename=>$type],
                 @locnodes,
                 (map {
                     my $k = $_;
                     map { [featureprop=>[[pkey=>$k],[pval=>$_]]] } @{$props{$k}}
                 } keys %props),
                ]];
    $w->ev(@$fnode);

    foreach my $ssf (@subsfs) {
        my $ssfid = $self->write_sf($ssf, $sid);
        $w->ev(feature_relationship=>[
                                      [subjfeature_id=>$ssfid],
                                      [objfeature_id=>$feature_id]
                                     ]
              );
    }
    return $feature_id;
}

# private;
# an ID for this session that should be
# unique... hmm
sub session_id {
    my $self = shift;
    $self->{_session_id} = shift if @_;
    if (!$self->{_session_id}) {
        $self->{_session_id} = $$.time;
    }
    return $self->{_session_id};
}


our $next_id = 1;
our %obj2id_hash = ();
sub get_temp_uid {
    my $self = shift;
    my $ob = shift;
    my $id = $obj2id_hash{$ob};
    if (!$id) {
        $id = $next_id++;
        $obj2id_hash{$ob} = $id;
    }
    return $self->session_id.'.'.$id;
}

# interbase and directional semantics
sub bp2ib {
    my $self = shift;
    my $loc = shift;
    my ($s, $e, $str) = 
      ref($loc) eq "ARRAY" ? (@$loc) : ($loc->start, $loc->end, $loc->strand);
    if ($str < 0) {
        ($s, $e) = ($e, $s);
    }
    $s--;
    return ($s, $e, $str);
}

sub subpartof {
    my $self = shift;
    my $type = 'partof_'.shift;
    $type =~ s/partof_CDS/CDS_exon/;
    $type =~ s/partof_\wRNA/exon/;
    return $type;
}

1;