view variant_effect_predictor/Bio/SeqIO/game.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

# $Id: game.pm,v 1.26.2.1 2003/06/28 22:23:15 jason Exp $
#
# BioPerl module for Bio::SeqIO::game
#
# Cared for by Brad Marshall <bradmars@yahoo.com>
#
# Copyright Ewan Birney & Lincoln Stein & Brad Marshall
#
# You may distribute this module under the same terms as perl itself
# _history
# June 25, 2000     written by Brad Marshall 
#
# POD documentation - main docs before the code

=head1 NAME

Bio::SeqIO::game - Parses GAME XML 0.1 and higher into and out of Bio::Seq objects.  

=head1 SYNOPSIS

To use this module you need XML::Parser, XML::Parser::PerlSAX
and XML::Writer.

Do not use this module directly.  Use it via the Bio::SeqIO class.

=head1 DESCRIPTION

This object can transform Bio::Seq objects to and from bioxml seq,
computation, feature and annotation dtds,versions 0.1 and higher.
These can be found at http://www.bioxml.org/dtds/current.  It does
this using the idHandler, seqHandler and featureHandler modules you
should have gotten with this one.

The idea is that any bioxml features can be turned into bioperl
annotations.  When Annotations and computations are parsed in, they
gain additional info in the bioperl SeqFeature tag attribute.  These
can be used to reconstitute a computation or annotation by the bioxml
with the bx-handler module when write_seq is called.

If you use this to write SeqFeatures that were not generated from
computations or annotations, it will output a list of bioxml features.
Some data may be lost in this step, since bioxml features just have a
span, type and description - nothing about the anlysis performed.

=head1 FEEDBACK

=head2 Mailing Lists

User feedback is an integral part of the evolution of this
and other Bioperl modules. Send your comments and suggestions preferably
 to one of the Bioperl mailing lists.
Your participation is much appreciated.

  bioperl-l@bioperl.org        - Technical bioperl discussion
  bioxml-dev@bioxml.org        - Technical discussion - Moderate volume
  bioxml-announce@bioxml.org   - General Announcements - Pretty dead
  http://www.bioxml.org/MailingLists/         - About the mailing lists

=head1 AUTHOR - Brad Marshall & Ewan Birney & Lincoln Stein

Email: bradmars@yahoo.com
       birney@sanger.ac.uk
       lstein@cshl.org


=head1 CONTRIBUTORS

Jason Stajich E<lt>jason@bioperl.orgE<gt>

=head1 APPENDIX

The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _

=cut

# Let the code begin...

package Bio::SeqIO::game;
use vars qw(@ISA);
use strict;
# Object preamble - inherits from Bio::Root::Object

use Bio::SeqIO;
use Bio::SeqIO::game::seqHandler;
use Bio::SeqIO::game::featureHandler;
use Bio::SeqIO::game::idHandler;
use XML::Parser::PerlSAX;
use Bio::SeqFeature::Generic;
use XML::Writer;

use Bio::Seq;

@ISA = qw(Bio::SeqIO);


sub _initialize {        
  my($self,@args) = @_;
  $self->SUPER::_initialize(@args);
  my $xmlfile           = "";
  $self->{'counter'}    = 0;
  $self->{'id_counter'} = 1;  
  $self->{'leftovers'}  = undef;
  $self->{'header'}     = undef;
  $self->{'chunkable'}  = undef;
  $self->{'xmldoc'}     = undef;

  $self->_export_subfeatures(1);
  $self->_group_subfeatures(1);
  $self->_subfeature_types('exons', 'promoters','poly_A_sites',
			   'utrs','introns','sub_SeqFeature');
  
  # filehandle is stored by superclass _initialize
}


=head2 _export_subfeatures

 Title   : _export_subfeatures
 Usage   : $obj->_export_subfeatures
 Function: export all subfeatures (also in the geneprediction structure)
 Returns : value of _export_subfeatures
 Args    : newvalue (optional)

=cut

sub _export_subfeatures{
   my $obj = shift;
   if( @_ ) {
      my $value = shift;
      $obj->{'_export_subfeatures'} = $value;
    }
    return $obj->{'_export_subfeatures'};

} 

=head2 _group_subfeatures

 Title   : _group_subfeatures
 Usage   : $obj->_group_subfeatures
 Function: Groups all subfeatures in separate feature_sets
 Returns : value of _group_subfeatures
 Args    : newvalue (optional)

=cut

sub _group_subfeatures{
   my $obj = shift;
   if( @_ ) {
      my $value = shift;
      $obj->{'_group_subfeatures'} = $value;
    }
    return $obj->{'_group_subfeatures'};
}

=head2 _subfeature_types

 Title   : _subfeature_types
 Usage   : $obj->_subfeature_types
 Function: array of all possible subfeatures, it should be a 
           name of a function which
         : returns an arrau of sub_seqfeatures when called: 
           @array = $feature->subfeaturetype()
 Returns : array of _subfeature_types
 Args    : array of subfeature types (optional)

=cut

sub _subfeature_types{
   my $obj = shift;
   if( @_ ) {
      my @values = @_;
      $obj->{'_subfeature_types'} = \@values;
    }
    return @{$obj->{'_subfeature_types'}};

} 

=head2 _add_subfeature_type
 
 Title   : _add_subfeature_type
 Usage   : $obj->_add_subfeature_type
 Function: add one possible subfeature, it should be a name of a function which
         : returns an arrau of sub_seqfeatures when called: @array = $feature->subfeaturetyp()
 Returns : 1
 Args    : one subfeature type (optional)

=cut

sub _add_subfeature_type{
   my $obj = shift;
   if( @_ ) {
      my @values = @_;
      push @{$obj->{'_subfeature_types'}}, @values;
    }
    return 1;

} 


=head2 next_seq

 Title   : next_seq
 Usage   : $seq = $stream->next_seq()
 Function: returns the next sequence in the stream
 Returns : Bio::Seq object
 Args    : NONE

=cut

sub next_seq {
  my $self = shift; 


  #  The header is the top level stuff in the XML file.
  #  IE  before the first <bx-seq:seq> tag.
  #  If you don't include this in each 'chunk', the
  #  parser will barf.
  my $header;
  unless ($self->{'header'}) {
    while (my $next_line = $self->_readline) {
      if($next_line=~/<bx-seq:seq?/) {
	$header .= $`;
	$self->{'header'}=$header;
	$self->{'leftovers'} .= "<bx-seq:seq".$';
	last;
      } else {
	$header .= $next_line;
      }      
    }
    if ($self->{'header'}=~m|<bx-game:flavor>.*chunkable.*</bx-game:flavor>|) {
      $self->{'chunkable'}=1;
    }
    
  }

  my $not_top_level;
  my $xmldoc;
  my $seq;
  #  If chunkable, we read in the document until the next 
  #  TOP LEVEL sequence.
  if ($self->{'chunkable'}) {
    $xmldoc = $self->{'header'}.$self->{'leftovers'};
    while (my $next_line = $self->_readline) {
      # Maintain depth of computations and annotations. 
      # We only want TOP LEVEL seqs if chunkable.
      while ($next_line=~ m|<bx-computation:computation|g) {
	$not_top_level++;
      }
      while ($next_line=~ m|<bx-annotation:annotation|g) {
	$not_top_level++;
      }
      while ($next_line=~ m|</bx-computation:computation|g) {
	$not_top_level--;
      }
      while ($next_line=~ m|</bx-annotation:annotation|g) {
	$not_top_level--;
      }
      if($next_line=~/<bx-seq:seq?/) {
	if (!$not_top_level) {
	  $xmldoc .= $`;
	  $self->{'leftovers'} .= "<bx-seq:seq".$';
	  last;
	}
      } else {
	$xmldoc .= $next_line;
      }  
    }
    #  Make sure the 'doc chunk' has a closing tag 
    #  to make the parser happy.
    if (!$xmldoc=~m|</bx-game:game>|){
      $xmldoc .= "</bx-game:game>";
    }
    # Grab the TOP LEVEL seq..
    if ($xmldoc =~ m|</bx-seq:seq|) {
      my $handler = Bio::SeqIO::game::idHandler->new();
      my $options = {Handler=>$handler};
      my $parser  = XML::Parser::PerlSAX->new($options);
      $self->{'seqs'} = $parser->parse(Source => { String => $xmldoc });
    } else { # No sequences.
      return 0;
    }
    # Get the seq out of the array.
    $seq = @{$self->{'seqs'}}[0];
  # If not chunkable,
  # only read document into memory once!
  } elsif (!$self->{'xmldoc'}) {
    $self->{'xmldoc'}=$self->{'header'}.$self->{'leftovers'};
    while (my $next_line = $self->_readline) {
      $self->{'xmldoc'} .= $next_line;
    }
    $xmldoc=$self->{'xmldoc'};
    # Get the seq id index.
    if ($xmldoc =~ m|</bx-seq:seq|) {
      my $handler = Bio::SeqIO::game::idHandler->new();
      my $options = {Handler=>$handler};
      my $parser  = XML::Parser::PerlSAX->new($options);
      $self->{'seqs'} = $parser->parse(Source => { String => $xmldoc });
      $seq = shift @{$self->{'seqs'}};
    } else { # No sequences.
      return 0;
    }
    my $seq = @{$self->{'seqs'}}[0];
  # if we already have the doc in memory, 
  # just get the doc.
  } elsif ($self->{'xmldoc'}) {
    $xmldoc=$self->{'xmldoc'};
    $seq = shift @{$self->{'seqs'}};
  }
  #  If there's more sequences:
  if ($seq) {
    # Get the next seq.
    my $handler = Bio::SeqIO::game::seqHandler->new(-seq => $seq);
    my $options = {Handler=>$handler};
    my $parser  = XML::Parser::PerlSAX->new($options);
    my $pseq = $parser->parse(Source => { String => $xmldoc });
    # get the features.
    my $fhandler = Bio::SeqIO::game::featureHandler->new($pseq->id(),
							 $pseq->length(), 
							 $pseq->alphabet());
    $options = {Handler=>$fhandler};
    
    $parser = XML::Parser::PerlSAX->new($options);
    my $features = $parser->parse(Source => { String => $xmldoc });
    my $seq = Bio::Seq->new();
    # Build the Bioperl Seq and return it.
    foreach my $feature (@{$features}) {
      $seq->add_SeqFeature($feature);
    }
    $seq->primary_seq($pseq);
    return $seq;
  } else {
    return 0;
  }
}

=head2 next_primary_seq

 Title   : next_primary_seq
 Usage   : $seq = $stream->next_primary_seq()
 Function: returns the next primary sequence (ie no seq_features) in the stream
 Returns : Bio::PrimarySeq object
 Args    : NONE

=cut

sub next_primary_seq {
  my $self=shift;

  #  The header is the top level stuff in the XML file.
  #  IE  before the first <bx-seq:seq> tag.
  #  If you don't include this in each 'chunk', the
  #  parser will barf.
  my $header;
  unless ($self->{'header'}) {
    while (my $next_line = $self->_readline) {
      if($next_line=~/<bx-seq:seq?/) {
	$header .= $`;
	$self->{'header'}=$header;
	$self->{'leftovers'} .= "<bx-seq:seq".$';
	last;
      } else {
	$header .= $next_line;
      }      
    }
    if ($self->{'header'}=~m|<bx-game:flavor>.*chunkable.*</bx-game:flavor>|) {
      $self->{'chunkable'}=1;
    }
      
  }

  my $not_top_level = 0;
  my $xmldoc;
  my $seq;
  #  If chunkable, we read in the document until the next 
  #  TOP LEVEL sequence.
  if ($self->{'chunkable'}) {
    $xmldoc = $self->{'header'}.$self->{'leftovers'};
    while (my $next_line = $self->_readline) {
      # Maintain depth of computations and annotations. 
      # We only want TOP LEVEL seqs if chunkable.
      while ($next_line=~ m|<bx-computation:computation|g) {
	$not_top_level++;
      }
      while ($next_line=~ m|<bx-annotation:annotationn|g) {
	$not_top_level++;
      }
      while ($next_line=~ m|</bx-computation:computation|g) {
	$not_top_level--;
      }
      while ($next_line=~ m|</bx-annotation:annotationn|g) {
	$not_top_level--;
      }
      if($next_line=~/<bx-seq:seq?/) {
	if (!$not_top_level) {
	  $xmldoc .= $`;
	  $self->{'leftovers'} .= "<bx-seq:seq".$';
	  last;
	}
      } else {
	$xmldoc .= $next_line;
      }  
    }
    #  Make sure the 'doc chunk' has a closing tag 
    #  to make the parser happy.
    if (!$xmldoc=~m|</bx-game:game>|){
      $xmldoc .= "</bx-game:game>";
    }
    # Grab the TOP LEVEL seq..
    if ($xmldoc =~ m|</bx-seq:seq|) {
      my $handler = Bio::SeqIO::game::idHandler->new();
      my $options = {Handler=>$handler};
      my $parser  = XML::Parser::PerlSAX->new($options);
      $self->{'seqs'} = $parser->parse(Source => { String => $xmldoc });
    } else { # No sequences.
      return 0;
    }
    $seq = @{$self->{'seqs'}}[0];
  # If not chunkable,
  # only read document into memory once!
  } elsif (!$self->{'xmldoc'}) {
    $self->{'xmldoc'}=$self->{'header'}.$self->{'leftovers'};
    while (my $next_line = $self->_readline) {
      $self->{'xmldoc'} .= $next_line;
    }
    $xmldoc=$self->{'xmldoc'};
    # Get the seq id index.
    if ($xmldoc =~ m|</bx-seq:seq|) {
      my $handler = Bio::SeqIO::game::idHandler->new();
      my $options = {Handler=>$handler};
      my $parser  = XML::Parser::PerlSAX->new($options);
      $self->{'seqs'} = $parser->parse(Source => { String => $xmldoc });
      $seq = shift @{$self->{'seqs'}};
    } else { # No sequences.
      return 0;
    }
    my $seq = @{$self->{'seqs'}}[0];
  # if we already have the doc in memory, 
  # just get the doc.
  } elsif ($self->{'xmldoc'}) {
    $xmldoc=$self->{'xmldoc'};
    $seq = shift @{$self->{'seqs'}};
  }

  #print $xmldoc;
    
  if ($seq) {
    # Get the next seq.
    my $handler = Bio::SeqIO::game::seqHandler->new(-seq => $seq);
    my $options = {Handler=>$handler};
    my $parser  = XML::Parser::PerlSAX->new($options);
    my $pseq = $parser->parse(Source => { String => $xmldoc });
    return $pseq;
  } else {
    return 0;
  }
}


=head2 write_seq

 Title   : write_seq
 Usage   : Not Yet Implemented! $stream->write_seq(@seq)
 Function: writes the $seq object into the stream
 Returns : 1 for success and 0 for error
 Args    : Bio::Seq object


=cut

sub write_seq {
    my ($self,@seqs) = @_;

    my $bxfeat  = "http://www.bioxml.org/dtds/current/feature.dtd";
    my $bxann   = "http://www.bioxml.org/dtds/current/annotation.dtd";
    my $bxcomp  = "http://www.bioxml.org/dtds/current/computation.dtd";
    my $bxgame  = "http://www.bioxml.org/dtds/current/game.dtd";
    my $bxlink  = "http://www.bioxml.org/dtds/current/link.dtd";
    my $bxseq   = "http://www.bioxml.org/dtds/current/seq.dtd";

    my $writer = new XML::Writer(OUTPUT      => $self->_fh || \*STDOUT,
				 NAMESPACES  => 1,
				 DATA_MODE   => 1,
				 DATA_INDENT => 4,
				 PREFIX_MAP  => {
				     ''     => '', # to keep undef warnings away in XML::Writer, fill in with something as a default prefix later?
				     $bxfeat => 'bx-feature',
				     $bxann =>  'bx-annotation',
				     $bxcomp => 'bx-computation',
				     $bxgame => 'bx-game',
				     $bxlink => 'bx-link',
				     $bxseq =>  'bx-seq'
				     });
    $writer->xmlDecl("UTF-8");
    $writer->doctype("bx-game:game", 'game', $bxgame);
    $writer ->startTag ([$bxgame, 'game']);
    $writer->startTag  ([$bxgame, 'flavor']);
    $writer->characters('chunkable');
    $writer->endTag    ([$bxgame, 'flavor']);

    foreach my $seq (@seqs) {
	$writer->startTag([$bxseq, 'seq'], 
			  [$bxseq, 'id'] => $seq->display_id,
			  [$bxseq, 'length'] => $seq->length,
			  [$bxseq, 'type'] => $seq->alphabet);
	if ($seq->length > 0) {
	    $writer->startTag([$bxseq, 'residues']);
	    $writer->characters($seq->seq);
	    $writer->endTag([$bxseq, 'residues']);
	}
	$writer->endTag([$bxseq, 'seq']);

	my @feats = $seq->all_SeqFeatures;

	my $features;
	foreach my $feature (@feats) {
	    if ($feature->has_tag('annotation_id')) {
		my @ann_id = $feature->each_tag_value('annotation_id');
		push (@{$features->{'annotations'}->{$ann_id[0]}}, $feature);
	    } elsif ($feature->has_tag('computation_id')) {
		my @comp_id = $feature->each_tag_value('computation_id');
		push (@{$features->{'computations'}->{$comp_id[0]}}, $feature);
	    } else {
		push (@{$features->{'everybody_else'}}, $feature);
	    }
	}
	foreach my $key (keys %{$features->{'annotations'}}) {
	    $writer->startTag([$bxann, 'annotation'],
			      [$bxann, 'id']=>$key
			      );
	    $writer->startTag([$bxann, 'seq_link']);
	    $writer->startTag([$bxlink, 'link']);
	    $writer->emptyTag([$bxlink, 'ref_link'],
			      [$bxlink, 'ref'] => $seq->display_id());
	    $writer->endTag([$bxlink, 'link']);
	    $writer->endTag([$bxann, 'seq_link']);					   
	    $self->__draw_feature_set($writer, $seq, $bxann, "", @{$features->{'annotations'}->{$key}});
	    $writer->endTag([$bxann, 'annotation']);
	}

	foreach my $key (keys %{$features->{'computations'}}) {
	    $writer->startTag([$bxcomp, 'computation'],
			      [$bxcomp, 'id']=>$key
			      );
	    $writer->startTag([$bxcomp, 'seq_link']);
	    $writer->startTag([$bxlink, 'link']);
	    $writer->emptyTag([$bxlink, 'ref_link'],
			      [$bxlink, 'ref'] => $seq->display_id());
	    $writer->endTag([$bxlink, 'link']);
	    $writer->endTag([$bxcomp, 'seq_link']);
	    $self->__draw_feature_set($writer, $seq, $bxcomp, "", @{$features->{'computations'}->{$key}});   
	    $writer->endTag([$bxcomp, 'computation']);
	}
	foreach my $feature (@{$features->{'everybody_else'}}) {
	    $self->__draw_feature($writer, $feature, $seq, "", 
				  $self->_export_subfeatures());
	}
    }
    $writer->endTag([$bxgame, 'game']);

    $self->flush if $self->_flush_on_write && defined $self->_fh;
    return 1;
}


#these two subroutines are very specific!

sub __draw_feature_set {
    my ($self, $writer, $seq, $namespace, $parent, @features) = @_;
    my ($feature_set_id);

    my $bxfeat = "http://www.bioxml.org/dtds/current/feature.dtd";

    if ($self->_export_subfeatures() && $self->_group_subfeatures()) {
	$feature_set_id = $self->{'id_counter'}; $self->{'id_counter'}++;
	$writer->startTag([$namespace, 'feature_set'],
			  [$namespace, 'id'] => $feature_set_id);
	foreach my $feature (@features) {
	    $self->__draw_feature($writer, $feature, $seq, $parent , 0);  
	}
	$writer->endTag([$namespace, 'feature_set']);
	foreach my $feature (@features) {
	    foreach my $subset ($self->_subfeature_types()) {
		if (my @subfeatures = eval ( '$feature->' . $subset . '()' )) {
		    my @id = $feature->each_tag_value('id');
		    $self->__draw_feature_set($writer, $seq, $namespace, $id[0], @subfeatures);     
		}
	    }	        
	}

    } else {
	$feature_set_id = $self->{'id_counter'}; $self->{'id_counter'}++;
	$writer->startTag([$namespace, 'feature_set'],
			  [$namespace, 'id'] => $feature_set_id);
	foreach my $feature (@features) {
	    $self->__draw_feature($writer, $feature, $seq, "" , $self->_export_subfeatures());
	}
	$writer->endTag([$namespace, 'feature_set']);
    }
}


sub __draw_feature {
    my ($self, $writer, $feature, $seq, $parent, $recursive) = @_;
    my ($subfeature, $subset, @subfeatures, $score, $score_val, $score_no);
    my $bxfeat = "http://www.bioxml.org/dtds/current/feature.dtd";
    
    if (!$feature->has_tag('id')) {
	$feature->add_tag_value('id', $self->{'id_counter'});
	$self->{'id_counter'}++;
    }
    
    my @id = $feature->each_tag_value('id');
    if ($parent) {
	$writer->startTag([$bxfeat, 'feature'],
			  [$bxfeat, 'id'] => $id[0]
			  );
    } else {
	$writer->startTag([$bxfeat, 'feature'],
			  [$bxfeat, 'id'] => $id[0],
			  [$bxfeat, 'parent'] => $parent
			  );
    }    
    $writer->startTag([$bxfeat, 'type']);
    $writer->characters($feature->primary_tag());
    $writer->endTag([$bxfeat, 'type']);
    foreach $score ($feature->all_tags()) {
	next if ($score eq 'id');
	$writer->startTag([$bxfeat, 'score'],
			  [$bxfeat, 'type'] => $score 
			  );
	$score_no = 0;
	foreach $score_val ($feature->each_tag_value($score)) {
	    next unless defined $score_val;
	    $writer->characters(' ') if ($score_no > 0);
	    $writer->characters($score_val);
	    $score_no++;
	}
	$writer->endTag([$bxfeat, 'score']);
    }

    $writer->startTag([$bxfeat, 'seq_relationship'],
		      [$bxfeat, 'seq'] => $seq->display_id,
		      [$bxfeat, 'type'] => 'query'
		      );

    $writer->startTag([$bxfeat, 'span']);
    $writer->startTag([$bxfeat, 'start']);
    $writer->characters($feature->start());
    $writer->endTag([$bxfeat, 'start']);
    $writer->startTag([$bxfeat, 'end']);
    $writer->characters($feature->end());
    $writer->endTag([$bxfeat, 'end']);
    $writer->endTag([$bxfeat, 'span']);
    $writer->endTag([$bxfeat, 'seq_relationship']);
    $writer->endTag([$bxfeat, 'feature']);

    #proces subseqfeature's, exons, introns, promotors, whatever...
    if ($recursive) {
	foreach $subset ($self->_subfeature_types()) {
	    #determine if it exists
	    if (@subfeatures = eval ( '$feature->' . $subset . '()' )) {
		foreach $subfeature (@subfeatures) {
		    $self->__draw_feature ($writer, $subfeature, $seq, $id[0], 1);
		}        
	    }
	}
    }
}

1;