diff variant_effect_predictor/Bio/SeqIO/ace.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/SeqIO/ace.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,196 @@
+# $Id: ace.pm,v 1.15 2002/10/25 16:23:16 jason Exp $
+#
+# BioPerl module for Bio::SeqIO::ace
+#
+# Cared for by James Gilbert <jgrg@sanger.ac.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::ace - ace sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and
+from ace file format.  It only parses a DNA or
+Peptide objects contained in the ace file,
+producing PrimarySeq objects from them.  All
+other objects in the files will be ignored.  It
+doesn't attempt to parse any annotation attatched
+to the containing Sequence or Protein objects,
+which would probably be impossible, since
+everyone's ACeDB schema can be different.
+
+It won't parse ace files containing Timestamps
+correctly either.  This can easily be added if
+considered necessary.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l@bioperl.org                  - General discussion
+  http://www.bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution.
+ Bug reports can be submitted via email or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHORS - James Gilbert
+
+Email: jgrg@sanger.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+package Bio::SeqIO::ace;
+use strict;
+use vars qw(@ISA);
+
+use Bio::SeqIO;
+use Bio::Seq;
+use Bio::Seq::SeqFactory;
+
+@ISA = qw(Bio::SeqIO);
+
+sub _initialize {
+  my($self,@args) = @_;
+  $self->SUPER::_initialize(@args);   
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::PrimarySeq'));      
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    : NONE
+
+=cut
+
+{
+    my %bio_mol_type = (
+        'dna'       => 'dna',
+        'peptide'   => 'protein',
+    );
+    
+    sub next_seq {
+        my( $self ) = @_;
+        local $/ = "";  # Split input on blank lines
+
+        my $fh = $self->_filehandle;
+        my( $type, $id );
+        while (<$fh>) {
+            if (($type, $id) = /^(DNA|Peptide)[\s:]+(.+?)\s*\n/si) {
+                s/^.+$//m;  # Remove first line
+                s/\s+//g;   # Remove whitespace
+                last;
+            }
+        }
+        # Return if there weren't any DNA or peptide objects
+        return unless $type;
+        
+        # Choose the molecule type
+        my $mol_type = $bio_mol_type{lc $type}
+            or $self->throw("Can't get Bio::Seq molecule type for '$type'");
+
+        # Remove quotes from $id
+        $id =~ s/^"|"$//g;
+        
+        # Un-escape forward slashes, double quotes, percent signs,
+        # semi-colons, tabs, and backslashes (if you're mad enough
+        # to have any of these as part of object names in your acedb
+        # database).
+	$id =~ s/\\([\/"%;\t\\])/$1/g;
+#"
+	# Called as next_seq(), so give back a Bio::Seq
+	return $self->sequence_factory->create(
+					       -seq        => $_,
+					       -primary_id => $id,
+					       -display_id => $id,
+					       -alphabet    => $mol_type,
+					       );        
+    }
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object(s)
+
+
+=cut
+
+sub write_seq {
+    my ($self, @seq) = @_;
+    
+    foreach my $seq (@seq) {
+	$self->throw("Did not provide a valid Bio::PrimarySeqI object") 
+	    unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
+        my $mol_type = $seq->alphabet;
+        my $id = $seq->display_id;
+        
+        # Escape special charachers in id
+        $id =~ s/([\/"%;\t\\])/\\$1/g;
+#"        
+        # Print header for DNA or Protein object
+        if ($mol_type eq 'dna') {
+            $self->_print( 
+                qq{\nSequence : "$id"\nDNA "$id"\n},
+                qq{\nDNA : "$id"\n},
+            );
+        }
+        elsif ($mol_type eq 'protein') {
+            $self->_print(
+                qq{\nProtein : "$id"\nPeptide "$id"\n},
+                qq{\nPeptide : "$id"\n},
+            );
+        }
+        else {
+            $self->throw("Don't know how to produce ACeDB output for '$mol_type'");
+        }
+
+        # Print the sequence
+        my $str = $seq->seq;
+        my( $formatted_seq );
+        while ($str =~ /(.{1,60})/g) {
+            $formatted_seq .= "$1\n";
+        }
+        $self->_print($formatted_seq, "\n");
+    }
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;