Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Variation/IO.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Variation/IO.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,349 @@ +# $Id: IO.pm,v 1.14 2002/11/04 09:07:45 heikki Exp $ +# +# BioPerl module for Bio::Variation::IO +# +# Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> +# +# Copyright Heikki Lehvaslaiho +# +# You may distribute this module under the same terms as perl itself +# +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Variation::IO - Handler for sequence variation IO Formats + +=head1 SYNOPSIS + + use Bio::Variation::IO; + + $in = Bio::Variation::IO->new(-file => "inputfilename" , '-format' => 'flat'); + $out = Bio::Variation::IO->new(-file => ">outputfilename" , '-format' => 'xml'); + # note: we quote -format to keep older perl's from complaining. + + while ( my $seq = $in->next() ) { + $out->write($seq); + } + +or + + use Bio::Variation::IO; + + #input file format can be read from the file extension (dat|xml) + $in = Bio::Variation::IO->newFh(-file => "inputfilename"); + $out = Bio::Variation::IO->newFh('-format' => 'xml'); + + # World's shortest flat<->xml format converter: + print $out $_ while <$in>; + +=head1 DESCRIPTION + +Bio::Variation::IO is a handler module for the formats in the Variation IO set (eg, +Bio::Variation::IO::flat). It is the officially sanctioned way of getting at +the format objects, which most people should use. + +The structure, conventions and most of the code is inherited from +L<Bio::SeqIO> module. The main difference is that instead of using +methods next_seq and write_seq, you drop '_seq' from the method names. + +The idea is that you request a stream object for a particular format. +All the stream objects have a notion of an internal file that is read +from or written to. A particular SeqIO object instance is configured +for either input or output. A specific example of a stream object is +the Bio::Variation::IO::flat object. + +Each stream object has functions + + $stream->next(); + +and + + $stream->write($seqDiff); + +also + + $stream->type() # returns 'INPUT' or 'OUTPUT' + +As an added bonus, you can recover a filehandle that is tied to the +SeqIO object, allowing you to use the standard E<lt>E<gt> and print operations +to read and write sequence objects: + + use Bio::Variation::IO; + + $stream = Bio::Variation::IO->newFh(-format => 'flat'); # read from standard input + + while ( $seq = <$stream> ) { + # do something with $seq + } + +and + + print $stream $seq; # when stream is in output mode + +This makes the simplest ever reformatter + + #!/usr/local/bin/perl + + $format1 = shift; + $format2 = shift || die "Usage: reformat format1 format2 < input > output"; + + use Bio::Variation::IO; + + $in = Bio::Variation::IO->newFh(-format => $format1 ); + $out = Bio::Variation::IO->newFh(-format => $format2 ); + #note: you might want to quote -format to keep older perl's from complaining. + + print $out $_ while <$in>; + + +=head1 CONSTRUCTORS + +=head2 Bio::Variation::IO-E<gt>new() + + $seqIO = Bio::Variation::IO->new(-file => 'filename', -format=>$format); + $seqIO = Bio::Variation::IO->new(-fh => \*FILEHANDLE, -format=>$format); + $seqIO = Bio::Variation::IO->new(-format => $format); + +The new() class method constructs a new Bio::Variation::IO object. The +returned object can be used to retrieve or print BioSeq objects. new() +accepts the following parameters: + +=over 4 + +=item -file + +A file path to be opened for reading or writing. The usual Perl +conventions apply: + + 'file' # open file for reading + '>file' # open file for writing + '>>file' # open file for appending + '+<file' # open file read/write + 'command |' # open a pipe from the command + '| command' # open a pipe to the command + +=item -fh + +You may provide new() with a previously-opened filehandle. For +example, to read from STDIN: + + $seqIO = Bio::Variation::IO->new(-fh => \*STDIN); + +Note that you must pass filehandles as references to globs. + +If neither a filehandle nor a filename is specified, then the module +will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt> +semantics. + +=item -format + +Specify the format of the file. Supported formats include: + + flat pseudo EMBL format + xml seqvar xml format + +If no format is specified and a filename is given, then the module +will attempt to deduce it from the filename. If this is unsuccessful, +Fasta format is assumed. + +The format name is case insensitive. 'FLAT', 'Flat' and 'flat' are +all supported. + +=back + +=head2 Bio::Variation::IO-E<gt>newFh() + + $fh = Bio::Variation::IO->newFh(-fh => \*FILEHANDLE, -format=>$format); + $fh = Bio::Variation::IO->newFh(-format => $format); + # etc. + + #e.g. + $out = Bio::Variation::IO->newFh( '-FORMAT' => 'flat'); + print $out $seqDiff; + +This constructor behaves like new(), but returns a tied filehandle +rather than a Bio::Variation::IO object. You can read sequences from this +object using the familiar E<lt>E<gt> operator, and write to it using print(). +The usual array and $_ semantics work. For example, you can read all +sequence objects into an array like this: + + @mutations = <$fh>; + +Other operations, such as read(), sysread(), write(), close(), and printf() +are not supported. + +=head1 OBJECT METHODS + +See below for more detailed summaries. The main methods are: + +=head2 $sequence = $seqIO-E<gt>next() + +Fetch the next sequence from the stream. + +=head2 $seqIO-E<gt>write($sequence [,$another_sequence,...]) + +Write the specified sequence(s) to the stream. + +=head2 TIEHANDLE(), READLINE(), PRINT() + +These provide the tie interface. See L<perltie> for more details. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to the +Bioperl mailing lists Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +report bugs to the Bioperl bug tracking system to help us keep track + the bugs and their resolution. Bug reports can be submitted via + email or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Heikki Lehvaslaiho + +Email: heikki@ebi.ac.uk +Address: + + EMBL Outstation, European Bioinformatics Institute + Wellcome Trust Genome Campus, Hinxton + Cambs. CB10 1SD, United Kingdom + + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a _ + +=cut + +# Let the code begin... + +package Bio::Variation::IO; +my $VERSION=1.0; + +use strict; +use vars '@ISA'; + +use Bio::SeqIO; + +@ISA = 'Bio::SeqIO'; + +=head2 new + + Title : new + Usage : $stream = Bio::Variation::IO->new(-file => $filename, -format => 'Format') + Function: Returns a new seqstream + Returns : A Bio::Variation::IO::Handler initialised with the appropriate format + Args : -file => $filename + -format => format + -fh => filehandle to attach to + +=cut + + +sub new { + my ($class, %param) = @_; + my ($format); + + @param{ map { lc $_ } keys %param } = values %param; # lowercase keys + $format = $param{'-format'} + || $class->_guess_format( $param{-file} || $ARGV[0] ) + || 'flat'; + $format = "\L$format"; # normalize capitalization to lower case + + return undef unless $class->_load_format_module($format); + return "Bio::Variation::IO::$format"->new(%param); +} + + +sub _load_format_module { + my ($class, $format) = @_; + my $module = "Bio::Variation::IO::" . $format; + my $ok; + eval { + $ok = $class->_load_module($module); + }; + if ( $@ ) { + print STDERR <<END; +$class: $format cannot be found +Exception $@ +For more information about the IO system please see the IO docs. +This includes ways of checking for formats at compile time, not run time +END + ; + } + return $ok; +} + +=head2 next + + Title : next + Usage : $seqDiff = $stream->next + Function: reads the next $seqDiff object from the stream + Returns : a Bio::Variation::SeqDiff object + Args : + +=cut + +sub next { + my ($self, $seq) = @_; + $self->throw("Sorry, you cannot read from a generic Bio::Variation::IO object."); +} + +sub next_seq { + my ($self, $seq) = @_; + $self->throw("These are not sequence objects. Use method 'next' instead of 'next_seq'."); + $self->next($seq); +} + +=head2 write + + Title : write + Usage : $stream->write($seq) + Function: writes the $seq object into the stream + Returns : 1 for success and 0 for error + Args : Bio::Variation::SeqDiff object + +=cut + +sub write { + my ($self, $seq) = @_; + $self->throw("Sorry, you cannot write to a generic Bio::Variation::IO object."); +} + +sub write_seq { + my ($self, $seq) = @_; + $self->warn("These are not sequence objects. Use method 'write' instead of 'write_seq'."); + $self->write($seq); +} + +=head2 _guess_format + + Title : _guess_format + Usage : $obj->_guess_format($filename) + Function: + Example : + Returns : guessed format of filename (lower case) + Args : + +=cut + +sub _guess_format { + my $class = shift; + return unless $_ = shift; + return 'flat' if /\.dat$/i; + return 'xml' if /\.xml$/i; +} + + +1;