Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Variation/IO.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: IO.pm,v 1.14 2002/11/04 09:07:45 heikki Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::Variation::IO | |
| 4 # | |
| 5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
| 6 # | |
| 7 # Copyright Heikki Lehvaslaiho | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 # | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::Variation::IO - Handler for sequence variation IO Formats | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 use Bio::Variation::IO; | |
| 20 | |
| 21 $in = Bio::Variation::IO->new(-file => "inputfilename" , '-format' => 'flat'); | |
| 22 $out = Bio::Variation::IO->new(-file => ">outputfilename" , '-format' => 'xml'); | |
| 23 # note: we quote -format to keep older perl's from complaining. | |
| 24 | |
| 25 while ( my $seq = $in->next() ) { | |
| 26 $out->write($seq); | |
| 27 } | |
| 28 | |
| 29 or | |
| 30 | |
| 31 use Bio::Variation::IO; | |
| 32 | |
| 33 #input file format can be read from the file extension (dat|xml) | |
| 34 $in = Bio::Variation::IO->newFh(-file => "inputfilename"); | |
| 35 $out = Bio::Variation::IO->newFh('-format' => 'xml'); | |
| 36 | |
| 37 # World's shortest flat<->xml format converter: | |
| 38 print $out $_ while <$in>; | |
| 39 | |
| 40 =head1 DESCRIPTION | |
| 41 | |
| 42 Bio::Variation::IO is a handler module for the formats in the Variation IO set (eg, | |
| 43 Bio::Variation::IO::flat). It is the officially sanctioned way of getting at | |
| 44 the format objects, which most people should use. | |
| 45 | |
| 46 The structure, conventions and most of the code is inherited from | |
| 47 L<Bio::SeqIO> module. The main difference is that instead of using | |
| 48 methods next_seq and write_seq, you drop '_seq' from the method names. | |
| 49 | |
| 50 The idea is that you request a stream object for a particular format. | |
| 51 All the stream objects have a notion of an internal file that is read | |
| 52 from or written to. A particular SeqIO object instance is configured | |
| 53 for either input or output. A specific example of a stream object is | |
| 54 the Bio::Variation::IO::flat object. | |
| 55 | |
| 56 Each stream object has functions | |
| 57 | |
| 58 $stream->next(); | |
| 59 | |
| 60 and | |
| 61 | |
| 62 $stream->write($seqDiff); | |
| 63 | |
| 64 also | |
| 65 | |
| 66 $stream->type() # returns 'INPUT' or 'OUTPUT' | |
| 67 | |
| 68 As an added bonus, you can recover a filehandle that is tied to the | |
| 69 SeqIO object, allowing you to use the standard E<lt>E<gt> and print operations | |
| 70 to read and write sequence objects: | |
| 71 | |
| 72 use Bio::Variation::IO; | |
| 73 | |
| 74 $stream = Bio::Variation::IO->newFh(-format => 'flat'); # read from standard input | |
| 75 | |
| 76 while ( $seq = <$stream> ) { | |
| 77 # do something with $seq | |
| 78 } | |
| 79 | |
| 80 and | |
| 81 | |
| 82 print $stream $seq; # when stream is in output mode | |
| 83 | |
| 84 This makes the simplest ever reformatter | |
| 85 | |
| 86 #!/usr/local/bin/perl | |
| 87 | |
| 88 $format1 = shift; | |
| 89 $format2 = shift || die "Usage: reformat format1 format2 < input > output"; | |
| 90 | |
| 91 use Bio::Variation::IO; | |
| 92 | |
| 93 $in = Bio::Variation::IO->newFh(-format => $format1 ); | |
| 94 $out = Bio::Variation::IO->newFh(-format => $format2 ); | |
| 95 #note: you might want to quote -format to keep older perl's from complaining. | |
| 96 | |
| 97 print $out $_ while <$in>; | |
| 98 | |
| 99 | |
| 100 =head1 CONSTRUCTORS | |
| 101 | |
| 102 =head2 Bio::Variation::IO-E<gt>new() | |
| 103 | |
| 104 $seqIO = Bio::Variation::IO->new(-file => 'filename', -format=>$format); | |
| 105 $seqIO = Bio::Variation::IO->new(-fh => \*FILEHANDLE, -format=>$format); | |
| 106 $seqIO = Bio::Variation::IO->new(-format => $format); | |
| 107 | |
| 108 The new() class method constructs a new Bio::Variation::IO object. The | |
| 109 returned object can be used to retrieve or print BioSeq objects. new() | |
| 110 accepts the following parameters: | |
| 111 | |
| 112 =over 4 | |
| 113 | |
| 114 =item -file | |
| 115 | |
| 116 A file path to be opened for reading or writing. The usual Perl | |
| 117 conventions apply: | |
| 118 | |
| 119 'file' # open file for reading | |
| 120 '>file' # open file for writing | |
| 121 '>>file' # open file for appending | |
| 122 '+<file' # open file read/write | |
| 123 'command |' # open a pipe from the command | |
| 124 '| command' # open a pipe to the command | |
| 125 | |
| 126 =item -fh | |
| 127 | |
| 128 You may provide new() with a previously-opened filehandle. For | |
| 129 example, to read from STDIN: | |
| 130 | |
| 131 $seqIO = Bio::Variation::IO->new(-fh => \*STDIN); | |
| 132 | |
| 133 Note that you must pass filehandles as references to globs. | |
| 134 | |
| 135 If neither a filehandle nor a filename is specified, then the module | |
| 136 will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt> | |
| 137 semantics. | |
| 138 | |
| 139 =item -format | |
| 140 | |
| 141 Specify the format of the file. Supported formats include: | |
| 142 | |
| 143 flat pseudo EMBL format | |
| 144 xml seqvar xml format | |
| 145 | |
| 146 If no format is specified and a filename is given, then the module | |
| 147 will attempt to deduce it from the filename. If this is unsuccessful, | |
| 148 Fasta format is assumed. | |
| 149 | |
| 150 The format name is case insensitive. 'FLAT', 'Flat' and 'flat' are | |
| 151 all supported. | |
| 152 | |
| 153 =back | |
| 154 | |
| 155 =head2 Bio::Variation::IO-E<gt>newFh() | |
| 156 | |
| 157 $fh = Bio::Variation::IO->newFh(-fh => \*FILEHANDLE, -format=>$format); | |
| 158 $fh = Bio::Variation::IO->newFh(-format => $format); | |
| 159 # etc. | |
| 160 | |
| 161 #e.g. | |
| 162 $out = Bio::Variation::IO->newFh( '-FORMAT' => 'flat'); | |
| 163 print $out $seqDiff; | |
| 164 | |
| 165 This constructor behaves like new(), but returns a tied filehandle | |
| 166 rather than a Bio::Variation::IO object. You can read sequences from this | |
| 167 object using the familiar E<lt>E<gt> operator, and write to it using print(). | |
| 168 The usual array and $_ semantics work. For example, you can read all | |
| 169 sequence objects into an array like this: | |
| 170 | |
| 171 @mutations = <$fh>; | |
| 172 | |
| 173 Other operations, such as read(), sysread(), write(), close(), and printf() | |
| 174 are not supported. | |
| 175 | |
| 176 =head1 OBJECT METHODS | |
| 177 | |
| 178 See below for more detailed summaries. The main methods are: | |
| 179 | |
| 180 =head2 $sequence = $seqIO-E<gt>next() | |
| 181 | |
| 182 Fetch the next sequence from the stream. | |
| 183 | |
| 184 =head2 $seqIO-E<gt>write($sequence [,$another_sequence,...]) | |
| 185 | |
| 186 Write the specified sequence(s) to the stream. | |
| 187 | |
| 188 =head2 TIEHANDLE(), READLINE(), PRINT() | |
| 189 | |
| 190 These provide the tie interface. See L<perltie> for more details. | |
| 191 | |
| 192 =head1 FEEDBACK | |
| 193 | |
| 194 =head2 Mailing Lists | |
| 195 | |
| 196 User feedback is an integral part of the evolution of this and other | |
| 197 Bioperl modules. Send your comments and suggestions preferably to the | |
| 198 Bioperl mailing lists Your participation is much appreciated. | |
| 199 | |
| 200 bioperl-l@bioperl.org - General discussion | |
| 201 http://bio.perl.org/MailList.html - About the mailing lists | |
| 202 | |
| 203 =head2 Reporting Bugs | |
| 204 | |
| 205 report bugs to the Bioperl bug tracking system to help us keep track | |
| 206 the bugs and their resolution. Bug reports can be submitted via | |
| 207 email or the web: | |
| 208 | |
| 209 bioperl-bugs@bio.perl.org | |
| 210 http://bugzilla.bioperl.org/ | |
| 211 | |
| 212 =head1 AUTHOR - Heikki Lehvaslaiho | |
| 213 | |
| 214 Email: heikki@ebi.ac.uk | |
| 215 Address: | |
| 216 | |
| 217 EMBL Outstation, European Bioinformatics Institute | |
| 218 Wellcome Trust Genome Campus, Hinxton | |
| 219 Cambs. CB10 1SD, United Kingdom | |
| 220 | |
| 221 | |
| 222 =head1 APPENDIX | |
| 223 | |
| 224 The rest of the documentation details each of the object | |
| 225 methods. Internal methods are usually preceded with a _ | |
| 226 | |
| 227 =cut | |
| 228 | |
| 229 # Let the code begin... | |
| 230 | |
| 231 package Bio::Variation::IO; | |
| 232 my $VERSION=1.0; | |
| 233 | |
| 234 use strict; | |
| 235 use vars '@ISA'; | |
| 236 | |
| 237 use Bio::SeqIO; | |
| 238 | |
| 239 @ISA = 'Bio::SeqIO'; | |
| 240 | |
| 241 =head2 new | |
| 242 | |
| 243 Title : new | |
| 244 Usage : $stream = Bio::Variation::IO->new(-file => $filename, -format => 'Format') | |
| 245 Function: Returns a new seqstream | |
| 246 Returns : A Bio::Variation::IO::Handler initialised with the appropriate format | |
| 247 Args : -file => $filename | |
| 248 -format => format | |
| 249 -fh => filehandle to attach to | |
| 250 | |
| 251 =cut | |
| 252 | |
| 253 | |
| 254 sub new { | |
| 255 my ($class, %param) = @_; | |
| 256 my ($format); | |
| 257 | |
| 258 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys | |
| 259 $format = $param{'-format'} | |
| 260 || $class->_guess_format( $param{-file} || $ARGV[0] ) | |
| 261 || 'flat'; | |
| 262 $format = "\L$format"; # normalize capitalization to lower case | |
| 263 | |
| 264 return undef unless $class->_load_format_module($format); | |
| 265 return "Bio::Variation::IO::$format"->new(%param); | |
| 266 } | |
| 267 | |
| 268 | |
| 269 sub _load_format_module { | |
| 270 my ($class, $format) = @_; | |
| 271 my $module = "Bio::Variation::IO::" . $format; | |
| 272 my $ok; | |
| 273 eval { | |
| 274 $ok = $class->_load_module($module); | |
| 275 }; | |
| 276 if ( $@ ) { | |
| 277 print STDERR <<END; | |
| 278 $class: $format cannot be found | |
| 279 Exception $@ | |
| 280 For more information about the IO system please see the IO docs. | |
| 281 This includes ways of checking for formats at compile time, not run time | |
| 282 END | |
| 283 ; | |
| 284 } | |
| 285 return $ok; | |
| 286 } | |
| 287 | |
| 288 =head2 next | |
| 289 | |
| 290 Title : next | |
| 291 Usage : $seqDiff = $stream->next | |
| 292 Function: reads the next $seqDiff object from the stream | |
| 293 Returns : a Bio::Variation::SeqDiff object | |
| 294 Args : | |
| 295 | |
| 296 =cut | |
| 297 | |
| 298 sub next { | |
| 299 my ($self, $seq) = @_; | |
| 300 $self->throw("Sorry, you cannot read from a generic Bio::Variation::IO object."); | |
| 301 } | |
| 302 | |
| 303 sub next_seq { | |
| 304 my ($self, $seq) = @_; | |
| 305 $self->throw("These are not sequence objects. Use method 'next' instead of 'next_seq'."); | |
| 306 $self->next($seq); | |
| 307 } | |
| 308 | |
| 309 =head2 write | |
| 310 | |
| 311 Title : write | |
| 312 Usage : $stream->write($seq) | |
| 313 Function: writes the $seq object into the stream | |
| 314 Returns : 1 for success and 0 for error | |
| 315 Args : Bio::Variation::SeqDiff object | |
| 316 | |
| 317 =cut | |
| 318 | |
| 319 sub write { | |
| 320 my ($self, $seq) = @_; | |
| 321 $self->throw("Sorry, you cannot write to a generic Bio::Variation::IO object."); | |
| 322 } | |
| 323 | |
| 324 sub write_seq { | |
| 325 my ($self, $seq) = @_; | |
| 326 $self->warn("These are not sequence objects. Use method 'write' instead of 'write_seq'."); | |
| 327 $self->write($seq); | |
| 328 } | |
| 329 | |
| 330 =head2 _guess_format | |
| 331 | |
| 332 Title : _guess_format | |
| 333 Usage : $obj->_guess_format($filename) | |
| 334 Function: | |
| 335 Example : | |
| 336 Returns : guessed format of filename (lower case) | |
| 337 Args : | |
| 338 | |
| 339 =cut | |
| 340 | |
| 341 sub _guess_format { | |
| 342 my $class = shift; | |
| 343 return unless $_ = shift; | |
| 344 return 'flat' if /\.dat$/i; | |
| 345 return 'xml' if /\.xml$/i; | |
| 346 } | |
| 347 | |
| 348 | |
| 349 1; |
