Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SeqIO/largefasta.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: largefasta.pm,v 1.18 2002/12/27 19:42:32 birney Exp $ | |
| 2 # BioPerl module for Bio::SeqIO::largefasta | |
| 3 # | |
| 4 # Cared for by Jason Stajich | |
| 5 # | |
| 6 # Copyright Jason Stajich | |
| 7 # | |
| 8 # You may distribute this module under the same terms as perl itself | |
| 9 # _history | |
| 10 # | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::SeqIO::largefasta - method i/o on very large fasta sequence files | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 Do not use this module directly. Use it via the Bio::SeqIO class. | |
| 20 | |
| 21 =head1 DESCRIPTION | |
| 22 | |
| 23 This object can transform Bio::Seq objects to and from fasta flat | |
| 24 file databases. | |
| 25 | |
| 26 This module handles very large sequence files by using the | |
| 27 Bio::Seq::LargePrimarySeq module to store all the sequence data in | |
| 28 a file. This can be a problem if you have limited disk space on your | |
| 29 computer because this will effectively cause 2 copies of the sequence | |
| 30 file to reside on disk for the life of the | |
| 31 Bio::Seq::LargePrimarySeq object. The default location for this is | |
| 32 specified by the L<File::Spec>-E<gt>tmpdir routine which is usually /tmp | |
| 33 on UNIX. If a sequence file is larger than the swap space (capacity | |
| 34 of the /tmp dir) this could cause problems for the machine. It is | |
| 35 possible to set the directory where the temporary file is located by | |
| 36 adding the following line to your code BEFORE calling next_seq. See | |
| 37 L<Bio::Seq::LargePrimarySeq> for more information. | |
| 38 | |
| 39 $Bio::Seq::LargePrimarySeq::DEFAULT_TEMP_DIR = 'newdir'; | |
| 40 | |
| 41 =head1 FEEDBACK | |
| 42 | |
| 43 =head2 Mailing Lists | |
| 44 | |
| 45 User feedback is an integral part of the evolution of this and other | |
| 46 Bioperl modules. Send your comments and suggestions preferably to one | |
| 47 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 48 | |
| 49 bioperl-l@bioperl.org - General discussion | |
| 50 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 51 | |
| 52 =head2 Reporting Bugs | |
| 53 | |
| 54 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 55 the bugs and their resolution. Bug reports can be submitted via email | |
| 56 or the web: | |
| 57 | |
| 58 bioperl-bugs@bio.perl.org | |
| 59 http://bugzilla.bioperl.org/ | |
| 60 | |
| 61 =head1 AUTHORS - Jason Stajich | |
| 62 | |
| 63 Email: jason@bioperl.org | |
| 64 | |
| 65 =head1 APPENDIX | |
| 66 | |
| 67 The rest of the documentation details each of the object | |
| 68 methods. Internal methods are usually preceded with a _ | |
| 69 | |
| 70 =cut | |
| 71 | |
| 72 # Let the code begin... | |
| 73 | |
| 74 package Bio::SeqIO::largefasta; | |
| 75 use vars qw(@ISA $FASTALINELEN); | |
| 76 use strict; | |
| 77 # Object preamble - inherits from Bio::Root::Object | |
| 78 | |
| 79 use Bio::SeqIO; | |
| 80 use Bio::Seq::SeqFactory; | |
| 81 | |
| 82 $FASTALINELEN = 60; | |
| 83 @ISA = qw(Bio::SeqIO); | |
| 84 | |
| 85 sub _initialize { | |
| 86 my($self,@args) = @_; | |
| 87 $self->SUPER::_initialize(@args); | |
| 88 if( ! defined $self->sequence_factory ) { | |
| 89 $self->sequence_factory(new Bio::Seq::SeqFactory | |
| 90 (-verbose => $self->verbose(), | |
| 91 -type => 'Bio::Seq::LargePrimarySeq')); | |
| 92 } | |
| 93 } | |
| 94 | |
| 95 =head2 next_seq | |
| 96 | |
| 97 Title : next_seq | |
| 98 Usage : $seq = $stream->next_seq() | |
| 99 Function: returns the next sequence in the stream | |
| 100 Returns : Bio::Seq object | |
| 101 Args : NONE | |
| 102 | |
| 103 =cut | |
| 104 | |
| 105 sub next_seq { | |
| 106 my ($self) = @_; | |
| 107 # local $/ = "\n"; | |
| 108 my $largeseq = $self->sequence_factory->create(); | |
| 109 my ($id,$fulldesc,$entry); | |
| 110 my $count = 0; | |
| 111 my $seen = 0; | |
| 112 while( defined ($entry = $self->_readline) ) { | |
| 113 if( $seen == 1 && $entry =~ /^\s*>/ ) { | |
| 114 $self->_pushback($entry); | |
| 115 return $largeseq; | |
| 116 } | |
| 117 # if ( ($entry eq '>') || eof($self->_fh) ) { $seen = 1; next; } | |
| 118 if ( ($entry eq '>') ) { $seen = 1; next; } | |
| 119 elsif( $entry =~ /\s*>(.+?)$/ ) { | |
| 120 $seen = 1; | |
| 121 ($id,$fulldesc) = ($1 =~ /^\s*(\S+)\s*(.*)$/) | |
| 122 or $self->warn("Can't parse fasta header"); | |
| 123 $largeseq->display_id($id); | |
| 124 $largeseq->primary_id($id); | |
| 125 $largeseq->desc($fulldesc); | |
| 126 } else { | |
| 127 $entry =~ s/\s+//g; | |
| 128 $largeseq->add_sequence_as_string($entry); | |
| 129 } | |
| 130 (++$count % 1000 == 0 && $self->verbose() > 0) && print "line $count\n"; | |
| 131 } | |
| 132 if( ! $seen ) { return undef; } | |
| 133 return $largeseq; | |
| 134 } | |
| 135 | |
| 136 =head2 write_seq | |
| 137 | |
| 138 Title : write_seq | |
| 139 Usage : $stream->write_seq(@seq) | |
| 140 Function: writes the $seq object into the stream | |
| 141 Returns : 1 for success and 0 for error | |
| 142 Args : Bio::Seq object | |
| 143 | |
| 144 | |
| 145 =cut | |
| 146 | |
| 147 sub write_seq { | |
| 148 my ($self,@seq) = @_; | |
| 149 foreach my $seq (@seq) { | |
| 150 my $top = $seq->id(); | |
| 151 if ($seq->can('desc') and my $desc = $seq->desc()) { | |
| 152 $desc =~ s/\n//g; | |
| 153 $top .= " $desc"; | |
| 154 } | |
| 155 $self->_print (">",$top,"\n"); | |
| 156 my $end = $seq->length(); | |
| 157 my $start = 1; | |
| 158 while( $start < $end ) { | |
| 159 my $stop = $start + $FASTALINELEN - 1; | |
| 160 $stop = $end if( $stop > $end ); | |
| 161 $self->_print($seq->subseq($start,$stop), "\n"); | |
| 162 $start += $FASTALINELEN; | |
| 163 } | |
| 164 } | |
| 165 | |
| 166 $self->flush if $self->_flush_on_write && defined $self->_fh; | |
| 167 return 1; | |
| 168 } | |
| 169 | |
| 170 1; |
