comparison variant_effect_predictor/Bio/Seq/LargePrimarySeq.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: LargePrimarySeq.pm,v 1.27 2002/12/01 00:05:21 jason Exp $
2 #
3 # BioPerl module for Bio::Seq::LargePrimarySeq
4 #
5 # Cared for by Ewan Birney <birney@ebi.ac.uk>
6 #
7 # Copyright Ewan Birney
8 #
9 # You may distribute this module under the same terms as perl itself
10 #
11 # updated to utilize File::Temp - jason 2000-12-12
12 # POD documentation - main docs before the code
13
14 =head1 NAME
15
16 Bio::Seq::LargePrimarySeq - PrimarySeq object that stores sequence as
17 files in the tempdir (as found by File::Temp) or the default method in
18 Bio::Root::Root
19
20 =head1 SYNOPSIS
21
22 # normal primary seq usage
23
24 =head1 DESCRIPTION
25
26 This object stores a sequence as a series of files in a temporary
27 directory. The aim is to allow someone the ability to store very large
28 sequences (eg, E<gt> 100MBases) in a file system without running out of memory
29 (eg, on a 64 MB real memory machine!).
30
31 Of course, to actually make use of this functionality, the programs
32 which use this object B<must> not call $primary_seq-E<gt>seq otherwise the
33 entire sequence will come out into memory and probably paste your
34 machine. However, calls $primary_seq-E<gt>subseq(10,100) will cause only
35 90 characters to be brought into real memory.
36
37 =head1 FEEDBACK
38
39 =head2 Mailing Lists
40
41 User feedback is an integral part of the evolution of this
42 and other Bioperl modules. Send your comments and suggestions preferably
43 to one of the Bioperl mailing lists.
44 Your participation is much appreciated.
45
46 bioperl-l@bioperl.org - General discussion
47 http://bio.perl.org/MailList.html - About the mailing lists
48
49 =head2 Reporting Bugs
50
51 Report bugs to the Bioperl bug tracking system to help us keep track
52 the bugs and their resolution. Bug reports can be submitted via email
53 or the web:
54
55 bioperl-bugs@bio.perl.org
56 http://bugzilla.bioperl.org/
57
58 =head1 AUTHOR - Ewan Birney, Jason Stajich
59
60 Email birney@ebi.ac.uk
61 Email jason@bioperl.org
62
63 =head1 APPENDIX
64
65 The rest of the documentation details each of the object
66 methods. Internal methods are usually preceded with a _
67
68 =cut
69
70
71 # Let the code begin...
72
73
74 package Bio::Seq::LargePrimarySeq;
75 use vars qw($AUTOLOAD @ISA);
76 use strict;
77
78 use Bio::PrimarySeq;
79 use Bio::Root::IO;
80
81 @ISA = qw(Bio::PrimarySeq Bio::Root::IO);
82
83 sub new {
84 my ($class, %params) = @_;
85
86 # don't let PrimarySeq set seq until we have
87 # opened filehandle
88
89 my $seq = $params{'-seq'} || $params{'-SEQ'};
90 if($seq ) {
91 delete $params{'-seq'};
92 delete $params{'-SEQ'};
93 }
94 my $self = $class->SUPER::new(%params);
95 $self->_initialize_io(%params);
96 my $tempdir = $self->tempdir( CLEANUP => 1);
97 my ($tfh,$file) = $self->tempfile( DIR => $tempdir );
98
99 $tfh && $self->_fh($tfh);
100 $file && $self->_filename($file);
101 $self->length(0);
102 $seq && $self->seq($seq);
103
104 return $self;
105 }
106
107
108 sub length {
109 my ($obj,$value) = @_;
110 if( defined $value) {
111 $obj->{'length'} = $value;
112 }
113
114 return (defined $obj->{'length'}) ? $obj->{'length'} : 0;
115 }
116
117 =head2 seq
118
119 Title : seq
120 Usage :
121 Function:
122 Example :
123 Returns :
124 Args :
125
126
127 =cut
128
129 sub seq {
130 my ($self, $data) = @_;
131 if( defined $data ) {
132 if( $self->length() == 0) {
133 $self->add_sequence_as_string($data);
134 } else {
135 $self->warn("Trying to reset the seq string, cannot do this with a LargePrimarySeq - must allocate a new object");
136 }
137 }
138 return $self->subseq(1,$self->length);
139 }
140
141 =head2 subseq
142
143 Title : subseq
144 Usage :
145 Function:
146 Example :
147 Returns :
148 Args :
149
150
151 =cut
152
153 sub subseq{
154 my ($self,$start,$end) = @_;
155 my $string;
156 my $fh = $self->_fh();
157
158 if( ref($start) && $start->isa('Bio::LocationI') ) {
159 my $loc = $start;
160 if( $loc->length == 0 ) {
161 $self->warn("Expect location lengths to be > 0");
162 return '';
163 } elsif( $loc->end < $loc->start ) {
164 # what about circular seqs
165 $self->warn("Expect location start to come before location end");
166 }
167 my $seq = '';
168 if( $loc->isa('Bio::Location::SplitLocationI') ) {
169 foreach my $subloc ( $loc->sub_Location ) {
170 if(! seek($fh,$subloc->start() - 1,0)) {
171 $self->throw("Unable to seek on file $start:$end $!");
172 }
173 my $ret = read($fh, $string, $subloc->length());
174 if( !defined $ret ) {
175 $self->throw("Unable to read $start:$end $!");
176 }
177 if( $subloc->strand < 0 ) {
178 $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
179 }
180 $seq .= $string;
181 }
182 } else {
183 if(! seek($fh,$loc->start()-1,0)) {
184 $self->throw("Unable to seek on file ".$loc->start.":".
185 $loc->end ." $!");
186 }
187 my $ret = read($fh, $string, $loc->length());
188 if( !defined $ret ) {
189 $self->throw("Unable to read ".$loc->start.":".
190 $loc->end ." $!");
191 }
192 $seq = $string;
193 }
194 if( defined $loc->strand &&
195 $loc->strand < 0 ) {
196 $seq = Bio::PrimarySeq->new(-seq => $seq)->revcom()->seq();
197 }
198 return $seq;
199 }
200 if( $start <= 0 || $end > $self->length ) {
201 $self->throw("Attempting to get a subseq out of range $start:$end vs ".
202 $self->length);
203 }
204 if( $end < $start ) {
205 $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
206 }
207
208 if(! seek($fh,$start-1,0)) {
209 $self->throw("Unable to seek on file $start:$end $!");
210 }
211 my $ret = read($fh, $string, $end-$start+1);
212 if( !defined $ret ) {
213 $self->throw("Unable to read $start:$end $!");
214 }
215 return $string;
216 }
217
218 =head2 add_sequence_as_string
219
220 Title : add_sequence_as_string
221 Usage : $seq->add_sequence_as_string("CATGAT");
222 Function: Appends additional residues to an existing LargePrimarySeq object.
223 This allows one to build up a large sequence without storing
224 entire object in memory.
225 Returns : Current length of sequence
226 Args : string to append
227
228 =cut
229
230 sub add_sequence_as_string{
231 my ($self,$str) = @_;
232 my $len = $self->length + CORE::length($str);
233 my $fh = $self->_fh();
234 if(! seek($fh,0,2)) {
235 $self->throw("Unable to seek end of file: $!");
236 }
237 $self->_print($str);
238 $self->length($len);
239 }
240
241
242 =head2 _filename
243
244 Title : _filename
245 Usage : $obj->_filename($newval)
246 Function:
247 Example :
248 Returns : value of _filename
249 Args : newvalue (optional)
250
251
252 =cut
253
254 sub _filename{
255 my ($obj,$value) = @_;
256 if( defined $value) {
257 $obj->{'_filename'} = $value;
258 }
259 return $obj->{'_filename'};
260
261 }
262 =head2 alphabet
263
264 Title : alphabet
265 Usage : $obj->alphabet($newval)
266 Function:
267 Example :
268 Returns : value of alphabet
269 Args : newvalue (optional)
270
271
272 =cut
273
274 sub alphabet{
275 my ($self,$value) = @_;
276 if( defined $value) {
277 $self->SUPER::alphabet($value);
278 }
279 return $self->SUPER::alphabet() || 'dna';
280
281 }
282
283 sub DESTROY {
284 my $self = shift;
285 my $fh = $self->_fh();
286 close($fh) if( defined $fh );
287 # this should be handled by Tempfile removal, but we'll unlink anyways.
288 unlink $self->_filename() if defined $self->_filename() && -e $self->_filename;
289 $self->SUPER::DESTROY();
290 }
291
292 1;