Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Seq/LargePrimarySeq.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: LargePrimarySeq.pm,v 1.27 2002/12/01 00:05:21 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::Seq::LargePrimarySeq | |
4 # | |
5 # Cared for by Ewan Birney <birney@ebi.ac.uk> | |
6 # | |
7 # Copyright Ewan Birney | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 # | |
11 # updated to utilize File::Temp - jason 2000-12-12 | |
12 # POD documentation - main docs before the code | |
13 | |
14 =head1 NAME | |
15 | |
16 Bio::Seq::LargePrimarySeq - PrimarySeq object that stores sequence as | |
17 files in the tempdir (as found by File::Temp) or the default method in | |
18 Bio::Root::Root | |
19 | |
20 =head1 SYNOPSIS | |
21 | |
22 # normal primary seq usage | |
23 | |
24 =head1 DESCRIPTION | |
25 | |
26 This object stores a sequence as a series of files in a temporary | |
27 directory. The aim is to allow someone the ability to store very large | |
28 sequences (eg, E<gt> 100MBases) in a file system without running out of memory | |
29 (eg, on a 64 MB real memory machine!). | |
30 | |
31 Of course, to actually make use of this functionality, the programs | |
32 which use this object B<must> not call $primary_seq-E<gt>seq otherwise the | |
33 entire sequence will come out into memory and probably paste your | |
34 machine. However, calls $primary_seq-E<gt>subseq(10,100) will cause only | |
35 90 characters to be brought into real memory. | |
36 | |
37 =head1 FEEDBACK | |
38 | |
39 =head2 Mailing Lists | |
40 | |
41 User feedback is an integral part of the evolution of this | |
42 and other Bioperl modules. Send your comments and suggestions preferably | |
43 to one of the Bioperl mailing lists. | |
44 Your participation is much appreciated. | |
45 | |
46 bioperl-l@bioperl.org - General discussion | |
47 http://bio.perl.org/MailList.html - About the mailing lists | |
48 | |
49 =head2 Reporting Bugs | |
50 | |
51 Report bugs to the Bioperl bug tracking system to help us keep track | |
52 the bugs and their resolution. Bug reports can be submitted via email | |
53 or the web: | |
54 | |
55 bioperl-bugs@bio.perl.org | |
56 http://bugzilla.bioperl.org/ | |
57 | |
58 =head1 AUTHOR - Ewan Birney, Jason Stajich | |
59 | |
60 Email birney@ebi.ac.uk | |
61 Email jason@bioperl.org | |
62 | |
63 =head1 APPENDIX | |
64 | |
65 The rest of the documentation details each of the object | |
66 methods. Internal methods are usually preceded with a _ | |
67 | |
68 =cut | |
69 | |
70 | |
71 # Let the code begin... | |
72 | |
73 | |
74 package Bio::Seq::LargePrimarySeq; | |
75 use vars qw($AUTOLOAD @ISA); | |
76 use strict; | |
77 | |
78 use Bio::PrimarySeq; | |
79 use Bio::Root::IO; | |
80 | |
81 @ISA = qw(Bio::PrimarySeq Bio::Root::IO); | |
82 | |
83 sub new { | |
84 my ($class, %params) = @_; | |
85 | |
86 # don't let PrimarySeq set seq until we have | |
87 # opened filehandle | |
88 | |
89 my $seq = $params{'-seq'} || $params{'-SEQ'}; | |
90 if($seq ) { | |
91 delete $params{'-seq'}; | |
92 delete $params{'-SEQ'}; | |
93 } | |
94 my $self = $class->SUPER::new(%params); | |
95 $self->_initialize_io(%params); | |
96 my $tempdir = $self->tempdir( CLEANUP => 1); | |
97 my ($tfh,$file) = $self->tempfile( DIR => $tempdir ); | |
98 | |
99 $tfh && $self->_fh($tfh); | |
100 $file && $self->_filename($file); | |
101 $self->length(0); | |
102 $seq && $self->seq($seq); | |
103 | |
104 return $self; | |
105 } | |
106 | |
107 | |
108 sub length { | |
109 my ($obj,$value) = @_; | |
110 if( defined $value) { | |
111 $obj->{'length'} = $value; | |
112 } | |
113 | |
114 return (defined $obj->{'length'}) ? $obj->{'length'} : 0; | |
115 } | |
116 | |
117 =head2 seq | |
118 | |
119 Title : seq | |
120 Usage : | |
121 Function: | |
122 Example : | |
123 Returns : | |
124 Args : | |
125 | |
126 | |
127 =cut | |
128 | |
129 sub seq { | |
130 my ($self, $data) = @_; | |
131 if( defined $data ) { | |
132 if( $self->length() == 0) { | |
133 $self->add_sequence_as_string($data); | |
134 } else { | |
135 $self->warn("Trying to reset the seq string, cannot do this with a LargePrimarySeq - must allocate a new object"); | |
136 } | |
137 } | |
138 return $self->subseq(1,$self->length); | |
139 } | |
140 | |
141 =head2 subseq | |
142 | |
143 Title : subseq | |
144 Usage : | |
145 Function: | |
146 Example : | |
147 Returns : | |
148 Args : | |
149 | |
150 | |
151 =cut | |
152 | |
153 sub subseq{ | |
154 my ($self,$start,$end) = @_; | |
155 my $string; | |
156 my $fh = $self->_fh(); | |
157 | |
158 if( ref($start) && $start->isa('Bio::LocationI') ) { | |
159 my $loc = $start; | |
160 if( $loc->length == 0 ) { | |
161 $self->warn("Expect location lengths to be > 0"); | |
162 return ''; | |
163 } elsif( $loc->end < $loc->start ) { | |
164 # what about circular seqs | |
165 $self->warn("Expect location start to come before location end"); | |
166 } | |
167 my $seq = ''; | |
168 if( $loc->isa('Bio::Location::SplitLocationI') ) { | |
169 foreach my $subloc ( $loc->sub_Location ) { | |
170 if(! seek($fh,$subloc->start() - 1,0)) { | |
171 $self->throw("Unable to seek on file $start:$end $!"); | |
172 } | |
173 my $ret = read($fh, $string, $subloc->length()); | |
174 if( !defined $ret ) { | |
175 $self->throw("Unable to read $start:$end $!"); | |
176 } | |
177 if( $subloc->strand < 0 ) { | |
178 $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq(); | |
179 } | |
180 $seq .= $string; | |
181 } | |
182 } else { | |
183 if(! seek($fh,$loc->start()-1,0)) { | |
184 $self->throw("Unable to seek on file ".$loc->start.":". | |
185 $loc->end ." $!"); | |
186 } | |
187 my $ret = read($fh, $string, $loc->length()); | |
188 if( !defined $ret ) { | |
189 $self->throw("Unable to read ".$loc->start.":". | |
190 $loc->end ." $!"); | |
191 } | |
192 $seq = $string; | |
193 } | |
194 if( defined $loc->strand && | |
195 $loc->strand < 0 ) { | |
196 $seq = Bio::PrimarySeq->new(-seq => $seq)->revcom()->seq(); | |
197 } | |
198 return $seq; | |
199 } | |
200 if( $start <= 0 || $end > $self->length ) { | |
201 $self->throw("Attempting to get a subseq out of range $start:$end vs ". | |
202 $self->length); | |
203 } | |
204 if( $end < $start ) { | |
205 $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc"); | |
206 } | |
207 | |
208 if(! seek($fh,$start-1,0)) { | |
209 $self->throw("Unable to seek on file $start:$end $!"); | |
210 } | |
211 my $ret = read($fh, $string, $end-$start+1); | |
212 if( !defined $ret ) { | |
213 $self->throw("Unable to read $start:$end $!"); | |
214 } | |
215 return $string; | |
216 } | |
217 | |
218 =head2 add_sequence_as_string | |
219 | |
220 Title : add_sequence_as_string | |
221 Usage : $seq->add_sequence_as_string("CATGAT"); | |
222 Function: Appends additional residues to an existing LargePrimarySeq object. | |
223 This allows one to build up a large sequence without storing | |
224 entire object in memory. | |
225 Returns : Current length of sequence | |
226 Args : string to append | |
227 | |
228 =cut | |
229 | |
230 sub add_sequence_as_string{ | |
231 my ($self,$str) = @_; | |
232 my $len = $self->length + CORE::length($str); | |
233 my $fh = $self->_fh(); | |
234 if(! seek($fh,0,2)) { | |
235 $self->throw("Unable to seek end of file: $!"); | |
236 } | |
237 $self->_print($str); | |
238 $self->length($len); | |
239 } | |
240 | |
241 | |
242 =head2 _filename | |
243 | |
244 Title : _filename | |
245 Usage : $obj->_filename($newval) | |
246 Function: | |
247 Example : | |
248 Returns : value of _filename | |
249 Args : newvalue (optional) | |
250 | |
251 | |
252 =cut | |
253 | |
254 sub _filename{ | |
255 my ($obj,$value) = @_; | |
256 if( defined $value) { | |
257 $obj->{'_filename'} = $value; | |
258 } | |
259 return $obj->{'_filename'}; | |
260 | |
261 } | |
262 =head2 alphabet | |
263 | |
264 Title : alphabet | |
265 Usage : $obj->alphabet($newval) | |
266 Function: | |
267 Example : | |
268 Returns : value of alphabet | |
269 Args : newvalue (optional) | |
270 | |
271 | |
272 =cut | |
273 | |
274 sub alphabet{ | |
275 my ($self,$value) = @_; | |
276 if( defined $value) { | |
277 $self->SUPER::alphabet($value); | |
278 } | |
279 return $self->SUPER::alphabet() || 'dna'; | |
280 | |
281 } | |
282 | |
283 sub DESTROY { | |
284 my $self = shift; | |
285 my $fh = $self->_fh(); | |
286 close($fh) if( defined $fh ); | |
287 # this should be handled by Tempfile removal, but we'll unlink anyways. | |
288 unlink $self->_filename() if defined $self->_filename() && -e $self->_filename; | |
289 $self->SUPER::DESTROY(); | |
290 } | |
291 | |
292 1; |