Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SeqIO/scf.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: scf.pm,v 1.23 2002/11/01 11:16:25 heikki Exp $ | |
2 # | |
3 # Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved. | |
4 # This module is free software; you can redistribute it and/or | |
5 # modify it under the same terms as Perl itself. | |
6 # | |
7 # Copyright Chad Matsalla | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::SeqIO::scf - .scf file input/output stream | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 Do not use this module directly. Use it via the Bio::SeqIO class, see | |
20 L<Bio::SeqIO> for more information. | |
21 | |
22 =head1 DESCRIPTION | |
23 | |
24 This object can transform .scf files to and from | |
25 Bio::Seq::SeqWithQuality objects. Mechanisms are present to retrieve | |
26 trace data from scf files. | |
27 | |
28 =head1 FEEDBACK | |
29 | |
30 =head2 Mailing Lists | |
31 | |
32 | |
33 User feedback is an integral part of the evolution of this and other | |
34 Bioperl modules. Send your comments and suggestions preferably to one | |
35 of the Bioperl mailing lists. Your participation is much appreciated. | |
36 | |
37 bioperl-l@bioperl.org - General discussion | |
38 http://www.bioperl.org/MailList.shtml - About the mailing lists | |
39 | |
40 =head2 Reporting Bugs | |
41 | |
42 Report bugs to the Bioperl bug tracking system to help us keep track | |
43 the bugs and their resolution. Bug reports can be submitted via email | |
44 or the web: | |
45 | |
46 bioperl-bugs@bio.perl.org | |
47 http://bugzilla.bioperl.org/ | |
48 | |
49 =head1 AUTHOR Chad Matsalla | |
50 | |
51 Chad Matsalla | |
52 bioinformatics@dieselwurks.com | |
53 | |
54 =head1 CONTRIBUTORS | |
55 | |
56 Jason Stajich, jason@bioperl.org | |
57 Tony Cox, avc@sanger.ac.uk | |
58 Heikki Lehvaslaiho, heikki@ebi.ac.uk | |
59 | |
60 =head1 APPENDIX | |
61 | |
62 The rest of the documentation details each of the object | |
63 methods. Internal methods are usually preceded with a _ | |
64 | |
65 =cut | |
66 | |
67 # Let the code begin... | |
68 | |
69 package Bio::SeqIO::scf; | |
70 use vars qw(@ISA $DEFAULT_QUALITY); | |
71 use strict; | |
72 use Bio::SeqIO; | |
73 use Bio::Seq::SeqFactory; | |
74 require 'dumpvar.pl'; | |
75 | |
76 BEGIN { | |
77 $DEFAULT_QUALITY= 10; | |
78 } | |
79 | |
80 @ISA = qw(Bio::SeqIO); | |
81 | |
82 sub _initialize { | |
83 my($self,@args) = @_; | |
84 $self->SUPER::_initialize(@args); | |
85 if( ! defined $self->sequence_factory ) { | |
86 $self->sequence_factory(new Bio::Seq::SeqFactory | |
87 (-verbose => $self->verbose(), | |
88 -type => 'Bio::Seq::SeqWithQuality')); | |
89 } | |
90 } | |
91 | |
92 =head2 next_seq() | |
93 | |
94 Title : next_seq() | |
95 Usage : $scf = $stream->next_seq() | |
96 Function: returns the next scf sequence in the stream | |
97 Returns : Bio::Seq::SeqWithQuality object | |
98 Args : NONE | |
99 Notes : Fills the interface specification for SeqIO. | |
100 The SCF specification does not provide for having more then | |
101 one sequence in a given scf. So once the filehandle has been open | |
102 and passed to SeqIO don't expect to run this function more then | |
103 once on a given scf unless you embraced and extended the SCF | |
104 standard. (But that's just C R A Z Y talk, isn't it.) | |
105 | |
106 =cut | |
107 | |
108 #' | |
109 sub next_seq { | |
110 my ($self) = @_; | |
111 my ($seq, $seqc, $fh, $buffer, $offset, $length, $read_bytes, @read, | |
112 %names); | |
113 # set up a filehandle to read in the scf | |
114 $fh = $self->_filehandle(); | |
115 unless ($fh) { # simulate the <> function | |
116 if ( !fileno(ARGV) or eof(ARGV) ) { | |
117 return unless my $ARGV = shift; | |
118 open(ARGV,$ARGV) or | |
119 $self->throw("Could not open $ARGV for SCF stream reading $!"); | |
120 } | |
121 $fh = \*ARGV; | |
122 } | |
123 binmode $fh; # for the Win32/Mac crowds | |
124 return unless read $fh, $buffer, 128; # no exception; probably end of file | |
125 # the first thing to do is parse the header. This is common | |
126 # among all versions of scf. | |
127 $self->_set_header($buffer); | |
128 # the rest of the the information is different between the | |
129 # the different versions of scf. | |
130 my $byte = "n"; | |
131 if ($self->{'version'} lt "3.00") { | |
132 # first gather the trace information | |
133 $length = $self->{'samples'}*$self->{sample_size}*4; | |
134 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
135 if ($self->{sample_size} == 1) { | |
136 $byte = "c"; | |
137 } | |
138 @read = unpack "${byte}${length}",$buffer; | |
139 # these traces need to be split | |
140 $self->_set_v2_traces(\@read); | |
141 # now go and get the base information | |
142 $offset = $self->{bases_offset}; | |
143 $length = ($self->{bases} * 12); | |
144 seek $fh,$offset,0; | |
145 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
146 # now distill the information into its fractions. | |
147 $self->_set_v2_bases($buffer); | |
148 } else { | |
149 my $transformed_read; | |
150 foreach (qw(A C G T)) { | |
151 $length = $self->{'samples'}*$self->{sample_size}; | |
152 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
153 if ($self->{sample_size} == 1) { | |
154 $byte = "c"; | |
155 } | |
156 @read = unpack "${byte}${length}",$buffer; | |
157 # this little spurt of nonsense is because | |
158 # the trace values are given in the binary | |
159 # file as unsigned shorts but they really | |
160 # are signed. 30000 is an arbitrary number | |
161 # (will there be any traces with a given | |
162 # point greater then 30000? I hope not. | |
163 # once the read is read, it must be changed | |
164 # from relative | |
165 for (my $element=0; $element < scalar(@read); $element++) { | |
166 if ($read[$element] > 30000) { | |
167 $read[$element] = $read[$element] - 65536; | |
168 } | |
169 } | |
170 $transformed_read = $self->_delta(\@read,"backward"); | |
171 $self->{'traces'}->{$_} = join(' ',@{$transformed_read}); | |
172 } | |
173 # now go and get the peak index information | |
174 $offset = $self->{bases_offset}; | |
175 $length = ($self->{bases} * 4); | |
176 seek $fh,$offset,0; | |
177 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
178 $self->_set_v3_peak_indices($buffer); | |
179 # now go and get the accuracy information | |
180 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
181 $self->_set_v3_base_accuracies($buffer); | |
182 # OK, now go and get the base information. | |
183 $length = $self->{bases}; | |
184 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
185 $self->{'parsed'}->{'sequence'} = unpack("a$length",$buffer); | |
186 # now, finally, extract the calls from the accuracy information. | |
187 $self->_set_v3_quality($self); | |
188 } | |
189 # now go and get the comment information | |
190 $offset = $self->{comments_offset}; | |
191 seek $fh,$offset,0; | |
192 $length = $self->{comment_size}; | |
193 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
194 $self->_set_comments($buffer); | |
195 return $self->sequence_factory->create | |
196 (-seq => $self->{'parsed'}->{'sequence'}, | |
197 -qual => $self->{'parsed'}->{'qualities'}, | |
198 -id => $self->{'comments'}->{'NAME'} | |
199 ); | |
200 } | |
201 | |
202 | |
203 =head2 _set_v3_quality() | |
204 | |
205 Title : _set_v3_quality() | |
206 Usage : $self->_set_v3_quality() | |
207 Function: Set the base qualities from version3 scf's | |
208 Returns : Nothing. Alters $self. | |
209 Args : None. | |
210 Notes : | |
211 | |
212 =cut | |
213 | |
214 #' | |
215 sub _set_v3_quality { | |
216 my $self = shift; | |
217 my @bases = split//,$self->{'parsed'}->{'sequence'}; | |
218 my (@qualities,$currbase,$currqual,$counter); | |
219 for ($counter=0; $counter <= $#bases ; $counter++) { | |
220 $currbase = uc($bases[$counter]); | |
221 if ($currbase eq "A") { $currqual = $self->{'parsed'}->{'base_accuracies'}->{'A'}->[$counter]; } | |
222 elsif ($currbase eq "C") { $currqual = $self->{'parsed'}->{'base_accuracies'}->{'C'}->[$counter]; } | |
223 elsif ($currbase eq "G") { $currqual = $self->{'parsed'}->{'base_accuracies'}->{'G'}->[$counter]; } | |
224 elsif ($currbase eq "T") { $currqual = $self->{'parsed'}->{'base_accuracies'}->{'T'}->[$counter]; } | |
225 else { $currqual = "unknown"; } | |
226 push @qualities,$currqual; | |
227 } | |
228 $self->{'parsed'}->{'qualities'} = \@qualities; | |
229 } | |
230 | |
231 =head2 _set_v3_peak_indices($buffer) | |
232 | |
233 Title : _set_v3_peak_indices($buffer) | |
234 Usage : $self->_set_v3_peak_indices($buffer); | |
235 Function: Unpacks the base accuracies for version3 scf | |
236 Returns : Nothing. Alters $self | |
237 Args : A scalar containing binary data. | |
238 Notes : | |
239 | |
240 =cut | |
241 | |
242 sub _set_v3_peak_indices { | |
243 my ($self,$buffer) = @_; | |
244 my $length = length($buffer); | |
245 my ($offset,@read,@positions); | |
246 @read = unpack "N$length",$buffer; | |
247 $self->{'parsed'}->{'peak_indices'} = join(' ',@read); | |
248 } | |
249 | |
250 =head2 _set_v3_base_accuracies($buffer) | |
251 | |
252 Title : _set_v3_base_accuracies($buffer) | |
253 Usage : $self->_set_v3_base_accuracies($buffer) | |
254 Function: Set the base accuracies for version 3 scf's | |
255 Returns : Nothing. Alters $self. | |
256 Args : A scalar containing binary data. | |
257 Notes : | |
258 | |
259 =cut | |
260 | |
261 #' | |
262 sub _set_v3_base_accuracies { | |
263 my ($self,$buffer) = @_; | |
264 my $length = length($buffer); | |
265 my $qlength = $length/4; | |
266 my $offset = 0; | |
267 my (@qualities,@sorter,$counter,$round,$last_base); | |
268 foreach (qw(A C G T)) { | |
269 my @read; | |
270 $last_base = $offset + $qlength; | |
271 for (;$offset < $last_base; $offset += $qlength) { | |
272 @read = unpack "c$qlength", substr($buffer,$offset,$qlength); | |
273 $self->{'parsed'}->{'base_accuracies'}->{"$_"} = \@read; | |
274 } | |
275 } | |
276 } | |
277 | |
278 | |
279 =head2 _set_comments($buffer) | |
280 | |
281 Title : _set_comments($buffer) | |
282 Usage : $self->_set_comments($buffer); | |
283 Function: Gather the comments section from the scf and parse it into its | |
284 components. | |
285 Returns : Nothing. Modifies $self. | |
286 Args : The buffer. It is expected that the buffer contains a binary | |
287 string for the comments section of an scf file according to | |
288 the scf file specifications. | |
289 Notes : None. Works like Jello. | |
290 | |
291 =cut | |
292 | |
293 sub _set_comments { | |
294 my ($self,$buffer) = @_; | |
295 my $size = length($buffer); | |
296 my $comments_retrieved = unpack "a$size",$buffer; | |
297 $comments_retrieved =~ s/\0//; | |
298 my @comments_split = split/\n/,$comments_retrieved; | |
299 if (@comments_split) { | |
300 foreach (@comments_split) { | |
301 /(\w+)=(.*)/; | |
302 if ($1 && $2) { | |
303 $self->{'comments'}->{$1} = $2; | |
304 } | |
305 } | |
306 } | |
307 return; | |
308 } | |
309 | |
310 =head2 _set_header() | |
311 | |
312 Title : _set_header($buffer) | |
313 Usage : $self->_set_header($buffer); | |
314 Function: Gather the header section from the scf and parse it into its | |
315 components. | |
316 Returns : Nothing. Modifies $self. | |
317 Args : The buffer. It is expected that the buffer contains a binary | |
318 string for the header section of an scf file according to the | |
319 scf file specifications. | |
320 Notes : None. | |
321 | |
322 =cut | |
323 | |
324 sub _set_header { | |
325 my ($self,$buffer) = @_; | |
326 ($self->{'scf'}, | |
327 $self->{'samples'}, | |
328 $self->{'sample_offset'}, | |
329 $self->{'bases'}, | |
330 $self->{'bases_left_clip'}, | |
331 $self->{'bases_right_clip'}, | |
332 $self->{'bases_offset'}, | |
333 $self->{'comment_size'}, | |
334 $self->{'comments_offset'}, | |
335 $self->{'version'}, | |
336 $self->{'sample_size'}, | |
337 $self->{'code_set'}, | |
338 @{$self->{'header_spare'}} ) = unpack "a4 NNNNNNNN a4 NN N20", $buffer; | |
339 return; | |
340 | |
341 } | |
342 | |
343 =head2 _set_v2_bases($buffer) | |
344 | |
345 Title : _set_v2_bases($buffer) | |
346 Usage : $self->_set_v2_bases($buffer); | |
347 Function: Gather the bases section from the scf and parse it into its | |
348 components. | |
349 Returns : Nothing. Modifies $self. | |
350 Args : The buffer. It is expected that the buffer contains a binary | |
351 string for the bases section of an scf file according to the | |
352 scf file specifications. | |
353 Notes : None. | |
354 | |
355 =cut | |
356 | |
357 sub _set_v2_bases { | |
358 my ($self,$buffer) = @_; | |
359 my $length = length($buffer); | |
360 my ($offset2,$currbuff,$currbase,$currqual,$sequence,@qualities,@indices); | |
361 my @read; | |
362 for ($offset2=0;$offset2<$length;$offset2+=12) { | |
363 @read = unpack "N C C C C a C3", substr($buffer,$offset2,$length); | |
364 push @indices,$read[0]; | |
365 $currbase = uc($read[5]); | |
366 if ($currbase eq "A") { $currqual = $read[1]; } | |
367 elsif ($currbase eq "C") { $currqual = $read[2]; } | |
368 elsif ($currbase eq "G") { $currqual = $read[3]; } | |
369 elsif ($currbase eq "T") { $currqual = $read[4]; } | |
370 else { $currqual = "UNKNOWN"; } | |
371 $sequence .= $currbase; | |
372 push @qualities,$currqual; | |
373 } | |
374 unless (!@indices) { | |
375 $self->{'parsed'}->{'peak_indices'} = join(' ',@indices); | |
376 } | |
377 $self->{'parsed'}->{'sequence'} = $sequence; | |
378 unless (!@qualities) { | |
379 $self->{'parsed'}->{'qualities'} = join(' ',@qualities); | |
380 } | |
381 } | |
382 | |
383 =head2 _set_v2_traces(\@traces_array) | |
384 | |
385 Title : _set_v2_traces(\@traces_array) | |
386 Usage : $self->_set_v2_traces(\@traces_array); | |
387 Function: Parses an scf Version2 trace array into its base components. | |
388 Returns : Nothing. Modifies $self. | |
389 Args : A reference to an array of the unpacked traces section of an | |
390 scf version2 file. | |
391 | |
392 =cut | |
393 | |
394 sub _set_v2_traces { | |
395 my ($self,$rread) = @_; | |
396 my @read = @$rread; | |
397 my $array = 0; | |
398 for (my $offset2 = 0; $offset2< scalar(@read); $offset2+=4) { | |
399 if ($array) { | |
400 push @{$self->{'traces'}->{'A'}},$read[$offset2]; | |
401 push @{$self->{'traces'}->{'C'}},$read[$offset2+1]; | |
402 push @{$self->{'traces'}->{'G'}},$read[$offset2+3]; | |
403 push @{$self->{'traces'}->{'T'}},$read[$offset2+2]; | |
404 } else { | |
405 $self->{'traces'}->{'A'} .= " ".$read[$offset2]; | |
406 $self->{'traces'}->{'C'} .= " ".$read[$offset2+1]; | |
407 $self->{'traces'}->{'G'} .= " ".$read[$offset2+2]; | |
408 $self->{'traces'}->{'T'} .= " ".$read[$offset2+3]; | |
409 } | |
410 } | |
411 return; | |
412 } | |
413 | |
414 =head2 get_trace($base_channel) | |
415 | |
416 Title : get_trace($base_channel) | |
417 Usage : @a_trace = @{$obj->get_trace("A")}; | |
418 Function: Return the trace data for the given base. | |
419 Returns : A reference to an array containing the trace data for the | |
420 given base. | |
421 Args : A,C,G, or T. Any other input throws. | |
422 Notes : | |
423 | |
424 =cut | |
425 | |
426 sub get_trace { | |
427 my ($self,$base_channel) = @_; | |
428 $base_channel =~ tr/a-z/A-Z/; | |
429 if ($base_channel !~ /A|T|G|C/) { | |
430 $self->throw("You tried to ask for a base channel that wasn't A,T,G, or C. Ask for one of those next time."); | |
431 } elsif ($base_channel) { | |
432 my @temp = split(' ',$self->{'traces'}->{$base_channel}); | |
433 return \@temp; | |
434 } | |
435 } | |
436 | |
437 =head2 get_peak_indices() | |
438 | |
439 Title : get_peak_indices() | |
440 Usage : @a_trace = @{$obj->get_peak_indices()}; | |
441 Function: Return the peak indices for this scf. | |
442 Returns : A reference to an array containing the peak indices for this scf. | |
443 Args : None. | |
444 Notes : | |
445 | |
446 =cut | |
447 | |
448 sub get_peak_indices { | |
449 my ($self) = shift; | |
450 my @temp = split(' ',$self->{'parsed'}->{'peak_indices'}); | |
451 return \@temp; | |
452 } | |
453 | |
454 | |
455 =head2 get_header() | |
456 | |
457 Title : get_header() | |
458 Usage : %header = %{$obj->get_header()}; | |
459 Function: Return the header for this scf. | |
460 Returns : A reference to a hash containing the header for this scf. | |
461 Args : None. | |
462 Notes : | |
463 | |
464 =cut | |
465 | |
466 sub get_header { | |
467 my ($self) = shift; | |
468 my %header; | |
469 foreach (qw(scf samples sample_offset bases bases_left_clip | |
470 bases_right_clip bases_offset comment_size comments_offset | |
471 version sample_size code_set peak_indices)) { | |
472 $header{"$_"} = $self->{"$_"}; | |
473 } | |
474 return \%header; | |
475 } | |
476 | |
477 =head2 _dump_traces_incoming($transformed) | |
478 | |
479 Title : _dump_traces_incoming("transformed") | |
480 Usage : &_dump_traces($ra,$rc,$rg,$rt); | |
481 Function: Used in debugging. Prints all traces one beside each other. | |
482 Returns : Nothing. | |
483 Args : References to the arrays containing the traces for A,C,G,T. | |
484 Notes : Beats using dumpValue, I'll tell ya. Much better then using | |
485 join' ' too. | |
486 - if a scalar is included as an argument (any scalar), this | |
487 procedure will dump the _delta'd trace. If you don't know what | |
488 that means you should not be using this. | |
489 | |
490 =cut | |
491 | |
492 #' | |
493 sub _dump_traces_incoming { | |
494 my ($self) = @_; | |
495 my (@sA,@sT,@sG,@sC); | |
496 # @sA = @{$self->{'traces'}->{'A'}}; | |
497 # @sC = @{$self->{'traces'}->{'C'}}; | |
498 # @sG = @{$self->{'traces'}->{'G'}}; | |
499 # @sT = @{$self->{'traces'}->{'T'}}; | |
500 @sA = @{$self->get_trace('A')}; | |
501 @sC = @{$self->get_trace('C')}; | |
502 @sG = @{$self->get_trace('G')}; | |
503 @sT = @{$self->get_trace('t')}; | |
504 print ("Count\ta\tc\tg\tt\n"); | |
505 for (my $curr=0; $curr < scalar(@sG); $curr++) { | |
506 print("$curr\t$sA[$curr]\t$sC[$curr]\t$sG[$curr]\t$sT[$curr]\n"); | |
507 } | |
508 return; | |
509 } | |
510 | |
511 =head2 _dump_traces_outgoing($transformed) | |
512 | |
513 Title : _dump_traces_outgoing("transformed") | |
514 Usage : &_dump_traces_outgoing(($ra,$rc,$rg,$rt); | |
515 Function: Used in debugging. Prints all traces one beside each other. | |
516 Returns : Nothing. | |
517 Args : References to the arrays containing the traces for A,C,G,T. | |
518 Notes : Beats using dumpValue, I\'ll tell ya. Much better then using | |
519 join' ' too. | |
520 - if a scalar is included as an argument (any scalar), this | |
521 procedur will dump the _delta'd trace. If you don't know what | |
522 that means you should not be using this. | |
523 | |
524 =cut | |
525 | |
526 sub _dump_traces_outgoing { | |
527 my ($self,$transformed) = @_; | |
528 my (@sA,@sT,@sG,@sC); | |
529 if ($transformed) { | |
530 @sA = @{$self->{'text'}->{'t_samples_a'}}; | |
531 @sC = @{$self->{'text'}->{'t_samples_c'}}; | |
532 @sG = @{$self->{'text'}->{'t_samples_g'}}; | |
533 @sT = @{$self->{'text'}->{'t_samples_t'}}; | |
534 } | |
535 else { | |
536 @sA = @{$self->{'text'}->{'samples_a'}}; | |
537 @sC = @{$self->{'text'}->{'samples_c'}}; | |
538 @sG = @{$self->{'text'}->{'samples_g'}}; | |
539 @sT = @{$self->{'text'}->{'samples_t'}}; | |
540 } | |
541 print ("Count\ta\tc\tg\tt\n"); | |
542 for (my $curr=0; $curr < scalar(@sG); $curr++) { | |
543 print("$curr\t$sA[$curr]\t$sC[$curr]\t$sG[$curr]\t$sT[$curr]\n"); | |
544 } | |
545 return; | |
546 } | |
547 | |
548 =head2 write_seq | |
549 | |
550 Title : write_seq(-SeqWithQuality => $swq, <comments>) | |
551 Usage : $obj->write_seq( -SeqWithQuality => $swq, | |
552 -version => 2, | |
553 -CONV => "Bioperl-Chads Mighty SCF writer."); | |
554 Function: Write out an scf. | |
555 Returns : Nothing. | |
556 Args : Requires: a reference to a SeqWithQuality object to form the | |
557 basis for the scf. | |
558 if -version is provided, it should be "2" or "3". A SCF of that | |
559 version will be written. | |
560 Any other arguments are assumed to be comments and are put into | |
561 the comments section of the scf. Read the specifications for scf | |
562 to decide what might be good to put in here. | |
563 | |
564 Notes : | |
565 Someday: (All of this stuff is easy easy easy I just don't have | |
566 the requirement or the time.) | |
567 - Change the peak scaling factor? | |
568 - Change the width of the peak? | |
569 - Change the overlap between peaks? | |
570 | |
571 =cut | |
572 | |
573 #' | |
574 sub write_seq { | |
575 my ($self,%args) = @_; | |
576 my %comments; | |
577 my ($label,$arg); | |
578 | |
579 my ($swq) = $self->_rearrange([qw(SEQWITHQUALITY)], %args); | |
580 unless (ref($swq) eq "Bio::Seq::SeqWithQuality") { | |
581 $self->throw("You must pass a Bio::Seq::SeqWithQuality object to write_seq as a parameter named \"SeqWithQuality\""); | |
582 } | |
583 # verify that there is some sequence or some qualities | |
584 # If the $swq with quality has no qualities, set all qualities to 0. | |
585 # If the $swq has no sequence, set the sequence to N\'s. | |
586 $self->_fill_missing_data($swq); | |
587 | |
588 # all of the rest of the arguments are comments for the scf | |
589 foreach $arg (sort keys %args) { | |
590 next if ($arg =~ /SeqWithQuality/i); | |
591 ($label = $arg) =~ s/^\-//; | |
592 $comments{$label} = $args{$arg}; | |
593 } | |
594 if (!$comments{'NAME'}) { $comments{'NAME'} = $swq->id(); } | |
595 # HA! Bwahahahaha. | |
596 $comments{'CONV'} = "Bioperl-Chads Mighty SCF writer." unless defined $comments{'CONV'}; | |
597 # now deal with the version of scf they want to write | |
598 if ($comments{version}) { | |
599 if ($comments{version} != 2 && $comments{version} != 3) { | |
600 $self->warn("This module can only write version 2.0 or 3.0 scf's. Writing a version 2.0 scf by default."); | |
601 $comments{version} = "2.00"; | |
602 } | |
603 if ($comments{'version'} > 2) { | |
604 $comments{'version'} = "3.00"; | |
605 } | |
606 } | |
607 else { | |
608 $comments{'version'} = "2.00"; | |
609 } | |
610 | |
611 | |
612 | |
613 # set a few things in the header | |
614 $self->{'header'}->{'magic'} = ".scf"; | |
615 $self->{'header'}->{'sample_size'} = "2"; | |
616 $self->{'header'}->{'bases'} = length($swq->seq()); | |
617 $self->{'header'}->{'bases_left_clip'} = "0"; | |
618 $self->{'header'}->{'bases_right_clip'} = "0"; | |
619 $self->{'header'}->{'version'} = $comments{'version'}; | |
620 $self->{'header'}->{'sample_size'} = "2"; | |
621 $self->{'header'}->{'code_set'} = "9"; | |
622 @{$self->{'header'}->{'spare'}} = qw(0 0 0 0 0 0 0 0 0 0 | |
623 0 0 0 0 0 0 0 0 0 0); | |
624 | |
625 # create the binary for the comments and file it in $self->{'binaries'}->{'comments'} | |
626 $self->_set_binary_comments(\%comments); | |
627 # create the binary and the strings for the traces, bases, offsets (if necessary), and accuracies (if necessary) | |
628 $self->_set_binary_tracesbases($comments{'version'},$swq->seq(),$swq->qual()); | |
629 | |
630 # now set more things in the header | |
631 $self->{'header'}->{'samples_offset'} = "128"; | |
632 | |
633 my ($b_base_offsets,$b_base_accuracies,$samples_size,$bases_size); | |
634 # | |
635 # version 2 | |
636 # | |
637 if ($self->{'header'}->{'version'} == 2) { | |
638 $samples_size = $self->{'header'}->{'samples'} * 4 * | |
639 $self->{'header'}->{'sample_size'}; | |
640 $bases_size = length($swq->seq()) * 12; | |
641 $self->{'header'}->{'bases_offset'} = 128 + length($self->{'binaries'}->{'samples_all'}); | |
642 $self->{'header'}->{'comments_offset'} = 128 + length($self->{'binaries'}->{'samples_all'}) + length($self->{'binaries'}->{'v2_bases'}); | |
643 $self->{'header'}->{'comments_size'} = length($self->{'binaries'}->{'comments'}); | |
644 $self->{'header'}->{'private_size'} = "0"; | |
645 $self->{'header'}->{'private_offset'} = 128 + $samples_size + | |
646 $bases_size + $self->{'header'}->{'comments_size'}; | |
647 } | |
648 else { | |
649 $self->{'header'}->{'bases_offset'} = 128 + length($self->{'binaries'}->{'samples_all'}); | |
650 $self->{'header'}->{'comments_size'} = length($self->{'binaries'}->{'comments'}); | |
651 # this is: | |
652 # bases_offset + base_offsets + accuracies + called_bases + reserved | |
653 $self->{'header'}->{'comments_offset'} = $self->{'header'}->{'bases_offset'} + 4*$self->{header}->{'bases'} + 4*$self->{header}->{'bases'} + $self->{header}->{'bases'} + 3*$self->{header}->{'bases'}; | |
654 $self->{'header'}->{'private_size'} = "0"; | |
655 $self->{'header'}->{'private_offset'} = $self->{'header'}->{'comments_offset'} + $self->{'header'}->{'comments_size'}; | |
656 } | |
657 | |
658 $self->_set_binary_header(); | |
659 | |
660 # should something better be done rather then returning after | |
661 # writing? I don't do any exception trapping here | |
662 if ($comments{'version'} == 2) { | |
663 # print ("Lengths:\n"); | |
664 # print("Header : ".length($self->{'binaries'}->{'header'})."\n"); | |
665 # print("Traces : ".length($self->{'binaries'}->{'samples_all'})."\n"); | |
666 # print("Bases : ".length($self->{'binaries'}->{'v2_bases'})."\n"); | |
667 # print("Comments: ".length($self->{'binaries'}->{'comments'})."\n"); | |
668 $self->_print ($self->{'binaries'}->{'header'}) or return; | |
669 $self->_print ($self->{'binaries'}->{'samples_all'}) or return; | |
670 $self->_print ($self->{'binaries'}->{'v2_bases'}) or return; | |
671 $self->_print ($self->{'binaries'}->{'comments'}) or return; | |
672 } | |
673 elsif ($comments{'version'} ==3) { | |
674 # print ("Lengths:\n"); | |
675 # print("Header : ".length($self->{'binaries'}->{'header'})."\n"); | |
676 # print("Traces : ".length($self->{'binaries'}->{'samples_all'})."\n"); | |
677 # print("Offsets : ".length($self->{'binaries'}->{'v3_peak_offsets'})."\n"); | |
678 # print("Accuracy: ".length($self->{'binaries'}->{'v3_accuracies_all'})."\n"); | |
679 # print("Bases : ".length($self->{'binaries'}->{'v3_called_bases'})."\n"); | |
680 # print("Reserved: ".length($self->{'binaries'}->{'v3_reserved'})."\n"); | |
681 # print("Comments: ".length($self->{'binaries'}->{'comments'})."\n"); | |
682 $self->{'header'}->{'comments_offset'} = | |
683 128+length($self->{'binaries'}->{'samples_all'})+ | |
684 length($self->{'binaries'}->{'v3_peak_offsets'})+ | |
685 length($self->{'binaries'}->{'v3_accuracies_all'})+ | |
686 length($self->{'binaries'}->{'v3_called_bases'})+ | |
687 length($self->{'binaries'}->{'v3_reserved'}); | |
688 $self->{'header'}->{'spare'}->[1] = | |
689 $self->{'header'}->{'comments_offset'} + | |
690 length($self->{'binaries'}->{'comments'}); | |
691 $self->_set_binary_header(); | |
692 $self->_print ($self->{'binaries'}->{'header'}) or print("Couldn't write header\n"); | |
693 $self->_print ($self->{'binaries'}->{'samples_all'}) or print("Couldn't write samples\n"); | |
694 $self->_print ($self->{'binaries'}->{'v3_peak_offsets'}) or print("Couldn't write peak offsets\n"); | |
695 $self->_print ($self->{'binaries'}->{'v3_accuracies_all'}) or print("Couldn't write accuracies\n"); | |
696 $self->_print ($self->{'binaries'}->{'v3_called_bases'}) or print("Couldn't write called_bases\n"); | |
697 $self->_print ($self->{'binaries'}->{'v3_reserved'}) or print("Couldn't write reserved\n"); | |
698 $self->_print ($self->{'binaries'}->{'comments'}) or print ("Couldn't write comments\n"); | |
699 } | |
700 | |
701 # kinda unnecessary, given the close() below, but maybe that'll go | |
702 # away someday. | |
703 $self->flush if $self->_flush_on_write && defined $self->_fh; | |
704 | |
705 $self->close(); | |
706 } | |
707 | |
708 =head2 _set_binary_header() | |
709 | |
710 Title : _set_binary_header(); | |
711 Usage : $self->_set_binary_header(); | |
712 Function: Provide the binary string that will be used as the header for | |
713 a scfv2 document. | |
714 Returns : A binary string. | |
715 Args : None. Uses the entries in the $self->{'header'} hash. These | |
716 are set on construction of the object (hopefully correctly!). | |
717 Notes : | |
718 | |
719 =cut | |
720 | |
721 sub _set_binary_header { | |
722 my ($self) = shift; | |
723 my $binary = pack "a4 NNNNNNNN a4 NN N20", | |
724 ( | |
725 $self->{'header'}->{'magic'}, | |
726 $self->{'header'}->{'samples'}, | |
727 $self->{'header'}->{'samples_offset'}, | |
728 $self->{'header'}->{'bases'}, | |
729 $self->{'header'}->{'bases_left_clip'}, | |
730 $self->{'header'}->{'bases_right_clip'}, | |
731 $self->{'header'}->{'bases_offset'}, | |
732 $self->{'header'}->{'comments_size'}, | |
733 $self->{'header'}->{'comments_offset'}, | |
734 $self->{'header'}->{'version'}, | |
735 $self->{'header'}->{'sample_size'}, | |
736 $self->{'header'}->{'code_set'}, | |
737 @{$self->{'header'}->{'spare'}}); | |
738 $self->{'binaries'}->{'header'} = $binary; | |
739 } | |
740 | |
741 =head2 _set_binary_tracesbases($version,$sequence,$ref_quality) | |
742 | |
743 Title : _set_binary_tracesbases($version,$sequence,$ref_quality) | |
744 Usage : $self->_set_binary_tracesbases($version,$sequence, | |
745 $ref_quality); | |
746 Function: Constructs the trace and base strings for all scfs | |
747 Returns : Nothing. Alters self. | |
748 Args : $version - "2" or "3" | |
749 $sequence - a scalar containing arbitrary sequence data | |
750 $ref_quality - a reference to an array containing quality | |
751 values | |
752 Notes : This is a really complicated thing. | |
753 | |
754 =cut | |
755 | |
756 sub _set_binary_tracesbases { | |
757 my ($self,$version,$sequence,$rqual) = @_; | |
758 $sequence =~ tr/a-z/A-Z/; | |
759 $self->{'info'}->{'sequence'} = $sequence; | |
760 $self->{'info'}->{'sequence_length'} = length($sequence); | |
761 my @quals = @$rqual; | |
762 # build the ramp for the first base. | |
763 # a ramp looks like this "1 4 13 29 51 71 80 71 51 29 13 4 1" times the quality score. | |
764 # REMEMBER: A C G T | |
765 # note to self-> smooth this thing out a bit later | |
766 @{$self->{'text'}->{'ramp'}} = qw( 1 4 13 29 51 75 80 75 51 29 13 4 1 ); | |
767 # the width of the ramp | |
768 $self->{'text'}->{'ramp_width'} = scalar(@{$self->{'text'}->{'ramp'}}); | |
769 # how far should the peaks overlap? | |
770 $self->{'text'}->{'ramp_overlap'} = 1; | |
771 # where should the peaks be located? | |
772 $self->{'text'}->{'peak_at'} = 7; | |
773 $self->{'text'}->{'ramp_total_length'} = | |
774 $self->{'info'}->{'sequence_length'} * $self->{'text'}->{'ramp_width'} | |
775 - $self->{'info'}->{'sequence_length'} * $self->{'text'}->{'ramp_overlap'}; | |
776 # create some empty arrays | |
777 # my (@sam_a,@sam_c,@sam_g,@sam_t,$pos); | |
778 my $pos; | |
779 my $total_length = $self->{'text'}->{ramp_total_length}; | |
780 for ($pos=0;$pos<=$total_length;$pos++) { | |
781 $self->{'text'}->{'samples_a'}[$pos] = $self->{'text'}->{'samples_c'}[$pos] | |
782 = $self->{'text'}->{'samples_g'}[$pos] = $self->{'text'}->{'samples_t'}[$pos] = "0"; | |
783 } | |
784 # $self->_dump_traces(); | |
785 # now populate them | |
786 my ($current_base,$place_base_at,$peak_quality,$ramp_counter,$current_ramp,$ramp_position); | |
787 my $sequence_length = $self->{'info'}->{'sequence_length'}; | |
788 my $half_ramp = int($self->{'text'}->{'ramp_width'}/2); | |
789 for ($pos = 0; $pos<$sequence_length;$pos++) { | |
790 $current_base = substr($self->{'info'}->{'sequence'},$pos,1); | |
791 # where should the peak for this base be placed? Modeled after a mktrace scf | |
792 $place_base_at = ($pos * $self->{'text'}->{'ramp_width'}) - | |
793 ($pos * $self->{'text'}->{'ramp_overlap'}) - | |
794 $half_ramp + $self->{'text'}->{'ramp_width'} - 1; | |
795 push @{$self->{'text'}->{'v3_peak_offsets'}},$place_base_at; | |
796 $peak_quality = $quals[$pos]; | |
797 if ($current_base eq "A") { | |
798 $ramp_position = $place_base_at - $half_ramp; | |
799 for ($current_ramp = 0; $current_ramp < $self->{'text'}->{'ramp_width'}; $current_ramp++) { | |
800 $self->{'text'}->{'samples_a'}[$ramp_position+$current_ramp] = $peak_quality * $self->{'text'}->{'ramp'}[$current_ramp]; | |
801 } | |
802 push @{$self->{'text'}->{'v2_bases'}},($place_base_at+1,$peak_quality,0,0,0,$current_base,0,0,0); | |
803 push @{$self->{'text'}->{'v3_base_accuracy_a'}},$peak_quality; | |
804 foreach (qw(g c t)) { | |
805 push @{$self->{'text'}->{"v3_base_accuracy_$_"}},0; | |
806 } | |
807 } | |
808 elsif ($current_base eq "C") { | |
809 $ramp_position = $place_base_at - $half_ramp; | |
810 for ($current_ramp = 0; $current_ramp < $self->{'text'}->{'ramp_width'}; $current_ramp++) { | |
811 $self->{'text'}->{'samples_c'}[$ramp_position+$current_ramp] = $peak_quality * $self->{'text'}->{'ramp'}[$current_ramp]; | |
812 } | |
813 push @{$self->{'text'}->{'v2_bases'}},($place_base_at+1,0,$peak_quality,0,0,$current_base,0,0,0); | |
814 push @{$self->{'text'}->{'v3_base_accuracy_c'}},$peak_quality; | |
815 foreach (qw(g a t)) { | |
816 push @{$self->{'text'}->{"v3_base_accuracy_$_"}},0; | |
817 } | |
818 } elsif ($current_base eq "G") { | |
819 $ramp_position = $place_base_at - $half_ramp; | |
820 for ($current_ramp = 0; | |
821 $current_ramp < $self->{'text'}->{'ramp_width'}; | |
822 $current_ramp++) { | |
823 $self->{'text'}->{'samples_g'}[$ramp_position+$current_ramp] = $peak_quality * $self->{'text'}->{'ramp'}[$current_ramp]; | |
824 } | |
825 push @{$self->{'text'}->{'v2_bases'}},($place_base_at+1,0,0,$peak_quality,0,$current_base,0,0,0); | |
826 push @{$self->{'text'}->{"v3_base_accuracy_g"}},$peak_quality; | |
827 foreach (qw(a c t)) { | |
828 push @{$self->{'text'}->{"v3_base_accuracy_$_"}},0; | |
829 } | |
830 } | |
831 elsif( $current_base eq "T" ) { | |
832 $ramp_position = $place_base_at - $half_ramp; | |
833 for ($current_ramp = 0; $current_ramp < $self->{'text'}->{'ramp_width'}; $current_ramp++) { | |
834 $self->{'text'}->{'samples_t'}[$ramp_position+$current_ramp] = $peak_quality * $self->{'text'}->{'ramp'}[$current_ramp]; | |
835 } | |
836 push @{$self->{'text'}->{'v2_bases'}},($place_base_at+1,0,0,0,$peak_quality,$current_base,0,0,0); | |
837 push @{$self->{'text'}->{'v3_base_accuracy_t'}},$peak_quality; | |
838 foreach (qw(g c a)) { | |
839 push @{$self->{'text'}->{"v3_base_accuracy_$_"}},0; | |
840 } | |
841 } elsif ($current_base eq "N") { | |
842 $ramp_position = $place_base_at - $half_ramp; | |
843 for ($current_ramp = 0; | |
844 $current_ramp < $self->{'text'}->{'ramp_width'}; | |
845 $current_ramp++) { | |
846 $self->{'text'}->{'samples_a'}[$ramp_position+$current_ramp] = $peak_quality * $self->{'text'}->{'ramp'}[$current_ramp]; | |
847 } | |
848 push @{$self->{'text'}->{'v2_bases'}},($place_base_at+1,$peak_quality, | |
849 $peak_quality,$peak_quality,$peak_quality, | |
850 $current_base,0,0,0); | |
851 foreach (qw(a c g t)) { | |
852 push @{$self->{'text'}->{"v3_base_accuracy_$_"}},0; | |
853 } | |
854 } | |
855 else { | |
856 # don't print this. | |
857 # print ("The current base ($current_base) is not a base. Hmmm.\n"); | |
858 } | |
859 } | |
860 foreach (qw(a c g t)) { | |
861 pop @{$self->{'text'}->{"samples_$_"}}; | |
862 } | |
863 | |
864 # set the samples in the header | |
865 $self->{'header'}->{'samples'} = scalar(@{$self->{'text'}->{'samples_a'}}); | |
866 | |
867 # create the final trace string (this is version dependent) | |
868 $self->_make_trace_string($version); | |
869 # create the binary for v2 bases | |
870 if ($self->{'header'}->{'version'} == 2) { | |
871 my ($packstring,@pack_array,$pos2,$tester,@unpacked); | |
872 for ($pos = 0; $pos<$sequence_length;$pos++) { | |
873 my @pack_array = @{$self->{'text'}->{'v2_bases'}}[$pos*9..$pos*9+8]; | |
874 $self->{'binaries'}->{'v2_bases'} .= pack "N C C C C a C3",@pack_array; | |
875 } | |
876 # now create the binary for the traces | |
877 my $trace_pack_length = scalar(@{$self->{'text'}->{'samples_all'}}); | |
878 $self->{'binaries'}->{'samples_all'} .= pack "n$trace_pack_length",@{$self->{'text'}->{'samples_all'}}; | |
879 } | |
880 else { | |
881 # now for the version 3 stuff! | |
882 # delta the trace data | |
883 my @temp; | |
884 foreach (qw(a c g t)) { | |
885 $self->{'text'}->{"t_samples_$_"} = $self->_delta($self->{'text'}->{"samples_$_"},"forward"); | |
886 if ($_ eq 'a') { | |
887 @temp = @{$self->{'text'}->{"t_samples_a"}}; | |
888 @{$self->{'text'}->{'samples_all'}} = @{$self->{'text'}->{"t_samples_a"}}; | |
889 } | |
890 else { | |
891 push @{$self->{'text'}->{'samples_all'}},@{$self->{'text'}->{"t_samples_$_"}}; | |
892 } | |
893 } | |
894 # now create the binary for the traces | |
895 my $trace_pack_length = scalar(@{$self->{'text'}->{'samples_all'}}); | |
896 | |
897 $self->{'binaries'}->{'samples_all'} .= pack "n$trace_pack_length",@{$self->{'text'}->{'samples_all'}}; | |
898 | |
899 # peak offsets | |
900 my $length = scalar(@{$self->{'text'}->{'v3_peak_offsets'}}); | |
901 $self->{'binaries'}->{'v3_peak_offsets'} = pack "N$length",@{$self->{'text'}->{'v3_peak_offsets'}}; | |
902 # base accuracies | |
903 @{$self->{'text'}->{'v3_accuracies_all'}} = @{$self->{'text'}->{"v3_base_accuracy_a"}}; | |
904 foreach (qw(c g t)) { | |
905 @{$self->{'text'}->{'v3_accuracies_all'}} = (@{$self->{'text'}->{'v3_accuracies_all'}},@{$self->{'text'}->{"v3_base_accuracy_$_"}}); | |
906 } | |
907 $length = scalar(@{$self->{'text'}->{'v3_accuracies_all'}}); | |
908 | |
909 $self->{'binaries'}->{'v3_accuracies_all'} = pack "c$length",@{$self->{'text'}->{'v3_accuracies_all'}}; | |
910 # called bases | |
911 $length = length($self->{'info'}->{'sequence'}); | |
912 my @seq = split(//,$self->{'info'}->{'sequence'}); | |
913 # pack the string | |
914 $self->{'binaries'}->{'v3_called_bases'} = $self->{'info'}->{'sequence'}; | |
915 # finally, reserved for future use | |
916 $length = $self->{'info'}->{'sequence_length'}; | |
917 for (my $counter=0; $counter < $length; $counter++) { | |
918 push @temp,0; | |
919 } | |
920 $self->{'binaries'}->{'v3_reserved'} = pack "N$length",@temp; | |
921 } | |
922 } | |
923 | |
924 =head2 _make_trace_string($version) | |
925 | |
926 Title : _make_trace_string($version) | |
927 Usage : $self->_make_trace_string($version) | |
928 Function: Merges trace data for the four bases to produce an scf | |
929 trace string. _requires_ $version | |
930 Returns : Nothing. Alters $self. | |
931 Args : $version - a version number. "2" or "3" | |
932 Notes : | |
933 | |
934 =cut | |
935 | |
936 sub _make_trace_string { | |
937 my ($self,$version) = @_; | |
938 my @traces; | |
939 my @traces_view; | |
940 my @as = @{$self->{'text'}->{'samples_a'}}; | |
941 my @cs = @{$self->{'text'}->{'samples_c'}}; | |
942 my @gs = @{$self->{'text'}->{'samples_g'}}; | |
943 my @ts = @{$self->{'text'}->{'samples_t'}}; | |
944 if ($version == 2) { | |
945 for (my $curr=0; $curr < scalar(@as); $curr++) { | |
946 $as[$curr] = $DEFAULT_QUALITY unless defined $as[$curr]; | |
947 $cs[$curr] = $DEFAULT_QUALITY unless defined $cs[$curr]; | |
948 $gs[$curr] = $DEFAULT_QUALITY unless defined $gs[$curr]; | |
949 $ts[$curr] = $DEFAULT_QUALITY unless defined $ts[$curr]; | |
950 push @traces,($as[$curr],$cs[$curr],$gs[$curr],$ts[$curr]); | |
951 } | |
952 } | |
953 elsif ($version == 3) { | |
954 @traces = (@as,@cs,@gs,@ts); | |
955 } | |
956 else { | |
957 $self->throw("No idea what version required to make traces here. You gave #$version# Bailing."); | |
958 } | |
959 my $length = scalar(@traces); | |
960 $self->{'text'}->{'samples_all'} = \@traces; | |
961 | |
962 } | |
963 | |
964 =head2 _set_binary_comments(\@comments) | |
965 | |
966 Title : _set_binary_comments(\@comments) | |
967 Usage : $self->_set_binary_comments(\@comments); | |
968 Function: Provide a binary string that will be the comments section of | |
969 the scf file. See the scf specifications for detailed | |
970 specifications for the comments section of an scf file. Hint: | |
971 CODE=something\nBODE=something\n\0 | |
972 Returns : Nothing. Alters self. | |
973 Args : A reference to an array containing comments. | |
974 Notes : None. | |
975 | |
976 =cut | |
977 | |
978 sub _set_binary_comments { | |
979 my ($self,$rcomments) = @_; | |
980 my $comments_string = ''; | |
981 my %comments = %$rcomments; | |
982 foreach my $key (sort keys %comments) { | |
983 $comments{$key} ||= ''; | |
984 $comments_string .= "$key=$comments{$key}\n"; | |
985 } | |
986 $comments_string .= "\n\0"; | |
987 $self->{'header'}->{'comments'} = $comments_string; | |
988 my $length = length($comments_string); | |
989 $self->{'binaries'}->{'comments'} = pack "A$length",$comments_string; | |
990 $self->{'header'}->{'comments'} = $comments_string; | |
991 } | |
992 | |
993 =head2 _fill_missing_data($swq) | |
994 | |
995 Title : _fill_missing_data($swq) | |
996 Usage : $self->_fill_missing_data($swq); | |
997 Function: If the $swq with quality has no qualities, set all qualities | |
998 to 0. | |
999 If the $swq has no sequence, set the sequence to N's. | |
1000 Returns : Nothing. Modifies the SeqWithQuality that was passed as an | |
1001 argument. | |
1002 Args : A reference to a Bio::Seq::SeqWithQuality | |
1003 Notes : None. | |
1004 | |
1005 =cut | |
1006 | |
1007 #' | |
1008 sub _fill_missing_data { | |
1009 my ($self,$swq) = @_; | |
1010 my $qual_obj = $swq->qual_obj(); | |
1011 my $seq_obj = $swq->seq_obj(); | |
1012 if ($qual_obj->length() == 0 && $seq_obj->length() != 0) { | |
1013 my $fake_qualities = ("$DEFAULT_QUALITY ")x$seq_obj->length(); | |
1014 $swq->qual($fake_qualities); | |
1015 } | |
1016 if ($seq_obj->length() == 0 && $qual_obj->length != 0) { | |
1017 my $sequence = ("N")x$qual_obj->length(); | |
1018 $swq->seq($sequence); | |
1019 } | |
1020 } | |
1021 | |
1022 =head2 _delta(\@trace_data,$direction) | |
1023 | |
1024 Title : _delta(\@trace_data,$direction) | |
1025 Usage : $self->_delta(\@trace_data,$direction); | |
1026 Function: | |
1027 Returns : A reference to an array containing modified trace values. | |
1028 Args : A reference to an array containing trace data and a string | |
1029 indicating the direction of conversion. ("forward" or | |
1030 "backward"). | |
1031 Notes : This code is taken from the specification for SCF3.2. | |
1032 http://www.mrc-lmb.cam.ac.uk/pubseq/manual/formats_unix_4.html | |
1033 | |
1034 =cut | |
1035 | |
1036 | |
1037 sub _delta { | |
1038 my ($self,$rsamples,$direction) = @_; | |
1039 my @samples = @$rsamples; | |
1040 # /* If job == DELTA_IT: | |
1041 # * change a series of sample points to a series of delta delta values: | |
1042 # * ie change them in two steps: | |
1043 # * first: delta = current_value - previous_value | |
1044 # * then: delta_delta = delta - previous_delta | |
1045 # * else | |
1046 # * do the reverse | |
1047 # */ | |
1048 # int i; | |
1049 # uint_2 p_delta, p_sample; | |
1050 | |
1051 my ($i,$num_samples,$p_delta,$p_sample,@samples_converted); | |
1052 | |
1053 # c-programmers are funny people with their single-letter variables | |
1054 | |
1055 if ( $direction eq "forward" ) { | |
1056 $p_delta = 0; | |
1057 for ($i=0; $i < scalar(@samples); $i++) { | |
1058 $p_sample = $samples[$i]; | |
1059 $samples[$i] = $samples[$i] - $p_delta; | |
1060 $p_delta = $p_sample; | |
1061 } | |
1062 $p_delta = 0; | |
1063 for ($i=0; $i < scalar(@samples); $i++) { | |
1064 $p_sample = $samples[$i]; | |
1065 $samples[$i] = $samples[$i] - $p_delta; | |
1066 $p_delta = $p_sample; | |
1067 } | |
1068 } | |
1069 elsif ($direction eq "backward") { | |
1070 $p_sample = 0; | |
1071 for ($i=0; $i < scalar(@samples); $i++) { | |
1072 $samples[$i] = $samples[$i] + $p_sample; | |
1073 $p_sample = $samples[$i]; | |
1074 } | |
1075 $p_sample = 0; | |
1076 for ($i=0; $i < scalar(@samples); $i++) { | |
1077 $samples[$i] = $samples[$i] + $p_sample; | |
1078 $p_sample = $samples[$i]; | |
1079 } | |
1080 } | |
1081 else { | |
1082 $self->warn("Bad direction. Use \"forward\" or \"backward\"."); | |
1083 } | |
1084 return \@samples; | |
1085 } | |
1086 | |
1087 =head2 _unpack_magik($buffer) | |
1088 | |
1089 Title : _unpack_magik($buffer) | |
1090 Usage : $self->_unpack_magik($buffer) | |
1091 Function: What unpack specification should be used? Try them all. | |
1092 Returns : Nothing. | |
1093 Args : A buffer containing arbitrary binary data. | |
1094 Notes : Eliminate the ambiguity and the guesswork. Used in the | |
1095 adaptation of _delta(), mostly. | |
1096 | |
1097 =cut | |
1098 | |
1099 sub _unpack_magik { | |
1100 my ($self,$buffer) = @_; | |
1101 my $length = length($buffer); | |
1102 my (@read,$counter); | |
1103 foreach (qw(c C s S i I l L n N v V)) { | |
1104 @read = unpack "$_$length", $buffer; | |
1105 print ("----- Unpacked with $_\n"); | |
1106 for ($counter=0; $counter < 20; $counter++) { | |
1107 print("$read[$counter]\n"); | |
1108 } | |
1109 } | |
1110 } | |
1111 | |
1112 =head2 read_from_buffer($filehandle,$buffer,$length) | |
1113 | |
1114 Title : read_from_buffer($filehandle,$buffer,$length) | |
1115 Usage : $self->read_from_buffer($filehandle,$buffer,$length); | |
1116 Function: Read from the buffer. | |
1117 Returns : $buffer, containing a read of $length | |
1118 Args : a filehandle, a buffer, and a read length | |
1119 Notes : I just got tired of typing | |
1120 "unless (length($buffer) == $length)" so I put it here. | |
1121 | |
1122 =cut | |
1123 | |
1124 sub read_from_buffer { | |
1125 my ($self,$fh,$buffer,$length) = @_; | |
1126 read $fh, $buffer, $length; | |
1127 unless (length($buffer) == $length) { | |
1128 $self->warn("The read was incomplete! Trying harder."); | |
1129 my $missing_length = $length - length($buffer); | |
1130 my $buffer2; | |
1131 read $fh,$buffer2,$missing_length; | |
1132 $buffer .= $buffer2; | |
1133 if (length($buffer) != $length) { | |
1134 $self->throw("Unexpected end of file while reading from SCF file. I should have read $length but instead got ".length($buffer)."! Current file position is ".tell($fh)."."); | |
1135 } | |
1136 } | |
1137 | |
1138 return $buffer; | |
1139 } | |
1140 | |
1141 =head2 _dump_keys() | |
1142 | |
1143 Title : _dump_keys() | |
1144 Usage : &_dump_keys($a_reference_to_some_hash) | |
1145 Function: Dump out the keys in a hash. | |
1146 Returns : Nothing. | |
1147 Args : A reference to a hash. | |
1148 Notes : A debugging method. | |
1149 | |
1150 =cut | |
1151 | |
1152 sub _dump_keys { | |
1153 my $rhash = shift; | |
1154 if ($rhash !~ /HASH/) { | |
1155 print("_dump_keys: that was not a hash.\nIt was #$rhash# which was this reference:".ref($rhash)."\n"); | |
1156 return; | |
1157 } | |
1158 print("_dump_keys: The keys for $rhash are:\n"); | |
1159 foreach (sort keys %$rhash) { | |
1160 print("$_\n"); | |
1161 } | |
1162 } | |
1163 | |
1164 =head2 _dump_base_accuracies() | |
1165 | |
1166 Title : _dump_base_accuracies() | |
1167 Usage : $self->_dump_base_accuracies(); | |
1168 Function: Dump out the v3 base accuracies in an easy to read format. | |
1169 Returns : Nothing. | |
1170 Args : None. | |
1171 Notes : A debugging method. | |
1172 | |
1173 =cut | |
1174 | |
1175 sub _dump_base_accuracies { | |
1176 my $self = shift; | |
1177 print("Dumping base accuracies! for v3\n"); | |
1178 print("There are this many elements in a,c,g,t:\n"); | |
1179 print(scalar(@{$self->{'text'}->{'v3_base_accuracy_a'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_c'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_g'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_t'}})."\n"); | |
1180 my $number_traces = scalar(@{$self->{'text'}->{'v3_base_accuracy_a'}}); | |
1181 for (my $counter=0; $counter < $number_traces; $counter++ ) { | |
1182 print("$counter\t"); | |
1183 print $self->{'text'}->{'v3_base_accuracy_a'}->[$counter]."\t"; | |
1184 print $self->{'text'}->{'v3_base_accuracy_c'}->[$counter]."\t"; | |
1185 print $self->{'text'}->{'v3_base_accuracy_g'}->[$counter]."\t"; | |
1186 print $self->{'text'}->{'v3_base_accuracy_t'}->[$counter]."\t"; | |
1187 print("\n"); | |
1188 } | |
1189 } | |
1190 | |
1191 =head2 _dump_peak_indices_incoming() | |
1192 | |
1193 Title : _dump_peak_indices_incoming() | |
1194 Usage : $self->_dump_peak_indices_incoming(); | |
1195 Function: Dump out the v3 peak indices in an easy to read format. | |
1196 Returns : Nothing. | |
1197 Args : None. | |
1198 Notes : A debugging method. | |
1199 | |
1200 =cut | |
1201 | |
1202 sub _dump_peak_indices_incoming { | |
1203 my $self = shift; | |
1204 print("Dump peak indices incoming!\n"); | |
1205 my $length = $self->{'bases'}; | |
1206 print("The length is $length\n"); | |
1207 for (my $count=0; $count < $length; $count++) { | |
1208 print("$count\t$self->{parsed}->{peak_indices}->[$count]\n"); | |
1209 } | |
1210 } | |
1211 | |
1212 =head2 _dump_base_accuracies_incoming() | |
1213 | |
1214 Title : _dump_base_accuracies_incoming() | |
1215 Usage : $self->_dump_base_accuracies_incoming(); | |
1216 Function: Dump out the v3 base accuracies in an easy to read format. | |
1217 Returns : Nothing. | |
1218 Args : None. | |
1219 Notes : A debugging method. | |
1220 | |
1221 =cut | |
1222 | |
1223 sub _dump_base_accuracies_incoming { | |
1224 my $self = shift; | |
1225 print("Dumping base accuracies! for v3\n"); | |
1226 # print("There are this many elements in a,c,g,t:\n"); | |
1227 # print(scalar(@{$self->{'parsed'}->{'v3_base_accuracy_a'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_c'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_g'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_t'}})."\n"); | |
1228 my $number_traces = $self->{'bases'}; | |
1229 for (my $counter=0; $counter < $number_traces; $counter++ ) { | |
1230 print("$counter\t"); | |
1231 foreach (qw(A T G C)) { | |
1232 print $self->{'parsed'}->{'base_accuracies'}->{$_}->[$counter]."\t"; | |
1233 } | |
1234 print("\n"); | |
1235 } | |
1236 } | |
1237 | |
1238 | |
1239 =head2 _dump_comments() | |
1240 | |
1241 Title : _dump_comments() | |
1242 Usage : $self->_dump_comments(); | |
1243 Function: Debug dump the comments section from the scf. | |
1244 Returns : Nothing. | |
1245 Args : Nothing. | |
1246 Notes : None. | |
1247 | |
1248 =cut | |
1249 | |
1250 sub _dump_comments { | |
1251 my ($self) = @_; | |
1252 warn ("SCF comments:\n"); | |
1253 foreach my $k (keys %{$self->{'comments'}}) { | |
1254 warn ("\t {$k} ==> ", $self->{'comments'}->{$k}, "\n"); | |
1255 } | |
1256 } | |
1257 | |
1258 | |
1259 1; | |
1260 __END__ | |
1261 | |
1262 |