Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/Alignment/Trim.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # Bio::Tools::Alignment::Trim.pm | |
2 # | |
3 # Cared for by Chad Matsalla | |
4 # | |
5 # Copyright Chad Matsalla | |
6 # | |
7 # You may distribute this module under the same terms as perl itself | |
8 | |
9 # POD documentation - main docs before the code | |
10 | |
11 =head1 NAME | |
12 | |
13 Bio::Tools::Alignment::Trim - A kludge to do specialized trimming of | |
14 sequence based on quality. | |
15 | |
16 =head1 SYNOPSIS | |
17 | |
18 use Bio::Tools::Alignment::Trim; | |
19 $o_trim = new Bio::Tools::Alignment::Trim; | |
20 $o_trim->set_reverse_designator("R"); | |
21 $o_trim->set_forward_designator("F"); | |
22 | |
23 | |
24 =head1 DESCRIPTION | |
25 | |
26 This is a specialized module designed by Chad for Chad to trim sequences | |
27 based on a highly specialized list of requirements. In other words, write | |
28 something that will trim sequences 'just like the people in the lab would | |
29 do manually'. | |
30 | |
31 I settled on a sliding-window-average style of search which is ugly and | |
32 slow but does _exactly_ what I want it to do. | |
33 | |
34 Mental note: rewrite this. | |
35 | |
36 It is very important to keep in mind the context in which this module was | |
37 written: strictly to support the projects for which Consed.pm was | |
38 designed. | |
39 | |
40 =head1 FEEDBACK | |
41 | |
42 =head2 Mailing Lists | |
43 | |
44 User feedback is an integral part of the evolution of this and other | |
45 Bioperl modules. Send your comments and suggestions preferably to one | |
46 of the Bioperl mailing lists. Your participation is much appreciated. | |
47 | |
48 bioperl-l@bioperl.org - General discussion | |
49 http://bio.perl.org/MailList.html - About the mailing | |
50 lists | |
51 | |
52 =head2 Reporting Bugs | |
53 | |
54 Report bugs to the Bioperl bug tracking system to help us keep track | |
55 the bugs and their resolution. Bug reports can be submitted via | |
56 email or the web: | |
57 | |
58 bioperl-bugs@bio.perl.org | |
59 http://bugzilla.bioperl.org/ | |
60 | |
61 =head1 AUTHOR - Chad Matsalla | |
62 | |
63 Email bioinformatics@dieselwurks.com | |
64 | |
65 =head1 APPENDIX | |
66 | |
67 The rest of the documentation details each of the object methods. | |
68 Internal methods are usually preceded with a _ | |
69 | |
70 =cut | |
71 | |
72 package Bio::Tools::Alignment::Trim; | |
73 | |
74 use Bio::Root::Root; | |
75 use strict; | |
76 use Dumpvalue; | |
77 | |
78 | |
79 | |
80 use vars qw($VERSION @ISA %DEFAULTS); | |
81 | |
82 $VERSION = '0.01'; | |
83 | |
84 @ISA = qw(Bio::Root::Root); | |
85 | |
86 BEGIN { | |
87 %DEFAULTS = ( 'f_designator' => 'f', | |
88 'r_designator' => 'r', | |
89 'windowsize' => '10', | |
90 'phreds' => '20'); | |
91 } | |
92 | |
93 =head2 new() | |
94 | |
95 Title : new() | |
96 Usage : $o_trim = Bio::Tools::Alignment::Trim->new(); | |
97 Function: Construct the Bio::Tools::Alignment::Trim object. No parameters | |
98 are required to create this object. It is strictly a bundle of | |
99 functions, as far as I am concerned. | |
100 Returns : A reference to a Bio::Tools::Alignment::Trim object. | |
101 Args : (optional) | |
102 -windowsize (default 10) | |
103 -phreds (default 20) | |
104 | |
105 | |
106 =cut | |
107 | |
108 sub new { | |
109 my ($class,@args) = @_; | |
110 my $self = $class->SUPER::new(@args); | |
111 my($windowsize,$phreds) = | |
112 $self->_rearrange([qw( | |
113 WINDOWSIZE | |
114 PHREDS | |
115 )], | |
116 @args); | |
117 $self->{windowsize} = $windowsize || $DEFAULTS{'windowsize'}; | |
118 $self->{phreds} = $phreds || $DEFAULTS{'phreds'}; | |
119 # print("Constructor set phreds to ".$self->{phreds}."\n") if $self->verbose > 0; | |
120 $self->set_designators($DEFAULTS{'f_designator'}, | |
121 $DEFAULTS{'r_designator'}); | |
122 return $self; | |
123 } | |
124 | |
125 =head2 set_designators($forward_designator,$reverse_designator) | |
126 | |
127 Title : set_designators(<forward>,<reverse>) | |
128 Usage : $o_trim->set_designators("F","R") | |
129 Function: Set the string by which the system determines whether a given | |
130 sequence represents a forward or a reverse read. | |
131 Returns : Nothing. | |
132 Args : two scalars: one representing the forward designator and one | |
133 representing the reverse designator | |
134 | |
135 =cut | |
136 | |
137 sub set_designators { | |
138 my $self = shift; | |
139 ($self->{'f_designator'},$self->{'r_designator'}) = @_; | |
140 } | |
141 | |
142 =head2 set_forward_designator($designator) | |
143 | |
144 Title : set_forward_designator($designator) | |
145 Usage : $o_trim->set_forward_designator("F") | |
146 Function: Set the string by which the system determines if a given | |
147 sequence is a forward read. | |
148 Returns : Nothing. | |
149 Args : A string representing the forward designator of this project. | |
150 | |
151 =cut | |
152 | |
153 sub set_forward_designator { | |
154 my ($self,$desig) = @_; | |
155 $self->{'f_designator'} = $desig; | |
156 } | |
157 | |
158 =head2 set_reverse_designator($reverse_designator) | |
159 | |
160 Title : set_reverse_designator($reverse_designator) | |
161 Function: Set the string by which the system determines if a given | |
162 sequence is a reverse read. | |
163 Usage : $o_trim->set_reverse_designator("R") | |
164 Returns : Nothing. | |
165 Args : A string representing the forward designator of this project. | |
166 | |
167 =cut | |
168 | |
169 sub set_reverse_designator { | |
170 my ($self,$desig) = @_; | |
171 $self->{'r_designator'} = $desig; | |
172 } | |
173 | |
174 =head2 get_designators() | |
175 | |
176 Title : get_designators() | |
177 Usage : $o_trim->get_designators() | |
178 Returns : A string describing the current designators. | |
179 Args : None | |
180 Notes : Really for informational purposes only. Duh. | |
181 | |
182 =cut | |
183 | |
184 sub get_designators { | |
185 my $self = shift; | |
186 return("forward: ".$self->{'f_designator'}." reverse: ".$self->{'r_designator'}); | |
187 } | |
188 | |
189 =head2 trim_leading_polys() | |
190 | |
191 Title : trim_leading_polys() | |
192 Usage : $o_trim->trim_leading_polys() | |
193 Function: Not implemented. Does nothing. | |
194 Returns : Nothing. | |
195 Args : None. | |
196 Notes : This function is not implemented. Part of something I wanted to | |
197 do but never got around to doing. | |
198 | |
199 =cut | |
200 | |
201 sub trim_leading_polys { | |
202 my ($self, $sequence) = @_; | |
203 } | |
204 | |
205 =head2 dump_hash() | |
206 | |
207 Title : dump_hash() | |
208 Usage : $o_trim->dump_hash() | |
209 Function: Unimplemented. | |
210 Returns : Nothing. | |
211 Args : None. | |
212 Notes : Does nothing. | |
213 | |
214 =cut | |
215 | |
216 sub dump_hash { | |
217 my $self = shift; | |
218 my %hash = %{$self->{'qualities'}}; | |
219 } # end dump_hash | |
220 | |
221 =head2 trim_singlet($sequence,$quality,$name,$class) | |
222 | |
223 Title : trim_singlet($sequence,$quality,$name,$class) | |
224 Usage : ($r_trim_points,$trimmed_sequence) = | |
225 @{$o_trim->trim_singlet($sequence,$quality,$name,$class)}; | |
226 Function: Trim a singlet based on its quality. | |
227 Returns : a reference to an array containing the forward and reverse | |
228 trim points and the trimmed sequence. | |
229 Args : $sequence : A sequence (SCALAR, please) | |
230 $quality : A _scalar_ of space-delimited quality values. | |
231 $name : the name of the sequence | |
232 $class : The class of the sequence. One of qw(singlet | |
233 singleton doublet pair multiplet) | |
234 Notes : At the time this was written the bioperl objects SeqWithQuality | |
235 and PrimaryQual did not exist. This is what is with the clumsy | |
236 passing of references and so on. I will rewrite this next time I | |
237 have to work with it. I also wasn't sure whether this function | |
238 should return just the trim points or the points and the sequence. | |
239 I decided that I always wanted both so that's how I implemented | |
240 it. | |
241 - Note that the size of the sliding windows is set during construction of | |
242 the Bio::Tools::Alignment::Trim object. | |
243 | |
244 =cut | |
245 | |
246 sub trim_singlet { | |
247 my ($self,$sequence,$quality,$name,$class) = @_; | |
248 # this split is done because I normally store quality values in a | |
249 # space-delimited scalar rather then in an array. | |
250 # I do this because serialization of the arrays is tough. | |
251 my @qual = split(' ',$quality); | |
252 my @points; | |
253 my $sequence_length = length($sequence); | |
254 my ($returnstring,$processed_sequence); | |
255 # smooth out the qualities | |
256 my $r_windows = &_sliding_window(\@qual,$self->{windowsize}); | |
257 # find out the leading and trailing trimpoints | |
258 my $start_base = $self->_get_start($r_windows,$self->{windowsize},$self->{phreds}); | |
259 my (@new_points,$trimmed_sequence); | |
260 # do you think that any sequence shorter then 100 should be | |
261 # discarded? I don't think that this should be the decision of this | |
262 # module. | |
263 # removed, 020926 | |
264 $points[0] = $start_base; | |
265 # whew! now for the end base | |
266 # required parameters: reference_to_windows,windowsize,$phredvalue,start_base | |
267 my $end_base = &_get_end($r_windows,$self->{windowsize}, | |
268 $self->{phreds},$start_base); | |
269 $points[1] = $end_base; | |
270 # now do the actual trimming | |
271 # CHAD : I don't think that it is a good idea to call chop_sequence here | |
272 # because chop_sequence also removes X's and N's and things | |
273 # and that is not always what is wanted | |
274 return \@points; | |
275 } | |
276 | |
277 =head2 trim_doublet($sequence,$quality,$name,$class) | |
278 | |
279 Title : trim_doublet($sequence,$quality,$name,$class) | |
280 Usage : ($r_trim_points,$trimmed_sequence) = | |
281 @{$o_trim->trim_singlet($sequence,$quality,$name,$class)}; | |
282 Function: Trim a singlet based on its quality. | |
283 Returns : a reference to an array containing the forward and reverse | |
284 Args : $sequence : A sequence | |
285 $quality : A _scalar_ of space-delimited quality values. | |
286 $name : the name of the sequence | |
287 $class : The class of the sequence. One of qw(singlet | |
288 singleton doublet pair multiplet) | |
289 Notes : At the time this was written the bioperl objects SeqWithQuality | |
290 and PrimaryQual did not exist. This is what is with the clumsy | |
291 passing of references and so on. I will rewrite this next time I | |
292 have to work with it. I also wasn't sure whether this function | |
293 should return just the trim points or the points and the sequence. | |
294 I decided that I always wanted both so that's how I implemented | |
295 it. | |
296 | |
297 =cut | |
298 | |
299 #' | |
300 sub trim_doublet { | |
301 my ($self,$sequence,$quality,$name,$class) = @_; | |
302 my @qual = split(' ',$quality); | |
303 my @points; | |
304 my $sequence_length = length($sequence); | |
305 my ($returnstring,$processed_sequence); | |
306 # smooth out the qualities | |
307 my $r_windows = &_sliding_window(\@qual,$self->{windowsize}); | |
308 # determine where the consensus sequence starts | |
309 my $offset = 0; | |
310 for (my $current = 0; $current<$sequence_length;$current++) { | |
311 if ($qual[$current] != 0) { | |
312 $offset = $current; | |
313 last; | |
314 } | |
315 } | |
316 # start_base required: r_quality,$windowsize,$phredvalue | |
317 my $start_base = $self->_get_start($r_windows,$self->{windowsize},$self->{phreds},$offset); | |
318 if ($start_base > ($sequence_length - 100)) { | |
319 $points[0] = ("FAILED"); | |
320 $points[1] = ("FAILED"); | |
321 return @points; | |
322 } | |
323 $points[0] = $start_base; | |
324 # | |
325 # whew! now for the end base | |
326 # | |
327 # required parameters: reference_to_windows,windowsize,$phredvalue,start_base | |
328 # | | |
329 # 010420 NOTE: We will no longer get the end base to avoid the Q/--\___/-- syndrome | |
330 my $end_base = $sequence_length; | |
331 my $start_of_trailing_zeros = &count_doublet_trailing_zeros(\@qual); | |
332 $points[1] = $end_base; | |
333 # CHAD : I don't think that it is a good idea to call chop_sequence here | |
334 # because chop_sequence also removes X's and N's and things | |
335 # and that is not always what is wanted | |
336 return @points; | |
337 } # end trim_doublet | |
338 | |
339 =head2 chop_sequence($name,$class,$sequence,@points) | |
340 | |
341 Title : chop_sequence($name,$class,$sequence,@points) | |
342 Usage : ($start_point,$end_point,$chopped_sequence) = | |
343 $o_trim->chop_sequence($name,$class,$sequence,@points); | |
344 Function: Chop a sequence based on its name, class, and sequence. | |
345 Returns : an array containing three scalars: | |
346 1- the start trim point | |
347 2- the end trim point | |
348 3- the chopped sequence | |
349 Args : | |
350 $name : the name of the sequence | |
351 $class : The class of the sequence. One of qw(singlet | |
352 singleton doublet pair multiplet) | |
353 $sequence : A sequence | |
354 @points : An array containing two elements- the first contains | |
355 the start trim point and the second conatines the end trim | |
356 point. | |
357 | |
358 =cut | |
359 | |
360 sub chop_sequence { | |
361 my ($self,$name,$class,$sequence,@points) = @_; | |
362 print("Coming into chop_sequence, \@points are @points\n"); | |
363 my $fdesig = $self->{'f_designator'}; | |
364 my $rdesig = $self->{'r_designator'}; | |
365 if (!$points[0] && !$points[1]) { | |
366 $sequence = "junk"; | |
367 return $sequence; | |
368 } | |
369 if ($class eq "singlet" && $name =~ /$fdesig$/) { | |
370 $sequence = substr($sequence,$points[0],$points[1]-$points[0]); | |
371 } | |
372 elsif ($class eq "singlet" && $name =~ /$rdesig$/) { | |
373 $sequence = substr($sequence,$points[0],$points[1]-$points[0]); | |
374 } | |
375 elsif ($class eq "singleton" && $name =~ /$fdesig$/) { | |
376 $sequence = substr($sequence,$points[0],$points[1]-$points[0]); | |
377 } | |
378 elsif ($class eq "singleton" && $name =~ /$rdesig$/) { | |
379 $sequence = substr($sequence,$points[0],$points[1]-$points[0]); | |
380 } | |
381 elsif ($class eq "doublet") { | |
382 $sequence = substr($sequence,$points[0],$points[1]-$points[0]); | |
383 } | |
384 # this is a _terrible_ to do this! i couldn't seem to find a better way | |
385 # i thought something like s/(^.*[Xx]{5,})//g; might work, but no go | |
386 # no time to find a fix! | |
387 my $length_before_trimming = length($sequence); | |
388 my $subs_Xs = $sequence =~ s/^.*[Xx]{5,}//g; | |
389 if ($subs_Xs) { | |
390 my $length_after_trimming = length($sequence); | |
391 my $number_Xs_trimmed = $length_before_trimming - $length_after_trimming; | |
392 $points[0] += $number_Xs_trimmed; | |
393 } | |
394 $length_before_trimming = length($sequence); | |
395 my $subs_Ns = $sequence =~ s/[Nn]{1,}$//g; | |
396 if ($subs_Ns) { | |
397 my $length_after_trimming = length($sequence); | |
398 my $number_Ns_trimmed = $length_before_trimming - $length_after_trimming; | |
399 $points[1] -= $number_Ns_trimmed; | |
400 $points[1] -= 1; | |
401 } | |
402 push @points,$sequence; | |
403 print("chop_sequence \@points are @points\n"); | |
404 return @points; | |
405 } | |
406 | |
407 =head2 _get_start($r_quals,$windowsize,$phreds,$offset) | |
408 | |
409 Title : _get_start($r_quals,$windowsize,$phreds,$offset) | |
410 Usage : $start_base = $self->_get_start($r_windows,5,20); | |
411 Function: Provide the start trim point for this sequence. | |
412 Returns : a scalar representing the start of the sequence | |
413 Args : | |
414 $r_quals : A reference to an array containing quality values. In | |
415 context, this array of values has been smoothed by then | |
416 sliding window-look ahead algorithm. | |
417 $windowsize : The size of the window used when the sliding window | |
418 look-ahead average was calculated. | |
419 $phreds : <fill in what this does here> | |
420 $offset : <fill in what this does here> | |
421 | |
422 =cut | |
423 | |
424 sub _get_start { | |
425 my ($self,$r_quals,$windowsize,$phreds,$offset) = @_; | |
426 print("Using $phreds phreds\n") if $self->verbose > 0; | |
427 # this is to help determine whether the sequence is good at all | |
428 my @quals = @$r_quals; | |
429 my ($count,$count2,$qualsum); | |
430 if ($offset) { $count = $offset; } else { $count = 0; } | |
431 # search along the length of the sequence | |
432 for (; ($count+$windowsize) <= scalar(@quals); $count++) { | |
433 # sum all of the quality values in this window. | |
434 my $cumulative=0; | |
435 for($count2 = $count; $count2 < $count+$windowsize; $count2++) { | |
436 if (!$quals[$count2]) { | |
437 # print("Quals don't exist here!\n"); | |
438 } | |
439 else { | |
440 $qualsum += $quals[$count2]; | |
441 # print("Incremented qualsum to ($qualsum)\n"); | |
442 } | |
443 $cumulative++; | |
444 } | |
445 # print("The sum of this window (starting at $count) is $qualsum. I counted $cumulative bases.\n"); | |
446 # if the total of windowsize * phreds is | |
447 if ($qualsum && $qualsum >= $windowsize*$phreds) { return $count; } | |
448 $qualsum = 0; | |
449 } | |
450 # if ($count > scalar(@quals)-$windowsize) { return; } | |
451 return $count; | |
452 } | |
453 | |
454 =head2 _get_end($r_qual,$windowsize,$phreds,$count) | |
455 | |
456 Title : _get_end($r_qual,$windowsize,$phreds,$count) | |
457 Usage : my $end_base = &_get_end($r_windows,20,20,$start_base); | |
458 Function: Get the end trim point for this sequence. | |
459 Returns : A scalar representing the end trim point for this sequence. | |
460 Args : | |
461 $r_qual : A reference to an array containing quality values. In | |
462 context, this array of values has been smoothed by then | |
463 sliding window-look ahead algorithm. | |
464 $windowsize : The size of the window used when the sliding window | |
465 look-ahead average was calculated. | |
466 $phreds : <fill in what this does here> | |
467 $count : Start looking for the end of the sequence here. | |
468 | |
469 =cut | |
470 | |
471 sub _get_end { | |
472 my ($r_qual,$windowsize,$phreds,$count) = @_; | |
473 my @quals = @$r_qual; | |
474 my $total_bases = scalar(@quals); | |
475 my ($count2,$qualsum,$end_of_quals,$bases_counted); | |
476 if (!$count) { $count=0; } | |
477 BASE: for (; $count < $total_bases; $count++) { | |
478 $bases_counted = 0; | |
479 $qualsum = 0; | |
480 POSITION: for($count2 = $count; $count2 < $total_bases; $count2++) { | |
481 $bases_counted++; | |
482 | |
483 if ($count2 == $total_bases-1) { | |
484 $qualsum += $quals[$count2]; | |
485 $bases_counted++; | |
486 last BASE; | |
487 } | |
488 elsif ($bases_counted == $windowsize) { | |
489 $qualsum += $quals[$count2]; | |
490 if ($qualsum < $bases_counted*$phreds) { | |
491 return $count+$bases_counted+$windowsize; | |
492 } | |
493 next BASE; | |
494 } | |
495 else { | |
496 $qualsum += $quals[$count2]; | |
497 } | |
498 } | |
499 if ($qualsum < $bases_counted*$phreds) { | |
500 return $count+$bases_counted+$windowsize; | |
501 } | |
502 else { } | |
503 $qualsum = 0; | |
504 } # end for | |
505 if ($end_of_quals) { | |
506 my $bases_for_average = $total_bases-$count2; | |
507 return $count2; | |
508 } | |
509 else { } | |
510 if ($qualsum) { } # print ("$qualsum\n"); | |
511 return $total_bases; | |
512 } # end get_end | |
513 | |
514 =head2 count_doublet_trailing_zeros($r_qual) | |
515 | |
516 Title : count_doublet_trailing_zeros($r_qual) | |
517 Usage : my $start_of_trailing_zeros = &count_doublet_trailing_zeros(\@qual); | |
518 Function: Find out when the trailing zero qualities start. | |
519 Returns : A scalar representing where the zeros start. | |
520 Args : A reference to an array of quality values. | |
521 Notes : Again, this should be rewritten to use PrimaryQual objects. | |
522 A more detailed explanation of why phrap puts these zeros here should | |
523 be written and placed here. Please email and hassle the author. | |
524 | |
525 | |
526 =cut | |
527 | |
528 sub count_doublet_trailing_zeros { | |
529 my ($r_qual) = shift; | |
530 my $number_of_trailing_zeros = 0; | |
531 my @qualities = @$r_qual; | |
532 for (my $current=scalar(@qualities);$current>0;$current--) { | |
533 if ($qualities[$current] && $qualities[$current] != 0) { | |
534 $number_of_trailing_zeros = scalar(@qualities)-$current; | |
535 return $current+1; | |
536 } | |
537 } | |
538 return scalar(@qualities); | |
539 } # end count_doublet_trailing_zeros | |
540 | |
541 =head2 _sliding_window($r_quals,$windowsize) | |
542 | |
543 Title : _sliding_window($r_quals,$windowsize) | |
544 Usage : my $r_windows = &_sliding_window(\@qual,$windowsize); | |
545 Function: Create a sliding window, look-forward-average on an array | |
546 of quality values. Used to smooth out differences in qualities. | |
547 Returns : A reference to an array containing the smoothed values. | |
548 Args : $r_quals: A reference to an array containing quality values. | |
549 $windowsize : The size of the sliding window. | |
550 Notes : This was written before PrimaryQual objects existed. They | |
551 should use that object but I haven't rewritten this yet. | |
552 | |
553 =cut | |
554 | |
555 #' | |
556 sub _sliding_window { | |
557 my ($r_quals,$windowsize) = @_; | |
558 my (@window,@quals,$qualsum,$count,$count2,$average,@averages,$bases_counted); | |
559 @quals = @$r_quals; | |
560 my $size_of_quality = scalar(@quals); | |
561 # do this loop for all of the qualities | |
562 for ($count=0; $count <= $size_of_quality; $count++) { | |
563 $bases_counted = 0; | |
564 BASE: for($count2 = $count; $count2 < $size_of_quality; $count2++) { | |
565 $bases_counted++; | |
566 # if the search hits the end of the averages, stop | |
567 # this is for the case near the end where bases remaining < windowsize | |
568 if ($count2 == $size_of_quality) { | |
569 $qualsum += $quals[$count2]; | |
570 last BASE; | |
571 } | |
572 # if the search hits the size of the window | |
573 elsif ($bases_counted == $windowsize) { | |
574 $qualsum += $quals[$count2]; | |
575 last BASE; | |
576 } | |
577 # otherwise add the quality value | |
578 unless (!$quals[$count2]) { | |
579 $qualsum += $quals[$count2]; | |
580 } | |
581 } | |
582 unless (!$qualsum || !$windowsize) { | |
583 $average = $qualsum / $bases_counted; | |
584 if (!$average) { $average = "0"; } | |
585 push @averages,$average; | |
586 } | |
587 $qualsum = 0; | |
588 } | |
589 # 02101 Yes, I repaired the mismatching numbers between averages and windows. | |
590 # print("There are ".scalar(@$r_quals)." quality values. They are @$r_quals\n"); | |
591 # print("There are ".scalar(@averages)." average values. They are @averages\n"); | |
592 return \@averages; | |
593 | |
594 } | |
595 | |
596 =head2 _print_formatted_qualities | |
597 | |
598 Title : _print_formatted_qualities(\@quals) | |
599 Usage : &_print_formatted_qualities(\@quals); | |
600 Returns : Nothing. Prints. | |
601 Args : A reference to an array containing quality values. | |
602 Notes : An internal procedure used in debugging. Prints out an array nicely. | |
603 | |
604 =cut | |
605 | |
606 sub _print_formatted_qualities { | |
607 my $rquals = shift; | |
608 my @qual = @$rquals; | |
609 for (my $count=0; $count<scalar(@qual) ; $count++) { | |
610 if (($count%10)==0) { print("\n$count\t"); } | |
611 if ($qual[$count]) { print ("$qual[$count]\t");} | |
612 else { print("0\t"); } | |
613 } | |
614 print("\n"); | |
615 } | |
616 | |
617 =head2 _get_end_old($r_qual,$windowsize,$phreds,$count) | |
618 | |
619 Title : _get_end_old($r_qual,$windowsize,$phreds,$count) | |
620 Usage : Deprecated. Don't use this! | |
621 Returns : Deprecated. Don't use this! | |
622 Args : Deprecated. Don't use this! | |
623 | |
624 =cut | |
625 | |
626 #' | |
627 sub _get_end_old { | |
628 my ($r_qual,$windowsize,$phreds,$count) = @_; | |
629 warn("Do Not Use this function (_get_end_old)"); | |
630 my $target = $windowsize*$phreds; | |
631 my @quals = @$r_qual; | |
632 my $total_bases = scalar(@quals); | |
633 my ($count2,$qualsum,$end_of_quals); | |
634 if (!$count) { $count=0; } | |
635 BASE: for (; $count < $total_bases; $count++) { | |
636 for($count2 = $count; $count2 < $count+$windowsize; $count2++) { | |
637 if ($count2 == scalar(@quals)-1) { | |
638 $qualsum += $quals[$count2]; | |
639 $end_of_quals = 1; | |
640 last BASE; | |
641 | |
642 } | |
643 $qualsum += $quals[$count2]; | |
644 } | |
645 if ($qualsum < $windowsize*$phreds) { | |
646 return $count+$windowsize; | |
647 } | |
648 $qualsum = 0; | |
649 } # end for | |
650 } # end get_end_old | |
651 | |
652 | |
653 # Autoload methods go after =cut, and are processed by the autosplit program. | |
654 | |
655 1; | |
656 __END__ |