Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SeqIO/scf.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: scf.pm,v 1.23 2002/11/01 11:16:25 heikki Exp $ | |
| 2 # | |
| 3 # Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved. | |
| 4 # This module is free software; you can redistribute it and/or | |
| 5 # modify it under the same terms as Perl itself. | |
| 6 # | |
| 7 # Copyright Chad Matsalla | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::SeqIO::scf - .scf file input/output stream | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 Do not use this module directly. Use it via the Bio::SeqIO class, see | |
| 20 L<Bio::SeqIO> for more information. | |
| 21 | |
| 22 =head1 DESCRIPTION | |
| 23 | |
| 24 This object can transform .scf files to and from | |
| 25 Bio::Seq::SeqWithQuality objects. Mechanisms are present to retrieve | |
| 26 trace data from scf files. | |
| 27 | |
| 28 =head1 FEEDBACK | |
| 29 | |
| 30 =head2 Mailing Lists | |
| 31 | |
| 32 | |
| 33 User feedback is an integral part of the evolution of this and other | |
| 34 Bioperl modules. Send your comments and suggestions preferably to one | |
| 35 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 36 | |
| 37 bioperl-l@bioperl.org - General discussion | |
| 38 http://www.bioperl.org/MailList.shtml - About the mailing lists | |
| 39 | |
| 40 =head2 Reporting Bugs | |
| 41 | |
| 42 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 43 the bugs and their resolution. Bug reports can be submitted via email | |
| 44 or the web: | |
| 45 | |
| 46 bioperl-bugs@bio.perl.org | |
| 47 http://bugzilla.bioperl.org/ | |
| 48 | |
| 49 =head1 AUTHOR Chad Matsalla | |
| 50 | |
| 51 Chad Matsalla | |
| 52 bioinformatics@dieselwurks.com | |
| 53 | |
| 54 =head1 CONTRIBUTORS | |
| 55 | |
| 56 Jason Stajich, jason@bioperl.org | |
| 57 Tony Cox, avc@sanger.ac.uk | |
| 58 Heikki Lehvaslaiho, heikki@ebi.ac.uk | |
| 59 | |
| 60 =head1 APPENDIX | |
| 61 | |
| 62 The rest of the documentation details each of the object | |
| 63 methods. Internal methods are usually preceded with a _ | |
| 64 | |
| 65 =cut | |
| 66 | |
| 67 # Let the code begin... | |
| 68 | |
| 69 package Bio::SeqIO::scf; | |
| 70 use vars qw(@ISA $DEFAULT_QUALITY); | |
| 71 use strict; | |
| 72 use Bio::SeqIO; | |
| 73 use Bio::Seq::SeqFactory; | |
| 74 require 'dumpvar.pl'; | |
| 75 | |
| 76 BEGIN { | |
| 77 $DEFAULT_QUALITY= 10; | |
| 78 } | |
| 79 | |
| 80 @ISA = qw(Bio::SeqIO); | |
| 81 | |
| 82 sub _initialize { | |
| 83 my($self,@args) = @_; | |
| 84 $self->SUPER::_initialize(@args); | |
| 85 if( ! defined $self->sequence_factory ) { | |
| 86 $self->sequence_factory(new Bio::Seq::SeqFactory | |
| 87 (-verbose => $self->verbose(), | |
| 88 -type => 'Bio::Seq::SeqWithQuality')); | |
| 89 } | |
| 90 } | |
| 91 | |
| 92 =head2 next_seq() | |
| 93 | |
| 94 Title : next_seq() | |
| 95 Usage : $scf = $stream->next_seq() | |
| 96 Function: returns the next scf sequence in the stream | |
| 97 Returns : Bio::Seq::SeqWithQuality object | |
| 98 Args : NONE | |
| 99 Notes : Fills the interface specification for SeqIO. | |
| 100 The SCF specification does not provide for having more then | |
| 101 one sequence in a given scf. So once the filehandle has been open | |
| 102 and passed to SeqIO don't expect to run this function more then | |
| 103 once on a given scf unless you embraced and extended the SCF | |
| 104 standard. (But that's just C R A Z Y talk, isn't it.) | |
| 105 | |
| 106 =cut | |
| 107 | |
| 108 #' | |
| 109 sub next_seq { | |
| 110 my ($self) = @_; | |
| 111 my ($seq, $seqc, $fh, $buffer, $offset, $length, $read_bytes, @read, | |
| 112 %names); | |
| 113 # set up a filehandle to read in the scf | |
| 114 $fh = $self->_filehandle(); | |
| 115 unless ($fh) { # simulate the <> function | |
| 116 if ( !fileno(ARGV) or eof(ARGV) ) { | |
| 117 return unless my $ARGV = shift; | |
| 118 open(ARGV,$ARGV) or | |
| 119 $self->throw("Could not open $ARGV for SCF stream reading $!"); | |
| 120 } | |
| 121 $fh = \*ARGV; | |
| 122 } | |
| 123 binmode $fh; # for the Win32/Mac crowds | |
| 124 return unless read $fh, $buffer, 128; # no exception; probably end of file | |
| 125 # the first thing to do is parse the header. This is common | |
| 126 # among all versions of scf. | |
| 127 $self->_set_header($buffer); | |
| 128 # the rest of the the information is different between the | |
| 129 # the different versions of scf. | |
| 130 my $byte = "n"; | |
| 131 if ($self->{'version'} lt "3.00") { | |
| 132 # first gather the trace information | |
| 133 $length = $self->{'samples'}*$self->{sample_size}*4; | |
| 134 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
| 135 if ($self->{sample_size} == 1) { | |
| 136 $byte = "c"; | |
| 137 } | |
| 138 @read = unpack "${byte}${length}",$buffer; | |
| 139 # these traces need to be split | |
| 140 $self->_set_v2_traces(\@read); | |
| 141 # now go and get the base information | |
| 142 $offset = $self->{bases_offset}; | |
| 143 $length = ($self->{bases} * 12); | |
| 144 seek $fh,$offset,0; | |
| 145 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
| 146 # now distill the information into its fractions. | |
| 147 $self->_set_v2_bases($buffer); | |
| 148 } else { | |
| 149 my $transformed_read; | |
| 150 foreach (qw(A C G T)) { | |
| 151 $length = $self->{'samples'}*$self->{sample_size}; | |
| 152 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
| 153 if ($self->{sample_size} == 1) { | |
| 154 $byte = "c"; | |
| 155 } | |
| 156 @read = unpack "${byte}${length}",$buffer; | |
| 157 # this little spurt of nonsense is because | |
| 158 # the trace values are given in the binary | |
| 159 # file as unsigned shorts but they really | |
| 160 # are signed. 30000 is an arbitrary number | |
| 161 # (will there be any traces with a given | |
| 162 # point greater then 30000? I hope not. | |
| 163 # once the read is read, it must be changed | |
| 164 # from relative | |
| 165 for (my $element=0; $element < scalar(@read); $element++) { | |
| 166 if ($read[$element] > 30000) { | |
| 167 $read[$element] = $read[$element] - 65536; | |
| 168 } | |
| 169 } | |
| 170 $transformed_read = $self->_delta(\@read,"backward"); | |
| 171 $self->{'traces'}->{$_} = join(' ',@{$transformed_read}); | |
| 172 } | |
| 173 # now go and get the peak index information | |
| 174 $offset = $self->{bases_offset}; | |
| 175 $length = ($self->{bases} * 4); | |
| 176 seek $fh,$offset,0; | |
| 177 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
| 178 $self->_set_v3_peak_indices($buffer); | |
| 179 # now go and get the accuracy information | |
| 180 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
| 181 $self->_set_v3_base_accuracies($buffer); | |
| 182 # OK, now go and get the base information. | |
| 183 $length = $self->{bases}; | |
| 184 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
| 185 $self->{'parsed'}->{'sequence'} = unpack("a$length",$buffer); | |
| 186 # now, finally, extract the calls from the accuracy information. | |
| 187 $self->_set_v3_quality($self); | |
| 188 } | |
| 189 # now go and get the comment information | |
| 190 $offset = $self->{comments_offset}; | |
| 191 seek $fh,$offset,0; | |
| 192 $length = $self->{comment_size}; | |
| 193 $buffer = $self->read_from_buffer($fh,$buffer,$length); | |
| 194 $self->_set_comments($buffer); | |
| 195 return $self->sequence_factory->create | |
| 196 (-seq => $self->{'parsed'}->{'sequence'}, | |
| 197 -qual => $self->{'parsed'}->{'qualities'}, | |
| 198 -id => $self->{'comments'}->{'NAME'} | |
| 199 ); | |
| 200 } | |
| 201 | |
| 202 | |
| 203 =head2 _set_v3_quality() | |
| 204 | |
| 205 Title : _set_v3_quality() | |
| 206 Usage : $self->_set_v3_quality() | |
| 207 Function: Set the base qualities from version3 scf's | |
| 208 Returns : Nothing. Alters $self. | |
| 209 Args : None. | |
| 210 Notes : | |
| 211 | |
| 212 =cut | |
| 213 | |
| 214 #' | |
| 215 sub _set_v3_quality { | |
| 216 my $self = shift; | |
| 217 my @bases = split//,$self->{'parsed'}->{'sequence'}; | |
| 218 my (@qualities,$currbase,$currqual,$counter); | |
| 219 for ($counter=0; $counter <= $#bases ; $counter++) { | |
| 220 $currbase = uc($bases[$counter]); | |
| 221 if ($currbase eq "A") { $currqual = $self->{'parsed'}->{'base_accuracies'}->{'A'}->[$counter]; } | |
| 222 elsif ($currbase eq "C") { $currqual = $self->{'parsed'}->{'base_accuracies'}->{'C'}->[$counter]; } | |
| 223 elsif ($currbase eq "G") { $currqual = $self->{'parsed'}->{'base_accuracies'}->{'G'}->[$counter]; } | |
| 224 elsif ($currbase eq "T") { $currqual = $self->{'parsed'}->{'base_accuracies'}->{'T'}->[$counter]; } | |
| 225 else { $currqual = "unknown"; } | |
| 226 push @qualities,$currqual; | |
| 227 } | |
| 228 $self->{'parsed'}->{'qualities'} = \@qualities; | |
| 229 } | |
| 230 | |
| 231 =head2 _set_v3_peak_indices($buffer) | |
| 232 | |
| 233 Title : _set_v3_peak_indices($buffer) | |
| 234 Usage : $self->_set_v3_peak_indices($buffer); | |
| 235 Function: Unpacks the base accuracies for version3 scf | |
| 236 Returns : Nothing. Alters $self | |
| 237 Args : A scalar containing binary data. | |
| 238 Notes : | |
| 239 | |
| 240 =cut | |
| 241 | |
| 242 sub _set_v3_peak_indices { | |
| 243 my ($self,$buffer) = @_; | |
| 244 my $length = length($buffer); | |
| 245 my ($offset,@read,@positions); | |
| 246 @read = unpack "N$length",$buffer; | |
| 247 $self->{'parsed'}->{'peak_indices'} = join(' ',@read); | |
| 248 } | |
| 249 | |
| 250 =head2 _set_v3_base_accuracies($buffer) | |
| 251 | |
| 252 Title : _set_v3_base_accuracies($buffer) | |
| 253 Usage : $self->_set_v3_base_accuracies($buffer) | |
| 254 Function: Set the base accuracies for version 3 scf's | |
| 255 Returns : Nothing. Alters $self. | |
| 256 Args : A scalar containing binary data. | |
| 257 Notes : | |
| 258 | |
| 259 =cut | |
| 260 | |
| 261 #' | |
| 262 sub _set_v3_base_accuracies { | |
| 263 my ($self,$buffer) = @_; | |
| 264 my $length = length($buffer); | |
| 265 my $qlength = $length/4; | |
| 266 my $offset = 0; | |
| 267 my (@qualities,@sorter,$counter,$round,$last_base); | |
| 268 foreach (qw(A C G T)) { | |
| 269 my @read; | |
| 270 $last_base = $offset + $qlength; | |
| 271 for (;$offset < $last_base; $offset += $qlength) { | |
| 272 @read = unpack "c$qlength", substr($buffer,$offset,$qlength); | |
| 273 $self->{'parsed'}->{'base_accuracies'}->{"$_"} = \@read; | |
| 274 } | |
| 275 } | |
| 276 } | |
| 277 | |
| 278 | |
| 279 =head2 _set_comments($buffer) | |
| 280 | |
| 281 Title : _set_comments($buffer) | |
| 282 Usage : $self->_set_comments($buffer); | |
| 283 Function: Gather the comments section from the scf and parse it into its | |
| 284 components. | |
| 285 Returns : Nothing. Modifies $self. | |
| 286 Args : The buffer. It is expected that the buffer contains a binary | |
| 287 string for the comments section of an scf file according to | |
| 288 the scf file specifications. | |
| 289 Notes : None. Works like Jello. | |
| 290 | |
| 291 =cut | |
| 292 | |
| 293 sub _set_comments { | |
| 294 my ($self,$buffer) = @_; | |
| 295 my $size = length($buffer); | |
| 296 my $comments_retrieved = unpack "a$size",$buffer; | |
| 297 $comments_retrieved =~ s/\0//; | |
| 298 my @comments_split = split/\n/,$comments_retrieved; | |
| 299 if (@comments_split) { | |
| 300 foreach (@comments_split) { | |
| 301 /(\w+)=(.*)/; | |
| 302 if ($1 && $2) { | |
| 303 $self->{'comments'}->{$1} = $2; | |
| 304 } | |
| 305 } | |
| 306 } | |
| 307 return; | |
| 308 } | |
| 309 | |
| 310 =head2 _set_header() | |
| 311 | |
| 312 Title : _set_header($buffer) | |
| 313 Usage : $self->_set_header($buffer); | |
| 314 Function: Gather the header section from the scf and parse it into its | |
| 315 components. | |
| 316 Returns : Nothing. Modifies $self. | |
| 317 Args : The buffer. It is expected that the buffer contains a binary | |
| 318 string for the header section of an scf file according to the | |
| 319 scf file specifications. | |
| 320 Notes : None. | |
| 321 | |
| 322 =cut | |
| 323 | |
| 324 sub _set_header { | |
| 325 my ($self,$buffer) = @_; | |
| 326 ($self->{'scf'}, | |
| 327 $self->{'samples'}, | |
| 328 $self->{'sample_offset'}, | |
| 329 $self->{'bases'}, | |
| 330 $self->{'bases_left_clip'}, | |
| 331 $self->{'bases_right_clip'}, | |
| 332 $self->{'bases_offset'}, | |
| 333 $self->{'comment_size'}, | |
| 334 $self->{'comments_offset'}, | |
| 335 $self->{'version'}, | |
| 336 $self->{'sample_size'}, | |
| 337 $self->{'code_set'}, | |
| 338 @{$self->{'header_spare'}} ) = unpack "a4 NNNNNNNN a4 NN N20", $buffer; | |
| 339 return; | |
| 340 | |
| 341 } | |
| 342 | |
| 343 =head2 _set_v2_bases($buffer) | |
| 344 | |
| 345 Title : _set_v2_bases($buffer) | |
| 346 Usage : $self->_set_v2_bases($buffer); | |
| 347 Function: Gather the bases section from the scf and parse it into its | |
| 348 components. | |
| 349 Returns : Nothing. Modifies $self. | |
| 350 Args : The buffer. It is expected that the buffer contains a binary | |
| 351 string for the bases section of an scf file according to the | |
| 352 scf file specifications. | |
| 353 Notes : None. | |
| 354 | |
| 355 =cut | |
| 356 | |
| 357 sub _set_v2_bases { | |
| 358 my ($self,$buffer) = @_; | |
| 359 my $length = length($buffer); | |
| 360 my ($offset2,$currbuff,$currbase,$currqual,$sequence,@qualities,@indices); | |
| 361 my @read; | |
| 362 for ($offset2=0;$offset2<$length;$offset2+=12) { | |
| 363 @read = unpack "N C C C C a C3", substr($buffer,$offset2,$length); | |
| 364 push @indices,$read[0]; | |
| 365 $currbase = uc($read[5]); | |
| 366 if ($currbase eq "A") { $currqual = $read[1]; } | |
| 367 elsif ($currbase eq "C") { $currqual = $read[2]; } | |
| 368 elsif ($currbase eq "G") { $currqual = $read[3]; } | |
| 369 elsif ($currbase eq "T") { $currqual = $read[4]; } | |
| 370 else { $currqual = "UNKNOWN"; } | |
| 371 $sequence .= $currbase; | |
| 372 push @qualities,$currqual; | |
| 373 } | |
| 374 unless (!@indices) { | |
| 375 $self->{'parsed'}->{'peak_indices'} = join(' ',@indices); | |
| 376 } | |
| 377 $self->{'parsed'}->{'sequence'} = $sequence; | |
| 378 unless (!@qualities) { | |
| 379 $self->{'parsed'}->{'qualities'} = join(' ',@qualities); | |
| 380 } | |
| 381 } | |
| 382 | |
| 383 =head2 _set_v2_traces(\@traces_array) | |
| 384 | |
| 385 Title : _set_v2_traces(\@traces_array) | |
| 386 Usage : $self->_set_v2_traces(\@traces_array); | |
| 387 Function: Parses an scf Version2 trace array into its base components. | |
| 388 Returns : Nothing. Modifies $self. | |
| 389 Args : A reference to an array of the unpacked traces section of an | |
| 390 scf version2 file. | |
| 391 | |
| 392 =cut | |
| 393 | |
| 394 sub _set_v2_traces { | |
| 395 my ($self,$rread) = @_; | |
| 396 my @read = @$rread; | |
| 397 my $array = 0; | |
| 398 for (my $offset2 = 0; $offset2< scalar(@read); $offset2+=4) { | |
| 399 if ($array) { | |
| 400 push @{$self->{'traces'}->{'A'}},$read[$offset2]; | |
| 401 push @{$self->{'traces'}->{'C'}},$read[$offset2+1]; | |
| 402 push @{$self->{'traces'}->{'G'}},$read[$offset2+3]; | |
| 403 push @{$self->{'traces'}->{'T'}},$read[$offset2+2]; | |
| 404 } else { | |
| 405 $self->{'traces'}->{'A'} .= " ".$read[$offset2]; | |
| 406 $self->{'traces'}->{'C'} .= " ".$read[$offset2+1]; | |
| 407 $self->{'traces'}->{'G'} .= " ".$read[$offset2+2]; | |
| 408 $self->{'traces'}->{'T'} .= " ".$read[$offset2+3]; | |
| 409 } | |
| 410 } | |
| 411 return; | |
| 412 } | |
| 413 | |
| 414 =head2 get_trace($base_channel) | |
| 415 | |
| 416 Title : get_trace($base_channel) | |
| 417 Usage : @a_trace = @{$obj->get_trace("A")}; | |
| 418 Function: Return the trace data for the given base. | |
| 419 Returns : A reference to an array containing the trace data for the | |
| 420 given base. | |
| 421 Args : A,C,G, or T. Any other input throws. | |
| 422 Notes : | |
| 423 | |
| 424 =cut | |
| 425 | |
| 426 sub get_trace { | |
| 427 my ($self,$base_channel) = @_; | |
| 428 $base_channel =~ tr/a-z/A-Z/; | |
| 429 if ($base_channel !~ /A|T|G|C/) { | |
| 430 $self->throw("You tried to ask for a base channel that wasn't A,T,G, or C. Ask for one of those next time."); | |
| 431 } elsif ($base_channel) { | |
| 432 my @temp = split(' ',$self->{'traces'}->{$base_channel}); | |
| 433 return \@temp; | |
| 434 } | |
| 435 } | |
| 436 | |
| 437 =head2 get_peak_indices() | |
| 438 | |
| 439 Title : get_peak_indices() | |
| 440 Usage : @a_trace = @{$obj->get_peak_indices()}; | |
| 441 Function: Return the peak indices for this scf. | |
| 442 Returns : A reference to an array containing the peak indices for this scf. | |
| 443 Args : None. | |
| 444 Notes : | |
| 445 | |
| 446 =cut | |
| 447 | |
| 448 sub get_peak_indices { | |
| 449 my ($self) = shift; | |
| 450 my @temp = split(' ',$self->{'parsed'}->{'peak_indices'}); | |
| 451 return \@temp; | |
| 452 } | |
| 453 | |
| 454 | |
| 455 =head2 get_header() | |
| 456 | |
| 457 Title : get_header() | |
| 458 Usage : %header = %{$obj->get_header()}; | |
| 459 Function: Return the header for this scf. | |
| 460 Returns : A reference to a hash containing the header for this scf. | |
| 461 Args : None. | |
| 462 Notes : | |
| 463 | |
| 464 =cut | |
| 465 | |
| 466 sub get_header { | |
| 467 my ($self) = shift; | |
| 468 my %header; | |
| 469 foreach (qw(scf samples sample_offset bases bases_left_clip | |
| 470 bases_right_clip bases_offset comment_size comments_offset | |
| 471 version sample_size code_set peak_indices)) { | |
| 472 $header{"$_"} = $self->{"$_"}; | |
| 473 } | |
| 474 return \%header; | |
| 475 } | |
| 476 | |
| 477 =head2 _dump_traces_incoming($transformed) | |
| 478 | |
| 479 Title : _dump_traces_incoming("transformed") | |
| 480 Usage : &_dump_traces($ra,$rc,$rg,$rt); | |
| 481 Function: Used in debugging. Prints all traces one beside each other. | |
| 482 Returns : Nothing. | |
| 483 Args : References to the arrays containing the traces for A,C,G,T. | |
| 484 Notes : Beats using dumpValue, I'll tell ya. Much better then using | |
| 485 join' ' too. | |
| 486 - if a scalar is included as an argument (any scalar), this | |
| 487 procedure will dump the _delta'd trace. If you don't know what | |
| 488 that means you should not be using this. | |
| 489 | |
| 490 =cut | |
| 491 | |
| 492 #' | |
| 493 sub _dump_traces_incoming { | |
| 494 my ($self) = @_; | |
| 495 my (@sA,@sT,@sG,@sC); | |
| 496 # @sA = @{$self->{'traces'}->{'A'}}; | |
| 497 # @sC = @{$self->{'traces'}->{'C'}}; | |
| 498 # @sG = @{$self->{'traces'}->{'G'}}; | |
| 499 # @sT = @{$self->{'traces'}->{'T'}}; | |
| 500 @sA = @{$self->get_trace('A')}; | |
| 501 @sC = @{$self->get_trace('C')}; | |
| 502 @sG = @{$self->get_trace('G')}; | |
| 503 @sT = @{$self->get_trace('t')}; | |
| 504 print ("Count\ta\tc\tg\tt\n"); | |
| 505 for (my $curr=0; $curr < scalar(@sG); $curr++) { | |
| 506 print("$curr\t$sA[$curr]\t$sC[$curr]\t$sG[$curr]\t$sT[$curr]\n"); | |
| 507 } | |
| 508 return; | |
| 509 } | |
| 510 | |
| 511 =head2 _dump_traces_outgoing($transformed) | |
| 512 | |
| 513 Title : _dump_traces_outgoing("transformed") | |
| 514 Usage : &_dump_traces_outgoing(($ra,$rc,$rg,$rt); | |
| 515 Function: Used in debugging. Prints all traces one beside each other. | |
| 516 Returns : Nothing. | |
| 517 Args : References to the arrays containing the traces for A,C,G,T. | |
| 518 Notes : Beats using dumpValue, I\'ll tell ya. Much better then using | |
| 519 join' ' too. | |
| 520 - if a scalar is included as an argument (any scalar), this | |
| 521 procedur will dump the _delta'd trace. If you don't know what | |
| 522 that means you should not be using this. | |
| 523 | |
| 524 =cut | |
| 525 | |
| 526 sub _dump_traces_outgoing { | |
| 527 my ($self,$transformed) = @_; | |
| 528 my (@sA,@sT,@sG,@sC); | |
| 529 if ($transformed) { | |
| 530 @sA = @{$self->{'text'}->{'t_samples_a'}}; | |
| 531 @sC = @{$self->{'text'}->{'t_samples_c'}}; | |
| 532 @sG = @{$self->{'text'}->{'t_samples_g'}}; | |
| 533 @sT = @{$self->{'text'}->{'t_samples_t'}}; | |
| 534 } | |
| 535 else { | |
| 536 @sA = @{$self->{'text'}->{'samples_a'}}; | |
| 537 @sC = @{$self->{'text'}->{'samples_c'}}; | |
| 538 @sG = @{$self->{'text'}->{'samples_g'}}; | |
| 539 @sT = @{$self->{'text'}->{'samples_t'}}; | |
| 540 } | |
| 541 print ("Count\ta\tc\tg\tt\n"); | |
| 542 for (my $curr=0; $curr < scalar(@sG); $curr++) { | |
| 543 print("$curr\t$sA[$curr]\t$sC[$curr]\t$sG[$curr]\t$sT[$curr]\n"); | |
| 544 } | |
| 545 return; | |
| 546 } | |
| 547 | |
| 548 =head2 write_seq | |
| 549 | |
| 550 Title : write_seq(-SeqWithQuality => $swq, <comments>) | |
| 551 Usage : $obj->write_seq( -SeqWithQuality => $swq, | |
| 552 -version => 2, | |
| 553 -CONV => "Bioperl-Chads Mighty SCF writer."); | |
| 554 Function: Write out an scf. | |
| 555 Returns : Nothing. | |
| 556 Args : Requires: a reference to a SeqWithQuality object to form the | |
| 557 basis for the scf. | |
| 558 if -version is provided, it should be "2" or "3". A SCF of that | |
| 559 version will be written. | |
| 560 Any other arguments are assumed to be comments and are put into | |
| 561 the comments section of the scf. Read the specifications for scf | |
| 562 to decide what might be good to put in here. | |
| 563 | |
| 564 Notes : | |
| 565 Someday: (All of this stuff is easy easy easy I just don't have | |
| 566 the requirement or the time.) | |
| 567 - Change the peak scaling factor? | |
| 568 - Change the width of the peak? | |
| 569 - Change the overlap between peaks? | |
| 570 | |
| 571 =cut | |
| 572 | |
| 573 #' | |
| 574 sub write_seq { | |
| 575 my ($self,%args) = @_; | |
| 576 my %comments; | |
| 577 my ($label,$arg); | |
| 578 | |
| 579 my ($swq) = $self->_rearrange([qw(SEQWITHQUALITY)], %args); | |
| 580 unless (ref($swq) eq "Bio::Seq::SeqWithQuality") { | |
| 581 $self->throw("You must pass a Bio::Seq::SeqWithQuality object to write_seq as a parameter named \"SeqWithQuality\""); | |
| 582 } | |
| 583 # verify that there is some sequence or some qualities | |
| 584 # If the $swq with quality has no qualities, set all qualities to 0. | |
| 585 # If the $swq has no sequence, set the sequence to N\'s. | |
| 586 $self->_fill_missing_data($swq); | |
| 587 | |
| 588 # all of the rest of the arguments are comments for the scf | |
| 589 foreach $arg (sort keys %args) { | |
| 590 next if ($arg =~ /SeqWithQuality/i); | |
| 591 ($label = $arg) =~ s/^\-//; | |
| 592 $comments{$label} = $args{$arg}; | |
| 593 } | |
| 594 if (!$comments{'NAME'}) { $comments{'NAME'} = $swq->id(); } | |
| 595 # HA! Bwahahahaha. | |
| 596 $comments{'CONV'} = "Bioperl-Chads Mighty SCF writer." unless defined $comments{'CONV'}; | |
| 597 # now deal with the version of scf they want to write | |
| 598 if ($comments{version}) { | |
| 599 if ($comments{version} != 2 && $comments{version} != 3) { | |
| 600 $self->warn("This module can only write version 2.0 or 3.0 scf's. Writing a version 2.0 scf by default."); | |
| 601 $comments{version} = "2.00"; | |
| 602 } | |
| 603 if ($comments{'version'} > 2) { | |
| 604 $comments{'version'} = "3.00"; | |
| 605 } | |
| 606 } | |
| 607 else { | |
| 608 $comments{'version'} = "2.00"; | |
| 609 } | |
| 610 | |
| 611 | |
| 612 | |
| 613 # set a few things in the header | |
| 614 $self->{'header'}->{'magic'} = ".scf"; | |
| 615 $self->{'header'}->{'sample_size'} = "2"; | |
| 616 $self->{'header'}->{'bases'} = length($swq->seq()); | |
| 617 $self->{'header'}->{'bases_left_clip'} = "0"; | |
| 618 $self->{'header'}->{'bases_right_clip'} = "0"; | |
| 619 $self->{'header'}->{'version'} = $comments{'version'}; | |
| 620 $self->{'header'}->{'sample_size'} = "2"; | |
| 621 $self->{'header'}->{'code_set'} = "9"; | |
| 622 @{$self->{'header'}->{'spare'}} = qw(0 0 0 0 0 0 0 0 0 0 | |
| 623 0 0 0 0 0 0 0 0 0 0); | |
| 624 | |
| 625 # create the binary for the comments and file it in $self->{'binaries'}->{'comments'} | |
| 626 $self->_set_binary_comments(\%comments); | |
| 627 # create the binary and the strings for the traces, bases, offsets (if necessary), and accuracies (if necessary) | |
| 628 $self->_set_binary_tracesbases($comments{'version'},$swq->seq(),$swq->qual()); | |
| 629 | |
| 630 # now set more things in the header | |
| 631 $self->{'header'}->{'samples_offset'} = "128"; | |
| 632 | |
| 633 my ($b_base_offsets,$b_base_accuracies,$samples_size,$bases_size); | |
| 634 # | |
| 635 # version 2 | |
| 636 # | |
| 637 if ($self->{'header'}->{'version'} == 2) { | |
| 638 $samples_size = $self->{'header'}->{'samples'} * 4 * | |
| 639 $self->{'header'}->{'sample_size'}; | |
| 640 $bases_size = length($swq->seq()) * 12; | |
| 641 $self->{'header'}->{'bases_offset'} = 128 + length($self->{'binaries'}->{'samples_all'}); | |
| 642 $self->{'header'}->{'comments_offset'} = 128 + length($self->{'binaries'}->{'samples_all'}) + length($self->{'binaries'}->{'v2_bases'}); | |
| 643 $self->{'header'}->{'comments_size'} = length($self->{'binaries'}->{'comments'}); | |
| 644 $self->{'header'}->{'private_size'} = "0"; | |
| 645 $self->{'header'}->{'private_offset'} = 128 + $samples_size + | |
| 646 $bases_size + $self->{'header'}->{'comments_size'}; | |
| 647 } | |
| 648 else { | |
| 649 $self->{'header'}->{'bases_offset'} = 128 + length($self->{'binaries'}->{'samples_all'}); | |
| 650 $self->{'header'}->{'comments_size'} = length($self->{'binaries'}->{'comments'}); | |
| 651 # this is: | |
| 652 # bases_offset + base_offsets + accuracies + called_bases + reserved | |
| 653 $self->{'header'}->{'comments_offset'} = $self->{'header'}->{'bases_offset'} + 4*$self->{header}->{'bases'} + 4*$self->{header}->{'bases'} + $self->{header}->{'bases'} + 3*$self->{header}->{'bases'}; | |
| 654 $self->{'header'}->{'private_size'} = "0"; | |
| 655 $self->{'header'}->{'private_offset'} = $self->{'header'}->{'comments_offset'} + $self->{'header'}->{'comments_size'}; | |
| 656 } | |
| 657 | |
| 658 $self->_set_binary_header(); | |
| 659 | |
| 660 # should something better be done rather then returning after | |
| 661 # writing? I don't do any exception trapping here | |
| 662 if ($comments{'version'} == 2) { | |
| 663 # print ("Lengths:\n"); | |
| 664 # print("Header : ".length($self->{'binaries'}->{'header'})."\n"); | |
| 665 # print("Traces : ".length($self->{'binaries'}->{'samples_all'})."\n"); | |
| 666 # print("Bases : ".length($self->{'binaries'}->{'v2_bases'})."\n"); | |
| 667 # print("Comments: ".length($self->{'binaries'}->{'comments'})."\n"); | |
| 668 $self->_print ($self->{'binaries'}->{'header'}) or return; | |
| 669 $self->_print ($self->{'binaries'}->{'samples_all'}) or return; | |
| 670 $self->_print ($self->{'binaries'}->{'v2_bases'}) or return; | |
| 671 $self->_print ($self->{'binaries'}->{'comments'}) or return; | |
| 672 } | |
| 673 elsif ($comments{'version'} ==3) { | |
| 674 # print ("Lengths:\n"); | |
| 675 # print("Header : ".length($self->{'binaries'}->{'header'})."\n"); | |
| 676 # print("Traces : ".length($self->{'binaries'}->{'samples_all'})."\n"); | |
| 677 # print("Offsets : ".length($self->{'binaries'}->{'v3_peak_offsets'})."\n"); | |
| 678 # print("Accuracy: ".length($self->{'binaries'}->{'v3_accuracies_all'})."\n"); | |
| 679 # print("Bases : ".length($self->{'binaries'}->{'v3_called_bases'})."\n"); | |
| 680 # print("Reserved: ".length($self->{'binaries'}->{'v3_reserved'})."\n"); | |
| 681 # print("Comments: ".length($self->{'binaries'}->{'comments'})."\n"); | |
| 682 $self->{'header'}->{'comments_offset'} = | |
| 683 128+length($self->{'binaries'}->{'samples_all'})+ | |
| 684 length($self->{'binaries'}->{'v3_peak_offsets'})+ | |
| 685 length($self->{'binaries'}->{'v3_accuracies_all'})+ | |
| 686 length($self->{'binaries'}->{'v3_called_bases'})+ | |
| 687 length($self->{'binaries'}->{'v3_reserved'}); | |
| 688 $self->{'header'}->{'spare'}->[1] = | |
| 689 $self->{'header'}->{'comments_offset'} + | |
| 690 length($self->{'binaries'}->{'comments'}); | |
| 691 $self->_set_binary_header(); | |
| 692 $self->_print ($self->{'binaries'}->{'header'}) or print("Couldn't write header\n"); | |
| 693 $self->_print ($self->{'binaries'}->{'samples_all'}) or print("Couldn't write samples\n"); | |
| 694 $self->_print ($self->{'binaries'}->{'v3_peak_offsets'}) or print("Couldn't write peak offsets\n"); | |
| 695 $self->_print ($self->{'binaries'}->{'v3_accuracies_all'}) or print("Couldn't write accuracies\n"); | |
| 696 $self->_print ($self->{'binaries'}->{'v3_called_bases'}) or print("Couldn't write called_bases\n"); | |
| 697 $self->_print ($self->{'binaries'}->{'v3_reserved'}) or print("Couldn't write reserved\n"); | |
| 698 $self->_print ($self->{'binaries'}->{'comments'}) or print ("Couldn't write comments\n"); | |
| 699 } | |
| 700 | |
| 701 # kinda unnecessary, given the close() below, but maybe that'll go | |
| 702 # away someday. | |
| 703 $self->flush if $self->_flush_on_write && defined $self->_fh; | |
| 704 | |
| 705 $self->close(); | |
| 706 } | |
| 707 | |
| 708 =head2 _set_binary_header() | |
| 709 | |
| 710 Title : _set_binary_header(); | |
| 711 Usage : $self->_set_binary_header(); | |
| 712 Function: Provide the binary string that will be used as the header for | |
| 713 a scfv2 document. | |
| 714 Returns : A binary string. | |
| 715 Args : None. Uses the entries in the $self->{'header'} hash. These | |
| 716 are set on construction of the object (hopefully correctly!). | |
| 717 Notes : | |
| 718 | |
| 719 =cut | |
| 720 | |
| 721 sub _set_binary_header { | |
| 722 my ($self) = shift; | |
| 723 my $binary = pack "a4 NNNNNNNN a4 NN N20", | |
| 724 ( | |
| 725 $self->{'header'}->{'magic'}, | |
| 726 $self->{'header'}->{'samples'}, | |
| 727 $self->{'header'}->{'samples_offset'}, | |
| 728 $self->{'header'}->{'bases'}, | |
| 729 $self->{'header'}->{'bases_left_clip'}, | |
| 730 $self->{'header'}->{'bases_right_clip'}, | |
| 731 $self->{'header'}->{'bases_offset'}, | |
| 732 $self->{'header'}->{'comments_size'}, | |
| 733 $self->{'header'}->{'comments_offset'}, | |
| 734 $self->{'header'}->{'version'}, | |
| 735 $self->{'header'}->{'sample_size'}, | |
| 736 $self->{'header'}->{'code_set'}, | |
| 737 @{$self->{'header'}->{'spare'}}); | |
| 738 $self->{'binaries'}->{'header'} = $binary; | |
| 739 } | |
| 740 | |
| 741 =head2 _set_binary_tracesbases($version,$sequence,$ref_quality) | |
| 742 | |
| 743 Title : _set_binary_tracesbases($version,$sequence,$ref_quality) | |
| 744 Usage : $self->_set_binary_tracesbases($version,$sequence, | |
| 745 $ref_quality); | |
| 746 Function: Constructs the trace and base strings for all scfs | |
| 747 Returns : Nothing. Alters self. | |
| 748 Args : $version - "2" or "3" | |
| 749 $sequence - a scalar containing arbitrary sequence data | |
| 750 $ref_quality - a reference to an array containing quality | |
| 751 values | |
| 752 Notes : This is a really complicated thing. | |
| 753 | |
| 754 =cut | |
| 755 | |
| 756 sub _set_binary_tracesbases { | |
| 757 my ($self,$version,$sequence,$rqual) = @_; | |
| 758 $sequence =~ tr/a-z/A-Z/; | |
| 759 $self->{'info'}->{'sequence'} = $sequence; | |
| 760 $self->{'info'}->{'sequence_length'} = length($sequence); | |
| 761 my @quals = @$rqual; | |
| 762 # build the ramp for the first base. | |
| 763 # a ramp looks like this "1 4 13 29 51 71 80 71 51 29 13 4 1" times the quality score. | |
| 764 # REMEMBER: A C G T | |
| 765 # note to self-> smooth this thing out a bit later | |
| 766 @{$self->{'text'}->{'ramp'}} = qw( 1 4 13 29 51 75 80 75 51 29 13 4 1 ); | |
| 767 # the width of the ramp | |
| 768 $self->{'text'}->{'ramp_width'} = scalar(@{$self->{'text'}->{'ramp'}}); | |
| 769 # how far should the peaks overlap? | |
| 770 $self->{'text'}->{'ramp_overlap'} = 1; | |
| 771 # where should the peaks be located? | |
| 772 $self->{'text'}->{'peak_at'} = 7; | |
| 773 $self->{'text'}->{'ramp_total_length'} = | |
| 774 $self->{'info'}->{'sequence_length'} * $self->{'text'}->{'ramp_width'} | |
| 775 - $self->{'info'}->{'sequence_length'} * $self->{'text'}->{'ramp_overlap'}; | |
| 776 # create some empty arrays | |
| 777 # my (@sam_a,@sam_c,@sam_g,@sam_t,$pos); | |
| 778 my $pos; | |
| 779 my $total_length = $self->{'text'}->{ramp_total_length}; | |
| 780 for ($pos=0;$pos<=$total_length;$pos++) { | |
| 781 $self->{'text'}->{'samples_a'}[$pos] = $self->{'text'}->{'samples_c'}[$pos] | |
| 782 = $self->{'text'}->{'samples_g'}[$pos] = $self->{'text'}->{'samples_t'}[$pos] = "0"; | |
| 783 } | |
| 784 # $self->_dump_traces(); | |
| 785 # now populate them | |
| 786 my ($current_base,$place_base_at,$peak_quality,$ramp_counter,$current_ramp,$ramp_position); | |
| 787 my $sequence_length = $self->{'info'}->{'sequence_length'}; | |
| 788 my $half_ramp = int($self->{'text'}->{'ramp_width'}/2); | |
| 789 for ($pos = 0; $pos<$sequence_length;$pos++) { | |
| 790 $current_base = substr($self->{'info'}->{'sequence'},$pos,1); | |
| 791 # where should the peak for this base be placed? Modeled after a mktrace scf | |
| 792 $place_base_at = ($pos * $self->{'text'}->{'ramp_width'}) - | |
| 793 ($pos * $self->{'text'}->{'ramp_overlap'}) - | |
| 794 $half_ramp + $self->{'text'}->{'ramp_width'} - 1; | |
| 795 push @{$self->{'text'}->{'v3_peak_offsets'}},$place_base_at; | |
| 796 $peak_quality = $quals[$pos]; | |
| 797 if ($current_base eq "A") { | |
| 798 $ramp_position = $place_base_at - $half_ramp; | |
| 799 for ($current_ramp = 0; $current_ramp < $self->{'text'}->{'ramp_width'}; $current_ramp++) { | |
| 800 $self->{'text'}->{'samples_a'}[$ramp_position+$current_ramp] = $peak_quality * $self->{'text'}->{'ramp'}[$current_ramp]; | |
| 801 } | |
| 802 push @{$self->{'text'}->{'v2_bases'}},($place_base_at+1,$peak_quality,0,0,0,$current_base,0,0,0); | |
| 803 push @{$self->{'text'}->{'v3_base_accuracy_a'}},$peak_quality; | |
| 804 foreach (qw(g c t)) { | |
| 805 push @{$self->{'text'}->{"v3_base_accuracy_$_"}},0; | |
| 806 } | |
| 807 } | |
| 808 elsif ($current_base eq "C") { | |
| 809 $ramp_position = $place_base_at - $half_ramp; | |
| 810 for ($current_ramp = 0; $current_ramp < $self->{'text'}->{'ramp_width'}; $current_ramp++) { | |
| 811 $self->{'text'}->{'samples_c'}[$ramp_position+$current_ramp] = $peak_quality * $self->{'text'}->{'ramp'}[$current_ramp]; | |
| 812 } | |
| 813 push @{$self->{'text'}->{'v2_bases'}},($place_base_at+1,0,$peak_quality,0,0,$current_base,0,0,0); | |
| 814 push @{$self->{'text'}->{'v3_base_accuracy_c'}},$peak_quality; | |
| 815 foreach (qw(g a t)) { | |
| 816 push @{$self->{'text'}->{"v3_base_accuracy_$_"}},0; | |
| 817 } | |
| 818 } elsif ($current_base eq "G") { | |
| 819 $ramp_position = $place_base_at - $half_ramp; | |
| 820 for ($current_ramp = 0; | |
| 821 $current_ramp < $self->{'text'}->{'ramp_width'}; | |
| 822 $current_ramp++) { | |
| 823 $self->{'text'}->{'samples_g'}[$ramp_position+$current_ramp] = $peak_quality * $self->{'text'}->{'ramp'}[$current_ramp]; | |
| 824 } | |
| 825 push @{$self->{'text'}->{'v2_bases'}},($place_base_at+1,0,0,$peak_quality,0,$current_base,0,0,0); | |
| 826 push @{$self->{'text'}->{"v3_base_accuracy_g"}},$peak_quality; | |
| 827 foreach (qw(a c t)) { | |
| 828 push @{$self->{'text'}->{"v3_base_accuracy_$_"}},0; | |
| 829 } | |
| 830 } | |
| 831 elsif( $current_base eq "T" ) { | |
| 832 $ramp_position = $place_base_at - $half_ramp; | |
| 833 for ($current_ramp = 0; $current_ramp < $self->{'text'}->{'ramp_width'}; $current_ramp++) { | |
| 834 $self->{'text'}->{'samples_t'}[$ramp_position+$current_ramp] = $peak_quality * $self->{'text'}->{'ramp'}[$current_ramp]; | |
| 835 } | |
| 836 push @{$self->{'text'}->{'v2_bases'}},($place_base_at+1,0,0,0,$peak_quality,$current_base,0,0,0); | |
| 837 push @{$self->{'text'}->{'v3_base_accuracy_t'}},$peak_quality; | |
| 838 foreach (qw(g c a)) { | |
| 839 push @{$self->{'text'}->{"v3_base_accuracy_$_"}},0; | |
| 840 } | |
| 841 } elsif ($current_base eq "N") { | |
| 842 $ramp_position = $place_base_at - $half_ramp; | |
| 843 for ($current_ramp = 0; | |
| 844 $current_ramp < $self->{'text'}->{'ramp_width'}; | |
| 845 $current_ramp++) { | |
| 846 $self->{'text'}->{'samples_a'}[$ramp_position+$current_ramp] = $peak_quality * $self->{'text'}->{'ramp'}[$current_ramp]; | |
| 847 } | |
| 848 push @{$self->{'text'}->{'v2_bases'}},($place_base_at+1,$peak_quality, | |
| 849 $peak_quality,$peak_quality,$peak_quality, | |
| 850 $current_base,0,0,0); | |
| 851 foreach (qw(a c g t)) { | |
| 852 push @{$self->{'text'}->{"v3_base_accuracy_$_"}},0; | |
| 853 } | |
| 854 } | |
| 855 else { | |
| 856 # don't print this. | |
| 857 # print ("The current base ($current_base) is not a base. Hmmm.\n"); | |
| 858 } | |
| 859 } | |
| 860 foreach (qw(a c g t)) { | |
| 861 pop @{$self->{'text'}->{"samples_$_"}}; | |
| 862 } | |
| 863 | |
| 864 # set the samples in the header | |
| 865 $self->{'header'}->{'samples'} = scalar(@{$self->{'text'}->{'samples_a'}}); | |
| 866 | |
| 867 # create the final trace string (this is version dependent) | |
| 868 $self->_make_trace_string($version); | |
| 869 # create the binary for v2 bases | |
| 870 if ($self->{'header'}->{'version'} == 2) { | |
| 871 my ($packstring,@pack_array,$pos2,$tester,@unpacked); | |
| 872 for ($pos = 0; $pos<$sequence_length;$pos++) { | |
| 873 my @pack_array = @{$self->{'text'}->{'v2_bases'}}[$pos*9..$pos*9+8]; | |
| 874 $self->{'binaries'}->{'v2_bases'} .= pack "N C C C C a C3",@pack_array; | |
| 875 } | |
| 876 # now create the binary for the traces | |
| 877 my $trace_pack_length = scalar(@{$self->{'text'}->{'samples_all'}}); | |
| 878 $self->{'binaries'}->{'samples_all'} .= pack "n$trace_pack_length",@{$self->{'text'}->{'samples_all'}}; | |
| 879 } | |
| 880 else { | |
| 881 # now for the version 3 stuff! | |
| 882 # delta the trace data | |
| 883 my @temp; | |
| 884 foreach (qw(a c g t)) { | |
| 885 $self->{'text'}->{"t_samples_$_"} = $self->_delta($self->{'text'}->{"samples_$_"},"forward"); | |
| 886 if ($_ eq 'a') { | |
| 887 @temp = @{$self->{'text'}->{"t_samples_a"}}; | |
| 888 @{$self->{'text'}->{'samples_all'}} = @{$self->{'text'}->{"t_samples_a"}}; | |
| 889 } | |
| 890 else { | |
| 891 push @{$self->{'text'}->{'samples_all'}},@{$self->{'text'}->{"t_samples_$_"}}; | |
| 892 } | |
| 893 } | |
| 894 # now create the binary for the traces | |
| 895 my $trace_pack_length = scalar(@{$self->{'text'}->{'samples_all'}}); | |
| 896 | |
| 897 $self->{'binaries'}->{'samples_all'} .= pack "n$trace_pack_length",@{$self->{'text'}->{'samples_all'}}; | |
| 898 | |
| 899 # peak offsets | |
| 900 my $length = scalar(@{$self->{'text'}->{'v3_peak_offsets'}}); | |
| 901 $self->{'binaries'}->{'v3_peak_offsets'} = pack "N$length",@{$self->{'text'}->{'v3_peak_offsets'}}; | |
| 902 # base accuracies | |
| 903 @{$self->{'text'}->{'v3_accuracies_all'}} = @{$self->{'text'}->{"v3_base_accuracy_a"}}; | |
| 904 foreach (qw(c g t)) { | |
| 905 @{$self->{'text'}->{'v3_accuracies_all'}} = (@{$self->{'text'}->{'v3_accuracies_all'}},@{$self->{'text'}->{"v3_base_accuracy_$_"}}); | |
| 906 } | |
| 907 $length = scalar(@{$self->{'text'}->{'v3_accuracies_all'}}); | |
| 908 | |
| 909 $self->{'binaries'}->{'v3_accuracies_all'} = pack "c$length",@{$self->{'text'}->{'v3_accuracies_all'}}; | |
| 910 # called bases | |
| 911 $length = length($self->{'info'}->{'sequence'}); | |
| 912 my @seq = split(//,$self->{'info'}->{'sequence'}); | |
| 913 # pack the string | |
| 914 $self->{'binaries'}->{'v3_called_bases'} = $self->{'info'}->{'sequence'}; | |
| 915 # finally, reserved for future use | |
| 916 $length = $self->{'info'}->{'sequence_length'}; | |
| 917 for (my $counter=0; $counter < $length; $counter++) { | |
| 918 push @temp,0; | |
| 919 } | |
| 920 $self->{'binaries'}->{'v3_reserved'} = pack "N$length",@temp; | |
| 921 } | |
| 922 } | |
| 923 | |
| 924 =head2 _make_trace_string($version) | |
| 925 | |
| 926 Title : _make_trace_string($version) | |
| 927 Usage : $self->_make_trace_string($version) | |
| 928 Function: Merges trace data for the four bases to produce an scf | |
| 929 trace string. _requires_ $version | |
| 930 Returns : Nothing. Alters $self. | |
| 931 Args : $version - a version number. "2" or "3" | |
| 932 Notes : | |
| 933 | |
| 934 =cut | |
| 935 | |
| 936 sub _make_trace_string { | |
| 937 my ($self,$version) = @_; | |
| 938 my @traces; | |
| 939 my @traces_view; | |
| 940 my @as = @{$self->{'text'}->{'samples_a'}}; | |
| 941 my @cs = @{$self->{'text'}->{'samples_c'}}; | |
| 942 my @gs = @{$self->{'text'}->{'samples_g'}}; | |
| 943 my @ts = @{$self->{'text'}->{'samples_t'}}; | |
| 944 if ($version == 2) { | |
| 945 for (my $curr=0; $curr < scalar(@as); $curr++) { | |
| 946 $as[$curr] = $DEFAULT_QUALITY unless defined $as[$curr]; | |
| 947 $cs[$curr] = $DEFAULT_QUALITY unless defined $cs[$curr]; | |
| 948 $gs[$curr] = $DEFAULT_QUALITY unless defined $gs[$curr]; | |
| 949 $ts[$curr] = $DEFAULT_QUALITY unless defined $ts[$curr]; | |
| 950 push @traces,($as[$curr],$cs[$curr],$gs[$curr],$ts[$curr]); | |
| 951 } | |
| 952 } | |
| 953 elsif ($version == 3) { | |
| 954 @traces = (@as,@cs,@gs,@ts); | |
| 955 } | |
| 956 else { | |
| 957 $self->throw("No idea what version required to make traces here. You gave #$version# Bailing."); | |
| 958 } | |
| 959 my $length = scalar(@traces); | |
| 960 $self->{'text'}->{'samples_all'} = \@traces; | |
| 961 | |
| 962 } | |
| 963 | |
| 964 =head2 _set_binary_comments(\@comments) | |
| 965 | |
| 966 Title : _set_binary_comments(\@comments) | |
| 967 Usage : $self->_set_binary_comments(\@comments); | |
| 968 Function: Provide a binary string that will be the comments section of | |
| 969 the scf file. See the scf specifications for detailed | |
| 970 specifications for the comments section of an scf file. Hint: | |
| 971 CODE=something\nBODE=something\n\0 | |
| 972 Returns : Nothing. Alters self. | |
| 973 Args : A reference to an array containing comments. | |
| 974 Notes : None. | |
| 975 | |
| 976 =cut | |
| 977 | |
| 978 sub _set_binary_comments { | |
| 979 my ($self,$rcomments) = @_; | |
| 980 my $comments_string = ''; | |
| 981 my %comments = %$rcomments; | |
| 982 foreach my $key (sort keys %comments) { | |
| 983 $comments{$key} ||= ''; | |
| 984 $comments_string .= "$key=$comments{$key}\n"; | |
| 985 } | |
| 986 $comments_string .= "\n\0"; | |
| 987 $self->{'header'}->{'comments'} = $comments_string; | |
| 988 my $length = length($comments_string); | |
| 989 $self->{'binaries'}->{'comments'} = pack "A$length",$comments_string; | |
| 990 $self->{'header'}->{'comments'} = $comments_string; | |
| 991 } | |
| 992 | |
| 993 =head2 _fill_missing_data($swq) | |
| 994 | |
| 995 Title : _fill_missing_data($swq) | |
| 996 Usage : $self->_fill_missing_data($swq); | |
| 997 Function: If the $swq with quality has no qualities, set all qualities | |
| 998 to 0. | |
| 999 If the $swq has no sequence, set the sequence to N's. | |
| 1000 Returns : Nothing. Modifies the SeqWithQuality that was passed as an | |
| 1001 argument. | |
| 1002 Args : A reference to a Bio::Seq::SeqWithQuality | |
| 1003 Notes : None. | |
| 1004 | |
| 1005 =cut | |
| 1006 | |
| 1007 #' | |
| 1008 sub _fill_missing_data { | |
| 1009 my ($self,$swq) = @_; | |
| 1010 my $qual_obj = $swq->qual_obj(); | |
| 1011 my $seq_obj = $swq->seq_obj(); | |
| 1012 if ($qual_obj->length() == 0 && $seq_obj->length() != 0) { | |
| 1013 my $fake_qualities = ("$DEFAULT_QUALITY ")x$seq_obj->length(); | |
| 1014 $swq->qual($fake_qualities); | |
| 1015 } | |
| 1016 if ($seq_obj->length() == 0 && $qual_obj->length != 0) { | |
| 1017 my $sequence = ("N")x$qual_obj->length(); | |
| 1018 $swq->seq($sequence); | |
| 1019 } | |
| 1020 } | |
| 1021 | |
| 1022 =head2 _delta(\@trace_data,$direction) | |
| 1023 | |
| 1024 Title : _delta(\@trace_data,$direction) | |
| 1025 Usage : $self->_delta(\@trace_data,$direction); | |
| 1026 Function: | |
| 1027 Returns : A reference to an array containing modified trace values. | |
| 1028 Args : A reference to an array containing trace data and a string | |
| 1029 indicating the direction of conversion. ("forward" or | |
| 1030 "backward"). | |
| 1031 Notes : This code is taken from the specification for SCF3.2. | |
| 1032 http://www.mrc-lmb.cam.ac.uk/pubseq/manual/formats_unix_4.html | |
| 1033 | |
| 1034 =cut | |
| 1035 | |
| 1036 | |
| 1037 sub _delta { | |
| 1038 my ($self,$rsamples,$direction) = @_; | |
| 1039 my @samples = @$rsamples; | |
| 1040 # /* If job == DELTA_IT: | |
| 1041 # * change a series of sample points to a series of delta delta values: | |
| 1042 # * ie change them in two steps: | |
| 1043 # * first: delta = current_value - previous_value | |
| 1044 # * then: delta_delta = delta - previous_delta | |
| 1045 # * else | |
| 1046 # * do the reverse | |
| 1047 # */ | |
| 1048 # int i; | |
| 1049 # uint_2 p_delta, p_sample; | |
| 1050 | |
| 1051 my ($i,$num_samples,$p_delta,$p_sample,@samples_converted); | |
| 1052 | |
| 1053 # c-programmers are funny people with their single-letter variables | |
| 1054 | |
| 1055 if ( $direction eq "forward" ) { | |
| 1056 $p_delta = 0; | |
| 1057 for ($i=0; $i < scalar(@samples); $i++) { | |
| 1058 $p_sample = $samples[$i]; | |
| 1059 $samples[$i] = $samples[$i] - $p_delta; | |
| 1060 $p_delta = $p_sample; | |
| 1061 } | |
| 1062 $p_delta = 0; | |
| 1063 for ($i=0; $i < scalar(@samples); $i++) { | |
| 1064 $p_sample = $samples[$i]; | |
| 1065 $samples[$i] = $samples[$i] - $p_delta; | |
| 1066 $p_delta = $p_sample; | |
| 1067 } | |
| 1068 } | |
| 1069 elsif ($direction eq "backward") { | |
| 1070 $p_sample = 0; | |
| 1071 for ($i=0; $i < scalar(@samples); $i++) { | |
| 1072 $samples[$i] = $samples[$i] + $p_sample; | |
| 1073 $p_sample = $samples[$i]; | |
| 1074 } | |
| 1075 $p_sample = 0; | |
| 1076 for ($i=0; $i < scalar(@samples); $i++) { | |
| 1077 $samples[$i] = $samples[$i] + $p_sample; | |
| 1078 $p_sample = $samples[$i]; | |
| 1079 } | |
| 1080 } | |
| 1081 else { | |
| 1082 $self->warn("Bad direction. Use \"forward\" or \"backward\"."); | |
| 1083 } | |
| 1084 return \@samples; | |
| 1085 } | |
| 1086 | |
| 1087 =head2 _unpack_magik($buffer) | |
| 1088 | |
| 1089 Title : _unpack_magik($buffer) | |
| 1090 Usage : $self->_unpack_magik($buffer) | |
| 1091 Function: What unpack specification should be used? Try them all. | |
| 1092 Returns : Nothing. | |
| 1093 Args : A buffer containing arbitrary binary data. | |
| 1094 Notes : Eliminate the ambiguity and the guesswork. Used in the | |
| 1095 adaptation of _delta(), mostly. | |
| 1096 | |
| 1097 =cut | |
| 1098 | |
| 1099 sub _unpack_magik { | |
| 1100 my ($self,$buffer) = @_; | |
| 1101 my $length = length($buffer); | |
| 1102 my (@read,$counter); | |
| 1103 foreach (qw(c C s S i I l L n N v V)) { | |
| 1104 @read = unpack "$_$length", $buffer; | |
| 1105 print ("----- Unpacked with $_\n"); | |
| 1106 for ($counter=0; $counter < 20; $counter++) { | |
| 1107 print("$read[$counter]\n"); | |
| 1108 } | |
| 1109 } | |
| 1110 } | |
| 1111 | |
| 1112 =head2 read_from_buffer($filehandle,$buffer,$length) | |
| 1113 | |
| 1114 Title : read_from_buffer($filehandle,$buffer,$length) | |
| 1115 Usage : $self->read_from_buffer($filehandle,$buffer,$length); | |
| 1116 Function: Read from the buffer. | |
| 1117 Returns : $buffer, containing a read of $length | |
| 1118 Args : a filehandle, a buffer, and a read length | |
| 1119 Notes : I just got tired of typing | |
| 1120 "unless (length($buffer) == $length)" so I put it here. | |
| 1121 | |
| 1122 =cut | |
| 1123 | |
| 1124 sub read_from_buffer { | |
| 1125 my ($self,$fh,$buffer,$length) = @_; | |
| 1126 read $fh, $buffer, $length; | |
| 1127 unless (length($buffer) == $length) { | |
| 1128 $self->warn("The read was incomplete! Trying harder."); | |
| 1129 my $missing_length = $length - length($buffer); | |
| 1130 my $buffer2; | |
| 1131 read $fh,$buffer2,$missing_length; | |
| 1132 $buffer .= $buffer2; | |
| 1133 if (length($buffer) != $length) { | |
| 1134 $self->throw("Unexpected end of file while reading from SCF file. I should have read $length but instead got ".length($buffer)."! Current file position is ".tell($fh)."."); | |
| 1135 } | |
| 1136 } | |
| 1137 | |
| 1138 return $buffer; | |
| 1139 } | |
| 1140 | |
| 1141 =head2 _dump_keys() | |
| 1142 | |
| 1143 Title : _dump_keys() | |
| 1144 Usage : &_dump_keys($a_reference_to_some_hash) | |
| 1145 Function: Dump out the keys in a hash. | |
| 1146 Returns : Nothing. | |
| 1147 Args : A reference to a hash. | |
| 1148 Notes : A debugging method. | |
| 1149 | |
| 1150 =cut | |
| 1151 | |
| 1152 sub _dump_keys { | |
| 1153 my $rhash = shift; | |
| 1154 if ($rhash !~ /HASH/) { | |
| 1155 print("_dump_keys: that was not a hash.\nIt was #$rhash# which was this reference:".ref($rhash)."\n"); | |
| 1156 return; | |
| 1157 } | |
| 1158 print("_dump_keys: The keys for $rhash are:\n"); | |
| 1159 foreach (sort keys %$rhash) { | |
| 1160 print("$_\n"); | |
| 1161 } | |
| 1162 } | |
| 1163 | |
| 1164 =head2 _dump_base_accuracies() | |
| 1165 | |
| 1166 Title : _dump_base_accuracies() | |
| 1167 Usage : $self->_dump_base_accuracies(); | |
| 1168 Function: Dump out the v3 base accuracies in an easy to read format. | |
| 1169 Returns : Nothing. | |
| 1170 Args : None. | |
| 1171 Notes : A debugging method. | |
| 1172 | |
| 1173 =cut | |
| 1174 | |
| 1175 sub _dump_base_accuracies { | |
| 1176 my $self = shift; | |
| 1177 print("Dumping base accuracies! for v3\n"); | |
| 1178 print("There are this many elements in a,c,g,t:\n"); | |
| 1179 print(scalar(@{$self->{'text'}->{'v3_base_accuracy_a'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_c'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_g'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_t'}})."\n"); | |
| 1180 my $number_traces = scalar(@{$self->{'text'}->{'v3_base_accuracy_a'}}); | |
| 1181 for (my $counter=0; $counter < $number_traces; $counter++ ) { | |
| 1182 print("$counter\t"); | |
| 1183 print $self->{'text'}->{'v3_base_accuracy_a'}->[$counter]."\t"; | |
| 1184 print $self->{'text'}->{'v3_base_accuracy_c'}->[$counter]."\t"; | |
| 1185 print $self->{'text'}->{'v3_base_accuracy_g'}->[$counter]."\t"; | |
| 1186 print $self->{'text'}->{'v3_base_accuracy_t'}->[$counter]."\t"; | |
| 1187 print("\n"); | |
| 1188 } | |
| 1189 } | |
| 1190 | |
| 1191 =head2 _dump_peak_indices_incoming() | |
| 1192 | |
| 1193 Title : _dump_peak_indices_incoming() | |
| 1194 Usage : $self->_dump_peak_indices_incoming(); | |
| 1195 Function: Dump out the v3 peak indices in an easy to read format. | |
| 1196 Returns : Nothing. | |
| 1197 Args : None. | |
| 1198 Notes : A debugging method. | |
| 1199 | |
| 1200 =cut | |
| 1201 | |
| 1202 sub _dump_peak_indices_incoming { | |
| 1203 my $self = shift; | |
| 1204 print("Dump peak indices incoming!\n"); | |
| 1205 my $length = $self->{'bases'}; | |
| 1206 print("The length is $length\n"); | |
| 1207 for (my $count=0; $count < $length; $count++) { | |
| 1208 print("$count\t$self->{parsed}->{peak_indices}->[$count]\n"); | |
| 1209 } | |
| 1210 } | |
| 1211 | |
| 1212 =head2 _dump_base_accuracies_incoming() | |
| 1213 | |
| 1214 Title : _dump_base_accuracies_incoming() | |
| 1215 Usage : $self->_dump_base_accuracies_incoming(); | |
| 1216 Function: Dump out the v3 base accuracies in an easy to read format. | |
| 1217 Returns : Nothing. | |
| 1218 Args : None. | |
| 1219 Notes : A debugging method. | |
| 1220 | |
| 1221 =cut | |
| 1222 | |
| 1223 sub _dump_base_accuracies_incoming { | |
| 1224 my $self = shift; | |
| 1225 print("Dumping base accuracies! for v3\n"); | |
| 1226 # print("There are this many elements in a,c,g,t:\n"); | |
| 1227 # print(scalar(@{$self->{'parsed'}->{'v3_base_accuracy_a'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_c'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_g'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_t'}})."\n"); | |
| 1228 my $number_traces = $self->{'bases'}; | |
| 1229 for (my $counter=0; $counter < $number_traces; $counter++ ) { | |
| 1230 print("$counter\t"); | |
| 1231 foreach (qw(A T G C)) { | |
| 1232 print $self->{'parsed'}->{'base_accuracies'}->{$_}->[$counter]."\t"; | |
| 1233 } | |
| 1234 print("\n"); | |
| 1235 } | |
| 1236 } | |
| 1237 | |
| 1238 | |
| 1239 =head2 _dump_comments() | |
| 1240 | |
| 1241 Title : _dump_comments() | |
| 1242 Usage : $self->_dump_comments(); | |
| 1243 Function: Debug dump the comments section from the scf. | |
| 1244 Returns : Nothing. | |
| 1245 Args : Nothing. | |
| 1246 Notes : None. | |
| 1247 | |
| 1248 =cut | |
| 1249 | |
| 1250 sub _dump_comments { | |
| 1251 my ($self) = @_; | |
| 1252 warn ("SCF comments:\n"); | |
| 1253 foreach my $k (keys %{$self->{'comments'}}) { | |
| 1254 warn ("\t {$k} ==> ", $self->{'comments'}->{$k}, "\n"); | |
| 1255 } | |
| 1256 } | |
| 1257 | |
| 1258 | |
| 1259 1; | |
| 1260 __END__ | |
| 1261 | |
| 1262 |
