Mercurial > repos > jdv > b2b_summarize_run
comparison bam2consensus @ 1:10c319d654df draft default tip
"planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/b2b_utils commit 9bf8a0462bd44f170c0371b6cae67dd0c3b3da9f-dirty"
| author | jdv |
|---|---|
| date | Tue, 28 Sep 2021 06:13:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:bd599efae04d | 1:10c319d654df |
|---|---|
| 1 #!/usr/bin/env perl | |
| 2 | |
| 3 use strict; | |
| 4 use warnings; | |
| 5 use 5.012; | |
| 6 | |
| 7 use BioX::Seq; | |
| 8 use BioX::Seq::Stream; | |
| 9 use BioX::Seq::Fetch; | |
| 10 use File::Temp qw/tempfile/; | |
| 11 use IPC::Cmd qw/can_run/; | |
| 12 use Getopt::Long; | |
| 13 use List::Util qw/sum max first/; | |
| 14 use Pod::Usage; | |
| 15 use POSIX qw/floor ceil/; | |
| 16 | |
| 17 #-inputs---------------------------------------------------------------------# | |
| 18 my $fn_bam; | |
| 19 my $fn_ref; | |
| 20 #-outputs--------------------------------------------------------------------# | |
| 21 my $fn_table; | |
| 22 my $fn_consensus; | |
| 23 my $fn_bedgraph; | |
| 24 #-knobs----------------------------------------------------------------------# | |
| 25 my $min_qual = 10; | |
| 26 my $min_depth = 3; | |
| 27 my $trim_fraction = 0.2; | |
| 28 my $sliding_window = 30; | |
| 29 my $bg_bin_figs = 0; | |
| 30 my $verbose = 0; | |
| 31 | |
| 32 my $PROGRAM = 'bam2consensus'; | |
| 33 my $VERSION = 0.004; | |
| 34 | |
| 35 GetOptions( | |
| 36 | |
| 37 #-inputs-----------------------------------------------------------------# | |
| 38 'bam=s' => \$fn_bam, | |
| 39 'ref=s' => \$fn_ref, | |
| 40 #-outputs----------------------------------------------------------------# | |
| 41 'table=s' => \$fn_table, | |
| 42 'consensus=s' => \$fn_consensus, | |
| 43 'bedgraph=s' => \$fn_bedgraph, | |
| 44 #-knobs------------------------------------------------------------------# | |
| 45 'min_qual=i' => \$min_qual, | |
| 46 'min_depth=i' => \$min_depth, | |
| 47 'trim=f' => \$trim_fraction, | |
| 48 'window=i' => \$sliding_window, | |
| 49 'bg_bin_figs=i' => \$bg_bin_figs, | |
| 50 'verbose' => \$verbose, | |
| 51 'help' => sub{ pod2usage(-verbose => 2); }, | |
| 52 'version' => sub{ print "This is $PROGRAM v$VERSION\n";exit; }, | |
| 53 | |
| 54 ) or pod2usage( -verbose => 1); | |
| 55 | |
| 56 # check for recent version of samtools | |
| 57 my $SAMTOOLS = can_run('samtools') | |
| 58 // die "Samtools is required but not found\n"; | |
| 59 my $v_string = `$SAMTOOLS --version`; | |
| 60 if ($v_string =~ /^samtools (\d+)\.(\d+)/m) { | |
| 61 die "Requires samtools >= 1.3.0\n" if ($1 < 1 || $2 < 3); | |
| 62 } else { | |
| 63 die "Error parsing samtools version string\n"; | |
| 64 } | |
| 65 | |
| 66 # check for mafft | |
| 67 my $MAFFT = can_run('mafft') | |
| 68 // die "MAFFT is required but not found\n"; | |
| 69 | |
| 70 | |
| 71 # misc param checking | |
| 72 die "Error: must specify at least one output target" if (! ( | |
| 73 defined $fn_table | |
| 74 || defined $fn_consensus | |
| 75 || defined $fn_bedgraph | |
| 76 )); | |
| 77 die "Error: missing reference parameter" | |
| 78 if (! defined $fn_ref); | |
| 79 die "Error reading reference" | |
| 80 if (! -r $fn_ref); | |
| 81 | |
| 82 | |
| 83 # globals | |
| 84 my @errors; | |
| 85 my @lines = () if (defined $fn_table); | |
| 86 | |
| 87 my %iupac = ( | |
| 88 A => 'A', | |
| 89 C => 'C', | |
| 90 G => 'G', | |
| 91 T => 'T', | |
| 92 AG => 'R', | |
| 93 CT => 'Y', | |
| 94 CG => 'S', | |
| 95 AT => 'W', | |
| 96 GT => 'K', | |
| 97 AC => 'M', | |
| 98 CGT => 'B', | |
| 99 AGT => 'D', | |
| 100 ACT => 'H', | |
| 101 ACG => 'V', | |
| 102 ACGT => 'N', | |
| 103 ); | |
| 104 | |
| 105 my @consensi; | |
| 106 my $bg = ''; | |
| 107 | |
| 108 my @curr_lines; | |
| 109 my $last_chr; | |
| 110 | |
| 111 my $last_depth = undef; | |
| 112 my $last_loc = 0; | |
| 113 my $bg_start = 0; | |
| 114 my $bg_loc = 0; | |
| 115 | |
| 116 | |
| 117 # initialize random-access sequence collection | |
| 118 my $seqs = BioX::Seq::Fetch->new($fn_ref) or die "Error loading reference"; | |
| 119 | |
| 120 | |
| 121 # pipe from samtools mpileup command | |
| 122 # (note: this is much faster in testing than using Perl bindings, e.g. | |
| 123 # Bio::DB::HTS or the like) | |
| 124 | |
| 125 $fn_bam //= '-'; # use stdin if BAM file not given | |
| 126 | |
| 127 open my $fh, '-|', $SAMTOOLS, | |
| 128 'mpileup', | |
| 129 '-d' => 1000000, | |
| 130 '-B', | |
| 131 '-f' => $fn_ref, | |
| 132 $fn_bam ; | |
| 133 | |
| 134 | |
| 135 | |
| 136 LINE: | |
| 137 while (my $line = <$fh>) { | |
| 138 | |
| 139 chomp $line; | |
| 140 my ($chr, @other) = split "\t", $line; | |
| 141 $last_chr //= $chr; | |
| 142 | |
| 143 if ($chr ne $last_chr) { | |
| 144 process(\@curr_lines); | |
| 145 @curr_lines = (); | |
| 146 $last_chr = $chr; | |
| 147 } | |
| 148 | |
| 149 push @curr_lines, $line; | |
| 150 } | |
| 151 | |
| 152 process(\@curr_lines); # don't forget last call | |
| 153 | |
| 154 # output bedgraph if asked | |
| 155 if (defined $fn_bedgraph) { | |
| 156 open my $fh_bedgraph, '>', $fn_bedgraph; | |
| 157 print {$fh_bedgraph} | |
| 158 "track type=bedGraph name=read_coverage maxHeightPixels=1000:80:20\n"; | |
| 159 print {$fh_bedgraph} $bg; | |
| 160 close $fh_bedgraph; | |
| 161 | |
| 162 } | |
| 163 | |
| 164 # output fasta if asked | |
| 165 if (defined $fn_consensus) { | |
| 166 | |
| 167 open my $out, '>', $fn_consensus; | |
| 168 print {$out} $_->as_fasta for (@consensi); | |
| 169 close $out; | |
| 170 | |
| 171 } | |
| 172 | |
| 173 # build and process table if asked | |
| 174 if (defined $fn_table) { | |
| 175 | |
| 176 # calculate sliding errors | |
| 177 my @avg_errors; | |
| 178 my $l = scalar(@errors); | |
| 179 $sliding_window = $l if ($l < $sliding_window); | |
| 180 my $left = floor(($sliding_window-1)/2); | |
| 181 my $right = ceil(($sliding_window-1)/2); | |
| 182 my $lower = $left; | |
| 183 my $upper = $l - $right; | |
| 184 for my $i (0..$#errors) { | |
| 185 my @pool; | |
| 186 if ($i < $lower) { | |
| 187 @pool = (@errors[0..$i-1] ,@errors[$i+1..$sliding_window-1]); | |
| 188 } | |
| 189 elsif ($i >= $upper) { | |
| 190 @pool = (@errors[$l-$sliding_window..$i-1], @errors[$i+1..$l-1]); | |
| 191 } | |
| 192 else { | |
| 193 @pool = (@errors[$i-$left..$i-1], @errors[$i+1..$i+$right]); | |
| 194 } | |
| 195 die "bad pool size @pool at $i" if (scalar(@pool)+1 != $sliding_window); | |
| 196 | |
| 197 # calc trimmed mean | |
| 198 @pool = sort {$a <=> $b} @pool; | |
| 199 my $l = @pool; | |
| 200 my @trimmed | |
| 201 = @pool[ int($l*$trim_fraction), int($l*(1-$trim_fraction))+0.5 ]; | |
| 202 my $tm = scalar(@trimmed) > 0 ? sum(@trimmed)/scalar(@trimmed) : 'NA'; | |
| 203 push @avg_errors, $tm; | |
| 204 } | |
| 205 | |
| 206 open my $fh_table, '>', $fn_table; | |
| 207 | |
| 208 # print table header | |
| 209 print {$fh_table} join( "\t", ( | |
| 210 'id', | |
| 211 'loc', | |
| 212 'ref', | |
| 213 'called', | |
| 214 'total_depth', | |
| 215 'counted_depth', | |
| 216 'mm_rate', | |
| 217 'A_count', | |
| 218 'T_count', | |
| 219 'G_count', | |
| 220 'C_count', | |
| 221 'N_count', | |
| 222 'gap_count', | |
| 223 'A_freq', | |
| 224 'T_freq', | |
| 225 'G_freq', | |
| 226 'C_freq', | |
| 227 'N_freq', | |
| 228 'gap_freq', | |
| 229 'A_sb', | |
| 230 'T_sb', | |
| 231 'G_sb', | |
| 232 'C_sb', | |
| 233 'bgnd_err', | |
| 234 'insertions' | |
| 235 ) ) . "\n"; | |
| 236 | |
| 237 my $iter = 0; | |
| 238 POS: | |
| 239 for (0..$#lines) { | |
| 240 my @parts = @{ $lines[$_] }; | |
| 241 @parts[23] = sprintf '%.3f', $avg_errors[$_]; | |
| 242 print {$fh_table} join( "\t",@parts), "\n"; | |
| 243 } | |
| 244 close $fh_table; | |
| 245 } | |
| 246 | |
| 247 sub process { | |
| 248 | |
| 249 my $ln_ref = shift; | |
| 250 | |
| 251 my $last_chr; | |
| 252 $last_depth = undef; | |
| 253 $last_loc = 0; | |
| 254 $bg_start = 0; | |
| 255 $bg_loc = 0; | |
| 256 | |
| 257 LINE: | |
| 258 for my $line (@$ln_ref) { | |
| 259 chomp $line; | |
| 260 my @parts = split "\t", $line; | |
| 261 my $chr = $parts[0]; | |
| 262 my $loc = $parts[1]; | |
| 263 my $ref = uc $parts[2]; | |
| 264 my $depth = $parts[3]; | |
| 265 my $read_string = $parts[4]; | |
| 266 my $qual_string = $parts[5]; | |
| 267 | |
| 268 # check that chr hasn't changed (don't supported multiple refs) | |
| 269 $last_chr = $last_chr // $chr; | |
| 270 if ($chr ne $last_chr) { | |
| 271 #process current, start new | |
| 272 } | |
| 273 | |
| 274 # simulate missing rows | |
| 275 my $t = $last_loc + 1; | |
| 276 while ($t < $loc) { | |
| 277 handle_entry( | |
| 278 $chr, | |
| 279 $t, | |
| 280 $seqs->fetch_seq($chr, $t, $t), | |
| 281 #substr($ref_seq, $t-1, 1), | |
| 282 0, | |
| 283 '', | |
| 284 '', | |
| 285 ); | |
| 286 ++$t; | |
| 287 } | |
| 288 | |
| 289 # send entry for handling | |
| 290 handle_entry( | |
| 291 $chr, | |
| 292 $loc, | |
| 293 $ref, | |
| 294 $depth, | |
| 295 $read_string, | |
| 296 $qual_string, | |
| 297 ); | |
| 298 | |
| 299 $last_loc = $loc; | |
| 300 | |
| 301 } | |
| 302 | |
| 303 # simulate missing rows at end | |
| 304 my $t = $last_loc + 1; | |
| 305 my $l = $seqs->length($last_chr); | |
| 306 while ($t <= $l) { | |
| 307 handle_entry( | |
| 308 $last_chr, | |
| 309 $t, | |
| 310 $seqs->fetch_seq($last_chr, $t, $t), | |
| 311 #substr($ref_seq, $t-1, 1), | |
| 312 0, | |
| 313 '', | |
| 314 '', | |
| 315 ); | |
| 316 ++$t; | |
| 317 } | |
| 318 | |
| 319 if (defined $fn_bedgraph) { | |
| 320 | |
| 321 $bg .= join("\t", $last_chr, $bg_start, $bg_loc, $last_depth) . "\n"; | |
| 322 } | |
| 323 | |
| 324 } | |
| 325 | |
| 326 | |
| 327 sub handle_entry { | |
| 328 | |
| 329 my ($chr, $loc, $ref, $depth, $read_string, $qual_string) = @_; | |
| 330 | |
| 331 my $called = ''; | |
| 332 | |
| 333 # handle zero-depth positions separately | |
| 334 if ($depth < 1) { | |
| 335 $called = 'N'; | |
| 336 print "Missing coverage at $chr pos $loc\n" if ($verbose); | |
| 337 if (defined $fn_table) { | |
| 338 push @lines, [ | |
| 339 $chr, | |
| 340 $loc, | |
| 341 $ref, | |
| 342 'N', | |
| 343 (0) x 19, | |
| 344 undef, | |
| 345 '', | |
| 346 ]; | |
| 347 } | |
| 348 push @errors, 0; | |
| 349 } | |
| 350 | |
| 351 # everything else | |
| 352 else { | |
| 353 | |
| 354 # handle insertions | |
| 355 my %inserts; | |
| 356 my $insert_count = 0; | |
| 357 while ($read_string =~ /\+(\d+)((??{"[ATGCNatgcnRYSWKMBDHVryswkmbdhv-]{$^N}"}))/g) { | |
| 358 $inserts{$2} += 1; | |
| 359 ++$insert_count; | |
| 360 } | |
| 361 | |
| 362 # ...and strip extra characters | |
| 363 $read_string =~ s/\^.//g; | |
| 364 $read_string =~ s/[\+\-](\d+)(??{"[ATGCNatgcnRYSWKMBDHVryswkmbdhv-]{$^N}"})//g; | |
| 365 $read_string =~ s/[^\.\,\w\*]//g; | |
| 366 | |
| 367 # simple parse check | |
| 368 my $l1 = length($read_string); | |
| 369 my $l2 = length($qual_string); | |
| 370 die "read/qual mismatch ($l1 v $l2)" if ($l1 != $l2); | |
| 371 | |
| 372 die "unexpected char at $chr pos $loc\n" | |
| 373 if ($read_string =~ /[^.,ATGCNatgcn*]/); | |
| 374 | |
| 375 my $lc = lc $ref; | |
| 376 $read_string =~ s/\./$ref/g; | |
| 377 $read_string =~ s/\,/$lc/g; | |
| 378 $read_string =~ s/n/N/g; | |
| 379 | |
| 380 # split into arrays | |
| 381 my %counts = map {$_ => 0} qw/A T G C N a t g c */; | |
| 382 my %cons_counts = map {$_ => 0} qw/A T G C N a t g c */; | |
| 383 my @chars = split '', $read_string; | |
| 384 my @quals = map {ord($_) - 33} split('', $qual_string); | |
| 385 | |
| 386 READ: | |
| 387 for my $i (0..$#chars) { | |
| 388 ++$cons_counts{ uc($chars[$i]) }; | |
| 389 ++$counts{ $chars[$i] } if ($quals[$i] >= $min_qual); | |
| 390 } | |
| 391 | |
| 392 # calculate strand bias and collapse counts | |
| 393 my %sb; | |
| 394 for my $b (qw/A T G C/) { | |
| 395 my $n = $counts{$b} + $counts{lc($b)}; | |
| 396 $sb{$b} = $n > 0 | |
| 397 ? ($n-1)/$n*(2*max($counts{$b}/$n, ($n-$counts{$b})/$n)-1) | |
| 398 : 0; | |
| 399 $counts{$b} += $counts{lc($b)}; | |
| 400 delete $counts{lc($b)}; | |
| 401 } | |
| 402 | |
| 403 $counts{$ref} = $counts{$ref} // 0; # some IUPAC codes not defined above | |
| 404 $cons_counts{$ref} = $cons_counts{$ref} // 0; # some IUPAC codes not defined above | |
| 405 my $mismatches = sum map {$counts{$_}} grep {$_ ne $ref} keys %counts; | |
| 406 my $counted_depth = $counts{$ref} + $mismatches; | |
| 407 my $cons_depth = sum map {$cons_counts{$_}} keys %counts; | |
| 408 my $error_rate = $counted_depth == 0 | |
| 409 ? 0 | |
| 410 : sprintf '%.4f', $mismatches/$counted_depth; | |
| 411 push @errors, $error_rate; | |
| 412 | |
| 413 my @insert_strings = (); | |
| 414 my $consensus_insert = ''; | |
| 415 | |
| 416 #create case-insensitive insert hash | |
| 417 my %combined_inserts; | |
| 418 for (keys %inserts) { | |
| 419 $combined_inserts{uc($_)} += $inserts{$_}; | |
| 420 } | |
| 421 | |
| 422 if (scalar(keys %combined_inserts) > 0) { | |
| 423 my @sorted_inserts = sort { | |
| 424 $combined_inserts{$b} <=> $combined_inserts{$a} | |
| 425 || $a cmp $b | |
| 426 } keys %combined_inserts; | |
| 427 for (@sorted_inserts) { | |
| 428 my $f_count = $inserts{$_} // 0; | |
| 429 my $r_count = $inserts{lc($_)} // 0; | |
| 430 my $n = $combined_inserts{$_}; | |
| 431 my $sb = sprintf '%.3f', ($n-1)/$n*max($f_count/$n, ($n-$f_count)/$n); | |
| 432 push @insert_strings, "$_($f_count,$r_count:$sb)"; | |
| 433 } | |
| 434 | |
| 435 # decide whether to include insert in consensus | |
| 436 if ($insert_count/$l1 > 0.5) { | |
| 437 my @realigned = realign(\%combined_inserts); | |
| 438 for my $i (0..$#realigned) { | |
| 439 my @b = sort { | |
| 440 $realigned[$i]->{$b} <=> $realigned[$i]->{$a} | |
| 441 } keys %{ $realigned[$i] }; | |
| 442 if ($realigned[$i]->{$b[0]}/$l1 > 0.5) { | |
| 443 $consensus_insert .= uc $b[0]; | |
| 444 } | |
| 445 } | |
| 446 } | |
| 447 | |
| 448 } | |
| 449 if ($cons_depth < $min_depth) { | |
| 450 $called = 'N'; | |
| 451 } | |
| 452 else { | |
| 453 my @sorted | |
| 454 = sort {$cons_counts{$b} <=> $cons_counts{$a}} keys %cons_counts; | |
| 455 | |
| 456 # get all top hits that aren't gaps | |
| 457 my @equal_hits = grep { | |
| 458 $cons_counts{$_} eq $cons_counts{$sorted[0]} && $_ ne '*' | |
| 459 } @sorted; | |
| 460 | |
| 461 if (scalar(@equal_hits)) { | |
| 462 my $tag = join('',sort {$a cmp $b} @equal_hits); | |
| 463 die "bad tag $tag" if (! defined $iupac{$tag}); | |
| 464 $called = $iupac{$tag}; | |
| 465 } | |
| 466 } | |
| 467 $called .= $consensus_insert; | |
| 468 | |
| 469 print "consensus/reference difference at $chr pos $loc (ref: $ref cons: $called)\n" | |
| 470 if ($verbose && $called ne $ref); | |
| 471 | |
| 472 if (defined $fn_table) { | |
| 473 push @lines, [ | |
| 474 $chr, | |
| 475 $loc, | |
| 476 $ref, | |
| 477 $called eq '' ? '-' : $called, | |
| 478 $depth, | |
| 479 $counted_depth, | |
| 480 sprintf('%.3f',$error_rate), | |
| 481 $counts{A}, | |
| 482 $counts{T}, | |
| 483 $counts{G}, | |
| 484 $counts{C}, | |
| 485 $counts{N}, | |
| 486 $counts{'*'}, | |
| 487 sprintf('%.3f',$counts{A}/$counted_depth), | |
| 488 sprintf('%.3f',$counts{T}/$counted_depth), | |
| 489 sprintf('%.3f',$counts{G}/$counted_depth), | |
| 490 sprintf('%.3f',$counts{C}/$counted_depth), | |
| 491 sprintf('%.3f',$counts{N}/$counted_depth), | |
| 492 sprintf('%.3f',$counts{'*'}/$counted_depth), | |
| 493 sprintf('%.3f',$sb{A}), | |
| 494 sprintf('%.3f',$sb{T}), | |
| 495 sprintf('%.3f',$sb{G}), | |
| 496 sprintf('%.3f',$sb{C}), | |
| 497 undef, | |
| 498 join(':',@insert_strings) | |
| 499 ]; | |
| 500 } | |
| 501 } | |
| 502 | |
| 503 my $consensus = first {$_->id eq $chr} @consensi; | |
| 504 if (! defined $consensus) { | |
| 505 $consensus = BioX::Seq->new('',$chr); | |
| 506 push @consensi, $consensus; | |
| 507 } | |
| 508 $consensus->seq .= $called; | |
| 509 | |
| 510 my $cons_len = length($called); | |
| 511 | |
| 512 # Generate bedgraph output | |
| 513 if (defined $fn_bedgraph && $cons_len > 0) { | |
| 514 | |
| 515 # bin depth if requested | |
| 516 if ($bg_bin_figs > 0) { | |
| 517 my $divisor = 10**max(0, length($depth)-$bg_bin_figs); | |
| 518 $depth = int($depth/$divisor) * $divisor; | |
| 519 } | |
| 520 | |
| 521 # output on depth change | |
| 522 if (! defined $last_depth || $depth != $last_depth) { | |
| 523 $bg .= join("\t",$last_chr, $bg_start, $bg_loc, $last_depth) . "\n" | |
| 524 if (defined $last_depth); | |
| 525 $last_depth = $depth; | |
| 526 $bg_start = $bg_loc; | |
| 527 } | |
| 528 | |
| 529 $bg_loc += $cons_len; | |
| 530 | |
| 531 } | |
| 532 | |
| 533 } | |
| 534 | |
| 535 | |
| 536 sub realign { | |
| 537 | |
| 538 # calculate a local realignment at indel using MAFFT | |
| 539 # TODO: reimplement using native code | |
| 540 | |
| 541 my ($hash) = @_; | |
| 542 | |
| 543 my @seqs = keys %{ $hash }; | |
| 544 my @weights = map {$hash->{$_}} @seqs; | |
| 545 my @scores; | |
| 546 if (scalar(@seqs) > 1) { | |
| 547 my ($fh, $fn) = tempfile; | |
| 548 for (0..$#seqs) { | |
| 549 my $n = $_ + 1; | |
| 550 print {$fh} ">$n\n$seqs[$_]\n"; | |
| 551 } | |
| 552 close $fh; | |
| 553 open my $stream, '-|', $MAFFT, | |
| 554 '--auto', | |
| 555 '--quiet', | |
| 556 '--op' => 0, | |
| 557 '--lop' => 0, | |
| 558 $fn; | |
| 559 my $p = BioX::Seq::Stream->new($stream); | |
| 560 while (my $seq = $p->next_seq) { | |
| 561 my $w = shift @weights; | |
| 562 for (0..length($seq)-1) { | |
| 563 my $base = substr $seq, $_, 1; | |
| 564 next if ($base eq '-'); | |
| 565 $scores[$_] = {} if (! defined $scores[$_]); | |
| 566 $scores[$_]->{$base} += $w; | |
| 567 } | |
| 568 } | |
| 569 } | |
| 570 else { | |
| 571 my $seq = $seqs[0]; | |
| 572 my $w = $weights[0]; | |
| 573 for (0..length($seq)-1) { | |
| 574 my $base = substr $seq, $_, 1; | |
| 575 next if ($base eq '-'); | |
| 576 $scores[$_] = {} if (! defined $scores[$_]); | |
| 577 $scores[$_]->{$base} += $w; | |
| 578 } | |
| 579 } | |
| 580 return @scores; | |
| 581 | |
| 582 } | |
| 583 | |
| 584 | |
| 585 __END__ | |
| 586 | |
| 587 =head1 NAME | |
| 588 | |
| 589 bam2consensus - consensus calling (etc) from BAM alignment | |
| 590 | |
| 591 =head1 SYNOPSIS | |
| 592 | |
| 593 bam2consensus --bam <in.bam> --ref <in.fasta> [options] --consensus <out.fasta> | |
| 594 | |
| 595 =head1 DESCRIPTION | |
| 596 | |
| 597 Re-calls a consensus sequence based on a BAM alignment to reference, with | |
| 598 various knobs and optional output formats | |
| 599 | |
| 600 =head1 PREREQUISITES | |
| 601 | |
| 602 Requires the following non-core Perl libraries: | |
| 603 | |
| 604 =over 1 | |
| 605 | |
| 606 =item * BioX::Seq | |
| 607 | |
| 608 =back | |
| 609 | |
| 610 as well as the following binaries: | |
| 611 | |
| 612 =over 1 | |
| 613 | |
| 614 =item * samtools (>= 1.3.1) | |
| 615 | |
| 616 =item * mafft | |
| 617 | |
| 618 =back | |
| 619 | |
| 620 =head1 OPTIONS | |
| 621 | |
| 622 =head2 Input (required) | |
| 623 | |
| 624 =over 4 | |
| 625 | |
| 626 =item B<--bam> I<filename> | |
| 627 | |
| 628 Path to input BAM alignments | |
| 629 | |
| 630 =item B<--ref> I<filename> | |
| 631 | |
| 632 Path to reference sequence used to generate BAM alignments | |
| 633 | |
| 634 =back | |
| 635 | |
| 636 =head2 Output (at least one is required, can specify more than one) | |
| 637 | |
| 638 =over 4 | |
| 639 | |
| 640 =item B<--consensus> | |
| 641 | |
| 642 Path to write consensus sequence to (as FASTA) | |
| 643 | |
| 644 =item B<--bedgraph> | |
| 645 | |
| 646 Path to write coverage file to (as bedgraph) | |
| 647 | |
| 648 =item B<--table> | |
| 649 | |
| 650 Path to write coverage file to (as tab-separated table) | |
| 651 | |
| 652 =back | |
| 653 | |
| 654 =head2 Configuration | |
| 655 | |
| 656 =over 4 | |
| 657 | |
| 658 =item B<--min_qual> | |
| 659 | |
| 660 Minimum quality for a base to be considered in consensus calling. Default: 10. | |
| 661 | |
| 662 =item B<--min_depth> | |
| 663 | |
| 664 Minimum read depth for consensus to be called (otherwise called as "N"). Default: 3. | |
| 665 | |
| 666 =item B<--trim> | |
| 667 | |
| 668 Fraction to trim from each end when calculating trimmed mean of error window. | |
| 669 Default: 0.2. | |
| 670 | |
| 671 =item B<--window> | |
| 672 | |
| 673 Size of sliding window used to calculate local error rates. Default: 30. | |
| 674 | |
| 675 =item B<--bg_bin_figs> <integer> | |
| 676 | |
| 677 If greater than zero, the number of significant figures used to bin depths in | |
| 678 bedgraph output. If zero, no binning is applied. This option is useful to | |
| 679 reduce the size of bedgraph output by binning similar depth values when high | |
| 680 resolution is not important. Default: 0 (disabled). | |
| 681 | |
| 682 =back | |
| 683 | |
| 684 =head1 CAVEATS AND BUGS | |
| 685 | |
| 686 Please submit bug reports to the issue tracker in the distribution repository. | |
| 687 | |
| 688 =head1 AUTHOR | |
| 689 | |
| 690 Jeremy Volkening (jdv@base2bio.com) | |
| 691 | |
| 692 =head1 LICENSE AND COPYRIGHT | |
| 693 | |
| 694 Copyright 2014-17 Jeremy Volkening | |
| 695 | |
| 696 This program is free software: you can redistribute it and/or modify | |
| 697 it under the terms of the GNU General Public License as published by | |
| 698 the Free Software Foundation, either version 3 of the License, or | |
| 699 (at your option) any later version. | |
| 700 | |
| 701 This program is distributed in the hope that it will be useful, | |
| 702 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 703 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 704 GNU General Public License for more details. | |
| 705 | |
| 706 You should have received a copy of the GNU General Public License | |
| 707 along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| 708 | |
| 709 =cut | |
| 710 |
