Mercurial > repos > jdv > albacore
comparison denoise.pl @ 1:0a4f83207e53 draft
planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/albacore commit 4aa7a76a7b29c425dd89a020979e835d785d3c95-dirty
| author | jdv |
|---|---|
| date | Wed, 06 Sep 2017 12:12:52 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:f8e25d69167d | 1:0a4f83207e53 |
|---|---|
| 1 #!/usr/bin/env perl | |
| 2 | |
| 3 use strict; | |
| 4 use warnings; | |
| 5 use 5.012; | |
| 6 | |
| 7 use File::Basename qw/basename/; | |
| 8 use File::Copy qw/copy/; | |
| 9 use Getopt::Long; | |
| 10 use List::Util qw/sum/; | |
| 11 | |
| 12 my $fn_table; | |
| 13 my @reads; | |
| 14 my @names; | |
| 15 my $min_score = 80; | |
| 16 my $min_frac = 0.05; | |
| 17 my $rm_unclass = 0; | |
| 18 my $n_keep; | |
| 19 my $fn_summary; | |
| 20 | |
| 21 use constant BARCODE => 14; | |
| 22 use constant SCORE => 15; | |
| 23 | |
| 24 my $unclass_tag = 'unclassified'; | |
| 25 | |
| 26 GetOptions( | |
| 27 'input=s' => \@reads, | |
| 28 'name=s' => \@names, | |
| 29 'table=s' => \$fn_table, | |
| 30 'min_score=f' => \$min_score, | |
| 31 'min_frac=f' => \$min_frac, | |
| 32 'remove_unclassified' => \$rm_unclass, | |
| 33 'n_keep=i' => \$n_keep, | |
| 34 'summary=s' => \$fn_summary, | |
| 35 ); | |
| 36 | |
| 37 die "Table not found\n" | |
| 38 if (! -r $fn_table); | |
| 39 | |
| 40 open my $tsv, '<', $fn_table; | |
| 41 my $head = <$tsv>; | |
| 42 chomp $head; | |
| 43 my @fields = split "\t", $head; | |
| 44 | |
| 45 die "unexpected field order" | |
| 46 if ($fields[BARCODE] ne 'barcode_arrangement'); | |
| 47 die "unexpected field order" | |
| 48 if ($fields[SCORE] ne 'barcode_score'); | |
| 49 | |
| 50 my %counts; | |
| 51 my %sums; | |
| 52 | |
| 53 while (my $line = <$tsv>) { | |
| 54 chomp $line; | |
| 55 my @fields = split "\t", $line; | |
| 56 my $bin = $fields[BARCODE]; | |
| 57 my $score = $fields[SCORE]; | |
| 58 $counts{$bin} += 1; | |
| 59 $sums{$bin} += $score; | |
| 60 } | |
| 61 | |
| 62 if ($rm_unclass) { | |
| 63 delete $counts{$unclass_tag}; | |
| 64 delete $sums{$unclass_tag}; | |
| 65 } | |
| 66 | |
| 67 my @keys = sort {$counts{$b} <=> $counts{$a}} keys %counts; | |
| 68 | |
| 69 my %scores; | |
| 70 | |
| 71 my %status; | |
| 72 | |
| 73 @status{ @keys } = ('discarded') x scalar(@keys); | |
| 74 | |
| 75 for (@keys) { | |
| 76 $scores{$_} = $sums{$_}/$counts{$_}; | |
| 77 } | |
| 78 my $sum_count = sum(values %counts); | |
| 79 my $mean_count = $sum_count / scalar(values %counts); | |
| 80 | |
| 81 if (defined $n_keep) { | |
| 82 | |
| 83 my %rank_scores; | |
| 84 @rank_scores{ @keys } = map {$scores{$_} * $counts{$_}/$sum_count} @keys; | |
| 85 | |
| 86 @keys = sort {$rank_scores{$b} <=> $rank_scores{$a}} keys %rank_scores; | |
| 87 | |
| 88 @keys = @keys[0..$n_keep-1]; | |
| 89 | |
| 90 } | |
| 91 | |
| 92 else { | |
| 93 | |
| 94 @keys = grep {$scores{$_} >= $min_score} @keys; | |
| 95 @keys = grep {$counts{$_} >= $mean_count*$min_frac} @keys; | |
| 96 | |
| 97 } | |
| 98 | |
| 99 @status{ @keys } = ('kept') x scalar(@keys); | |
| 100 | |
| 101 @keys = sort {$counts{$b} <=> $counts{$a}} keys %status; | |
| 102 | |
| 103 # print summary table | |
| 104 open my $summary, '>', $fn_summary | |
| 105 or die "Failed to open summary: $!\n"; | |
| 106 say {$summary} join "\t", | |
| 107 '#bin', | |
| 108 'n_reads', | |
| 109 'avg_score', | |
| 110 'status', | |
| 111 ; | |
| 112 for (@keys) { | |
| 113 say {$summary} join "\t", | |
| 114 $_, | |
| 115 $counts{$_}, | |
| 116 sprintf("%0.3f",$scores{$_}), | |
| 117 $status{$_}, | |
| 118 ; | |
| 119 } | |
| 120 close $summary; | |
| 121 | |
| 122 mkdir('outputs'); | |
| 123 | |
| 124 for my $i (0..$#reads) { | |
| 125 | |
| 126 my $fn = $reads[$i]; | |
| 127 my $name = $names[$i]; | |
| 128 | |
| 129 my $base = basename($name); | |
| 130 my $bin = $base; | |
| 131 $bin =~ s/\.(fastq|fq|fast5\.tar\.gz)$//i; | |
| 132 if (defined $status{$bin} && $status{$bin} eq 'kept') { | |
| 133 copy( $fn, "outputs/$base" ); | |
| 134 } | |
| 135 | |
| 136 } |
