Mercurial > repos > charles_s_test > seqsero2
annotate libs/sratoolkit.2.8.0-centos_linux64/example/perl/quality-stats.pl @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
rev | line source |
---|---|
3
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
1 #!/usr/bin/env perl |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
2 # =========================================================================== |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
3 # |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
4 # PUBLIC DOMAIN NOTICE |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
5 # National Center for Biotechnology Information |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
6 # |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
7 # This software/database is a "United States Government Work" under the |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
8 # terms of the United States Copyright Act. It was written as part of |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
9 # the author's official duties as a United States Government employee and |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
10 # thus cannot be copyrighted. This software/database is freely available |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
11 # to the public for use. The National Library of Medicine and the U.S. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
12 # Government have not placed any restriction on its use or reproduction. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
13 # |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
14 # Although all reasonable efforts have been taken to ensure the accuracy |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
15 # and reliability of the software and data, the NLM and the U.S. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
16 # Government do not and cannot warrant the performance or results that |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
17 # may be obtained by using this software or data. The NLM and the U.S. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
18 # Government disclaim all warranties, express or implied, including |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
19 # warranties of performance, merchantability or fitness for any particular |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
20 # purpose. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
21 # |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
22 # Please cite the author in any work or product based on this material. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
23 # |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
24 # =========================================================================== |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
25 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
26 use warnings; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
27 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
28 use constant MAX_SCORE_COUNT => 64; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
29 use constant MAX_READ_COUNT => 2; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
30 use Data::Dumper; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
31 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
32 my %opts = ( |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
33 'row-range' => '-1000000', |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
34 ); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
35 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
36 sub usage() |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
37 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
38 print <<"HELP"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
39 report QUALITY statistics for an SRA |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
40 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
41 Usage: |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
42 $0 [<options>...] <accession>... |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
43 options are |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
44 -h | -? | --help |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
45 --row-range <row-range-spec> default is first 1000000 spots |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
46 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
47 Example: |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
48 $0 --row-range "5,7,11-15,25-37" SRR797646 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
49 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
50 HELP |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
51 exit 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
52 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
53 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
54 usage if scalar @ARGV == 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
55 foreach (@ARGV) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
56 usage() if (/^-h$/ || /^-\?$/ || /^--help$/); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
57 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
58 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
59 my $VDB_DUMP = `which vdb-dump` or die "Please put path to vdb-dump in PATH"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
60 chomp $VDB_DUMP; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
61 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
62 my @stats = (); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
63 my $max_read = 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
64 my $max_len = 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
65 my $max_score = 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
66 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
67 sub split_record($$$$) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
68 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
69 my @fld = \split(/\t/, ${$_[0]}); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
70 @{$_[1]} = split(/,\s*/, ${$fld[0]}); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
71 @{$_[2]} = split(/,\s*/, ${$fld[1]}); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
72 @{$_[3]} = split(/,\s*/, ${$fld[2]}); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
73 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
74 return [ split(/,\s*/, ${$fld[3]}) ]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
75 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
76 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
77 sub resize_stats($) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
78 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
79 my $need = scalar(@{$_[0]}) * MAX_SCORE_COUNT * MAX_READ_COUNT; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
80 my $have = scalar(@stats); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
81 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
82 return unless ($have < $need); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
83 splice @stats, $have, 0, map { 0 } (1..($need - $have)); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
84 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
85 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
86 sub expand_and_summarize($) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
87 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
88 my @rs; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
89 my @rl; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
90 my @rt; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
91 my $qv = split_record $_[0], \@rs, \@rl, \@rt; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
92 my $reads = scalar @rs; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
93 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
94 die "inconsistent data" unless scalar(@rl) == $reads; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
95 die "inconsistent data" unless scalar(@rt) == $reads; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
96 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
97 resize_stats $qv; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
98 $max_read = $reads if $max_read < $reads; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
99 for (my $read = 0; $read != $reads; ++$read) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
100 next unless $rt[$read] =~ /SRA_READ_TYPE_BIOLOGICAL/; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
101 my $pos = $rs[$read]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
102 my $len = $rl[$read]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
103 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
104 $max_len = $len if ($max_len < $len); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
105 for (my $i = 0; $i != $len; ++$i) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
106 my $score = $qv->[$pos + $i] + 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
107 $score = MAX_SCORE_COUNT - 1 if $score >= MAX_SCORE_COUNT; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
108 my $idx = ($i * MAX_SCORE_COUNT + $score) * MAX_READ_COUNT + $read; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
109 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
110 $max_score = $score if $max_score < $score; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
111 ++$stats[$idx]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
112 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
113 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
114 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
115 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
116 sub warn_if_aligned($) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
117 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
118 return unless `$VDB_DUMP -y "$_[0]"` =~ /Database/i; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
119 open CMD, '-|', "$VDB_DUMP -E \"$_[0]\"" or die "$!"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
120 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
121 while (defined(local $_ = <CMD>)) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
122 chomp; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
123 if (/PRIMARY_ALIGNMENT/) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
124 print "$_[0] is aligned; results may be biased to the reference\n"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
125 last; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
126 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
127 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
128 close CMD; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
129 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
130 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
131 sub process($) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
132 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
133 my $cmd = "$VDB_DUMP -f tab -C \"READ_START,READ_LEN,READ_TYPE,QUALITY\" -R $opts{'row-range'} \"$_[0]\""; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
134 open CMD, '-|', $cmd or die "$!"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
135 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
136 while (defined($_ = <CMD>)) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
137 chomp; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
138 expand_and_summarize \$_; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
139 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
140 close CMD; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
141 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
142 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
143 for (my $i = 0; $i < scalar @ARGV; ++$i) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
144 $_ = $ARGV[$i]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
145 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
146 if (/^-/) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
147 if (/^--row-range$/) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
148 $opts{'row-range'} = $ARGV[++$i]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
149 next; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
150 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
151 usage(); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
152 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
153 warn_if_aligned $_; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
154 process $_; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
155 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
156 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
157 print join("\t", ('Read', 'Pos', 'Min', '25%', '50%', '75%', 'Max', 'Avg', 'St.Dev.', 'Mode'))."\n"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
158 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
159 sub partition($$$) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
160 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
161 my ($array, $cnt, $le) = @_; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
162 my $N = scalar(@{$array}); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
163 my $ge = 1.0 - $le; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
164 my $i; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
165 my $rsum = 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
166 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
167 for ($i = 0; $i != $N; ++$i) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
168 next if $i == 2; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
169 my $n = $array->[$i]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
170 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
171 next unless $n; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
172 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
173 my $n_le = $rsum + $n; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
174 my $n_ge = $cnt - $rsum + $n; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
175 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
176 $rsum = $n_le; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
177 return $i if ($n_le/$cnt >= $le && $n_ge/$cnt >= $ge); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
178 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
179 return undef; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
180 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
181 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
182 for (my $read = 0; $read != $max_read; ++$read) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
183 for (my $pos = 0; $pos != $max_len; ++$pos) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
184 my $sum = 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
185 my $ssum = 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
186 my $cnt = 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
187 my $most = -1; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
188 my @part = ( MAX_SCORE_COUNT, 0, 0, 0, 0 ); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
189 my @mode = (); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
190 my $array = [ map { ($stats[($pos * MAX_SCORE_COUNT + $_) * MAX_READ_COUNT + $read] || 0) } (0..$max_score) ]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
191 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
192 for (my $i = 0; $i != $max_score + 1; ++$i) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
193 my $n = $array->[$i]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
194 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
195 $most = $n if ($most < $n); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
196 if ($i != 2) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
197 my $x = $n * $i; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
198 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
199 $cnt += $n; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
200 $sum += $x; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
201 $ssum += $i * $x; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
202 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
203 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
204 if ($cnt) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
205 my $i; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
206 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
207 for ($i = 0; $i != $max_score + 1; ++$i) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
208 my $n = $array->[$i]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
209 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
210 if ($n) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
211 push @mode, $i if ($n == $most); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
212 if ($i != 2) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
213 $part[0] = $i if $part[0] > $i; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
214 $part[4] = $i if $part[4] < $i; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
215 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
216 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
217 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
218 $part[1] = partition $array, $cnt, 0.25; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
219 $part[2] = partition $array, $cnt, 0.5; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
220 $part[3] = partition $array, $cnt, 0.75; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
221 printf join("\t", ($read + 1, $pos + 1, @part, $sum/$cnt, sqrt(($ssum - $sum*$sum/$cnt)/$cnt), @mode))."\n"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
222 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
223 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
224 } |