Mercurial > repos > charles_s_test > seqsero2
annotate libs/sratoolkit.2.8.0-centos_linux64/example/perl/base-stats.pl @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
rev | line source |
---|---|
3
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
1 #!/usr/bin/env perl |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
2 # ============================================================================= |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
3 # |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
4 # PUBLIC DOMAIN NOTICE |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
5 # National Center for Biotechnology Information |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
6 # |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
7 # This software/database is a "United States Government Work" under the |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
8 # terms of the United States Copyright Act. It was written as part of |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
9 # the author's official duties as a United States Government employee and |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
10 # thus cannot be copyrighted. This software/database is freely available |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
11 # to the public for use. The National Library of Medicine and the U.S. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
12 # Government have not placed any restriction on its use or reproduction. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
13 # |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
14 # Although all reasonable efforts have been taken to ensure the accuracy |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
15 # and reliability of the software and data, the NLM and the U.S. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
16 # Government do not and cannot warrant the performance or results that |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
17 # may be obtained by using this software or data. The NLM and the U.S. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
18 # Government disclaim all warranties, express or implied, including |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
19 # warranties of performance, merchantability or fitness for any particular |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
20 # purpose. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
21 # |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
22 # Please cite the author in any work or product based on this material. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
23 # |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
24 # ============================================================================= |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
25 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
26 =head1 NAME |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
27 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
28 base-stats - report base statistics for an SRA run |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
29 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
30 =head1 DESCRIPTION |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
31 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
32 This script computes base and GC content statistics for an SRA run. The output |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
33 is summarized by mate and position within the mate. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
34 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
35 =head1 PURPOSE |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
36 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
37 This script demonstrates driving the SRA Toolkit tool 'vdb-dump' from Perl for |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
38 accessing data in the SRA. It requires that the toolkit be properly configured. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
39 'vdb-dump' is the Jack-of-all-trades of the SRA Toolkit. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
40 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
41 =head1 EXAMPLE |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
42 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
43 base-stats.pl SRR797646 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
44 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
45 =head1 SEE ALSO |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
46 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
47 base-stats.pl --help |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
48 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
49 vdb-dump --help |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
50 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
51 =cut |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
52 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
53 use strict; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
54 use warnings; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
55 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
56 sub usage() |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
57 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
58 print <<"HELP"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
59 report base statistics for an SRA run |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
60 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
61 Usage: |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
62 $0 [<options>...] <accession>... |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
63 options are |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
64 -h | -? | --help shows this message |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
65 --row-range <row-range-spec> default is spots 1-1000000 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
66 row-range-spec a comma seperated list of row numbers and/or |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
67 ranges; a range is <first>-<last> |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
68 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
69 --output-type <output-spec> set the type of data output; proportion is |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
70 the default |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
71 output-spec one of 'count', 'percent', or 'proportion' |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
72 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
73 accession an SRA accession or path to an SRA file |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
74 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
75 Example: |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
76 $0 --row-range "5,7,11-15,25-37" --output-type count SRR797646 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
77 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
78 HELP |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
79 exit 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
80 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
81 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
82 usage() if scalar @ARGV == 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
83 foreach (@ARGV) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
84 usage() if (/^-h$/ || /^-\?$/ || /^--help$/); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
85 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
86 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
87 use constant MAX_READ_COUNT => 2; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
88 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
89 # the columns to be read from the input |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
90 use constant COLUMN_LIST => 'READ_START,READ_LEN,READ_TYPE,READ'; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
91 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
92 # need vdb-dump to read input files |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
93 my $VDB_DUMP = `which vdb-dump` or die "Please put path to vdb-dump in PATH"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
94 chomp $VDB_DUMP; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
95 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
96 my %opts = ( |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
97 'row-range' => '1-1000000', # first million spots |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
98 'output-type' => 'proportion' |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
99 ); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
100 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
101 sub foreach_row($$) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
102 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
103 my ($source, $callback) = @_; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
104 my $cmd = $VDB_DUMP |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
105 . ' --rows "' . $opts{'row-range'} . '"' # set the row range |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
106 . ' --columns "' . COLUMN_LIST . '"' # set the columns to be read |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
107 . ' --format tab' # set output format to tab delimited |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
108 . ' "' . $source . '"'; # the file/accession to be read |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
109 open CMD, '-|', $cmd or die "failed to exec vdb-dump: $!"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
110 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
111 while (defined(local $_ = <CMD>)) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
112 # print; # uncomment to print raw output from vdb-dump (e.g. for debugging) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
113 chomp; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
114 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
115 &$callback(\split(/\t/, $_)); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
116 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
117 close CMD; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
118 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
119 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
120 sub sra_object_type($) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
121 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
122 open CMD, '-|', $VDB_DUMP .' --obj_type "' . $_[0] . '"' or die "failed to exec vdb-dump: $!"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
123 local $_ = <CMD>; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
124 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
125 close CMD; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
126 chomp; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
127 return $_; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
128 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
129 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
130 sub sra_list_tables($) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
131 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
132 my @rslt = (); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
133 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
134 if (sra_object_type($_[0]) eq 'Database') { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
135 open CMD, '-|', $VDB_DUMP . ' --table_enum "' . $_[0] . '"' or die "failed to exec vdb-dump: $!"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
136 while (defined(local $_ = <CMD>)) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
137 push @rslt, $1 if /^tbl #\d+:\s*(\w+)/; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
138 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
139 close CMD; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
140 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
141 return @rslt; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
142 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
143 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
144 sub warn_if_aligned($); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
145 sub print_result(); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
146 sub process; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
147 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
148 my @stats = (); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
149 my $max_read = 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
150 my $max_pos = 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
151 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
152 for (my $i = 0; $i < scalar @ARGV; ++$i) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
153 $_ = $ARGV[$i]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
154 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
155 if (/^-/) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
156 if (/^--row-range$/) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
157 $opts{'row-range'} = $ARGV[++$i]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
158 next; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
159 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
160 if (/^--output-type$/) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
161 my $type = $ARGV[++$i]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
162 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
163 if ( $type eq 'count' |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
164 || $type eq 'proportion' |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
165 || $type eq 'percent') |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
166 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
167 $opts{'output-type'} = $type; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
168 next; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
169 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
170 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
171 usage(); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
172 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
173 warn_if_aligned $_; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
174 foreach_row($_, \&process); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
175 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
176 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
177 print_result(); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
178 exit 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
179 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
180 sub warn_if_aligned($) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
181 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
182 local $_; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
183 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
184 foreach (sra_list_tables($_[0])) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
185 if (/PRIMARY_ALIGNMENT/) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
186 print "$_[0] is aligned and is likely to be mostly ordered; results may be biased to the reference\n"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
187 last; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
188 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
189 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
190 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
191 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
192 sub get_stat($$$) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
193 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
194 # (pos * 5 + base) * MAX_READ_COUNT + read_number; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
195 return $stats[($_[1] * 5 + $_[2]) * MAX_READ_COUNT + $_[0]]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
196 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
197 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
198 sub inc_stat($$$) # this is called a zillion times and needs to be fast |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
199 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
200 # (pos * 5 + base) * MAX_READ_COUNT + read_number; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
201 ++$stats[($_[1] * 5 + $_[2]) * MAX_READ_COUNT + $_[0]]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
202 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
203 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
204 sub resize_stats($) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
205 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
206 my $length = $_[0]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
207 my $need = MAX_READ_COUNT * 5 * $length; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
208 my $have = scalar(@stats); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
209 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
210 return if $have >= $need; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
211 splice @stats, $have, 0, map { 0 } (1..($need - $have)); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
212 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
213 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
214 sub convert_bases($) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
215 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
216 local $_ = ${$_[0]}; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
217 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
218 tr/ACGTN/01234/; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
219 return $_; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
220 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
221 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
222 # accumulate statistics for one spot |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
223 sub process |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
224 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
225 # this matches COLUMN_LIST above |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
226 my ($READ_START, $READ_LEN, $READ_TYPE, $READ) = @_; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
227 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
228 # the data in these fields is comma delimited |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
229 # and describe how to split READ up into its pieces |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
230 my @start = split(/,\s*/, $$READ_START); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
231 my @length = split(/,\s*/, $$READ_LEN); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
232 my @type = split(/,\s*/, $$READ_TYPE); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
233 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
234 # READ contains the bases for the whole spot (i.e. all the mates concatenated) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
235 my @bases = split(//, convert_bases($READ)); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
236 my $reads = scalar(@start); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
237 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
238 die "too many reads: $reads reads; adjust MAX_READ_COUNT and rerun" unless $reads <= MAX_READ_COUNT; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
239 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
240 $max_read = $reads if ($max_read < $reads); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
241 for (my $read = 0; $read != $reads; ++$read) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
242 # only count biological bases (i.e. no adapters, linkers, etc.) |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
243 next unless $type[$read] =~ /SRA_READ_TYPE_BIOLOGICAL/; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
244 my $pos = $start[$read]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
245 my $len = $length[$read]; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
246 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
247 resize_stats($len); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
248 $max_pos = $len if ($max_pos < $len); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
249 for (my $i = 0; $i != $len; ++$i) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
250 inc_stat($read, $i, $bases[$pos + $i]); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
251 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
252 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
253 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
254 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
255 sub print_result() { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
256 my $GCp_mean; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
257 my $GCp_stdev; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
258 my $sum = 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
259 my $ssum = 0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
260 my $format = |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
261 $opts{'output-type'} eq 'count' ? sub { $_[0] } : |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
262 $opts{'output-type'} eq 'percent' ? sub { int($_[0] * 100.0/$_[1] + 0.5).'%' } : |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
263 sub { $_[0]/$_[1] }; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
264 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
265 for (my $read = 0; $read != $max_read; ++$read) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
266 for (my $pos = 0; $pos != $max_pos; ++$pos) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
267 my $gc = (get_stat($read, $pos, 1) + get_stat($read, $pos, 2)) * 1.0; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
268 my $n = $gc + get_stat($read, $pos, 0) + get_stat($read, $pos, 3); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
269 my $x = $gc/$n; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
270 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
271 $sum += $x; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
272 $ssum += $x * $x; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
273 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
274 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
275 $GCp_mean = $sum/($max_pos*$max_read); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
276 $GCp_stdev = sqrt(($ssum - $sum*$GCp_mean)/($max_pos*$max_read)); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
277 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
278 printf join("\t", ('Read', 'Pos', 'A', 'C', 'G', 'T', 'N', 'GC', 'Suspect'))."\n"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
279 for (my $read = 0; $read != $max_read; ++$read) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
280 for (my $pos = 0; $pos != $max_pos; ++$pos) { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
281 my $cntA = get_stat($read, $pos, 0); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
282 my $cntC = get_stat($read, $pos, 1); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
283 my $cntG = get_stat($read, $pos, 2); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
284 my $cntT = get_stat($read, $pos, 3); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
285 my $cntN = get_stat($read, $pos, 4); |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
286 my $GC = $cntC + $cntG; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
287 my $tot = $cntA + $GC + $cntT; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
288 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
289 # keep N out for GC |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
290 my $GC_score = ($GC/$tot - $GCp_mean)/$GCp_stdev; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
291 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
292 # add N in |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
293 $tot += $cntN; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
294 print join("\t", ($read + 1, $pos + 1, |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
295 &$format($cntA, $tot), |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
296 &$format($cntC, $tot), |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
297 &$format($cntG, $tot), |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
298 &$format($cntT, $tot), |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
299 &$format($cntN, $tot), |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
300 &$format($GC, $tot), |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
301 '*'x(int(abs($GC_score)))))."\n"; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
302 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
303 } |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
304 } |