annotate libs/sratoolkit.2.8.0-centos_linux64/example/perl/dump-reference.pl @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
1 #!/usr/bin/env perl
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
2 # ===========================================================================
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
3 #
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
4 # PUBLIC DOMAIN NOTICE
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
5 # National Center for Biotechnology Information
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
6 #
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
7 # This software/database is a "United States Government Work" under the
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
8 # terms of the United States Copyright Act. It was written as part of
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
9 # the author's official duties as a United States Government employee and
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
10 # thus cannot be copyrighted. This software/database is freely available
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
11 # to the public for use. The National Library of Medicine and the U.S.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
12 # Government have not placed any restriction on its use or reproduction.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
13 #
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
14 # Although all reasonable efforts have been taken to ensure the accuracy
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
15 # and reliability of the software and data, the NLM and the U.S.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
16 # Government do not and cannot warrant the performance or results that
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
17 # may be obtained by using this software or data. The NLM and the U.S.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
18 # Government disclaim all warranties, express or implied, including
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
19 # warranties of performance, merchantability or fitness for any particular
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
20 # purpose.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
21 #
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
22 # Please cite the author in any work or product based on this material.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
23 #
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
24 # ===========================================================================
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
25
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
26 use warnings;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
27
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
28 my %opts = (
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
29 'local-name' => 0,
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
30 'line-length' => 70
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
31 );
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
32
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
33 sub usage()
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
34 {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
35 print <<"HELP";
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
36 extracts the reference sequence as FASTA from an aligned SRA
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
37
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
38 Usage:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
39 $0 [<options>...] <accession>...
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
40 options are
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
41 -h | -? | --help this message
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
42 --use-local toggle def-line between reference accession and
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
43 original (local to run) name of reference
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
44 --line-length N sequence line length (use 0 for unlimited)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
45 (default is $opts{'line-length'})
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
46
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
47 Example:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
48 $0 SRR341548 > ref.fasta
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
49
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
50 HELP
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
51 exit 0;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
52 }
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
53
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
54 usage if scalar @ARGV == 0;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
55 foreach (@ARGV) {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
56 usage() if (/^-h$/ || /^-\?$/ || /^--help$/);
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
57 }
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
58
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
59 my $VDB_DUMP = `which vdb-dump` or die "Please put path to vdb-dump in PATH";
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
60 chomp $VDB_DUMP;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
61
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
62 sub process($)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
63 {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
64 my $defline = '';
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
65 my $ref = '';
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
66
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
67 open IN, '-|', "$VDB_DUMP -f tab -T REFERENCE -C \"NAME,SEQ_ID,READ\" \"$_[0]\"" or die "$!";
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
68
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
69 while (defined($_ = <IN>)) {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
70 chomp;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
71 my ($name, $seqid, $seq) = \split /\t/;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
72 my $new_defline;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
73
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
74 if ($opts{'local-name'}) {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
75 $new_defline = ">$$name $$seqid";
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
76 }
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
77 else {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
78 $new_defline = ">$$seqid $$name";
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
79 }
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
80 if ($defline ne $new_defline) {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
81 print "$ref\n" if $ref;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
82 print "$new_defline\n";
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
83 $defline = $new_defline;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
84 $ref = '';
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
85 }
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
86 $ref .= $$seq;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
87 if ($opts{'line-length'} != 0) {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
88 while (length($ref) >= $opts{'line-length'}) {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
89 print substr($ref, 0, $opts{'line-length'})."\n";
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
90 $ref = substr($ref, $opts{'line-length'});
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
91 }
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
92 }
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
93 }
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
94 print "$ref\n" if $ref;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
95 close IN;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
96 }
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
97
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
98 for (my $i = 0; $i < scalar @ARGV; ++$i) {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
99 $_ = $ARGV[$i];
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
100
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
101 if (/^-/) {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
102 if (/^--use-local$/) {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
103 $opts{'local-name'} = 1 - $opts{'local-name'};
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
104 next;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
105 }
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
106 if (/^--line-length$/) {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
107 $_ = $ARGV[++$i];
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
108 /^(\d+)$/ or usage();
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
109 $opts{'line-length'} = $1;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
110 next;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
111 }
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
112 usage();
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
113 }
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
114 process $_;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
115 }