comparison libs/sratoolkit.2.8.0-centos_linux64/example/perl/dump-reference.pl @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
comparison
equal deleted inserted replaced
2:0d65b71ff8df 3:38ad1130d077
1 #!/usr/bin/env perl
2 # ===========================================================================
3 #
4 # PUBLIC DOMAIN NOTICE
5 # National Center for Biotechnology Information
6 #
7 # This software/database is a "United States Government Work" under the
8 # terms of the United States Copyright Act. It was written as part of
9 # the author's official duties as a United States Government employee and
10 # thus cannot be copyrighted. This software/database is freely available
11 # to the public for use. The National Library of Medicine and the U.S.
12 # Government have not placed any restriction on its use or reproduction.
13 #
14 # Although all reasonable efforts have been taken to ensure the accuracy
15 # and reliability of the software and data, the NLM and the U.S.
16 # Government do not and cannot warrant the performance or results that
17 # may be obtained by using this software or data. The NLM and the U.S.
18 # Government disclaim all warranties, express or implied, including
19 # warranties of performance, merchantability or fitness for any particular
20 # purpose.
21 #
22 # Please cite the author in any work or product based on this material.
23 #
24 # ===========================================================================
25
26 use warnings;
27
28 my %opts = (
29 'local-name' => 0,
30 'line-length' => 70
31 );
32
33 sub usage()
34 {
35 print <<"HELP";
36 extracts the reference sequence as FASTA from an aligned SRA
37
38 Usage:
39 $0 [<options>...] <accession>...
40 options are
41 -h | -? | --help this message
42 --use-local toggle def-line between reference accession and
43 original (local to run) name of reference
44 --line-length N sequence line length (use 0 for unlimited)
45 (default is $opts{'line-length'})
46
47 Example:
48 $0 SRR341548 > ref.fasta
49
50 HELP
51 exit 0;
52 }
53
54 usage if scalar @ARGV == 0;
55 foreach (@ARGV) {
56 usage() if (/^-h$/ || /^-\?$/ || /^--help$/);
57 }
58
59 my $VDB_DUMP = `which vdb-dump` or die "Please put path to vdb-dump in PATH";
60 chomp $VDB_DUMP;
61
62 sub process($)
63 {
64 my $defline = '';
65 my $ref = '';
66
67 open IN, '-|', "$VDB_DUMP -f tab -T REFERENCE -C \"NAME,SEQ_ID,READ\" \"$_[0]\"" or die "$!";
68
69 while (defined($_ = <IN>)) {
70 chomp;
71 my ($name, $seqid, $seq) = \split /\t/;
72 my $new_defline;
73
74 if ($opts{'local-name'}) {
75 $new_defline = ">$$name $$seqid";
76 }
77 else {
78 $new_defline = ">$$seqid $$name";
79 }
80 if ($defline ne $new_defline) {
81 print "$ref\n" if $ref;
82 print "$new_defline\n";
83 $defline = $new_defline;
84 $ref = '';
85 }
86 $ref .= $$seq;
87 if ($opts{'line-length'} != 0) {
88 while (length($ref) >= $opts{'line-length'}) {
89 print substr($ref, 0, $opts{'line-length'})."\n";
90 $ref = substr($ref, $opts{'line-length'});
91 }
92 }
93 }
94 print "$ref\n" if $ref;
95 close IN;
96 }
97
98 for (my $i = 0; $i < scalar @ARGV; ++$i) {
99 $_ = $ARGV[$i];
100
101 if (/^-/) {
102 if (/^--use-local$/) {
103 $opts{'local-name'} = 1 - $opts{'local-name'};
104 next;
105 }
106 if (/^--line-length$/) {
107 $_ = $ARGV[++$i];
108 /^(\d+)$/ or usage();
109 $opts{'line-length'} = $1;
110 next;
111 }
112 usage();
113 }
114 process $_;
115 }