Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/example/perl/dump-reference.pl @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:0d65b71ff8df | 3:38ad1130d077 |
---|---|
1 #!/usr/bin/env perl | |
2 # =========================================================================== | |
3 # | |
4 # PUBLIC DOMAIN NOTICE | |
5 # National Center for Biotechnology Information | |
6 # | |
7 # This software/database is a "United States Government Work" under the | |
8 # terms of the United States Copyright Act. It was written as part of | |
9 # the author's official duties as a United States Government employee and | |
10 # thus cannot be copyrighted. This software/database is freely available | |
11 # to the public for use. The National Library of Medicine and the U.S. | |
12 # Government have not placed any restriction on its use or reproduction. | |
13 # | |
14 # Although all reasonable efforts have been taken to ensure the accuracy | |
15 # and reliability of the software and data, the NLM and the U.S. | |
16 # Government do not and cannot warrant the performance or results that | |
17 # may be obtained by using this software or data. The NLM and the U.S. | |
18 # Government disclaim all warranties, express or implied, including | |
19 # warranties of performance, merchantability or fitness for any particular | |
20 # purpose. | |
21 # | |
22 # Please cite the author in any work or product based on this material. | |
23 # | |
24 # =========================================================================== | |
25 | |
26 use warnings; | |
27 | |
28 my %opts = ( | |
29 'local-name' => 0, | |
30 'line-length' => 70 | |
31 ); | |
32 | |
33 sub usage() | |
34 { | |
35 print <<"HELP"; | |
36 extracts the reference sequence as FASTA from an aligned SRA | |
37 | |
38 Usage: | |
39 $0 [<options>...] <accession>... | |
40 options are | |
41 -h | -? | --help this message | |
42 --use-local toggle def-line between reference accession and | |
43 original (local to run) name of reference | |
44 --line-length N sequence line length (use 0 for unlimited) | |
45 (default is $opts{'line-length'}) | |
46 | |
47 Example: | |
48 $0 SRR341548 > ref.fasta | |
49 | |
50 HELP | |
51 exit 0; | |
52 } | |
53 | |
54 usage if scalar @ARGV == 0; | |
55 foreach (@ARGV) { | |
56 usage() if (/^-h$/ || /^-\?$/ || /^--help$/); | |
57 } | |
58 | |
59 my $VDB_DUMP = `which vdb-dump` or die "Please put path to vdb-dump in PATH"; | |
60 chomp $VDB_DUMP; | |
61 | |
62 sub process($) | |
63 { | |
64 my $defline = ''; | |
65 my $ref = ''; | |
66 | |
67 open IN, '-|', "$VDB_DUMP -f tab -T REFERENCE -C \"NAME,SEQ_ID,READ\" \"$_[0]\"" or die "$!"; | |
68 | |
69 while (defined($_ = <IN>)) { | |
70 chomp; | |
71 my ($name, $seqid, $seq) = \split /\t/; | |
72 my $new_defline; | |
73 | |
74 if ($opts{'local-name'}) { | |
75 $new_defline = ">$$name $$seqid"; | |
76 } | |
77 else { | |
78 $new_defline = ">$$seqid $$name"; | |
79 } | |
80 if ($defline ne $new_defline) { | |
81 print "$ref\n" if $ref; | |
82 print "$new_defline\n"; | |
83 $defline = $new_defline; | |
84 $ref = ''; | |
85 } | |
86 $ref .= $$seq; | |
87 if ($opts{'line-length'} != 0) { | |
88 while (length($ref) >= $opts{'line-length'}) { | |
89 print substr($ref, 0, $opts{'line-length'})."\n"; | |
90 $ref = substr($ref, $opts{'line-length'}); | |
91 } | |
92 } | |
93 } | |
94 print "$ref\n" if $ref; | |
95 close IN; | |
96 } | |
97 | |
98 for (my $i = 0; $i < scalar @ARGV; ++$i) { | |
99 $_ = $ARGV[$i]; | |
100 | |
101 if (/^-/) { | |
102 if (/^--use-local$/) { | |
103 $opts{'local-name'} = 1 - $opts{'local-name'}; | |
104 next; | |
105 } | |
106 if (/^--line-length$/) { | |
107 $_ = $ARGV[++$i]; | |
108 /^(\d+)$/ or usage(); | |
109 $opts{'line-length'} = $1; | |
110 next; | |
111 } | |
112 usage(); | |
113 } | |
114 process $_; | |
115 } |