annotate spades.pl @ 0:a003e54b84f7 draft

planemo upload
author nml
date Thu, 02 Mar 2017 15:58:00 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a003e54b84f7 planemo upload
nml
parents:
diff changeset
1 #!/usr/bin/env perl
a003e54b84f7 planemo upload
nml
parents:
diff changeset
2 ## A wrapper script to call spades.py and collect its output
a003e54b84f7 planemo upload
nml
parents:
diff changeset
3 use strict;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
4 use warnings;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
5 use File::Temp qw/ tempfile tempdir /;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
6 use File::Copy;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
7 use Getopt::Long;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
8
a003e54b84f7 planemo upload
nml
parents:
diff changeset
9 # Parse arguments
a003e54b84f7 planemo upload
nml
parents:
diff changeset
10 my ($out_contigs_file,
a003e54b84f7 planemo upload
nml
parents:
diff changeset
11 $out_contigs_stats,
a003e54b84f7 planemo upload
nml
parents:
diff changeset
12 $out_scaffolds_file,
a003e54b84f7 planemo upload
nml
parents:
diff changeset
13 $out_scaffolds_stats,
a003e54b84f7 planemo upload
nml
parents:
diff changeset
14 $out_log_file,
a003e54b84f7 planemo upload
nml
parents:
diff changeset
15 $new_name,
a003e54b84f7 planemo upload
nml
parents:
diff changeset
16 @sysargs) = @ARGV;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
17
a003e54b84f7 planemo upload
nml
parents:
diff changeset
18
a003e54b84f7 planemo upload
nml
parents:
diff changeset
19 my $output_dir = 'output_dir';
a003e54b84f7 planemo upload
nml
parents:
diff changeset
20
a003e54b84f7 planemo upload
nml
parents:
diff changeset
21 # Create log handle
a003e54b84f7 planemo upload
nml
parents:
diff changeset
22 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
a003e54b84f7 planemo upload
nml
parents:
diff changeset
23
a003e54b84f7 planemo upload
nml
parents:
diff changeset
24 # Run program
a003e54b84f7 planemo upload
nml
parents:
diff changeset
25 runSpades(@sysargs);
a003e54b84f7 planemo upload
nml
parents:
diff changeset
26 collectOutput($new_name);
a003e54b84f7 planemo upload
nml
parents:
diff changeset
27 extractCoverageLength($out_contigs_file, $out_contigs_stats);
a003e54b84f7 planemo upload
nml
parents:
diff changeset
28 extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
a003e54b84f7 planemo upload
nml
parents:
diff changeset
29 print $log "Done\n";
a003e54b84f7 planemo upload
nml
parents:
diff changeset
30 close $log;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
31 exit 0;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
32
a003e54b84f7 planemo upload
nml
parents:
diff changeset
33 # Run spades
a003e54b84f7 planemo upload
nml
parents:
diff changeset
34 sub runSpades {
a003e54b84f7 planemo upload
nml
parents:
diff changeset
35 my $cmd = join(" ", @_) . " -o $output_dir";
a003e54b84f7 planemo upload
nml
parents:
diff changeset
36 my $return_code = system($cmd);
a003e54b84f7 planemo upload
nml
parents:
diff changeset
37 if ($return_code) {
a003e54b84f7 planemo upload
nml
parents:
diff changeset
38 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
a003e54b84f7 planemo upload
nml
parents:
diff changeset
39 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
a003e54b84f7 planemo upload
nml
parents:
diff changeset
40 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
41 return 0;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
42 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
43
a003e54b84f7 planemo upload
nml
parents:
diff changeset
44 # Collect output
a003e54b84f7 planemo upload
nml
parents:
diff changeset
45 sub collectOutput{
a003e54b84f7 planemo upload
nml
parents:
diff changeset
46 my ($new_name) = @_;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
47
a003e54b84f7 planemo upload
nml
parents:
diff changeset
48 # To do: check that the files are there
a003e54b84f7 planemo upload
nml
parents:
diff changeset
49 # Collects output
a003e54b84f7 planemo upload
nml
parents:
diff changeset
50 if ( not -e "$output_dir/contigs.fasta") {
a003e54b84f7 planemo upload
nml
parents:
diff changeset
51 die "Could not find contigs.fasta file\n";
a003e54b84f7 planemo upload
nml
parents:
diff changeset
52 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
53 if ( not -e "$output_dir/scaffolds.fasta") {
a003e54b84f7 planemo upload
nml
parents:
diff changeset
54 die "Could not find scaffolds.fasta file\n";
a003e54b84f7 planemo upload
nml
parents:
diff changeset
55 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
56
a003e54b84f7 planemo upload
nml
parents:
diff changeset
57 #if a new name is given for the contigs and scaffolds, change them before moving them
a003e54b84f7 planemo upload
nml
parents:
diff changeset
58 if ( $new_name ne 'NODE') {
a003e54b84f7 planemo upload
nml
parents:
diff changeset
59 renameContigs($new_name);
a003e54b84f7 planemo upload
nml
parents:
diff changeset
60 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
61 else {
a003e54b84f7 planemo upload
nml
parents:
diff changeset
62 move "$output_dir/contigs.fasta", $out_contigs_file;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
63 move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
64 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
65
a003e54b84f7 planemo upload
nml
parents:
diff changeset
66
a003e54b84f7 planemo upload
nml
parents:
diff changeset
67
a003e54b84f7 planemo upload
nml
parents:
diff changeset
68 open LOG, '<', "$output_dir/spades.log"
a003e54b84f7 planemo upload
nml
parents:
diff changeset
69 or die "Cannot open log file $output_dir/spades.log: $?";
a003e54b84f7 planemo upload
nml
parents:
diff changeset
70 print $log $_ while (<LOG>);
a003e54b84f7 planemo upload
nml
parents:
diff changeset
71 return 0;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
72 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
73
a003e54b84f7 planemo upload
nml
parents:
diff changeset
74 #Change name in contig and scaffolds file
a003e54b84f7 planemo upload
nml
parents:
diff changeset
75 sub renameContigs{
a003e54b84f7 planemo upload
nml
parents:
diff changeset
76 my ($name) = @_;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
77
a003e54b84f7 planemo upload
nml
parents:
diff changeset
78 open my $in, '<',"$output_dir/contigs.fasta" or die $!;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
79 open my $out,'>', $out_contigs_file;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
80
a003e54b84f7 planemo upload
nml
parents:
diff changeset
81 while ( my $line = <$in>) {
a003e54b84f7 planemo upload
nml
parents:
diff changeset
82 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
a003e54b84f7 planemo upload
nml
parents:
diff changeset
83 #also move the remainder of the length
a003e54b84f7 planemo upload
nml
parents:
diff changeset
84 if ( $line =~ />NODE_(\d+)_(.+)/) {
a003e54b84f7 planemo upload
nml
parents:
diff changeset
85 $line = ">$name" . "_$1 $2\n";
a003e54b84f7 planemo upload
nml
parents:
diff changeset
86 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
87 print $out $line;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
88 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
89 close $in;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
90 close $out;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
91
a003e54b84f7 planemo upload
nml
parents:
diff changeset
92
a003e54b84f7 planemo upload
nml
parents:
diff changeset
93 open $in, '<',"$output_dir/scaffolds.fasta" or die $!;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
94 open $out,'>', $out_scaffolds_file;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
95
a003e54b84f7 planemo upload
nml
parents:
diff changeset
96 while ( my $line = <$in>) {
a003e54b84f7 planemo upload
nml
parents:
diff changeset
97 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
a003e54b84f7 planemo upload
nml
parents:
diff changeset
98 #also move the remainder of the length
a003e54b84f7 planemo upload
nml
parents:
diff changeset
99 if ( $line =~ />NODE_(\d+)_(.+)/) {
a003e54b84f7 planemo upload
nml
parents:
diff changeset
100 $line = ">$name" . "_$1 $2\n";
a003e54b84f7 planemo upload
nml
parents:
diff changeset
101 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
102 print $out $line;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
103 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
104 close $in;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
105 close $out;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
106
a003e54b84f7 planemo upload
nml
parents:
diff changeset
107 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
108
a003e54b84f7 planemo upload
nml
parents:
diff changeset
109
a003e54b84f7 planemo upload
nml
parents:
diff changeset
110 # Extract
a003e54b84f7 planemo upload
nml
parents:
diff changeset
111 sub extractCoverageLength{
a003e54b84f7 planemo upload
nml
parents:
diff changeset
112 my ($in, $out) = @_;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
113 open FASTA, '<', $in or die $!;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
114 open TAB, '>', $out or die $!;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
115 print TAB "#name\tlength\tcoverage\n";
a003e54b84f7 planemo upload
nml
parents:
diff changeset
116 while (<FASTA>){
a003e54b84f7 planemo upload
nml
parents:
diff changeset
117 next unless /^>/;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
118 chomp;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
119 die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/);
a003e54b84f7 planemo upload
nml
parents:
diff changeset
120 my ($name,$n, $l, $cov) = ($1,$2, $3, $4);
a003e54b84f7 planemo upload
nml
parents:
diff changeset
121 print TAB "$name" . "_$n\t$l\t$cov\n";
a003e54b84f7 planemo upload
nml
parents:
diff changeset
122 }
a003e54b84f7 planemo upload
nml
parents:
diff changeset
123 close TAB;
a003e54b84f7 planemo upload
nml
parents:
diff changeset
124 }