|
0
|
1 #!/usr/bin/env perl
|
|
|
2 ## A wrapper script to call spades.py and collect its output
|
|
|
3 use strict;
|
|
|
4 use warnings;
|
|
|
5 use File::Temp qw/ tempfile tempdir /;
|
|
|
6 use File::Copy;
|
|
|
7 use Getopt::Long;
|
|
|
8
|
|
|
9 # Parse arguments
|
|
|
10 my ($out_contigs_file,
|
|
|
11 $out_contigs_stats,
|
|
|
12 $out_scaffolds_file,
|
|
|
13 $out_scaffolds_stats,
|
|
|
14 $out_log_file,
|
|
|
15 $new_name,
|
|
|
16 @sysargs) = @ARGV;
|
|
|
17
|
|
|
18
|
|
|
19 my $output_dir = 'output_dir';
|
|
|
20
|
|
|
21 # Create log handle
|
|
|
22 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
|
|
|
23
|
|
|
24 # Run program
|
|
|
25 runSpades(@sysargs);
|
|
|
26 collectOutput($new_name);
|
|
|
27 extractCoverageLength($out_contigs_file, $out_contigs_stats);
|
|
|
28 extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
|
|
|
29 print $log "Done\n";
|
|
|
30 close $log;
|
|
|
31 exit 0;
|
|
|
32
|
|
|
33 # Run spades
|
|
|
34 sub runSpades {
|
|
|
35 my $cmd = join(" ", @_) . " -o $output_dir";
|
|
|
36 my $return_code = system($cmd);
|
|
|
37 if ($return_code) {
|
|
|
38 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
|
|
|
39 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
|
|
|
40 }
|
|
|
41 return 0;
|
|
|
42 }
|
|
|
43
|
|
|
44 # Collect output
|
|
|
45 sub collectOutput{
|
|
|
46 my ($new_name) = @_;
|
|
|
47
|
|
|
48 # To do: check that the files are there
|
|
|
49 # Collects output
|
|
|
50 if ( not -e "$output_dir/contigs.fasta") {
|
|
|
51 die "Could not find contigs.fasta file\n";
|
|
|
52 }
|
|
|
53 if ( not -e "$output_dir/scaffolds.fasta") {
|
|
|
54 die "Could not find scaffolds.fasta file\n";
|
|
|
55 }
|
|
|
56
|
|
|
57 #if a new name is given for the contigs and scaffolds, change them before moving them
|
|
|
58 if ( $new_name ne 'NODE') {
|
|
|
59 renameContigs($new_name);
|
|
|
60 }
|
|
|
61 else {
|
|
|
62 move "$output_dir/contigs.fasta", $out_contigs_file;
|
|
|
63 move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
|
|
|
64 }
|
|
|
65
|
|
|
66
|
|
|
67
|
|
|
68 open LOG, '<', "$output_dir/spades.log"
|
|
|
69 or die "Cannot open log file $output_dir/spades.log: $?";
|
|
|
70 print $log $_ while (<LOG>);
|
|
|
71 return 0;
|
|
|
72 }
|
|
|
73
|
|
|
74 #Change name in contig and scaffolds file
|
|
|
75 sub renameContigs{
|
|
|
76 my ($name) = @_;
|
|
|
77
|
|
|
78 open my $in, '<',"$output_dir/contigs.fasta" or die $!;
|
|
|
79 open my $out,'>', $out_contigs_file;
|
|
|
80
|
|
|
81 while ( my $line = <$in>) {
|
|
|
82 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
|
|
|
83 #also move the remainder of the length
|
|
|
84 if ( $line =~ />NODE_(\d+)_(.+)/) {
|
|
|
85 $line = ">$name" . "_$1 $2\n";
|
|
|
86 }
|
|
|
87 print $out $line;
|
|
|
88 }
|
|
|
89 close $in;
|
|
|
90 close $out;
|
|
|
91
|
|
|
92
|
|
|
93 open $in, '<',"$output_dir/scaffolds.fasta" or die $!;
|
|
|
94 open $out,'>', $out_scaffolds_file;
|
|
|
95
|
|
|
96 while ( my $line = <$in>) {
|
|
|
97 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
|
|
|
98 #also move the remainder of the length
|
|
|
99 if ( $line =~ />NODE_(\d+)_(.+)/) {
|
|
|
100 $line = ">$name" . "_$1 $2\n";
|
|
|
101 }
|
|
|
102 print $out $line;
|
|
|
103 }
|
|
|
104 close $in;
|
|
|
105 close $out;
|
|
|
106
|
|
|
107 }
|
|
|
108
|
|
|
109
|
|
|
110 # Extract
|
|
|
111 sub extractCoverageLength{
|
|
|
112 my ($in, $out) = @_;
|
|
|
113 open FASTA, '<', $in or die $!;
|
|
|
114 open TAB, '>', $out or die $!;
|
|
|
115 print TAB "#name\tlength\tcoverage\n";
|
|
|
116 while (<FASTA>){
|
|
|
117 next unless /^>/;
|
|
|
118 chomp;
|
|
|
119 die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/);
|
|
|
120 my ($name,$n, $l, $cov) = ($1,$2, $3, $4);
|
|
|
121 print TAB "$name" . "_$n\t$l\t$cov\n";
|
|
|
122 }
|
|
|
123 close TAB;
|
|
|
124 }
|