changeset 13:1ae36afad28d draft

Added a new output, a tab file providing length and coverage information for each contig.
author lionelguy
date Mon, 19 Aug 2013 08:55:21 -0400
parents 8877df82f1d7
children 94978eb0f1be
files tools/spades_2_5/spades.pl tools/spades_2_5/spades.xml
diffstat 2 files changed, 48 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/tools/spades_2_5/spades.pl	Tue Aug 06 12:21:27 2013 -0500
+++ b/tools/spades_2_5/spades.pl	Mon Aug 19 08:55:21 2013 -0400
@@ -4,9 +4,27 @@
 use warnings;
 use File::Temp qw/ tempfile tempdir /;
 use File::Copy;
+use Getopt::Long;
 
 # Parse arguments
-my ($out_contigs_file, $out_scaffolds_file, $out_log_file, @sysargs) = @ARGV;
+my ($out_contigs_file,
+    $out_contigs_stats,
+    $out_scaffolds_file,
+    $out_scaffolds_stats,
+    $out_log_file,
+    @sysargs) = @ARGV;
+
+## GetOptions not compatible with parsing the rest of the arguments in an array.
+## Keeping the not-so-nice parse-in-one-go method, without named arguments.
+# GetOptions(
+#     'contigs-file=s'    => \$out_contigs_file,
+#     'contigs-stats=s'   => \$out_contigs_stats,
+#     'scaffolds-file=s'  => \$out_scaffolds_file,
+#     'scaffolds-stats=s' => \$out_scaffolds_stats,
+#     'out_log_file=s'    => \$out_log_file,
+# );
+
+# my @sysargs = @ARGV;
 
 # Create temporary folder to store files, delete after use
 #my $output_dir = tempdir( CLEANUP => 0 );
@@ -20,6 +38,8 @@
 # To do: record time
 &runSpades(@sysargs);
 &collectOutput();
+&extractCoverageLength($out_contigs_file, $out_contigs_stats);
+&extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
 print $log "Done\n";
 close $log;
 exit 0;
@@ -34,6 +54,7 @@
     }
     return 0;
 }
+
 # Collect output
 sub collectOutput{
     # To do: check that the files are there
@@ -45,3 +66,19 @@
     print $log $_ while (<LOG>);
     return 0;
 }
+
+# Extract
+sub extractCoverageLength{
+    my ($in, $out) = @_;
+    open FASTA, '<', $in or die $!;
+    open TAB, '>', $out or die $!;
+    while (<FASTA>){
+	next unless /^>/;
+	chomp;
+	my @a = split(/\s/, $_);
+	my ($NODE, $n, $LENGTH, $l, $COV, $cov) = split(/_/, $a[0]);
+	die "Not all elements found in $_\n" unless ($n && $l && $cov);
+	print TAB "$n\t$l\t$cov\n";
+    }
+    close TAB;
+}
--- a/tools/spades_2_5/spades.xml	Tue Aug 06 12:21:27 2013 -0500
+++ b/tools/spades_2_5/spades.xml	Mon Aug 19 08:55:21 2013 -0400
@@ -1,9 +1,14 @@
-<tool id="spades" name="spades" version="0.3">
+<tool id="spades" name="spades" version="0.4">
   <description>SPAdes genome assembler for regular and single-cell projects</description>
   <requirements>
     <requirement type="package" version="2.5.0">spades</requirement>
   </requirements>
-  <command interpreter="perl">spades.pl $out_contigs $out_scaffolds $out_log 
+  <command interpreter="perl">spades.pl 
+     $out_contigs 
+     $out_contig_stats 
+     $out_scaffolds 
+     $out_scaffold_stats 
+     $out_log 
     ## A real command looks like: spades.py -k 21,33,55,77,99,127 --careful -1 Y.fastq.gz -2 X.fastq.gz -t 24 -o output
     spades.py
     ## TODO: kmers, threads, other options (-sc for single-cell)
@@ -74,7 +79,9 @@
   </inputs>
   <outputs>
     <data name="out_contigs" format="fasta" label="SPAdes contigs (fasta)" />
+    <data name="out_contig_stats" format="tabular" label="SPAdes contig stats" />
     <data name="out_scaffolds" format="fasta" label="SPAdes scaffolds (fasta)" />
+    <data name="out_scaffold_stats" format="tabular" label="SPAdes scaffold stats" />
     <data name="out_log" format="txt" label="SPAdes log" />
   </outputs>
   <tests>
@@ -94,7 +101,7 @@
   <help>
 **What it does**
 
-Runs SPAdes 2.5.0, collects the output, and throws away all the temporary files.
+Runs SPAdes 2.5.0, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage.
 
 **Citation**