Mercurial > repos > lionelguy > spades
changeset 13:1ae36afad28d draft
Added a new output, a tab file providing length and coverage information for each contig.
author | lionelguy |
---|---|
date | Mon, 19 Aug 2013 08:55:21 -0400 |
parents | 8877df82f1d7 |
children | 94978eb0f1be |
files | tools/spades_2_5/spades.pl tools/spades_2_5/spades.xml |
diffstat | 2 files changed, 48 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/spades_2_5/spades.pl Tue Aug 06 12:21:27 2013 -0500 +++ b/tools/spades_2_5/spades.pl Mon Aug 19 08:55:21 2013 -0400 @@ -4,9 +4,27 @@ use warnings; use File::Temp qw/ tempfile tempdir /; use File::Copy; +use Getopt::Long; # Parse arguments -my ($out_contigs_file, $out_scaffolds_file, $out_log_file, @sysargs) = @ARGV; +my ($out_contigs_file, + $out_contigs_stats, + $out_scaffolds_file, + $out_scaffolds_stats, + $out_log_file, + @sysargs) = @ARGV; + +## GetOptions not compatible with parsing the rest of the arguments in an array. +## Keeping the not-so-nice parse-in-one-go method, without named arguments. +# GetOptions( +# 'contigs-file=s' => \$out_contigs_file, +# 'contigs-stats=s' => \$out_contigs_stats, +# 'scaffolds-file=s' => \$out_scaffolds_file, +# 'scaffolds-stats=s' => \$out_scaffolds_stats, +# 'out_log_file=s' => \$out_log_file, +# ); + +# my @sysargs = @ARGV; # Create temporary folder to store files, delete after use #my $output_dir = tempdir( CLEANUP => 0 ); @@ -20,6 +38,8 @@ # To do: record time &runSpades(@sysargs); &collectOutput(); +&extractCoverageLength($out_contigs_file, $out_contigs_stats); +&extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); print $log "Done\n"; close $log; exit 0; @@ -34,6 +54,7 @@ } return 0; } + # Collect output sub collectOutput{ # To do: check that the files are there @@ -45,3 +66,19 @@ print $log $_ while (<LOG>); return 0; } + +# Extract +sub extractCoverageLength{ + my ($in, $out) = @_; + open FASTA, '<', $in or die $!; + open TAB, '>', $out or die $!; + while (<FASTA>){ + next unless /^>/; + chomp; + my @a = split(/\s/, $_); + my ($NODE, $n, $LENGTH, $l, $COV, $cov) = split(/_/, $a[0]); + die "Not all elements found in $_\n" unless ($n && $l && $cov); + print TAB "$n\t$l\t$cov\n"; + } + close TAB; +}
--- a/tools/spades_2_5/spades.xml Tue Aug 06 12:21:27 2013 -0500 +++ b/tools/spades_2_5/spades.xml Mon Aug 19 08:55:21 2013 -0400 @@ -1,9 +1,14 @@ -<tool id="spades" name="spades" version="0.3"> +<tool id="spades" name="spades" version="0.4"> <description>SPAdes genome assembler for regular and single-cell projects</description> <requirements> <requirement type="package" version="2.5.0">spades</requirement> </requirements> - <command interpreter="perl">spades.pl $out_contigs $out_scaffolds $out_log + <command interpreter="perl">spades.pl + $out_contigs + $out_contig_stats + $out_scaffolds + $out_scaffold_stats + $out_log ## A real command looks like: spades.py -k 21,33,55,77,99,127 --careful -1 Y.fastq.gz -2 X.fastq.gz -t 24 -o output spades.py ## TODO: kmers, threads, other options (-sc for single-cell) @@ -74,7 +79,9 @@ </inputs> <outputs> <data name="out_contigs" format="fasta" label="SPAdes contigs (fasta)" /> + <data name="out_contig_stats" format="tabular" label="SPAdes contig stats" /> <data name="out_scaffolds" format="fasta" label="SPAdes scaffolds (fasta)" /> + <data name="out_scaffold_stats" format="tabular" label="SPAdes scaffold stats" /> <data name="out_log" format="txt" label="SPAdes log" /> </outputs> <tests> @@ -94,7 +101,7 @@ <help> **What it does** -Runs SPAdes 2.5.0, collects the output, and throws away all the temporary files. +Runs SPAdes 2.5.0, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage. **Citation**