Mercurial > repos > geert-vandeweyer > clusterfast

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ClusterFast.xml	Mon Jul 28 05:56:04 2014 -0400
@@ -0,0 +1,84 @@
+<tool id="ClusterFast" name="ClusterFast" version="0.0.1">
+  <description></description>
+  <command interpreter="perl">
+     run_cf0611.pl
+
+      ## input files
+      	-b $input1
+      	-t $input2
+
+      ## references
+	-p $indexes
+
+      ## output files
+      	-o $__new_file_path__  # use galaxy temp dir for automatic cleanup.
+	-c $contigs
+	-y $bp1
+	-z $bp2
+
+
+      ## run parameters
+      	-r O ## clean up not needed, as this is in galaxy tmp.
+      	-m1 $discordant
+	-m2 $split
+	-d $distance
+     > $runlog
+  </command>
+  <requirements>
+    <requirement type="package" version="0.1.18">samtools</requirement>
+    <requirement type="package" version="latest">novoalign</requirement>
+    <requirement type="package" version="latest">velvet</requirement>
+    <requirement type="package" version="latest">blat_server</requirement>
+  </requirements>
+  <inputs>
+        <param name="input1" type="data" format="bam" label="BAM file" help="BAM file of mapped reads." />
+        <param name="input2" type="data" format="bed" label="Target Regions BED" help="BED file containing regions of interest. See below for format" />
+        <param name="m1" type="integer" value="2" label="Supporting Discordant Read Pairs" help="Default: 2"  />
+	<param name="m2" type="integer" value="1" label="Supporting Split Reads" help="Default: 1" />
+	<param name="distance" type="integer" value="50000" label="Max Distance From Target To Search" help="Default: 50000" />
+	<param name="indexes" type="select" label="Reference Genome" help="Select the correct genome build" >
+		<options from_data_table="clusterfast" />
+	</param>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="bp1" label="${tool.name} on ${on_string}: Primary Breakpoints"/>
+    <data format="tabular" name="bp2" label="${tool.name} on ${on_string}: Final Breakpoints"/>
+    <data format="txt"	name="contigs" label="${tool.name} on ${on_string}: Contigs"/>
+    <data format="txt" name="runlog" label="${tool.name} on ${on_string}: Runtime output"/>
+  </outputs>
+  <help>
+
+**What it does**
+
+ClusterFAST is a tool for finding translocation in next generation sequencing data developed by Haley Abel and Eric Duncavage at Washington University in St. Louis, MO. A manuscript detailing the use of ClusterFAST ican be found at: http://www.ncbi.nlm.nih.gov/pubmed/?term=24813172. For questions or comments please email eduncavage@path.wustl.edu
+
+ClusterFAST is a pipeline for highly specific detection of translocations from high-coverage targeted capture sequence data. It detects translocation breakpoints with single base accuracy and provides assembled contigs for PCR validation. ClustFAST is implemented in Java for improved interoperability and can be run from the command line via a perl script. ClusterFAST is meant for translocation detection from targeted regions and requires a picard-style interval file containing the targets (e.g., ALK_MLL.txt). Numerous output files are created during the workflow (see below) to allow for error checking. The current version of ClusterFAST uses velvet to assemble contigs during the final step, however other assemblers can be substituted.
+
+
+------
+
+**Targets BED**
+
+The BED file containing targets of interest. I would recommend to use one entry per gene of interest, instead of one entry per exon for this tool::
+
+  Column 1: Chromosome : Use the same syntax as the references used by Galaxy. Check your sam-headers for the correct format. ('chr1' vs '1')
+  Column 2: Start Position
+  Column 3: End Position
+  Column 4: Target Name.
+  Column 5: Score : ignored, use '0'
+  Column 6: Strand: ,'+' or '-'
+
+
+------
+
+**Input formats**
+
+BAM file for reads, BED file for targets.
+
+------
+
+
+  </help>
+</tool>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cf_scripts/filter_contigs.0403.pl	Mon Jul 28 05:56:04 2014 -0400
@@ -0,0 +1,98 @@
+#!/usr/bin/perl
+
+use strict;
+my $tol=300;
+my $assembler=$ARGV[3];
+
+
+#read in .psl file (blat output),
+open(PSL, "$ARGV[0]") or die "Can't open .psl.\n";
+my %blat_results;
+#hash blat results by query name
+while(my $line=<PSL>) {
+    chomp($line);
+    my ($match, $mm, $rm, $nn, $qgpct, $qgpb, $tgpct, $tgpb, $strand, $qname, $qsize, $qstart, $qend, $tname, $tsize, $tstart, $tend, $stuff)=split(/\s+/, $line);
+    if( ! exists $blat_results{$qname}) {
+        $blat_results{$qname}=();
+    }
+    push (@{$blat_results{$qname}}, $line);
+}
+close(PSL);
+
+my %good_contigs;
+my %good_blats;
+
+#read in cf output
+
+open(QP, "$ARGV[1]");
+while(my $line=<QP>) {
+    chomp($line);
+    my $pf="pass";
+    my ($x, $chr1, $pos1, $chr2, $pos2, $or, $ct, $splct)=split(/\s+/, $line);
+    if($pf eq "pass") {
+
+        foreach my $key(keys %blat_results) {
+            my @score=(0,0);
+            my @blat_out=();
+
+            for(my $j=0; $j<@{$blat_results{$key}}; $j++) {
+                my ($match, $mm, $rm, $nn, $qgpct, $qgpb, $tgpct, $tgpb, $strand, $qname, $qsize, $qstart, $qend, $tname, $tsize, $tstart, $tend, $stuff)=split(/\s+/, $blat_results{$key}[$j]);
+                if(($tname eq $chr1) && ($pos1>$tstart-$tol)&&($pos1<$tend+$tol)) {
+                    $score[0]++;
+                    push(@blat_out, $blat_results{$key}[$j]);
+                }
+                if(($tname eq $chr2) && ($pos2>$tstart-$tol)&&($pos2<$tend+$tol)) {
+                    $score[1]++;
+                    push(@blat_out, $blat_results{$key}[$j]);
+                }
+            }
+
+            if($score[0]>0 && $score[1]>0) {			# if there is a contig with blat results corresponding to both sides of the cf result
+				$good_contigs{$key}=$line;
+				@{$good_blats{$key}}=@blat_out;
+            }
+        }
+    }
+}
+close(QP);
+
+my %all_ctgs;
+print STDOUT "*********************************************\n$assembler results\n\n";
+
+if(keys(%good_contigs)>0) {
+	# read in fasta file containing contigs
+    open(CT, "$ARGV[2]") or die "Can't open contigs.\n";
+    my $curr_ctg="";
+    while (my $line=<CT>) {
+		chomp($line);
+		if(substr($line, 0, 1) eq ">") {
+			$curr_ctg=substr($line,1);
+			$all_ctgs{$curr_ctg}=();								#read each contig into its own array
+		}
+		else {
+			push(@{$all_ctgs{$curr_ctg}}, $line);
+		}
+    }
+    close(CT);
+
+    foreach my $key(keys %all_ctgs) {
+		if(exists $good_contigs{$key}) {
+			my $ctg="";
+			for(my $k=0; $k<@{$all_ctgs{$key}}; $k++) {
+				$ctg.=$all_ctgs{$key}[$k];
+			}
+			foreach(@{$good_blats{$key}}) {
+				my ($match, $mm, $rm, $nn, $qgpct, $qgpb, $tgpct, $tgpb, $strand, $qname, $qsize, $qstart, $qend, $tname, $size, $tstart, $tend, $stuff)=split(/\s+/, $_);
+				print STDOUT "$qname\t$qstart\t$qend\t$tname\t$tstart\t$tend\t$strand\n";
+	    		}
+	    		print STDOUT "$key $good_contigs{$key}\n$ctg\n";
+
+		}
+    	}
+}
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cf_scripts/split_for_velvet.pl	Mon Jul 28 05:56:04 2014 -0400
@@ -0,0 +1,53 @@
+#!/usr/bin/perl
+
+use strict;
+my %read1;
+my %read2;
+my %qual1;
+my %qual2;
+
+while (my $line=<STDIN>) {
+	chomp();
+	my ($name, $flag, $chr1, $pos1, $mq, $cigar, $chr2, $pos2, $dist, $seq, $quals, $stuff)=split(/\s+/, $line);
+	if($flag & 16) {	#reverse-complement it
+		$seq=reverse($seq);
+		$seq=~tr/ACGT/TGCA/;
+		$quals=reverse($quals);
+	}
+	if($flag & 64) {	#first in pair
+		$read1{$name}=$seq;
+		$qual1{$name}=$quals;
+	}
+	else {
+		$read2{$name}=$seq;
+		$qual2{$name}=$quals;
+	}
+}
+
+open(FQ1, ">$ARGV[0]") or die "Can't open $ARGV[0]";
+open(FQ2, ">$ARGV[1]") or die "Can't open $ARGV[1]";
+open(FQ3, ">$ARGV[2]") or die "Can't open $ARGV[2]";
+
+foreach my $key(keys %read1) {
+
+    	if(exists $read2{$key}) {
+		print FQ1 "\@$key\n$read1{$key}\n+\n$qual1{$key}\n";
+        	print FQ2 "\@$key\n$read2{$key}\n+\n$qual2{$key}\n";
+	}
+	else {
+		print FQ3 "\@$key\n$read1{$key}\n+\n$qual1{$key}\n";
+	}
+}
+foreach my $key(keys %read2) {
+
+	if(!(exists $read1{$key})) {
+            	print FQ3 "\@$key\n$read2{$key}\n+\n$qual2{$key}\n";
+	}
+}
+close(FQ1);
+close(FQ2);
+close(FQ3);
+
+
+
+
\ No newline at end of file
Binary file cf_v1.1.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/clusterfast.loc.sample	Mon Jul 28 05:56:04 2014 -0400
@@ -0,0 +1,32 @@
+#This is a sample file distributed with Galaxy that enables the clusterfast
+#tool to locate genome files for use with blat, novoalign and fasta. You will
+#need to supply these files and then create a clusterfast.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The clusterfast.loc
+#file has this format (white space characters are TAB characters):
+#
+#<unique_build_id>	<display_name>		<NOVOALIGN.idx_path;2bit_path;fasta_path>
+#
+#for example:
+#
+#hg19	Human (Homo sapiens): hg19		/depot/data2/galaxy/novoalign/hg19.novoidx;/depot/data2/galaxy/twobit/hg19.2bit;/depot/data2/galaxy/fasta/hg19.fa
+#mm9	Mouse (Mus musculus): mm9		/depot/data2/galaxy/novoalign/mm9.novoidx;/depot/data2/galaxy/twobit/mm9.2bit;/depot/data2/galaxy/fasta/mm9.fa
+#
+#then your /depot/data2/galaxy/twobit/ directory
+#would need to contain the following 2bit files:
+#
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg19.2bit
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 mm9.2bit
+#
+# /depot/data2/galaxy/novoalign/ directory
+#would need to contain the following novoalign index files files:
+#
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg19.novoidx
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 mm9.novoidx
+#
+# your fasta folder needs the following files:
+#
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg19.fa
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 mm9.fa
+#
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst	Mon Jul 28 05:56:04 2014 -0400
@@ -0,0 +1,25 @@
+BACKGROUND:
+ClusterFAST is a tool for finding translocation in next generation sequencing data developed by Haley Abel and Eric Duncavage at Washington University in St. Louis, MO. A manuscript detailing the use of ClusterFAST ican be found at: http://www.ncbi.nlm.nih.gov/pubmed/?term=24813172. For questions or comments please email eduncavage@path.wustl.edu
+
+ClusterFAST is a pipeline for highly specific detection of translocations from high-coverage targeted capture sequence data. It detects translocation breakpoints with single base accuracy and provides assembled contigs for PCR validation. ClustFAST is implemented in Java for improved interoperability and can be run from the command line via a perl script. ClusterFAST is meant for translocation detection from targeted regions and requires a picard-style interval file containing the targets (e.g., ALK_MLL.txt). Numerous output files are created during the workflow (see below) to allow for error checking. The current version of ClusterFAST uses velvet to assemble contigs during the final step, however other assemblers can be substituted.
+
+GALAXY WRAPPER:
+This galaxy wrapper was created by Geert Vandeweyer. It represents a static snapshot of ClusterFast version 0611 (received from the authors on 2014-07-24). For questions regarding its functionality, contact me by geert.vandeweyer@uantwerpen.be
+
+LICENSE AND DISCLAIMER:
+ClusterFAST is protected by copyright, is distributed under the GNU GPL public license, and may be used freely for research purposes.   The use of ClusterFAST for the purpose of financial profit or the inclusion/distribution with for-profit software not covered by GNU public license is prohibited.  Please email eduncavage@path.wustl.edu for information on commercial license.  The performance of ClusterFAST will vary depending on many sequencing parameters and must be validated by individual labratories.
+
+INSTALLATION:
+Galaxy should be able to install all depedencies automatically. These include
+- samtools
+- blat-server
+- velvet
+- novoalign
+
+After installation, you need to update several environment settings for the dependencies to work correctly.
+
+- NOVOALIGN path: (novoalign binaries are NOT automatically installed, see package_novoalign for details ) : <tool_dependency_dir>/environment_settings/NOVOALIGN_PATH/geert-vandeweyer/package_novoalign/<hash_string>/env.sh
+- index files (clusterfast.loc holds info on 2bit/fasta/novoalign references) :  <path>/galaxy-dist/tool-data/<toolshed>/repos/geert-vandeweyer/clusterfast/<hash>/clusterfast.loc. Read comments for the exact syntax.
+- number of velvet threads : <tool_dependency_dir>/environment_settings/OMP_THREAD_LIMIT/geert-vandeweyer/package_velvet/<hash_string>/env.sh
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/run_cf0611.pl	Mon Jul 28 05:56:04 2014 -0400
@@ -0,0 +1,235 @@
+#!/usr/bin/perl
+
+#Copyright 2012-2013 Haley Abel
+#
+#This file is part of ClusterFAST.
+#
+#ClusterFAST is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+#
+#ClusterFAST is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+#You should have received a copy of the GNU General Public License along with ClusterFAST.  If not, see <http://www.gnu.org/licenses/>.
+
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+use File::Path qw(make_path);
+use Cwd ;
+use Cwd 'abs_path';
+
+my ($inbam, $outdir, $targets, $minct1, $minct2, $dist, $novoindex, $rmtmp, $FASTA,$twobit,$refpaths,$bp1,$bp2,$contigs);
+
+$minct1=2;
+$minct2=1;
+$dist=50000;
+$rmtmp=1;
+my $known_partners=0;
+my $partnerfile='';
+
+my $cwd = getcwd();
+my $scriptspath="$cwd/cf_scripts";
+my $cffile="$cwd/cf_v1.1.jar";
+my $help='';
+
+
+#YOU MUST ADD LOCAL FILE PATHS HERE FOR STANDALONE USE (including trailing slashes!). Galaxy adds the binaries to the path, so these can be left empty.
+my $SAMTOOLS=''##INSERT PATH TO SAMTOOLS; http://sourceforge.net/projects/samtools/files/samtools/;
+my $NOVOALIGN=''##INSERT PATH TO NOVOALIGN; http://www.novocraft.com/main/downloadpage.php;
+my $VELVET=''##INSERT PATH TO VELVET;http://www.molecularevolution.org/software/genomics/velvet;
+my $BLAT=''##INSET PATH TO BLAT gfServer/gfClient tools;
+
+
+
+
+GetOptions(
+    "b|inbam=s"=> \$inbam,
+    "o|outdir=s"=> \$outdir,
+    "t|targets=s"=> \$targets,
+    "f|fasta=s"=> \$FASTA,
+    "m1|minct1=i"=> \$minct1,
+    "m2|minct2=i"=> \$minct2,
+    "d|distance=i"=>\$dist,
+    "i|novoindex=s"=> \$novoindex,
+    "r|rmtmp=i"=> \$rmtmp,
+    "h|help"=>\$help,
+    "2|twobit=s"=>\$twobit,
+    "p|refpaths=s"=>\$refpaths,
+    "c|contigs=s"=>\$contigs,
+    "y|bp1=s"=>\$bp1,
+    "z|bp2=s"=>\$bp2,
+    ) or die "Usage:  run_cf_0611.pl -b inbam -o outdir -f fasta -t targets -d distance -i novoindex -r rmtmp -m1 min_pairs -m2 min_splits -t genome.2bit\n";
+
+if($help) {
+	print "Usage:  run_cf_0611.pl -b inbam -o outdir -f fasta -t targets  -d distance -i novoindex -r rmtmp -m1 min_pairs -m2 min_splits -t genome.2bit\n";
+	print "targets=BED style interval file containing the regions to search for SV\n";
+	print "fasta=reference fasta file\nnovoindex=reference index for novoalign\n";
+	print "distance=max distance from target to search (default=50000)\n";
+	print "min_pairs=minimum number of read pairs supporting breakpoint (default=2)\nmin_splits=minimum number of split reads supporting breakpoint (default=1)\n";
+	print "rmtmp=remove temp directory (default=1, true)\n";
+	print "genome.2bit = 2bit index fasta file of reference genome, used by the blat server (which is launched on the localhost)\n";
+	print "refpaths = semicolon seperated list of reference genomes paths (used by galaxy). The order is mandatory to be novoalign;2bit;fasta\n";
+	print "contigs = (Optional) output file for the contigs (default: outdir/contigs.txt)\n";
+	print "bp1 = (Optional) output file for the breakpoints.1.txt file (default: outdir/breakpoints.1.txt)\n";
+	print "bp2 = (Optional) output file for the breakpoints.2.txt file (default: outdir/breakpoints.2.txt)\n";
+	exit(1);
+}
+
+# if refpaths is defined => split into seperate variables.
+if (defined($refpaths) && $refpaths ne '') {
+	($novoindex, $twobit, $FASTA) = split(/;/,$refpaths);
+}
+
+if(! ( defined $inbam && defined $targets && defined $FASTA && defined $outdir && defined $novoindex && defined $twobit )) {
+	print STDERR "Usage:  run_cf_0611.pl -b inbam -o outdir -f fasta -t targets  -d distance -i novoindex -r rmtmp -m1 min_pairs -m2 min_splits -t genome.2bit\n";
+	exit(1);
+}
+elsif (! (-e $inbam && -e $targets && -e $FASTA && -e $novoindex && -e $twobit)) {
+	print STDERR "Critical file missing.\n";
+	exit(1);
+}
+
+make_empty_output();
+$outdir = abs_path($outdir);
+
+## convert Targets.bed to targets.picard
+my $cmd="${SAMTOOLS}samtools view -H $inbam > $outdir/Targets.picard";
+print STDOUT "$cmd\n";
+system($cmd);
+$cmd = "awk '{OFS=\"\\t\"; print \$1,\$2,\$3,\$6,\$4 }' $targets >> $outdir/Targets.picard";
+print STDOUT "$cmd\n";
+system($cmd);
+$targets = "$outdir/Targets.picard";
+
+## check all reads are paired. Combination of paired and single end libraries crashes the tool.
+my @fs = `${SAMTOOLS}samtools flagstat $inbam`;
+my $total = 0;
+my $paired = 0;
+foreach(@fs) {
+	chomp;
+	if ($_ =~ m/^(\d+).*in total/) {
+		$total = $1;
+	}
+	elsif ($_ =~ m/^(\d+).*paired in sequencing/) {
+		$paired = $1;
+	}
+}
+if ($total != $paired) {
+	print "Removing single-read data from the input bam file\n";
+	system("${SAMTOOLS}samtools view -f 1 -h -b -o $outdir/Paired.only.bam $inbam");
+	$inbam = "$outdir/Paired.only.bam";
+}
+
+## process.
+$cmd="java -Xmx8g -Xms6g -jar $cffile $inbam $targets $minct1 $dist $outdir 1";
+print STDOUT "$cmd\n";
+system($cmd);
+
+chdir("${outdir}/temp");
+
+if(!(-s "toRemap1.fq")) {
+    print STDOUT "No breakpoints found.\n";
+    chdir("$outdir");
+    clean_up();
+    exit(1);
+}
+
+$cmd="${NOVOALIGN}novoalign -o SAM -i 230 140 -r all -e 999 -c2 -d $novoindex -F STDFQ -f toRemap1.fq toRemap2.fq > novoout.2.sam";
+print STDOUT "$cmd\n";
+system($cmd);
+
+$cmd="java -Xmx6g -Xms4g -jar $cffile novoout.2.sam $targets $minct2 $dist $outdir 2";
+print STDOUT "$cmd\n";
+system($cmd);
+
+
+chdir("${outdir}/temp");
+print STDOUT "Assembling contigs...\n";
+&assemble();
+print STDOUT "Done\n";
+
+
+system("mv  contigs.txt $outdir");
+chdir("$outdir");
+clean_up();
+
+
+
+
+#########################
+
+sub make_empty_output {
+    make_path($outdir);
+    open(FH, ">${outdir}/breakpoints.1.txt") or die "Can't open ${outdir}/breakpoints.1.txt";   close(FH);
+    open(FH, ">${outdir}/breakpoints.2.txt") or die "Can't open ${outdir}/breakpoints.2.txt";  close(FH);
+    open(FH, ">${outdir}/contigs.txt") or die "Can't open ${outdir}/contigs.txt"; close(FH);
+
+}
+
+sub assemble {
+    system("${SAMTOOLS}samtools view merged.sorted.final.temp.bam | sort -k 1,1 -k 2,2n -u | tee merged.sorted.final.unique.sam  | perl ${scriptspath}/split_for_velvet.pl pair1.fq pair2.fq single.fq");
+    if(!(-s "pair1.fq")) {
+	print STDERR "No breakpoints supported by split reads.\n";
+	exit(1);
+    }
+	system("${VELVET}velveth velvet_out 31 -fastq -shortPaired -separate pair1.fq pair2.fq -short single.fq > /dev/null");
+	system("${VELVET}velvetg velvet_out -cov_cutoff 3 -ins_length 300 -exp_cov 100  -ins_length_sd 60 > /dev/null");
+	# launch blat server at random unused port
+	my $port = 10000 + int(rand(5000));
+	my $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ;
+	while ($status !~ m/Error/) {
+		$port = 10000 + int(rand(5000));
+		$status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ;
+	}
+	print "Waiting for BLAT server to come online.\n";
+	system("(${BLAT}gfServer start 127.0.0.1 $port -canStop $twobit >/dev/null 2>&1) &");
+	# wait for startup
+	$status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ;
+	while ($status =~ m/Error/) {
+		sleep 15;
+		$status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ;
+	}
+
+	# run query
+	system("${BLAT}gfClient 127.0.0.1 $port / velvet_out/contigs.fa velvet.psl -minScore=2 -minIdentity=90 -nohead");
+	# take blat server down.
+	system("${BLAT}gfServer stop 127.0.0.1 $port");
+	# continue with analysis.
+	my $command = "perl ${scriptspath}/filter_contigs.0403.pl velvet.psl ${outdir}/breakpoints.2.txt velvet_out/contigs.fa velvet > velvet.txt";
+	print STDOUT "$command\n";
+	system($command);
+	system("mv velvet.txt contigs.txt");
+}
+
+sub clean_up {
+    ## move files to output locations.
+    if (defined $contigs) {
+	$contigs = abs_path($contigs);
+	system("cp '$outdir/contigs.txt' '$contigs'");
+    }
+    if (defined $bp1) {
+	$bp1 = abs_path($bp1);
+	system("cp '$outdir/breakpoints.1.txt' '$bp1'");
+    }
+    if (defined $bp2) {
+	$bp2 = abs_path($bp2);
+	system("cp '$outdir/breakpoints.2.txt' '$bp2'");
+    }
+    if ($rmtmp) {
+        system("rm -R temp");
+    }
+}
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Mon Jul 28 05:56:04 2014 -0400
@@ -0,0 +1,9 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of all index files under genome directory -->
+    <table name="clusterfast" comment_char="#">
+        <columns>name,value, path</columns>
+        <file path="clusterfast.loc" />
+    </table>
+
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Mon Jul 28 05:56:04 2014 -0400
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="novoalign" version="latest">
+        <repository changeset_revision="003907882a0c" name="package_novoalign" owner="geert-vandeweyer" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="velvet" version="latest">
+        <repository changeset_revision="688d46996f1e" name="package_velvet" owner="geert-vandeweyer" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="samtools" version="0.1.18">
+        <repository changeset_revision="c0f72bdba484" name="package_samtools_0_1_18" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="blat_server" version="latest">
+        <repository changeset_revision="455a4bd49c52" name="package_blat_server" owner="jjohnson" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+
+</tool_dependency>