# HG changeset patch # User geert-vandeweyer # Date 1406541235 14400 # Node ID 4a3afa90ff7ff640962ff63a932f6c3af2ab91bf # Parent 1b008b4b05f364d5545b42bcdad8839afe903cae Uploaded diff -r 1b008b4b05f3 -r 4a3afa90ff7f ClusterFast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ClusterFast.xml Mon Jul 28 05:53:55 2014 -0400 @@ -0,0 +1,84 @@ + + + + run_cf0611.pl + + ## input files + -b $input1 + -t $input2 + + ## references + -p $indexes + + ## output files + -o $__new_file_path__ # use galaxy temp dir for automatic cleanup. + -c $contigs + -y $bp1 + -z $bp2 + + + ## run parameters + -r O ## clean up not needed, as this is in galaxy tmp. + -m1 $discordant + -m2 $split + -d $distance + > $runlog + + + samtools + novoalign + velvet + blat_server + + + + + + + + + + + + + + + + + + + +**What it does** + +ClusterFAST is a tool for finding translocation in next generation sequencing data developed by Haley Abel and Eric Duncavage at Washington University in St. Louis, MO. A manuscript detailing the use of ClusterFAST ican be found at: http://www.ncbi.nlm.nih.gov/pubmed/?term=24813172. For questions or comments please email eduncavage@path.wustl.edu + +ClusterFAST is a pipeline for highly specific detection of translocations from high-coverage targeted capture sequence data. It detects translocation breakpoints with single base accuracy and provides assembled contigs for PCR validation. ClustFAST is implemented in Java for improved interoperability and can be run from the command line via a perl script. ClusterFAST is meant for translocation detection from targeted regions and requires a picard-style interval file containing the targets (e.g., ALK_MLL.txt). Numerous output files are created during the workflow (see below) to allow for error checking. The current version of ClusterFAST uses velvet to assemble contigs during the final step, however other assemblers can be substituted. + + +------ + +**Targets BED** + +The BED file containing targets of interest. I would recommend to use one entry per gene of interest, instead of one entry per exon for this tool:: + + Column 1: Chromosome : Use the same syntax as the references used by Galaxy. Check your sam-headers for the correct format. ('chr1' vs '1') + Column 2: Start Position + Column 3: End Position + Column 4: Target Name. + Column 5: Score : ignored, use '0' + Column 6: Strand: ,'+' or '-' + + +------ + +**Input formats** + +BAM file for reads, BED file for targets. + +------ + + + + + + diff -r 1b008b4b05f3 -r 4a3afa90ff7f cf_scripts/filter_contigs.0403.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cf_scripts/filter_contigs.0403.pl Mon Jul 28 05:53:55 2014 -0400 @@ -0,0 +1,98 @@ +#!/usr/bin/perl + +use strict; +my $tol=300; +my $assembler=$ARGV[3]; + + +#read in .psl file (blat output), +open(PSL, "$ARGV[0]") or die "Can't open .psl.\n"; +my %blat_results; +#hash blat results by query name +while(my $line=) { + chomp($line); + my ($match, $mm, $rm, $nn, $qgpct, $qgpb, $tgpct, $tgpb, $strand, $qname, $qsize, $qstart, $qend, $tname, $tsize, $tstart, $tend, $stuff)=split(/\s+/, $line); + if( ! exists $blat_results{$qname}) { + $blat_results{$qname}=(); + } + push (@{$blat_results{$qname}}, $line); +} +close(PSL); + +my %good_contigs; +my %good_blats; + +#read in cf output + +open(QP, "$ARGV[1]"); +while(my $line=) { + chomp($line); + my $pf="pass"; + my ($x, $chr1, $pos1, $chr2, $pos2, $or, $ct, $splct)=split(/\s+/, $line); + if($pf eq "pass") { + + foreach my $key(keys %blat_results) { + my @score=(0,0); + my @blat_out=(); + + for(my $j=0; $j<@{$blat_results{$key}}; $j++) { + my ($match, $mm, $rm, $nn, $qgpct, $qgpb, $tgpct, $tgpb, $strand, $qname, $qsize, $qstart, $qend, $tname, $tsize, $tstart, $tend, $stuff)=split(/\s+/, $blat_results{$key}[$j]); + if(($tname eq $chr1) && ($pos1>$tstart-$tol)&&($pos1<$tend+$tol)) { + $score[0]++; + push(@blat_out, $blat_results{$key}[$j]); + } + if(($tname eq $chr2) && ($pos2>$tstart-$tol)&&($pos2<$tend+$tol)) { + $score[1]++; + push(@blat_out, $blat_results{$key}[$j]); + } + } + + if($score[0]>0 && $score[1]>0) { # if there is a contig with blat results corresponding to both sides of the cf result + $good_contigs{$key}=$line; + @{$good_blats{$key}}=@blat_out; + } + } + } +} +close(QP); + +my %all_ctgs; +print STDOUT "*********************************************\n$assembler results\n\n"; + +if(keys(%good_contigs)>0) { + # read in fasta file containing contigs + open(CT, "$ARGV[2]") or die "Can't open contigs.\n"; + my $curr_ctg=""; + while (my $line=) { + chomp($line); + if(substr($line, 0, 1) eq ">") { + $curr_ctg=substr($line,1); + $all_ctgs{$curr_ctg}=(); #read each contig into its own array + } + else { + push(@{$all_ctgs{$curr_ctg}}, $line); + } + } + close(CT); + + foreach my $key(keys %all_ctgs) { + if(exists $good_contigs{$key}) { + my $ctg=""; + for(my $k=0; $k<@{$all_ctgs{$key}}; $k++) { + $ctg.=$all_ctgs{$key}[$k]; + } + foreach(@{$good_blats{$key}}) { + my ($match, $mm, $rm, $nn, $qgpct, $qgpb, $tgpct, $tgpb, $strand, $qname, $qsize, $qstart, $qend, $tname, $size, $tstart, $tend, $stuff)=split(/\s+/, $_); + print STDOUT "$qname\t$qstart\t$qend\t$tname\t$tstart\t$tend\t$strand\n"; + } + print STDOUT "$key $good_contigs{$key}\n$ctg\n"; + + } + } +} + + + + + + diff -r 1b008b4b05f3 -r 4a3afa90ff7f cf_scripts/split_for_velvet.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cf_scripts/split_for_velvet.pl Mon Jul 28 05:53:55 2014 -0400 @@ -0,0 +1,53 @@ +#!/usr/bin/perl + +use strict; +my %read1; +my %read2; +my %qual1; +my %qual2; + +while (my $line=) { + chomp(); + my ($name, $flag, $chr1, $pos1, $mq, $cigar, $chr2, $pos2, $dist, $seq, $quals, $stuff)=split(/\s+/, $line); + if($flag & 16) { #reverse-complement it + $seq=reverse($seq); + $seq=~tr/ACGT/TGCA/; + $quals=reverse($quals); + } + if($flag & 64) { #first in pair + $read1{$name}=$seq; + $qual1{$name}=$quals; + } + else { + $read2{$name}=$seq; + $qual2{$name}=$quals; + } +} + +open(FQ1, ">$ARGV[0]") or die "Can't open $ARGV[0]"; +open(FQ2, ">$ARGV[1]") or die "Can't open $ARGV[1]"; +open(FQ3, ">$ARGV[2]") or die "Can't open $ARGV[2]"; + +foreach my $key(keys %read1) { + + if(exists $read2{$key}) { + print FQ1 "\@$key\n$read1{$key}\n+\n$qual1{$key}\n"; + print FQ2 "\@$key\n$read2{$key}\n+\n$qual2{$key}\n"; + } + else { + print FQ3 "\@$key\n$read1{$key}\n+\n$qual1{$key}\n"; + } +} +foreach my $key(keys %read2) { + + if(!(exists $read1{$key})) { + print FQ3 "\@$key\n$read2{$key}\n+\n$qual2{$key}\n"; + } +} +close(FQ1); +close(FQ2); +close(FQ3); + + + + \ No newline at end of file diff -r 1b008b4b05f3 -r 4a3afa90ff7f cf_v1.1.jar Binary file cf_v1.1.jar has changed diff -r 1b008b4b05f3 -r 4a3afa90ff7f clusterfast.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clusterfast.loc.sample Mon Jul 28 05:53:55 2014 -0400 @@ -0,0 +1,32 @@ +#This is a sample file distributed with Galaxy that enables the clusterfast +#tool to locate genome files for use with blat, novoalign and fasta. You will +#need to supply these files and then create a clusterfast.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The clusterfast.loc +#file has this format (white space characters are TAB characters): +# +# +# +#for example: +# +#hg19 Human (Homo sapiens): hg19 /depot/data2/galaxy/novoalign/hg19.novoidx;/depot/data2/galaxy/twobit/hg19.2bit;/depot/data2/galaxy/fasta/hg19.fa +#mm9 Mouse (Mus musculus): mm9 /depot/data2/galaxy/novoalign/mm9.novoidx;/depot/data2/galaxy/twobit/mm9.2bit;/depot/data2/galaxy/fasta/mm9.fa +# +#then your /depot/data2/galaxy/twobit/ directory +#would need to contain the following 2bit files: +# +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg19.2bit +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 mm9.2bit +# +# /depot/data2/galaxy/novoalign/ directory +#would need to contain the following novoalign index files files: +# +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg19.novoidx +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 mm9.novoidx +# +# your fasta folder needs the following files: +# +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg19.fa +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 mm9.fa +# + diff -r 1b008b4b05f3 -r 4a3afa90ff7f readme.rst --- a/readme.rst Mon Jul 28 05:51:33 2014 -0400 +++ b/readme.rst Mon Jul 28 05:53:55 2014 -0400 @@ -1,16 +1,25 @@ -Galaxy tool_dependency for NovoAlign -==================================== +BACKGROUND: +ClusterFAST is a tool for finding translocation in next generation sequencing data developed by Haley Abel and Eric Duncavage at Washington University in St. Louis, MO. A manuscript detailing the use of ClusterFAST ican be found at: http://www.ncbi.nlm.nih.gov/pubmed/?term=24813172. For questions or comments please email eduncavage@path.wustl.edu + +ClusterFAST is a pipeline for highly specific detection of translocations from high-coverage targeted capture sequence data. It detects translocation breakpoints with single base accuracy and provides assembled contigs for PCR validation. ClustFAST is implemented in Java for improved interoperability and can be run from the command line via a perl script. ClusterFAST is meant for translocation detection from targeted regions and requires a picard-style interval file containing the targets (e.g., ALK_MLL.txt). Numerous output files are created during the workflow (see below) to allow for error checking. The current version of ClusterFAST uses velvet to assemble contigs during the final step, however other assemblers can be substituted. -This tool_dependency is written by Geert Vandeweyer. NovoAlign is a product of Novocraft, and is available only for open non-profit use without license. +GALAXY WRAPPER: +This galaxy wrapper was created by Geert Vandeweyer. It represents a static snapshot of ClusterFast version 0611 (received from the authors on 2014-07-24). For questions regarding its functionality, contact me by geert.vandeweyer@uantwerpen.be -To obtain Novoalign, go to http://www.novocraft.com/main/page.php?id=968 +LICENSE AND DISCLAIMER: +ClusterFAST is protected by copyright, is distributed under the GNU GPL public license, and may be used freely for research purposes. The use of ClusterFAST for the purpose of financial profit or the inclusion/distribution with for-profit software not covered by GNU public license is prohibited. Please email eduncavage@path.wustl.edu for information on commercial license. The performance of ClusterFAST will vary depending on many sequencing parameters and must be validated by individual labratories. -Download the approriate package, unpack the folder and edit the NOVOALIGN_PATH environment variable in the file: - -/environment_settings/NOVOALIGN_PATH/geert-vandeweyer/package_novoalign//env.sh +INSTALLATION: +Galaxy should be able to install all depedencies automatically. These include +- samtools +- blat-server +- velvet +- novoalign -to point to the folder where you have installed NovoAlign. +After installation, you need to update several environment settings for the dependencies to work correctly. -To prepare the index files, use: novoindex +- NOVOALIGN path: (novoalign binaries are NOT automatically installed, see package_novoalign for details ) : /environment_settings/NOVOALIGN_PATH/geert-vandeweyer/package_novoalign//env.sh +- index files (clusterfast.loc holds info on 2bit/fasta/novoalign references) : /galaxy-dist/tool-data//repos/geert-vandeweyer/clusterfast//clusterfast.loc. Read comments for the exact syntax. +- number of velvet threads : /environment_settings/OMP_THREAD_LIMIT/geert-vandeweyer/package_velvet//env.sh -Finally, you need to set up the /galaxy-dist/tool-data//repos/geert-vandeweyer/package_novoalign//novoalign.loc file for the correct path of your index files. + diff -r 1b008b4b05f3 -r 4a3afa90ff7f run_cf0611.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/run_cf0611.pl Mon Jul 28 05:53:55 2014 -0400 @@ -0,0 +1,235 @@ +#!/usr/bin/perl + +#Copyright 2012-2013 Haley Abel +# +#This file is part of ClusterFAST. +# +#ClusterFAST is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. +# +#ClusterFAST is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License along with ClusterFAST. If not, see . + + +use warnings; +use strict; + +use Getopt::Long; +use Pod::Usage; +use File::Path qw(make_path); +use Cwd ; +use Cwd 'abs_path'; + +my ($inbam, $outdir, $targets, $minct1, $minct2, $dist, $novoindex, $rmtmp, $FASTA,$twobit,$refpaths,$bp1,$bp2,$contigs); + +$minct1=2; +$minct2=1; +$dist=50000; +$rmtmp=1; +my $known_partners=0; +my $partnerfile=''; + +my $cwd = getcwd(); +my $scriptspath="$cwd/cf_scripts"; +my $cffile="$cwd/cf_v1.1.jar"; +my $help=''; + + +#YOU MUST ADD LOCAL FILE PATHS HERE FOR STANDALONE USE (including trailing slashes!). Galaxy adds the binaries to the path, so these can be left empty. +my $SAMTOOLS=''##INSERT PATH TO SAMTOOLS; http://sourceforge.net/projects/samtools/files/samtools/; +my $NOVOALIGN=''##INSERT PATH TO NOVOALIGN; http://www.novocraft.com/main/downloadpage.php; +my $VELVET=''##INSERT PATH TO VELVET;http://www.molecularevolution.org/software/genomics/velvet; +my $BLAT=''##INSET PATH TO BLAT gfServer/gfClient tools; + + + + +GetOptions( + "b|inbam=s"=> \$inbam, + "o|outdir=s"=> \$outdir, + "t|targets=s"=> \$targets, + "f|fasta=s"=> \$FASTA, + "m1|minct1=i"=> \$minct1, + "m2|minct2=i"=> \$minct2, + "d|distance=i"=>\$dist, + "i|novoindex=s"=> \$novoindex, + "r|rmtmp=i"=> \$rmtmp, + "h|help"=>\$help, + "2|twobit=s"=>\$twobit, + "p|refpaths=s"=>\$refpaths, + "c|contigs=s"=>\$contigs, + "y|bp1=s"=>\$bp1, + "z|bp2=s"=>\$bp2, + ) or die "Usage: run_cf_0611.pl -b inbam -o outdir -f fasta -t targets -d distance -i novoindex -r rmtmp -m1 min_pairs -m2 min_splits -t genome.2bit\n"; + +if($help) { + print "Usage: run_cf_0611.pl -b inbam -o outdir -f fasta -t targets -d distance -i novoindex -r rmtmp -m1 min_pairs -m2 min_splits -t genome.2bit\n"; + print "targets=BED style interval file containing the regions to search for SV\n"; + print "fasta=reference fasta file\nnovoindex=reference index for novoalign\n"; + print "distance=max distance from target to search (default=50000)\n"; + print "min_pairs=minimum number of read pairs supporting breakpoint (default=2)\nmin_splits=minimum number of split reads supporting breakpoint (default=1)\n"; + print "rmtmp=remove temp directory (default=1, true)\n"; + print "genome.2bit = 2bit index fasta file of reference genome, used by the blat server (which is launched on the localhost)\n"; + print "refpaths = semicolon seperated list of reference genomes paths (used by galaxy). The order is mandatory to be novoalign;2bit;fasta\n"; + print "contigs = (Optional) output file for the contigs (default: outdir/contigs.txt)\n"; + print "bp1 = (Optional) output file for the breakpoints.1.txt file (default: outdir/breakpoints.1.txt)\n"; + print "bp2 = (Optional) output file for the breakpoints.2.txt file (default: outdir/breakpoints.2.txt)\n"; + exit(1); +} + +# if refpaths is defined => split into seperate variables. +if (defined($refpaths) && $refpaths ne '') { + ($novoindex, $twobit, $FASTA) = split(/;/,$refpaths); +} + +if(! ( defined $inbam && defined $targets && defined $FASTA && defined $outdir && defined $novoindex && defined $twobit )) { + print STDERR "Usage: run_cf_0611.pl -b inbam -o outdir -f fasta -t targets -d distance -i novoindex -r rmtmp -m1 min_pairs -m2 min_splits -t genome.2bit\n"; + exit(1); +} +elsif (! (-e $inbam && -e $targets && -e $FASTA && -e $novoindex && -e $twobit)) { + print STDERR "Critical file missing.\n"; + exit(1); +} + +make_empty_output(); +$outdir = abs_path($outdir); + +## convert Targets.bed to targets.picard +my $cmd="${SAMTOOLS}samtools view -H $inbam > $outdir/Targets.picard"; +print STDOUT "$cmd\n"; +system($cmd); +$cmd = "awk '{OFS=\"\\t\"; print \$1,\$2,\$3,\$6,\$4 }' $targets >> $outdir/Targets.picard"; +print STDOUT "$cmd\n"; +system($cmd); +$targets = "$outdir/Targets.picard"; + +## check all reads are paired. Combination of paired and single end libraries crashes the tool. +my @fs = `${SAMTOOLS}samtools flagstat $inbam`; +my $total = 0; +my $paired = 0; +foreach(@fs) { + chomp; + if ($_ =~ m/^(\d+).*in total/) { + $total = $1; + } + elsif ($_ =~ m/^(\d+).*paired in sequencing/) { + $paired = $1; + } +} +if ($total != $paired) { + print "Removing single-read data from the input bam file\n"; + system("${SAMTOOLS}samtools view -f 1 -h -b -o $outdir/Paired.only.bam $inbam"); + $inbam = "$outdir/Paired.only.bam"; +} + +## process. +$cmd="java -Xmx8g -Xms6g -jar $cffile $inbam $targets $minct1 $dist $outdir 1"; +print STDOUT "$cmd\n"; +system($cmd); + +chdir("${outdir}/temp"); + +if(!(-s "toRemap1.fq")) { + print STDOUT "No breakpoints found.\n"; + chdir("$outdir"); + clean_up(); + exit(1); +} + +$cmd="${NOVOALIGN}novoalign -o SAM -i 230 140 -r all -e 999 -c2 -d $novoindex -F STDFQ -f toRemap1.fq toRemap2.fq > novoout.2.sam"; +print STDOUT "$cmd\n"; +system($cmd); + +$cmd="java -Xmx6g -Xms4g -jar $cffile novoout.2.sam $targets $minct2 $dist $outdir 2"; +print STDOUT "$cmd\n"; +system($cmd); + + +chdir("${outdir}/temp"); +print STDOUT "Assembling contigs...\n"; +&assemble(); +print STDOUT "Done\n"; + + +system("mv contigs.txt $outdir"); +chdir("$outdir"); +clean_up(); + + + + +######################### + +sub make_empty_output { + make_path($outdir); + open(FH, ">${outdir}/breakpoints.1.txt") or die "Can't open ${outdir}/breakpoints.1.txt"; close(FH); + open(FH, ">${outdir}/breakpoints.2.txt") or die "Can't open ${outdir}/breakpoints.2.txt"; close(FH); + open(FH, ">${outdir}/contigs.txt") or die "Can't open ${outdir}/contigs.txt"; close(FH); + +} + +sub assemble { + system("${SAMTOOLS}samtools view merged.sorted.final.temp.bam | sort -k 1,1 -k 2,2n -u | tee merged.sorted.final.unique.sam | perl ${scriptspath}/split_for_velvet.pl pair1.fq pair2.fq single.fq"); + if(!(-s "pair1.fq")) { + print STDERR "No breakpoints supported by split reads.\n"; + exit(1); + } + system("${VELVET}velveth velvet_out 31 -fastq -shortPaired -separate pair1.fq pair2.fq -short single.fq > /dev/null"); + system("${VELVET}velvetg velvet_out -cov_cutoff 3 -ins_length 300 -exp_cov 100 -ins_length_sd 60 > /dev/null"); + # launch blat server at random unused port + my $port = 10000 + int(rand(5000)); + my $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ; + while ($status !~ m/Error/) { + $port = 10000 + int(rand(5000)); + $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ; + } + print "Waiting for BLAT server to come online.\n"; + system("(${BLAT}gfServer start 127.0.0.1 $port -canStop $twobit >/dev/null 2>&1) &"); + # wait for startup + $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ; + while ($status =~ m/Error/) { + sleep 15; + $status = `${BLAT}gfServer status 127.0.0.1 $port 2>&1` ; + } + + # run query + system("${BLAT}gfClient 127.0.0.1 $port / velvet_out/contigs.fa velvet.psl -minScore=2 -minIdentity=90 -nohead"); + # take blat server down. + system("${BLAT}gfServer stop 127.0.0.1 $port"); + # continue with analysis. + my $command = "perl ${scriptspath}/filter_contigs.0403.pl velvet.psl ${outdir}/breakpoints.2.txt velvet_out/contigs.fa velvet > velvet.txt"; + print STDOUT "$command\n"; + system($command); + system("mv velvet.txt contigs.txt"); +} + +sub clean_up { + ## move files to output locations. + if (defined $contigs) { + $contigs = abs_path($contigs); + system("cp '$outdir/contigs.txt' '$contigs'"); + } + if (defined $bp1) { + $bp1 = abs_path($bp1); + system("cp '$outdir/breakpoints.1.txt' '$bp1'"); + } + if (defined $bp2) { + $bp2 = abs_path($bp2); + system("cp '$outdir/breakpoints.2.txt' '$bp2'"); + } + if ($rmtmp) { + system("rm -R temp"); + } +} + + + + + + + + + + + + diff -r 1b008b4b05f3 -r 4a3afa90ff7f tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Mon Jul 28 05:51:33 2014 -0400 +++ b/tool_data_table_conf.xml.sample Mon Jul 28 05:53:55 2014 -0400 @@ -1,8 +1,9 @@ - - value, dbkey, name, path - +

+ name,value, path +

+ diff -r 1b008b4b05f3 -r 4a3afa90ff7f tool_dependencies.xml --- a/tool_dependencies.xml Mon Jul 28 05:51:33 2014 -0400 +++ b/tool_dependencies.xml Mon Jul 28 05:53:55 2014 -0400 @@ -1,28 +1,16 @@ - + - - - - - /Set/Path/to/NovoAlign/Installation/folder - - - - -Galaxy tool_dependency for NovoAlign -==================================== - -This tool_dependency is written by Geert Vandeweyer. NovoAlign is a product of Novocraft, and is available only for open non-profit use without license. - -To obtain Novoalign, go to http://www.novocraft.com/main/page.php?id=968 - -Download the approriate package, unpack the folder and edit the NOVOALIGN_PATH environment variable in the file: - -<tool_dependency_dir>/environment_settings/NOVOALIGN_PATH/geert-vandeweyer/package_novoalign/<hash_string>/env.sh - -to point to the folder where you have installed NovoAlign. - - - + + + + + + + + + + + +