Mercurial > repos > iuc > fastq_screen

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bowtie2_indices.loc.sample	Fri May 16 07:57:33 2014 -0400
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie2 indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie2_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie2/hg18/,
+#then the bowtie2_indices.loc entry would look like this:
+#
+#hg18	hg18	hg18	/depot/data2/galaxy/bowtie2/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie2/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie2_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon			hg18	hg18 Canonical	/depot/data2/galaxy/bowtie2/hg18/hg18canon
+#hg18full			hg18	hg18 Full		/depot/data2/galaxy/bowtie2/hg18/hg18full
+#/orig/path/hg19	hg19	hg19			/depot/data2/galaxy/bowtie2/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_screen	Fri May 16 07:57:33 2014 -0400
@@ -0,0 +1,1073 @@
+#!/usr/bin/perl
+use warnings;
+use strict;
+use Getopt::Long;
+use FindBin qw($RealBin);
+use File::Copy;
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
+use File::Temp qw/ tempfile tempdir /;
+
+our $VERSION = "0.4.2";
+
+###########################################################################
+###########################################################################
+##                                                                       ##
+## Copyright 2013, Simon Andrews  (simon.andrews@babraham.ac.uk)         ##
+##                 Mark Fiers     (Plant & Food Research, NZ)            ##
+##                 Steven Wingett (steven.wingett@babraham.ac.uk)        ##
+##                                                                       ##
+## This program is free software: you can redistribute it and/or modify  ##
+## it under the terms of the GNU General Public License as published by  ##
+## the Free Software Foundation, either version 3 of the License, or     ##
+## (at your option) any later version.                                   ##
+##                                                                       ##
+## This program is distributed in the hope that it will be useful,       ##
+## but WITHOUT ANY WARRANTY; without even the implied warranty of        ##
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         ##
+## GNU General Public License for more details.                          ##
+##                                                                       ##
+## You should have received a copy of the GNU General Public License     ##
+## along with this program.  If not, see <http://www.gnu.org/licenses/>. ##
+###########################################################################
+###########################################################################
+
+
+unless(@ARGV){    #Display a help message if no arguments specified
+  print while (<DATA>);
+  exit;
+}
+
+
+## Option variables
+my $subset_count;
+my $outdir;
+my $illumina;
+my $quiet;
+my $help;
+my $version;
+my $paired;
+my $threads;
+my $color;
+my $conf;
+my $bowtie_opts;
+my $bowtie2_opts;
+my $nohits;
+my $aligner;
+
+my $config_result = GetOptions("subset=i" => \$subset_count,
+                   "outdir=s" => \$outdir,
+                   "illumina1_3" => \$illumina,
+                   "quiet" => \$quiet,
+                   "help" => \$help,
+                   "version" => \$version,
+                   "paired" => \$paired,
+                   "conf=s" => \$conf,
+                   "color" => \$color,
+                   "bowtie=s" => \$bowtie_opts,
+                   "bowtie2=s" => \$bowtie2_opts,
+                   "threads=i" => \$threads,
+                   "nohits" => \$nohits,
+                   "aligner=s" => \$aligner
+                  );
+
+die "Could not parse options" unless ($config_result);
+
+if ($help) {
+  print while (<DATA>);
+  exit;
+}
+
+if ($version) {
+  print "fastq_screen v$VERSION\n";
+  exit;
+}
+
+if ($color) {
+  $color = '-C';
+}
+else {
+  $color = '';
+}
+
+if(defined $aligner){
+  if( ($aligner ne 'bowtie') and ($aligner ne 'bowtie2') ){
+    die "Valid options for --aligner are 'bowtie' or 'bowtie2' only.\n";
+  }elsif( ($aligner eq 'bowtie') and (defined $bowtie2_opts) ){
+    die "Bowtie selected as the aligner yet bowtie2 options specified.\n";
+  }elsif( ($aligner eq 'bowtie2') and (defined $bowtie_opts) ){
+    die "Bowtie 2 selected as the aligner yet bowtie options specified.\n";
+  }
+}
+
+$bowtie_opts = '' unless ($bowtie_opts); # Get undef warning otherwise
+$bowtie2_opts = '' unless ($bowtie2_opts); # Get undef warning otherwise
+
+# Configuration
+my $number_of_threads = 1;
+my $path_to_bowtie = 'bowtie';
+my $path_to_bowtie2 = 'bowtie2';
+my @libraries;
+my %library_bowtie_version;   # Records which library should be used with which version of bowtie (i.e bowtie1 or bowtie2)
+
+load_configuration($conf);
+
+# Override the configuration default if they've
+# manually specified a number of threads to use
+$number_of_threads = $threads if ($threads);
+
+unless (@libraries) {
+  die "No search libraries were configured";
+}
+
+
+my @files = @ARGV;
+
+die "No files to process" unless (@files);
+
+my $index = 0;
+while ($index <= $#files) {
+  if ($paired) {
+    process_file($files[$index],$files[$index+1]);
+    $index+=2;
+  }
+  else {
+    process_file($files[$index]);
+    $index++;
+  }
+}
+
+
+
+sub process_file {
+
+  my ($file,$file2) = @_;
+
+  my %seq_genomes; # Stores the hits to each genome
+  my $readsprocessed = 0; # Stores the total number of sequences searched
+
+
+  if ($file2) {
+    warn "Processing $file and $file2\n" unless ($quiet);
+  }
+  else {
+    warn "Processing $file\n" unless ($quiet);
+  }
+
+  # Check that we can actually find the files we're working with
+  unless (-e $file) {
+    warn "Couldn't locate file $file - skipping\n";
+    return;
+  }
+  if ($file2) {
+    unless (-e $file2) {
+      warn "Couldn't locate file $file2 - skipping\n";
+      return;
+    }
+  }
+
+  # Work out the output file name we're going to use
+  my $outfile = $file;
+  $outfile =~ s/\.(txt|seq|fastq)$//i;
+
+  # We can try to remove the end specific part of the name
+  # if they're using standard Illumina naming. It doesn't
+  # really matter if this fails
+  if ($paired) {
+    $outfile =~ s/s_(\d)_1_sequence/s_${1}_sequence/;
+  }
+
+  $outfile .= "_screen.txt";
+
+  if ($outdir) {
+    $outfile = (split(/\//,$outfile))[-1];
+    $outfile = $outdir."/".$outfile;
+  }
+
+  if (-e $outfile) {
+    warn "Output file $outfile already exists - skipping\n";
+    return;
+  }
+
+  open (OUT,'>',$outfile) or do {
+    warn "Couldn't write to $outfile: $!";
+    return;
+  };
+
+  #Print out the Version and other information to the output file
+
+  if($subset_count){
+    print OUT "#Fastq_screen version: $VERSION\t#Reads in subset: $subset_count\n";
+  }else{
+    print OUT "#Fastq_screen version: $VERSION\n";
+  }
+
+  #Print the headers to the output file
+  print OUT join ("\t",('Library','#Reads_processed','#Unmapped','%Unmapped','#One_hit_one_library','%One_hit_one_library','#Multiple_hits_one_library','%Multiple_hits_one_library','#One_hit_multiple_libraries','%One_hit_multiple_libraries','Multiple_hits_multiple_libraries','%Multiple_hits_multiple_libraries')),"\n";
+
+  my $temp_file;
+  my $temp_file2;
+
+  my $read_length = get_read_length($file);
+
+  if ($read_length < 0) {
+    warn "Failed to calculate read length from $file";
+    return;
+  }
+
+  if ($read_length < 20) {
+    warn "Ignoring reads shorter than 20bp\n";
+    $read_length = 20;
+  }
+
+  # We don't use a seed of >40 even if the reads are that long
+  $read_length = 40 if ($read_length > 40);
+
+
+  # Count the sequences in the file.
+  # We need to make a subset of these sequences
+
+  # First we need to count how many sequences are in the original file
+  my $seqcount = 0;
+
+  warn "Counting sequences in $file\n" unless ($quiet);
+
+  #Check if the read file is compressed and open accordingly
+  if($file =~ /\.gz$/){
+    open (IN,"zcat \'$file\' |") or do {
+      warn "Can't read $file: $!";
+      return;
+    };
+  }else{
+    open (IN,$file) or do {
+      warn "Can't read $file: $!";
+      return;
+    };
+  }
+
+  ++$seqcount while (<IN>);
+
+  $seqcount = int($seqcount/4);
+
+  if ($subset_count and $seqcount > $subset_count*2) {    # We actually need to do the reduction
+
+    $temp_file = $file;
+    $temp_file .= "_temp_subset.txt";
+
+    # Since we're writing we need to do it in
+    # the directory we know we're allowed to write
+    # to.
+
+    if ($outdir) {
+      $temp_file = (split(/\//,$temp_file))[-1];
+      $temp_file = $outdir."/".$temp_file;
+    }
+
+    open (TEMP,'>',$temp_file) or do {
+      warn "Can't write temp subset file: $!";
+    return;
+    };
+
+    my $interval = sprintf("%.0f",($seqcount/$subset_count));
+
+    warn "Making reduced seq file with ratio $interval:1\n" unless ($quiet);
+
+    #Move back to the start of the file.  The Seek function will not work if reading the file using zcat, so
+    #close and open the file to move back to the start.
+    close IN;
+    if($file =~ /\.gz$/){
+      open (IN,"zcat \'$file\' |") or do {
+    warn "Can't read $file: $!";
+    return;
+      };
+    }else{
+      open (IN,$file) or do {
+    warn "Can't read $file: $!";
+    return;
+      };
+    }
+
+    my $current_count = 0;
+    while (<IN>) {
+      if (/^@/) {
+    my $record = $_;
+    $record .= scalar <IN>;
+    $record .= scalar <IN>;
+    $record .= scalar <IN>;
+
+    if ($current_count % $interval == 0) {
+      print TEMP $record;
+      ++$readsprocessed;
+    }
+    ++$current_count;
+      }
+    }
+
+    close (TEMP) or do {
+      warn "Can't write temp subset file: $!";
+      return;
+    };
+
+    $file = $temp_file;
+
+    if ($file2) {    # We need to do the reduction in file 2 as well
+
+      $temp_file2 = $file2;
+      $temp_file2 .= "_temp_subset.txt";
+
+      # Since we're writing we need to do it in
+      # the directory we know we're allowed to write
+      # to.
+
+      if ($outdir) {
+    $temp_file2 = (split(/\//,$temp_file2))[-1];
+    $temp_file2 = $outdir."/".$temp_file2;
+      }
+
+      open (TEMP,'>',$temp_file2) or do {
+    warn "Can't write temp2 subset file: $!";
+    return;
+      };
+
+      warn "Making reduced seq file 2 with ratio $interval:1\n" unless ($quiet);
+
+      #Check if the read file is compressed and open accordingly
+      if($file2 =~ /\.gz$/){
+    open (IN,"zcat \'$file2\' |") or do {
+      warn "Failed to read $file2: $!";
+      unlink ($temp_file) or warn "Couldn't delete temp file $temp_file: $!";
+      return;
+    };
+      }else{
+    open (IN,$file2) or do {
+      warn "Failed to read $file2: $!";
+      unlink ($temp_file) or warn "Couldn't delete temp file $temp_file: $!";
+      return;
+    };
+      }
+
+      my $current_count = 0;
+      while (<IN>) {
+    if (/^@/) {
+      my $record = $_;
+      $record .= scalar <IN>;
+      $record .= scalar <IN>;
+      $record .= scalar <IN>;
+
+      print TEMP $record if ($current_count % $interval ==0);
+      ++$current_count;
+    }
+      }
+
+      close (TEMP) or do {
+    warn "Can't write temp2 subset file: $!";
+    return;
+      };
+
+      $file2 = $temp_file2;
+    }
+  }
+  else {
+    $readsprocessed = $seqcount;
+    if ($subset_count) {
+      warn "Not making subset of $subset_count since $seqcount actual seqs is too low or close enough\n" unless ($quiet);
+    }
+    #If no file reduction was made and the original sequence file(s) were zipped, then they need to be unzipped before
+    #sending to Bowtie.
+    if ($file =~ /\.gz$/) {
+
+      my $temp_copied_file = $file;
+      $temp_copied_file .= "_temp_copied.gz";
+
+      # Since we're writing we need to do it in
+      # the directory we know we're allowed to write
+      # to.
+      if ($outdir) {
+    $temp_copied_file = (split(/\//,$temp_copied_file))[-1];
+    $temp_copied_file = $outdir."/".$temp_copied_file;
+      }
+
+      #Copy the file to the output directory then unzip
+      #Copying the file first avoids unzipping problems if the filename is a symbolic link
+      copy ("$file", "$temp_copied_file");
+      gunzip $temp_copied_file => "$temp_copied_file".".txt" or die "gunzip failed: $GunzipError\n";
+      $file = $temp_copied_file;
+      unlink $temp_copied_file;
+      $file = $temp_copied_file.".txt";
+      $temp_file = $file;    #Removes the unzipped file after Bowtie is finished mapping
+    }
+
+    if(($file2) and ($file2 =~ /\.gz$/)) {
+
+      my $temp_copied_file2 = $file2;
+      $temp_copied_file2 .= "_temp_copied.gz";
+
+      if ($outdir) {
+    $temp_copied_file2 = (split(/\//,$temp_copied_file2))[-1];
+    $temp_copied_file2 = $outdir."/".$temp_copied_file2;
+      }
+      copy ("$file2", "$temp_copied_file2");
+      gunzip $temp_copied_file2  => "$temp_copied_file2".".txt" or die "gunzip failed: $GunzipError\n";
+      unlink  $temp_copied_file2;
+      $file2 = $temp_copied_file2.".txt";
+      $temp_file2 = $file2;    #Removes the unzipped file after Bowtie is finished mapping
+    }
+  }
+
+
+  my $library_index = -1; # Make lists in the same order as @libraries
+
+  foreach my $library (@libraries) {
+    #Write Bowtie/Bowtie2 Standard Error to a temporary output file
+    #Generate a random filename and place in $outdir (if specified)
+    my $error_fh;
+    my $error_filename;
+    if($outdir){
+      ($error_fh, $error_filename) = tempfile( 'aligner_standard_error.XXXXXXXX', SUFFIX => '.txt', DIR => $outdir);
+    }else{
+      ($error_fh, $error_filename) = tempfile( 'aligner_standard_error.XXXXXXXX', SUFFIX => '.txt');
+    }
+
+    if($file2){
+      warn "Searching $file and $file2 against $library->[0]\n" unless ($quiet);
+    }else{
+      warn "Searching $file against $library->[0]\n" unless ($quiet);
+    }
+
+    my $illumina_flag = '';
+    $illumina_flag = "--phred64-quals" if ($illumina);
+
+    my $bowtie_command;
+
+
+    #Count the index of the library being used
+    $library_index++;
+
+
+    #Determine whether to execute bowtie1 or bowtie2, whether this is a paired-end or single-end, and execute accordingly
+    if($library_bowtie_version{$library->[1]} == 1 ){
+      if ($file2) {
+    $bowtie_command = "$path_to_bowtie $bowtie_opts $illumina_flag $color -l $read_length -k 2 --chunkmbs 512 --maxins 1000 -p $number_of_threads $library->[1] -1 \'$file\' -2 \'$file2\' 2>$error_filename |";
+      }
+      else {
+    $bowtie_command = "$path_to_bowtie $bowtie_opts $illumina_flag $color -l $read_length -k 2 --chunkmbs 512 -p $number_of_threads $library->[1] \'$file\' 2>$error_filename |";
+      }
+    }else{
+      if ($file2) {
+    $bowtie_command = "$path_to_bowtie2 $bowtie2_opts $illumina_flag -k 2 --very-fast-local --no-hd --no-unal --maxins 1000 -p $number_of_threads $library->[1] -1 \'$file\' -2 \'$file2\' 2>$error_filename|";
+      }
+      else {
+    $bowtie_command = "$path_to_bowtie2 $bowtie2_opts $illumina_flag -k 2 --very-fast-local --no-hd --no-unal -p $number_of_threads $library->[1] \'$file\' 2>$error_filename |";
+      }
+    }
+
+    open (BOWTIE,$bowtie_command) or die "Failed to launch bowtie command '$bowtie_command': $!";
+
+    my $seqname_suffix = 1;    #Explained below
+
+    while (<BOWTIE>) {
+
+      my $line = $_;
+      chomp $line;
+
+      #Check output is in native original bowtie format or SAM format
+      unless( ($line =~ /^.+\t[\+\-]\t.+\t.+\t[TAGCN]+\t.+/) or  ($line =~ /^.+\t\d+\t\S+\t\d+\t\d+\t\S+\t.+\t\d+\t\S+\t[ATCGN]+\t.+/) ){
+    # This is a progress message coming from stderr
+    warn "Bowtie/Bowtie2 output not in correct format: $_\n";
+    next;
+      }
+
+      my ($seqname) = split(/\t/, $line);
+
+      #Unlike the original bowtie native output format, bowtie2 removes the '/1' or '/2' from the end of a pair-end read (in fact bowtie adds the '/1' or '/2'
+      #if it is missing in the FASTQ file). This will cause counting problems. To remedy this add '/1' or '/2' or alternating alignments
+      if( ($library_bowtie_version{$library->[1]} == 2) and $file2 ){   #Bowtie2 used as aligner and a paired-end read
+    $seqname = $seqname.'/'.$seqname_suffix;
+    $seqname_suffix = abs($seqname_suffix - 3);    #seqname_suffix alternates between 1 and 2
+    }
+
+
+    # Initialise an array of zero values the same length as @libraries
+      unless (exists $seq_genomes{$seqname}){
+    for my $position(0..$#libraries){
+      $seq_genomes{$seqname}->[$position] = 0;
+    }
+      }
+
+
+      # If there is already a hit for this read against this library, set the hit score to 2
+      # If this is the first hit, set the score to 1
+      if($seq_genomes{$seqname}->[$library_index] == 1){
+    $seq_genomes{$seqname}->[$library_index] = 2;
+      }else{
+    $seq_genomes{$seqname}->[$library_index] = 1;
+      }
+    }
+
+
+    #Check the Standard Error output file and report any errors
+    #Bowtie reports the alignment summary to standard error, so ignore the alignment summary
+    while(<$error_fh>){
+      unless (/^\#/ or /^Reported/ or /^No alignments/ or /reads; of these/ or /were unpaired; of these/ or /aligned/ or /overall alignment rate/ or /mates make up the pairs; of these/ or /were paired; of these/ or /----/) {
+    warn "Bowtie/Bowtie2 warning: $_";
+      }
+    }
+    close $error_fh;
+    unlink $error_filename or die "Could not delete temporary Standard Error file '$error_filename' : $!";
+  }
+
+
+  # Collate the hit results from the Bowtie searches.
+  #Each read/read pair can be classified as 1) maps to no libraries;
+  #2) maps uniquely to a specified library; 3) maps multiple times to a specified library, but to no others;
+  #4) maps once to a specified library and to at least one other library; 5) maps multiple
+  #times to a specified library and to at least one other library.
+
+  my @one_hit_one_library;
+  my @one_hit_multiple_libraries;
+  my @multiple_hits_one_library;
+  my @multiple_hits_multiple_libraries;
+
+  # Initialise thoses arrays with zero values, making them the same length as @libraries
+  for my $position(0..$#libraries){
+    $one_hit_one_library[$position] = 0;
+    $one_hit_multiple_libraries[$position] = 0;
+    $multiple_hits_one_library[$position] = 0;
+    $multiple_hits_multiple_libraries[$position] = 0;
+  }
+
+  foreach my $seq_id(keys %seq_genomes){
+    my $numlib = 0;
+
+    # Count up the number of libraries where this sequence
+    # had a hit
+    foreach (@{$seq_genomes{$seq_id}}) {
+      $numlib++ if ($_);
+    }
+
+    #Identify the hits
+    # Classify the type of hit
+    if($numlib == 1){
+      foreach my $index (0..$#{$seq_genomes{$seq_id}}){
+    if($seq_genomes{$seq_id}->[$index] == 2){
+      $multiple_hits_one_library[$index]++;    #Maps multiple times to a specified library
+    }else{
+      $one_hit_one_library[$index] += $seq_genomes{$seq_id}->[$index];    #Maps uniquely / does not map (i.e. add 0)
+    }
+      }
+    }else{
+      foreach my $index (0..$#{$seq_genomes{$seq_id}}){
+    if($seq_genomes{$seq_id}->[$index] == 2){
+      $multiple_hits_multiple_libraries[$index]++;    #Maps multiple times to a specified library, but to no others
+    }else{
+      $one_hit_multiple_libraries[$index] += $seq_genomes{$seq_id}->[$index];    #Maps multiple times / does not map (i.e. add 0)
+    }
+      }
+    }
+  }
+
+  # If we're doing a paired end search then all our counts will
+  # be double what they should be since we'll get hits for both
+  # of the sequences in the pair with different ids (bowtie1 will
+  # make them different even if they weren't originally).  We
+  # therefore need to halve the counts before calculating percentages.
+  # The easiest way to do this is to double the expected total
+  # count
+
+  $readsprocessed *= 2 if ($file2);
+
+  # Summarise the counts and write the text report
+  foreach my $index (0..$#libraries){
+    my $library = $libraries[$index];
+
+    my $percent_one_hit_one_library = ($one_hit_one_library[$index] / $readsprocessed) * 100;
+    $percent_one_hit_one_library = sprintf("%.2f",$percent_one_hit_one_library);
+
+    my $percent_one_hit_multiple_libraries = ($one_hit_multiple_libraries[$index] / $readsprocessed) * 100;
+    $percent_one_hit_multiple_libraries  = sprintf("%.2f",$percent_one_hit_multiple_libraries);
+
+    my $percent_multiple_hits_one_library = ($multiple_hits_one_library[$index] / $readsprocessed) * 100;
+    $percent_multiple_hits_one_library = sprintf("%.2f", $percent_multiple_hits_one_library);
+
+    my $percent_multiple_hits_multiple_libraries = ($multiple_hits_multiple_libraries[$index] / $readsprocessed) * 100;
+    $percent_multiple_hits_multiple_libraries = sprintf("%.2f", $percent_multiple_hits_multiple_libraries);
+
+    my $percent_unmapped = 100 - $percent_one_hit_one_library - $percent_one_hit_multiple_libraries - $percent_multiple_hits_one_library - $percent_multiple_hits_multiple_libraries;
+    $percent_unmapped = sprintf("%.2f", $percent_unmapped);
+
+    my $reads_unmapped = $readsprocessed - $one_hit_one_library[$index] - $one_hit_multiple_libraries[$index] - $multiple_hits_one_library[$index] - $multiple_hits_multiple_libraries[$index];
+
+    print OUT join("\t",($library->[0], $readsprocessed, $reads_unmapped, $percent_unmapped, $one_hit_one_library[$index], $percent_one_hit_one_library, $multiple_hits_one_library[$index], $percent_multiple_hits_one_library, $one_hit_multiple_libraries[$index], $percent_one_hit_multiple_libraries, $multiple_hits_multiple_libraries[$index], $percent_multiple_hits_multiple_libraries)),"\n";
+  }
+
+  #Calculate the number of reads that mapped to none of the libraries
+  my $percent_hit_no_libraries = (($readsprocessed - (scalar keys %seq_genomes)) / $readsprocessed) * 100;
+  $percent_hit_no_libraries = sprintf("%.2f",$percent_hit_no_libraries);
+  print OUT "\n\%Hit_no_libraries: $percent_hit_no_libraries\n";
+
+  close IN;
+  close OUT or die "Coudn't write to $outfile: $!";
+
+
+  if($nohits){
+    if($file2){
+      find_no_hits(\%seq_genomes, $file, $file2, $outfile);
+    }else{
+      find_no_hits(\%seq_genomes, $file, $outfile);
+    }
+  }
+
+  unlink($temp_file) or warn "Unable to delete temp file '$temp_file'" if ($temp_file);
+
+  unlink($temp_file2) or warn "Unable to delete temp file '$temp_file'" if ($temp_file2);
+
+
+  #Check whether the module GD::Graph is installed
+  eval{
+    require GD::Graph::bars;
+    GD::Graph::pie->import();
+  };
+
+  unless($@) {
+    make_graph($outfile);
+  }else{
+    warn "Perl module GD::Graph::bars not installed, skipping charts\n";
+  }
+}
+
+print "Processing complete\n";
+
+
+
+#####################################################################################################
+#Subroutines
+#####################################################################################################
+
+
+sub load_configuration {
+
+  # Find the config file
+
+  my ($conf_file) = @_;
+
+  # If they haven't specified a conf file then look
+  # in the directory containing the program.
+  $conf_file = "$RealBin/fastq_screen.conf" unless ($conf_file);
+
+  unless (-e $conf_file) {
+    die "Couldn't find fastq_screen.conf at '$conf_file'";
+  }
+
+  warn "Reading configuration from '$conf_file'\n" unless ($quiet);
+
+  open (CONF,$conf_file) or die "Can't read $conf_file : $!";
+
+  #Determine aligner to use from 1) paths specified and 2) databases if no aligner specified
+  if(!defined $aligner){
+    my $bowtie1_path_exists = 0;
+    my $bowtie2_path_exists = 0;
+    my $bowtie1_database_exists = 0;
+    my $bowtie2_database_exists = 0;
+
+    while (<CONF>) {
+      chomp;
+      s/^\s+//;
+      s/\s+$//;
+
+      next if (/^\#/);
+      next unless ($_);
+
+      my ($name) = split(/\s+/);
+
+      if ($name eq 'BOWTIE'){
+    if( defined ((split(/\s+/,$_,2))[1]) ){
+      $bowtie1_path_exists = 1;
+    }
+      }elsif($name eq 'BOWTIE2'){
+    if( defined ((split(/\s+/,$_,2))[1]) ){
+      $bowtie2_path_exists = 1;
+    }
+      }elsif ($name eq 'DATABASE') {
+    my (undef, undef, undef, $bowtie_version) = split(/\s+/,$_,4);
+
+    if(!defined $bowtie_version){    #Determine whether an index should be used with bowtie1 or bowtie2
+      $bowtie1_database_exists = 1;
+    }elsif($bowtie_version =~ /bowtie2/i){
+      $bowtie2_database_exists = 1;
+
+    }
+      }
+    }
+
+    #Is bowtie or bowtie2 in path?
+    unless(system "which bowtie >/dev/null 2>&1"){
+      $bowtie1_path_exists = 1;
+    }
+    unless(system "which bowtie2 >/dev/null 2>&1"){
+      $bowtie2_path_exists = 1;
+    }
+
+    #Decide whether to proceed and display message
+    if($bowtie1_path_exists and $bowtie2_path_exists and $bowtie1_database_exists and $bowtie2_database_exists){
+      $aligner = 'bowtie';
+      warn "Aligner not specified (--aligner). Path to bowtie and bowtie2 both specified, bowtie and bowtie2 database both present: defaulting to bowtie\n";
+    }elsif($bowtie1_path_exists and $bowtie1_database_exists){
+      $aligner = 'bowtie';
+      warn "Aligner not specified (--aligner). Path to bowtie specified and bowtie database(s) present: mapping with bowtie\n";
+    }elsif($bowtie2_path_exists and $bowtie2_database_exists){
+      $aligner = 'bowtie2';
+      warn "Aligner not specified (--aligner). Path to bowtie2 specified and bowtie2 database(s) present: mapping with bowtie2\n";
+    }else{
+      die "Aligner not specified (--aligner). No path to bowtie or bowtie2, or, no bowtie or bowtie2 databases specified: lease adjust configuration file.\n";
+    }
+    seek (CONF,0,0);    #Return to start of conf file
+  }
+
+
+  while (<CONF>) {
+    chomp;
+    s/^\s+//;
+    s/\s+$//;
+
+    next if (/^\#/);
+    next unless ($_);
+
+    my ($name) = split(/\s+/);
+
+    if ( ($name eq 'BOWTIE') and ($aligner eq 'bowtie') ) {
+
+      $path_to_bowtie = (split(/\s+/,$_,2))[1];
+
+      die "Empty value set for BOWTIE config parameter" unless ($path_to_bowtie);
+
+      warn "Using '$path_to_bowtie' as bowtie path\n" unless ($quiet);
+    }
+
+    elsif( ($name eq 'BOWTIE2') and ($aligner eq 'bowtie2') ) {
+      $path_to_bowtie2 = (split(/\s+/,$_,2))[1];
+
+      die "Empty value set for BOWTIE2 config parameter" unless ($path_to_bowtie2);
+
+      warn "Using '$path_to_bowtie2' as bowtie2 path\n" unless ($quiet);
+    }
+
+    elsif ($name eq 'THREADS') {
+      $number_of_threads = (split(/\s+/))[1];
+      unless ($number_of_threads =~ /^\d+$/) {
+    die "Invalid number of threads '$number_of_threads set in conf file";
+      }
+      warn "Using $number_of_threads threads for searches\n" unless ($quiet);
+    }
+
+    elsif ($name eq 'DATABASE') {
+      my (undef,$db_name,$db_path,$bowtie_version) = split(/\s+/,$_,4);
+
+      if(!defined $bowtie_version){    #Determine whether an index should be used with bowtie1 or bowtie2
+    $bowtie_version = 1;
+      }elsif($bowtie_version =~ /bowtie2/i){
+    $bowtie_version = 2;
+    #Bowtie2 cannot process colorspace reads
+    if($color){
+      die "Bowtie2 cannot process colorspace reads, in the configuration file please specify original Bowtie indices for '$db_name'\n";
+    }
+      }elsif($bowtie_version =~ /bowtie/i){
+    $bowtie_version = 1;
+      }else{
+    die "Could not determine which bowtie version to use for: '$db_name\t$db_path\t$bowtie_version'\nPlease modify the configuraion file by entering after the path to the desired sequence aligner either i)'Bowtie2' or ii)leave blank to default to the original Bowtie\n";
+      }
+
+      if( ($bowtie_version == 1) and ($aligner eq 'bowtie2') ){    #Check the index is for the aligner to be used
+    next;
+      }elsif( ($bowtie_version == 2) and ($aligner eq 'bowtie') ){
+    next;
+      }
+
+      $library_bowtie_version{$db_path} = $bowtie_version;
+
+      # Check to see that there's a bowtie or bowtie2 index at that location
+      if($bowtie_version == 1){
+    unless (-e "$db_path.1.ebwt") {
+      warn "Skipping DATABASE '$db_name' since no bowtie index was found at '$db_path'\n";
+      next;
+    }
+      }else{
+    unless (-e "$db_path.1.bt2") {
+      warn "Skipping DATABASE '$db_name' since no bowtie index was found at '$db_path'\n";
+      next;
+    }
+      }
+
+      warn "Adding database $db_name\n" unless ($quiet);
+      push @libraries, [$db_name,$db_path];
+    }
+  }
+  close CONF;
+}
+
+
+
+sub get_read_length {
+
+  my ($file) = @_;
+
+  #Check if the read file is compressed and open accordingly
+  if($file =~ /\.gz$/){
+    open (IN,"zcat \'$file\' |") or do {
+      warn "Failed to read $file: $!";
+      return 0;
+    };
+  }else{
+    open (IN,$file) or do {
+      warn "Failed to read $file: $!";
+      return 0;
+    };
+  }
+
+  my $shortest_length = -1;
+
+  my $current_count = 0;
+  while (<IN>) {
+    if (/^@/) {
+      my $seq = scalar <IN>;
+      chomp $seq;
+      if ($shortest_length < 0 or length $seq < $shortest_length) {
+    $shortest_length = length $seq;
+      }
+
+      # Skip the rest of the record
+      $_ = scalar <IN>;
+      $_ = scalar <IN>;
+
+      ++$current_count;
+      last if ($current_count > 1000);
+    }
+  }
+  return $shortest_length;
+}
+
+
+
+sub make_graph {
+
+  my ($file) = @_;
+
+  my $outfile = $file;
+  $outfile =~ s/\.txt$//;
+
+  open (IN,$file) or die "Can't open data file '$file' to create graph";
+
+  my @data;
+  my $percent_no_hits;
+
+  $_ = <IN>;    #Header
+  $_ = <IN>;    #Header
+
+  while (<IN>) {
+    chomp;
+
+    if(/^\s*$/){
+      next;
+    }
+
+    if(/^\%Hit_no_libraries: (\S+)/){
+      $percent_no_hits = $1;
+      last;
+    }
+
+    my ($name, undef, undef, $unmapped, undef, $percent_one_hit_one_library, undef, $percent_multiple_hits_one_library, undef, $percent_one_hit_multiple_libraries, undef, $percent_multiple_hits_multiple_libraries) = split(/\t/);
+
+    push @{$data[0]},$name;
+    push @{$data[1]},$percent_one_hit_one_library;
+    push @{$data[2]},$percent_multiple_hits_one_library;
+    push @{$data[3]},$percent_one_hit_multiple_libraries;
+    push @{$data[4]},$percent_multiple_hits_multiple_libraries;
+  }
+
+  close IN;
+
+  #Add a separate bar for the reads that hit none of the libraries
+  push @{$data[0]},'No hits';
+  push @{$data[1]},$percent_no_hits;
+  push @{$data[2]},'0';
+  push @{$data[3]},'0';
+  push @{$data[4]},'0';
+
+  # my $graph = GD::Graph::bars->new(75* @{$data[0]},350);
+  # too small - ross fix may 12 2014
+  my $graph = GD::Graph::bars->new(1024,768);
+
+
+  $graph->set_legend('One hit\one library', 'Multiple hits\one library', 'One hit\multiple libraries','Multiple hits\multiple libraries');
+
+  $graph -> set (
+         y_label => '% Mapped',
+         title => ((split(/\//,$outfile))[-1]),
+         y_max_value => 100,
+         bar_spacing => 15,
+         transparent => 0,
+         overwrite => 1,
+         cumulate => 1,
+         dclrs => [qw(lblue dblue lred dred)],
+        );
+
+  my $gd = $graph -> plot (\@data);
+
+
+
+  $outfile .= ".png";
+
+  open (IMG,'>',$outfile) or die "Can't write graph to $outfile: $!";
+  binmode IMG;
+  print IMG $gd->png();
+  close IMG or die "Can't write graph to $outfile: $!";
+}
+
+
+
+sub find_no_hits{
+  my $seq_genomes;
+  my $file;
+  my $file2;
+  my $no_hits_filename;
+  my @filenames = ();
+  my $count = 0;
+
+  #Determine if we have a paired read
+  if(scalar @_ == 4){
+    ($seq_genomes, $file, $file2, $no_hits_filename) = @_;
+    push(@filenames, ($file, $file2));
+  }else{
+    ($seq_genomes, $file, $no_hits_filename) = @_;
+     push(@filenames, $file);
+  }
+
+  $no_hits_filename .= "_no_hits.txt";
+
+  foreach my $filename (@filenames){
+    $count++;
+
+    open(IN, $filename) or die "Can't read $filename : $!";
+
+    open(OUT, ">>$no_hits_filename") or die "Could not write to $no_hits_filename";
+
+    while(<IN>){
+      if (/^@/) {
+    my $seq_id = $_;
+    chomp $seq_id;
+    $seq_id = substr($seq_id,1); #Remove the '@' symbol to match Bowtie output
+    my $rest_of_record = scalar <IN>;
+    $rest_of_record .= scalar <IN>;
+    $rest_of_record .= scalar <IN>;
+
+    #Read files containing paired-end reads will sometimes name the reads according to whether they are the #1 or #2 mates
+    # by appending a /1 or /2 suffix to the read name. If no such suffix is present in Bowtie's input, the suffix will be added
+    #when Bowtie prints read names in alignments.
+    #Paired files without the /1, /2 convention will cause problems for fastq_screen, since the Bowte output read name will not match
+    #that in the input file.  The code below corrects for this.
+    if($file2){
+      my $mate = substr($seq_id, -2);
+      unless(($mate eq '/1') or ($mate eq '/2')){
+        $seq_id .= '/'."$count";
+      }
+    }
+
+    unless(exists $$seq_genomes{$seq_id}){
+      print OUT "@"."$seq_id"."\n"."$rest_of_record";
+    }
+      }
+    }
+    close IN;
+    close OUT or die "Coudn't write to $no_hits_filename: $!";
+  }
+}
+
+
+
+__DATA__
+
+Fastq Screen - Screen sequences against a panel of databases
+
+Synopsis
+
+  fastq_screen [OPTION]... [FastQ FILE]...
+
+Function
+
+  Fastq Screen is intended to be used as part of a QC pipeline.
+  It allows you to take a sequence dataset and search it
+  against a set of bowtie databases.  It will then generate
+  both a text and a graphical summary of the results to see if
+  the sequence dataset contains the kind of sequences you expect
+  or not.
+
+Options
+
+  --help -h      Print program help and exit
+
+  --subset       Don't use the whole sequence file to search, but
+                 create a temporary dataset of this size. The
+                 dataset created will be of approximately (within
+                 a factor of 2) of this size. If the real dataset
+                 is smaller than twice the specified size then the
+                 whole dataset will be used. Subsets will be taken
+                 evenly from throughout the whole original dataset
+
+  --paired       Files are paired end. Files must be specified in
+                 the correct order with pairs of files coming
+                 immediately after one another. Results files will
+                 be named after the first file in the pair if the
+                 names differ between the two files.
+
+  --outdir       Specify a directory in which to save output files.
+                 If no directory is specified then output files
+                 are saved into the same directory as the input
+                 file.
+
+  --illumina1_3  Assume that the quality values are in encoded in
+                 Illumina v1.3 format. Defaults to Sanger format
+                 if this flag is not specified
+
+  --quiet        Supress all progress reports on stderr and only
+                 report errors
+
+  --version      Print the program version and exit
+
+  --threads      Specify across how many threads bowtie will be
+                 allowed to run. Overrides the default value set
+                 in the conf file
+
+  --conf         Manually specify a location for the configuration
+                 file to be used for this run. If not specified
+                 then the file will be taken from the same directory
+                 as the fastq_screen program
+
+  --color        FastQ files are in colorspace. This requires that
+                 the libraries configures in the config file are
+                 colorspace indices.
+
+  --bowtie       Specify extra parameters to be passed to bowtie.
+                 These parameters should be quoted to clearly
+                 delimit bowtie parameters from fastq_screen
+                 parameters. You should not try to use this option
+                 to override the normal search or reporting options
+                 for bowtie which are set automatically but it might
+                 be useful to allow reads to be trimmed before
+                 alignment etc.
+
+  --bowtie2      Specify extra parameters to be passed to bowtie 2.
+                 These parameters should be quoted to clearly
+                 delimit bowtie2 parameters from fastq_screen
+                 parameters. You should not try to use this option
+                 to override the normal search or reporting options
+                 for bowtie which are set automatically but it might
+                 be useful to allow reads to be trimmed before
+                 alignment etc.
+
+  --nohits       Writes to a file the sequences that did not map to
+                 any of the specified genome libraries. If the
+                 subset option is also specified, only reads from
+                 the temporary dataset that failed to align to the
+                 reference genomes will be written to the output file.
+
+  --aligner     Specify the aligner to use for the mapping. Valid
+                arguments are 'bowtie' or 'bowtie2'.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_screen.xml	Fri May 16 07:57:33 2014 -0400
@@ -0,0 +1,189 @@
+<tool id="fastq_screen" name="fastq_screen" version="0.4.2">
+    <description>Screen for contamination</description>
+    <requirements>
+        <requirement type="package" version="0.4.2">fastq_screen</requirement>
+        <requirement type="package" version="2.1.0">bowtie2</requirement>
+    </requirements>
+    <command>
+    fastq_screen --aligner="bowtie2" --outdir="." --conf="$fastqrunconf"
+    #if $sampN &gt; 0:
+    --subset "$sampN"
+    #end if
+    "$input1"
+    #if $singlePaired.sPaired == "paired":
+    "$input2"
+    #end if
+    ; mv *_screen.png ${outpng} ; mv *_screen.txt ${outtext}
+    </command>
+
+    <stdio>
+        <regex match=".*" source="both" level="warning" description="fastqc_screen perl script output"/>
+    </stdio>
+
+    <inputs>
+        <param name="jobName" type="text" size="120" value="fastq_screen" label="Job narrative (included in output names as a reminder)"
+          help="Only letters, numbers and underscores _ will be retained in this field">
+           <sanitizer invalid_char="">
+              <valid initial="string.letters,string.digits"><add value="_" /> </valid>
+           </sanitizer>
+        </param>
+        <param name="sampN" type="integer" size="20" value="500000" label="Sample this number of reads. Set to 0 or less to use all"
+        help="Time/precision trade off - fewer reads takes a little less time trading off precision of the estimates."/>
+        <conditional name="singlePaired">
+            <param name="sPaired" type="select" label="Single ended or mate-pair ended reads in this library?">
+              <option value="single" selected="true">Single-end</option>
+              <option value="paired">Paired-end</option>
+            </param>
+            <when value="single">
+                <param format="fastqsanger,fastq" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>
+            </when>
+            <when value="paired">
+                <param format="fastqsanger,fastq" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
+                <param format="fastqsanger,fastq" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
+            </when>
+        </conditional>
+
+        <!-- Genome source. -->
+        <repeat name="refGenomes" title="Installed organism reference sequences to check for alignment to your fastq" min="1"
+        help="For checking cell culture sequence for contamination, Mycoplasma Genitalium might be a good choice eg">
+            <param name="ref" type="select" label="Bowtie2 reference genome">
+                <options from_data_table="bowtie2_indexes">
+                <filter type="sort_by" column="3"/>
+                <validator type="no_options" message="No indexes are available for bowtie2"/>
+                </options>
+            </param>
+        </repeat>
+    </inputs>
+
+    <outputs>
+       <data format="tabular" name="outtext" label="${jobName}.xls"/>
+       <data format="png" name="outpng" label="${jobName}.png"/>
+    </outputs>
+    <configfiles>
+         <configfile name="fastqrunconf">
+###### autogenerated by fastq_screen.xml for fastq_screen run
+BOWTIE2 /data/app/bin/bowtie2
+#for $refs in $refGenomes:
+DATABASE    $refs.ref.fields.value $refs.ref.fields.path    BOWTIE2
+#end for
+         </configfile>
+    </configfiles>
+
+<help>
+
+**What it does**
+This is a Galaxy wrapper exposing software from Babraham -fastq_screen_
+Designed to search sequence data in fastq files for matches to contaminants or to check the likely
+species.
+In QC checking, you can use it to look for (eg) sequence from contaminating mycoplasmae in cell cultures - it may be non-differential but it will be pro-inflammatory and, well, less than ideal.
+
+Here's the help from the perl script used by this wrapper:
+
+Fastq Screen - Screen sequences against a panel of databases
+
+Synopsis
+
+  fastq_screen [OPTION]... [FastQ FILE]...
+
+Function
+
+  Fastq Screen is intended to be used as part of a QC pipeline.
+  It allows you to take a sequence dataset and search it
+  against a set of bowtie databases.  It will then generate
+  both a text and a graphical summary of the results to see if
+  the sequence dataset contains the kind of sequences you expect
+  or not.
+
+Options
+
+  --help -h      Print program help and exit
+
+  --subset       Don't use the whole sequence file to search, but
+                 create a temporary dataset of this size. The
+                 dataset created will be of approximately (within
+                 a factor of 2) of this size. If the real dataset
+                 is smaller than twice the specified size then the
+                 whole dataset will be used. Subsets will be taken
+                 evenly from throughout the whole original dataset
+
+  --paired       Files are paired end. Files must be specified in
+                 the correct order with pairs of files coming
+                 immediately after one another. Results files will
+                 be named after the first file in the pair if the
+                 names differ between the two files.
+
+  --outdir       Specify a directory in which to save output files.
+                 If no directory is specified then output files
+                 are saved into the same directory as the input
+                 file.
+
+  --illumina1_3  Assume that the quality values are in encoded in
+                 Illumina v1.3 format. Defaults to Sanger format
+                 if this flag is not specified
+
+  --quiet        Supress all progress reports on stderr and only
+                 report errors
+
+  --version      Print the program version and exit
+
+  --threads      Specify across how many threads bowtie will be
+                 allowed to run. Overrides the default value set
+                 in the conf file
+
+  --conf         Manually specify a location for the configuration
+                 file to be used for this run. If not specified
+                 then the file will be taken from the same directory
+                 as the fastq_screen program
+
+  --color        FastQ files are in colorspace. This requires that
+                 the libraries configures in the config file are
+                 colorspace indices.
+
+  --bowtie       Specify extra parameters to be passed to bowtie.
+                 These parameters should be quoted to clearly
+                 delimit bowtie parameters from fastq_screen
+                 parameters. You should not try to use this option
+                 to override the normal search or reporting options
+                 for bowtie which are set automatically but it might
+                 be useful to allow reads to be trimmed before
+                 alignment etc.
+
+  --bowtie2      Specify extra parameters to be passed to bowtie 2.
+                 These parameters should be quoted to clearly
+                 delimit bowtie2 parameters from fastq_screen
+                 parameters. You should not try to use this option
+                 to override the normal search or reporting options
+                 for bowtie which are set automatically but it might
+                 be useful to allow reads to be trimmed before
+                 alignment etc.
+
+  --nohits       Writes to a file the sequences that did not map to
+                 any of the specified genome libraries. If the
+                 subset option is also specified, only reads from
+                 the temporary dataset that failed to align to the
+                 reference genomes will be written to the output file.
+
+  --aligner     Specify the aligner to use for the mapping. Valid
+                arguments are 'bowtie' or 'bowtie2'.
+
+
+**Attributions**
+
+Note that each component has its own license.
+Good luck with figuring out your obligations.
+
+fastq_screen - see the web site at Fastq_screen_
+
+Galaxy_ (that's what you are using right now!) for gluing everything together
+
+
+Code and documentation comprising this tool was written by Ross Lazarus and that part is Licensed_ the same way as other rgenetics artefacts
+
+.. _Fastq_screen: http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen
+
+.. _Galaxy: http://getgalaxy.org
+
+.. _Licensed: https://www.gnu.org/licenses/lgpl.html
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri May 16 07:57:33 2014 -0400
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of indexes in the Bowtie2 mapper format -->
+    <table name="bowtie2_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie2_indices.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Fri May 16 07:57:33 2014 -0400
@@ -0,0 +1,31 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="bowtie2" version="2.1.0">
+        <repository changeset_revision="606d435a57a4" name="package_bowtie2_2_1_0" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="fastq_screen" version="0.4.2">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.4.2.tar.gz</action>
+                <action type="move_directory_files">
+                    <source_directory>.</source_directory>
+                    <destination_directory>$INSTALL_DIR</destination_directory>
+                </action>
+                <action type="set_environment">
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+Galaxy wrapper for http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/ from Simon Andrews.
+Useful if you want to (eg) check your RNA sequence from a cell culture for contamination from (eg) Mycoplasma Genitalium, or if you have sequence from an unknown organism :(
+Permanently set to use your Galaxy bowtie2 indices - expects Bowtie2 to be installed so the Galaxy package_bowtie2 is installed if it's not already there.
+
+###WARNING### The fastq_screen perl script requires that the GD::Graph http://search.cpan.org/~bwarfield/GDGraph/Graph.pm module be installed.
+Otherwise, NO plot will be created. This wrapper relies on the system perl - a proper Galaxy perl dependency is just too icky to think about.
+
+Send code if you think otherwise.
+        </readme>
+    </package>
+
+</tool_dependency>