Galaxy |

Changeset 0:8a8adbf98ecc (2014-05-16)

Next changeset 1:3480daf4ed27 (2014-05-16)

Commit message:
First upload

added:
bowtie2_indices.loc.sample
fastq_screen
fastq_screen.xml
tool_data_table_conf.xml.sample
tool_dependencies.xml

diff -r 000000000000 -r 8a8adbf98ecc bowtie2_indices.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bowtie2_indices.loc.sample Fri May 16 07:57:33 2014 -0400

@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie2 indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie2_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie2/hg18/,
+#then the bowtie2_indices.loc entry would look like this:
+#
+#hg18 hg18 hg18 /depot/data2/galaxy/bowtie2/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie2/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie2_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie2/hg18/hg18canon
+#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie2/hg18/hg18full
+#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie2/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#

diff -r 000000000000 -r 8a8adbf98ecc fastq_screen
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_screen Fri May 16 07:57:33 2014 -0400

[

b'@@ -0,0 +1,1073 @@\n+#!/usr/bin/perl\n+use warnings;\n+use strict;\n+use Getopt::Long;\n+use FindBin qw($RealBin);\n+use File::Copy;\n+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);\n+use File::Temp qw/ tempfile tempdir /;\n+\n+our $VERSION = "0.4.2";\n+\n+###########################################################################\n+###########################################################################\n+## ##\n+## Copyright 2013, Simon Andrews (simon.andrews@babraham.ac.uk) ##\n+## Mark Fiers (Plant & Food Research, NZ) ##\n+## Steven Wingett (steven.wingett@babraham.ac.uk) ##\n+## ##\n+## This program is free software: you can redistribute it and/or modify ##\n+## it under the terms of the GNU General Public License as published by ##\n+## the Free Software Foundation, either version 3 of the License, or ##\n+## (at your option) any later version. ##\n+## ##\n+## This program is distributed in the hope that it will be useful, ##\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of ##\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ##\n+## GNU General Public License for more details. ##\n+## ##\n+## You should have received a copy of the GNU General Public License ##\n+## along with this program. If not, see <http://www.gnu.org/licenses/>. ##\n+###########################################################################\n+###########################################################################\n+\n+\n+unless(@ARGV){ #Display a help message if no arguments specified\n+ print while (<DATA>);\n+ exit;\n+}\n+\n+\n+## Option variables\n+my $subset_count;\n+my $outdir;\n+my $illumina;\n+my $quiet;\n+my $help;\n+my $version;\n+my $paired;\n+my $threads;\n+my $color;\n+my $conf;\n+my $bowtie_opts;\n+my $bowtie2_opts;\n+my $nohits;\n+my $aligner;\n+\n+my $config_result = GetOptions("subset=i" => \\$subset_count,\n+ "outdir=s" => \\$outdir,\n+ "illumina1_3" => \\$illumina,\n+ "quiet" => \\$quiet,\n+ "help" => \\$help,\n+ "version" => \\$version,\n+ "paired" => \\$paired,\n+ "conf=s" => \\$conf,\n+ "color" => \\$color,\n+ "bowtie=s" => \\$bowtie_opts,\n+ "bowtie2=s" => \\$bowtie2_opts,\n+ "threads=i" => \\$threads,\n+ "nohits" => \\$nohits,\n+ "aligner=s" => \\$aligner\n+ );\n+\n+die "Could not parse options" unless ($config_result);\n+\n+if ($help) {\n+ print while (<DATA>);\n+ exit;\n+}\n+\n+if ($version) {\n+ print "fastq_screen v$VERSION\\n";\n+ exit;\n+}\n+\n+if ($color) {\n+ $color = \'-C\';\n+}\n+else {\n+ $color = \'\';\n+}\n+\n+if(defined $aligner){\n+ if( ($aligner ne \'bowtie\') and ($aligner ne \'bowtie2\') ){\n+ die "Valid options for --aligner are \'bowtie\' or \'bowtie2\' only.\\n";\n+ }elsif( ($aligner eq \'bowtie\') and (defined $bowtie2_opts) ){\n+ die "Bowtie selected as the aligner yet bowtie2 options specified.\\n";\n+ }elsif( ($aligner eq \'bowtie2\') and (defined $bowtie_opts) ){\n+ die "Bowtie 2 selected as the aligner yet bowtie options specified.\\n";\n+ }\n+}\n+\n+$bowtie_opts = \'\' unless ($bowtie_opts); # Get undef warning otherwise\n+$bowtie2_opts = \'\' unless ($bowtie2_opts); # Get undef warning otherwise\n+\n+# Configuration\n+my $number_of_threads = 1;\n+my $path_to_bowtie = \'bowtie\';\n+my $path_to_bowtie2 = \'bowtie2\';\n+my @libraries;\n+my %library_bowtie_version; # Records which library should be used with which version of bowtie (i.e bowtie1 or bowtie2)\n+\n+load_configuration($conf);\n+\n+# Override the '..b'$!";\n+ }\n+}\n+\n+\n+\n+__DATA__\n+\n+Fastq Screen - Screen sequences against a panel of databases\n+\n+Synopsis\n+\n+ fastq_screen [OPTION]... [FastQ FILE]...\n+\n+Function\n+\n+ Fastq Screen is intended to be used as part of a QC pipeline.\n+ It allows you to take a sequence dataset and search it\n+ against a set of bowtie databases. It will then generate\n+ both a text and a graphical summary of the results to see if\n+ the sequence dataset contains the kind of sequences you expect\n+ or not.\n+\n+Options\n+\n+ --help -h Print program help and exit\n+\n+ --subset Don\'t use the whole sequence file to search, but\n+ create a temporary dataset of this size. The\n+ dataset created will be of approximately (within\n+ a factor of 2) of this size. If the real dataset\n+ is smaller than twice the specified size then the\n+ whole dataset will be used. Subsets will be taken\n+ evenly from throughout the whole original dataset\n+\n+ --paired Files are paired end. Files must be specified in\n+ the correct order with pairs of files coming\n+ immediately after one another. Results files will\n+ be named after the first file in the pair if the\n+ names differ between the two files.\n+\n+ --outdir Specify a directory in which to save output files.\n+ If no directory is specified then output files\n+ are saved into the same directory as the input\n+ file.\n+\n+ --illumina1_3 Assume that the quality values are in encoded in\n+ Illumina v1.3 format. Defaults to Sanger format\n+ if this flag is not specified\n+\n+ --quiet Supress all progress reports on stderr and only\n+ report errors\n+\n+ --version Print the program version and exit\n+\n+ --threads Specify across how many threads bowtie will be\n+ allowed to run. Overrides the default value set\n+ in the conf file\n+\n+ --conf Manually specify a location for the configuration\n+ file to be used for this run. If not specified \n+ then the file will be taken from the same directory \n+ as the fastq_screen program\n+\n+ --color FastQ files are in colorspace. This requires that \n+ the libraries configures in the config file are \n+ colorspace indices.\n+\n+ --bowtie Specify extra parameters to be passed to bowtie. \n+ These parameters should be quoted to clearly \n+ delimit bowtie parameters from fastq_screen \n+ parameters. You should not try to use this option \n+ to override the normal search or reporting options \n+ for bowtie which are set automatically but it might \n+ be useful to allow reads to be trimmed before\n+ alignment etc.\n+\n+ --bowtie2 Specify extra parameters to be passed to bowtie 2. \n+ These parameters should be quoted to clearly \n+ delimit bowtie2 parameters from fastq_screen \n+ parameters. You should not try to use this option \n+ to override the normal search or reporting options \n+ for bowtie which are set automatically but it might \n+ be useful to allow reads to be trimmed before\n+ alignment etc.\n+\n+ --nohits Writes to a file the sequences that did not map to \n+ any of the specified genome libraries. If the \n+ subset option is also specified, only reads from \n+ the temporary dataset that failed to align to the\n+ reference genomes will be written to the output file.\n+\n+ --aligner Specify the aligner to use for the mapping. Valid \n+ arguments are \'bowtie\' or \'bowtie2\'.\n+ \n'

diff -r 000000000000 -r 8a8adbf98ecc fastq_screen.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_screen.xml Fri May 16 07:57:33 2014 -0400

b'@@ -0,0 +1,189 @@\n+<tool id="fastq_screen" name="fastq_screen" version="0.4.2">\n+ <description>Screen for contamination</description>\n+ <requirements>\n+ <requirement type="package" version="0.4.2">fastq_screen</requirement>\n+ <requirement type="package" version="2.1.0">bowtie2</requirement>\n+ </requirements>\n+ <command> \n+ fastq_screen --aligner="bowtie2" --outdir="." --conf="$fastqrunconf"\n+ #if $sampN > 0:\n+ --subset "$sampN"\n+ #end if\n+ "$input1"\n+ #if $singlePaired.sPaired == "paired":\n+ "$input2" \n+ #end if\n+ ; mv *_screen.png ${outpng} ; mv *_screen.txt ${outtext}\n+ </command>\n+\n+ <stdio>\n+ <regex match=".*" source="both" level="warning" description="fastqc_screen perl script output"/>\n+ </stdio>\n+\n+ <inputs>\n+ <param name="jobName" type="text" size="120" value="fastq_screen" label="Job narrative (included in output names as a reminder)" \n+ help="Only letters, numbers and underscores _ will be retained in this field">\n+ <sanitizer invalid_char="">\n+ <valid initial="string.letters,string.digits"><add value="_" /> </valid>\n+ </sanitizer>\n+ </param>\n+ <param name="sampN" type="integer" size="20" value="500000" label="Sample this number of reads. Set to 0 or less to use all"\n+ help="Time/precision trade off - fewer reads takes a little less time trading off precision of the estimates."/>\n+ <conditional name="singlePaired">\n+ <param name="sPaired" type="select" label="Single ended or mate-pair ended reads in this library?">\n+ <option value="single" selected="true">Single-end</option>\n+ <option value="paired">Paired-end</option>\n+ </param>\n+ <when value="single">\n+ <param format="fastqsanger,fastq" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>\n+ </when>\n+ <when value="paired">\n+ <param format="fastqsanger,fastq" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />\n+ <param format="fastqsanger,fastq" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />\n+ </when>\n+ </conditional>\n+\n+ \n+ <repeat name="refGenomes" title="Installed organism reference sequences to check for alignment to your fastq" min="1"\n+ help="For checking cell culture sequence for contamination, Mycoplasma Genitalium might be a good choice eg">\n+ <param name="ref" type="select" label="Bowtie2 reference genome">\n+ <options from_data_table="bowtie2_indexes">\n+ <filter type="sort_by" column="3"/>\n+ <validator type="no_options" message="No indexes are available for bowtie2"/>\n+ </options>\n+ </param>\n+ </repeat>\n+ </inputs>\n+\n+ <outputs>\n+ <data format="tabular" name="outtext" label="${jobName}.xls"/>\n+ <data format="png" name="outpng" label="${jobName}.png"/>\n+ </outputs>\n+ <configfiles>\n+ <configfile name="fastqrunconf">\n+###### autogenerated by fastq_screen.xml for fastq_screen run\n+BOWTIE2 /data/app/bin/bowtie2\n+#for $refs in $refGenomes:\n+DATABASE $refs.ref.fields.value $refs.ref.fields.path BOWTIE2\n+#end for\n+ </configfile>\n+ </configfiles>\n+ \n+<help>\n+\n+**What it does**\n+This is a Galaxy wrapper exposing software from Babraham -fastq_screen_\n+Designed to search sequence data in fastq files for matches to contaminants or to check the likely\n+species.\n+In QC checking, you can use it to look for (eg) sequence from contaminating mycoplasmae in cell cultures - it may be non-differential but it wil'..b"e whole sequence file to search, but\n+ create a temporary dataset of this size. The\n+ dataset created will be of approximately (within\n+ a factor of 2) of this size. If the real dataset\n+ is smaller than twice the specified size then the\n+ whole dataset will be used. Subsets will be taken\n+ evenly from throughout the whole original dataset\n+\n+ --paired Files are paired end. Files must be specified in\n+ the correct order with pairs of files coming\n+ immediately after one another. Results files will\n+ be named after the first file in the pair if the\n+ names differ between the two files.\n+\n+ --outdir Specify a directory in which to save output files.\n+ If no directory is specified then output files\n+ are saved into the same directory as the input\n+ file.\n+\n+ --illumina1_3 Assume that the quality values are in encoded in\n+ Illumina v1.3 format. Defaults to Sanger format\n+ if this flag is not specified\n+\n+ --quiet Supress all progress reports on stderr and only\n+ report errors\n+\n+ --version Print the program version and exit\n+\n+ --threads Specify across how many threads bowtie will be\n+ allowed to run. Overrides the default value set\n+ in the conf file\n+\n+ --conf Manually specify a location for the configuration\n+ file to be used for this run. If not specified \n+ then the file will be taken from the same directory \n+ as the fastq_screen program\n+\n+ --color FastQ files are in colorspace. This requires that \n+ the libraries configures in the config file are \n+ colorspace indices.\n+\n+ --bowtie Specify extra parameters to be passed to bowtie. \n+ These parameters should be quoted to clearly \n+ delimit bowtie parameters from fastq_screen \n+ parameters. You should not try to use this option \n+ to override the normal search or reporting options \n+ for bowtie which are set automatically but it might \n+ be useful to allow reads to be trimmed before\n+ alignment etc.\n+\n+ --bowtie2 Specify extra parameters to be passed to bowtie 2. \n+ These parameters should be quoted to clearly \n+ delimit bowtie2 parameters from fastq_screen \n+ parameters. You should not try to use this option \n+ to override the normal search or reporting options \n+ for bowtie which are set automatically but it might \n+ be useful to allow reads to be trimmed before\n+ alignment etc.\n+\n+ --nohits Writes to a file the sequences that did not map to \n+ any of the specified genome libraries. If the \n+ subset option is also specified, only reads from \n+ the temporary dataset that failed to align to the\n+ reference genomes will be written to the output file.\n+\n+ --aligner Specify the aligner to use for the mapping. Valid \n+ arguments are 'bowtie' or 'bowtie2'.\n+ \n+ \n+**Attributions**\n+\n+Note that each component has its own license.\n+Good luck with figuring out your obligations.\n+\n+fastq_screen - see the web site at Fastq_screen_\n+\n+Galaxy_ (that's what you are using right now!) for gluing everything together \n+\n+\n+Code and documentation comprising this tool was written by Ross Lazarus and that part is Licensed_ the same way as other rgenetics artefacts\n+\n+.. _Fastq_screen: http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen\n+\n+.. _Galaxy: http://getgalaxy.org\n+\n+.. _Licensed: https://www.gnu.org/licenses/lgpl.html\n+\n+</help>\n+</tool>\n"

diff -r 000000000000 -r 8a8adbf98ecc tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Fri May 16 07:57:33 2014 -0400

@@ -0,0 +1,8 @@
+
+<tables>
+    
+    <table name="bowtie2_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie2_indices.loc" />
+    </table>
+</tables>

diff -r 000000000000 -r 8a8adbf98ecc tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Fri May 16 07:57:33 2014 -0400

@@ -0,0 +1,31 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="bowtie2" version="2.1.0">
+        <repository changeset_revision="606d435a57a4" name="package_bowtie2_2_1_0" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="fastq_screen" version="0.4.2">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.4.2.tar.gz</action>
+                <action type="move_directory_files">
+                    <source_directory>.</source_directory>
+                    <destination_directory>$INSTALL_DIR</destination_directory>
+                </action>
+                <action type="set_environment">
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+Galaxy wrapper for http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/ from Simon Andrews.
+Useful if you want to (eg) check your RNA sequence from a cell culture for contamination from (eg) Mycoplasma Genitalium, or if you have sequence from an unknown organism :(
+Permanently set to use your Galaxy bowtie2 indices - expects Bowtie2 to be installed so the Galaxy package_bowtie2 is installed if it's not already there.
+
+###WARNING### The fastq_screen perl script requires that the GD::Graph http://search.cpan.org/~bwarfield/GDGraph/Graph.pm module be installed.
+Otherwise, NO plot will be created. This wrapper relies on the system perl - a proper Galaxy perl dependency is just too icky to think about.
+
+Send code if you think otherwise.
+        </readme>
+    </package>
+
+</tool_dependency>