| Next changeset 1:3480daf4ed27 (2014-05-16) |
|
Commit message:
First upload |
|
added:
bowtie2_indices.loc.sample fastq_screen fastq_screen.xml tool_data_table_conf.xml.sample tool_dependencies.xml |
| b |
| diff -r 000000000000 -r 8a8adbf98ecc bowtie2_indices.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bowtie2_indices.loc.sample Fri May 16 07:57:33 2014 -0400 |
| b |
| @@ -0,0 +1,37 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Bowtie2 indexed sequences data files. You will +#need to create these data files and then create a bowtie_indices.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bowtie2_indices.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/bowtie2/hg18/, +#then the bowtie2_indices.loc entry would look like this: +# +#hg18 hg18 hg18 /depot/data2/galaxy/bowtie2/hg18/hg18 +# +#and your /depot/data2/galaxy/bowtie2/hg18/ directory +#would contain hg18.*.ebwt files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt +#...etc... +# +#Your bowtie2_indices.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie2/hg18/hg18canon +#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie2/hg18/hg18full +#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie2/hg19/hg19 +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +# |
| b |
| diff -r 000000000000 -r 8a8adbf98ecc fastq_screen --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_screen Fri May 16 07:57:33 2014 -0400 |
| [ |
| b'@@ -0,0 +1,1073 @@\n+#!/usr/bin/perl\n+use warnings;\n+use strict;\n+use Getopt::Long;\n+use FindBin qw($RealBin);\n+use File::Copy;\n+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);\n+use File::Temp qw/ tempfile tempdir /;\n+\n+our $VERSION = "0.4.2";\n+\n+###########################################################################\n+###########################################################################\n+## ##\n+## Copyright 2013, Simon Andrews (simon.andrews@babraham.ac.uk) ##\n+## Mark Fiers (Plant & Food Research, NZ) ##\n+## Steven Wingett (steven.wingett@babraham.ac.uk) ##\n+## ##\n+## This program is free software: you can redistribute it and/or modify ##\n+## it under the terms of the GNU General Public License as published by ##\n+## the Free Software Foundation, either version 3 of the License, or ##\n+## (at your option) any later version. ##\n+## ##\n+## This program is distributed in the hope that it will be useful, ##\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of ##\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ##\n+## GNU General Public License for more details. ##\n+## ##\n+## You should have received a copy of the GNU General Public License ##\n+## along with this program. If not, see <http://www.gnu.org/licenses/>. ##\n+###########################################################################\n+###########################################################################\n+\n+\n+unless(@ARGV){ #Display a help message if no arguments specified\n+ print while (<DATA>);\n+ exit;\n+}\n+\n+\n+## Option variables\n+my $subset_count;\n+my $outdir;\n+my $illumina;\n+my $quiet;\n+my $help;\n+my $version;\n+my $paired;\n+my $threads;\n+my $color;\n+my $conf;\n+my $bowtie_opts;\n+my $bowtie2_opts;\n+my $nohits;\n+my $aligner;\n+\n+my $config_result = GetOptions("subset=i" => \\$subset_count,\n+ "outdir=s" => \\$outdir,\n+ "illumina1_3" => \\$illumina,\n+ "quiet" => \\$quiet,\n+ "help" => \\$help,\n+ "version" => \\$version,\n+ "paired" => \\$paired,\n+ "conf=s" => \\$conf,\n+ "color" => \\$color,\n+ "bowtie=s" => \\$bowtie_opts,\n+ "bowtie2=s" => \\$bowtie2_opts,\n+ "threads=i" => \\$threads,\n+ "nohits" => \\$nohits,\n+ "aligner=s" => \\$aligner\n+ );\n+\n+die "Could not parse options" unless ($config_result);\n+\n+if ($help) {\n+ print while (<DATA>);\n+ exit;\n+}\n+\n+if ($version) {\n+ print "fastq_screen v$VERSION\\n";\n+ exit;\n+}\n+\n+if ($color) {\n+ $color = \'-C\';\n+}\n+else {\n+ $color = \'\';\n+}\n+\n+if(defined $aligner){\n+ if( ($aligner ne \'bowtie\') and ($aligner ne \'bowtie2\') ){\n+ die "Valid options for --aligner are \'bowtie\' or \'bowtie2\' only.\\n";\n+ }elsif( ($aligner eq \'bowtie\') and (defined $bowtie2_opts) ){\n+ die "Bowtie selected as the aligner yet bowtie2 options specified.\\n";\n+ }elsif( ($aligner eq \'bowtie2\') and (defined $bowtie_opts) ){\n+ die "Bowtie 2 selected as the aligner yet bowtie options specified.\\n";\n+ }\n+}\n+\n+$bowtie_opts = \'\' unless ($bowtie_opts); # Get undef warning otherwise\n+$bowtie2_opts = \'\' unless ($bowtie2_opts); # Get undef warning otherwise\n+\n+# Configuration\n+my $number_of_threads = 1;\n+my $path_to_bowtie = \'bowtie\';\n+my $path_to_bowtie2 = \'bowtie2\';\n+my @libraries;\n+my %library_bowtie_version; # Records which library should be used with which version of bowtie (i.e bowtie1 or bowtie2)\n+\n+load_configuration($conf);\n+\n+# Override the '..b'$!";\n+ }\n+}\n+\n+\n+\n+__DATA__\n+\n+Fastq Screen - Screen sequences against a panel of databases\n+\n+Synopsis\n+\n+ fastq_screen [OPTION]... [FastQ FILE]...\n+\n+Function\n+\n+ Fastq Screen is intended to be used as part of a QC pipeline.\n+ It allows you to take a sequence dataset and search it\n+ against a set of bowtie databases. It will then generate\n+ both a text and a graphical summary of the results to see if\n+ the sequence dataset contains the kind of sequences you expect\n+ or not.\n+\n+Options\n+\n+ --help -h Print program help and exit\n+\n+ --subset Don\'t use the whole sequence file to search, but\n+ create a temporary dataset of this size. The\n+ dataset created will be of approximately (within\n+ a factor of 2) of this size. If the real dataset\n+ is smaller than twice the specified size then the\n+ whole dataset will be used. Subsets will be taken\n+ evenly from throughout the whole original dataset\n+\n+ --paired Files are paired end. Files must be specified in\n+ the correct order with pairs of files coming\n+ immediately after one another. Results files will\n+ be named after the first file in the pair if the\n+ names differ between the two files.\n+\n+ --outdir Specify a directory in which to save output files.\n+ If no directory is specified then output files\n+ are saved into the same directory as the input\n+ file.\n+\n+ --illumina1_3 Assume that the quality values are in encoded in\n+ Illumina v1.3 format. Defaults to Sanger format\n+ if this flag is not specified\n+\n+ --quiet Supress all progress reports on stderr and only\n+ report errors\n+\n+ --version Print the program version and exit\n+\n+ --threads Specify across how many threads bowtie will be\n+ allowed to run. Overrides the default value set\n+ in the conf file\n+\n+ --conf Manually specify a location for the configuration\n+ file to be used for this run. If not specified \n+ then the file will be taken from the same directory \n+ as the fastq_screen program\n+\n+ --color FastQ files are in colorspace. This requires that \n+ the libraries configures in the config file are \n+ colorspace indices.\n+\n+ --bowtie Specify extra parameters to be passed to bowtie. \n+ These parameters should be quoted to clearly \n+ delimit bowtie parameters from fastq_screen \n+ parameters. You should not try to use this option \n+ to override the normal search or reporting options \n+ for bowtie which are set automatically but it might \n+ be useful to allow reads to be trimmed before\n+ alignment etc.\n+\n+ --bowtie2 Specify extra parameters to be passed to bowtie 2. \n+ These parameters should be quoted to clearly \n+ delimit bowtie2 parameters from fastq_screen \n+ parameters. You should not try to use this option \n+ to override the normal search or reporting options \n+ for bowtie which are set automatically but it might \n+ be useful to allow reads to be trimmed before\n+ alignment etc.\n+\n+ --nohits Writes to a file the sequences that did not map to \n+ any of the specified genome libraries. If the \n+ subset option is also specified, only reads from \n+ the temporary dataset that failed to align to the\n+ reference genomes will be written to the output file.\n+\n+ --aligner Specify the aligner to use for the mapping. Valid \n+ arguments are \'bowtie\' or \'bowtie2\'.\n+ \n' |
| b |
| diff -r 000000000000 -r 8a8adbf98ecc fastq_screen.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_screen.xml Fri May 16 07:57:33 2014 -0400 |
| b |
| b'@@ -0,0 +1,189 @@\n+<tool id="fastq_screen" name="fastq_screen" version="0.4.2">\n+ <description>Screen for contamination</description>\n+ <requirements>\n+ <requirement type="package" version="0.4.2">fastq_screen</requirement>\n+ <requirement type="package" version="2.1.0">bowtie2</requirement>\n+ </requirements>\n+ <command> \n+ fastq_screen --aligner="bowtie2" --outdir="." --conf="$fastqrunconf"\n+ #if $sampN > 0:\n+ --subset "$sampN"\n+ #end if\n+ "$input1"\n+ #if $singlePaired.sPaired == "paired":\n+ "$input2" \n+ #end if\n+ ; mv *_screen.png ${outpng} ; mv *_screen.txt ${outtext}\n+ </command>\n+\n+ <stdio>\n+ <regex match=".*" source="both" level="warning" description="fastqc_screen perl script output"/>\n+ </stdio>\n+\n+ <inputs>\n+ <param name="jobName" type="text" size="120" value="fastq_screen" label="Job narrative (included in output names as a reminder)" \n+ help="Only letters, numbers and underscores _ will be retained in this field">\n+ <sanitizer invalid_char="">\n+ <valid initial="string.letters,string.digits"><add value="_" /> </valid>\n+ </sanitizer>\n+ </param>\n+ <param name="sampN" type="integer" size="20" value="500000" label="Sample this number of reads. Set to 0 or less to use all"\n+ help="Time/precision trade off - fewer reads takes a little less time trading off precision of the estimates."/>\n+ <conditional name="singlePaired">\n+ <param name="sPaired" type="select" label="Single ended or mate-pair ended reads in this library?">\n+ <option value="single" selected="true">Single-end</option>\n+ <option value="paired">Paired-end</option>\n+ </param>\n+ <when value="single">\n+ <param format="fastqsanger,fastq" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>\n+ </when>\n+ <when value="paired">\n+ <param format="fastqsanger,fastq" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />\n+ <param format="fastqsanger,fastq" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />\n+ </when>\n+ </conditional>\n+\n+ <!-- Genome source. -->\n+ <repeat name="refGenomes" title="Installed organism reference sequences to check for alignment to your fastq" min="1"\n+ help="For checking cell culture sequence for contamination, Mycoplasma Genitalium might be a good choice eg">\n+ <param name="ref" type="select" label="Bowtie2 reference genome">\n+ <options from_data_table="bowtie2_indexes">\n+ <filter type="sort_by" column="3"/>\n+ <validator type="no_options" message="No indexes are available for bowtie2"/>\n+ </options>\n+ </param>\n+ </repeat>\n+ </inputs>\n+\n+ <outputs>\n+ <data format="tabular" name="outtext" label="${jobName}.xls"/>\n+ <data format="png" name="outpng" label="${jobName}.png"/>\n+ </outputs>\n+ <configfiles>\n+ <configfile name="fastqrunconf">\n+###### autogenerated by fastq_screen.xml for fastq_screen run\n+BOWTIE2 /data/app/bin/bowtie2\n+#for $refs in $refGenomes:\n+DATABASE $refs.ref.fields.value $refs.ref.fields.path BOWTIE2\n+#end for\n+ </configfile>\n+ </configfiles>\n+ \n+<help>\n+\n+**What it does**\n+This is a Galaxy wrapper exposing software from Babraham -fastq_screen_\n+Designed to search sequence data in fastq files for matches to contaminants or to check the likely\n+species.\n+In QC checking, you can use it to look for (eg) sequence from contaminating mycoplasmae in cell cultures - it may be non-differential but it wil'..b"e whole sequence file to search, but\n+ create a temporary dataset of this size. The\n+ dataset created will be of approximately (within\n+ a factor of 2) of this size. If the real dataset\n+ is smaller than twice the specified size then the\n+ whole dataset will be used. Subsets will be taken\n+ evenly from throughout the whole original dataset\n+\n+ --paired Files are paired end. Files must be specified in\n+ the correct order with pairs of files coming\n+ immediately after one another. Results files will\n+ be named after the first file in the pair if the\n+ names differ between the two files.\n+\n+ --outdir Specify a directory in which to save output files.\n+ If no directory is specified then output files\n+ are saved into the same directory as the input\n+ file.\n+\n+ --illumina1_3 Assume that the quality values are in encoded in\n+ Illumina v1.3 format. Defaults to Sanger format\n+ if this flag is not specified\n+\n+ --quiet Supress all progress reports on stderr and only\n+ report errors\n+\n+ --version Print the program version and exit\n+\n+ --threads Specify across how many threads bowtie will be\n+ allowed to run. Overrides the default value set\n+ in the conf file\n+\n+ --conf Manually specify a location for the configuration\n+ file to be used for this run. If not specified \n+ then the file will be taken from the same directory \n+ as the fastq_screen program\n+\n+ --color FastQ files are in colorspace. This requires that \n+ the libraries configures in the config file are \n+ colorspace indices.\n+\n+ --bowtie Specify extra parameters to be passed to bowtie. \n+ These parameters should be quoted to clearly \n+ delimit bowtie parameters from fastq_screen \n+ parameters. You should not try to use this option \n+ to override the normal search or reporting options \n+ for bowtie which are set automatically but it might \n+ be useful to allow reads to be trimmed before\n+ alignment etc.\n+\n+ --bowtie2 Specify extra parameters to be passed to bowtie 2. \n+ These parameters should be quoted to clearly \n+ delimit bowtie2 parameters from fastq_screen \n+ parameters. You should not try to use this option \n+ to override the normal search or reporting options \n+ for bowtie which are set automatically but it might \n+ be useful to allow reads to be trimmed before\n+ alignment etc.\n+\n+ --nohits Writes to a file the sequences that did not map to \n+ any of the specified genome libraries. If the \n+ subset option is also specified, only reads from \n+ the temporary dataset that failed to align to the\n+ reference genomes will be written to the output file.\n+\n+ --aligner Specify the aligner to use for the mapping. Valid \n+ arguments are 'bowtie' or 'bowtie2'.\n+ \n+ \n+**Attributions**\n+\n+Note that each component has its own license.\n+Good luck with figuring out your obligations.\n+\n+fastq_screen - see the web site at Fastq_screen_\n+\n+Galaxy_ (that's what you are using right now!) for gluing everything together \n+\n+\n+Code and documentation comprising this tool was written by Ross Lazarus and that part is Licensed_ the same way as other rgenetics artefacts\n+\n+.. _Fastq_screen: http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen\n+\n+.. _Galaxy: http://getgalaxy.org\n+\n+.. _Licensed: https://www.gnu.org/licenses/lgpl.html\n+\n+</help>\n+</tool>\n" |
| b |
| diff -r 000000000000 -r 8a8adbf98ecc tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri May 16 07:57:33 2014 -0400 |
| b |
| @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of indexes in the Bowtie2 mapper format --> + <table name="bowtie2_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/bowtie2_indices.loc" /> + </table> +</tables> |
| b |
| diff -r 000000000000 -r 8a8adbf98ecc tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri May 16 07:57:33 2014 -0400 |
| b |
| @@ -0,0 +1,31 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="bowtie2" version="2.1.0"> + <repository changeset_revision="606d435a57a4" name="package_bowtie2_2_1_0" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="fastq_screen" version="0.4.2"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.4.2.tar.gz</action> + <action type="move_directory_files"> + <source_directory>.</source_directory> + <destination_directory>$INSTALL_DIR</destination_directory> + </action> + <action type="set_environment"> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR</environment_variable> + </action> + </actions> + </install> + <readme> +Galaxy wrapper for http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/ from Simon Andrews. +Useful if you want to (eg) check your RNA sequence from a cell culture for contamination from (eg) Mycoplasma Genitalium, or if you have sequence from an unknown organism :( +Permanently set to use your Galaxy bowtie2 indices - expects Bowtie2 to be installed so the Galaxy package_bowtie2 is installed if it's not already there. + +###WARNING### The fastq_screen perl script requires that the GD::Graph http://search.cpan.org/~bwarfield/GDGraph/Graph.pm module be installed. +Otherwise, NO plot will be created. This wrapper relies on the system perl - a proper Galaxy perl dependency is just too icky to think about. + +Send code if you think otherwise. + </readme> + </package> + +</tool_dependency> |