changeset 3:8d17799a1f91 draft

Uploaded
author bgruening
date Mon, 21 Oct 2013 12:33:32 -0400
parents 0deb03bc35df
children 4882bc140680
files blockclust.xml readme.rst repository_dependencies.xml tool_dependencies.xml
diffstat 4 files changed, 116 insertions(+), 161 deletions(-) [+]
line wrap: on
line diff
--- a/blockclust.xml	Mon Oct 21 11:34:09 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-<tool id="blockclust" name="BlockClust" version="0.1">
-    <description>Non-coding RNA clustering from deep sequencing read profiles</description>
-    <requirements>
-        <requirement type="package" version="1.0">blockclust</requirement>
-    </requirements>
-    <!--<version_command> -version</version_command>-->
-    <command>
-        #set $blockclust_data_path = \$BLOCKCLUST_DATA_PATH
-        #if str($mode.operation) == "pre":
-            #set $outputdir = $tags_bed.extra_files_path
-            BlockClustPipeLine.pl -m PRE -bam $mode.reads_bam -tbed $tags_bed;
-        #end if
-
-        #if str($mode.operation) == "clust":
-            #set $outputdir = $clusters.extra_files_path
-            #set $accept_bed=list()
-            #set $reject_bed=list()
-            ## prepare annotations
-            #if str($mode.reference) == "hg19":
-                $accept_bed.append("$blockclust_data_path/annotations/hg19/hg19.accept.bed")
-                $reject_bed.append("$blockclust_data_path/annotations/hg19/hg19.reject.bed")
-            #elif str($mode.reference) == "mm10":
-                $accept_bed.append("$blockclust_data_path/annotations/mm10/mm10.accept.bed")
-                $reject_bed.append("$blockclust_data_path/annotations/mm10/mm10.reject.bed")
-            #elif str($mode.reference) == "dm3":
-                $accept_bed.append("$blockclust_data_path/annotations/dm3/dm3.accept.bed")
-                $reject_bed.append("$blockclust_data_path/annotations/dm3/dm3.reject.bed")
-            #end if
-            BlockClustPipeLine.pl -m TEST -f SEQUENCE -c $blockclust_data_path/blockclust.config
-            -t $mode.input_bbo
-            -a #echo ''.join( $accept_bed )
-            -r #echo ''.join( $reject_bed )
-            -o $outputdir;
-            cp #echo os.path.join($outputdir, 'mcl_clusters','all_clusters.bed')# $clusters;
-            cp #echo os.path.join($outputdir, 'hclust_tree.pdf')# $hclust_plot;
-            cp #echo os.path.join($outputdir, 'discretized.gspan.tab')# $sim_tab_out;
-        #end if
-    
-        #if str($mode.operation) == "post":
-            #set $outputdir = $clusters_bed.extra_files_path
-            BlockClustPipeLine.pl -m POST -cbed $mode.clusters_bed -cm $mode.cmsearch_out -tab $mode.sim_tab_in -o $outputdir;
-            cp #echo os.path.join($outputdir, 'cluster_distribution.pdf')# $cluster_dist;
-        #end if
-    </command>
-    <inputs>
-        <conditional name="mode">
-            <param name="operation" type="select" label="Select mode of operation">
-                <option value="pre">Pre-processing </option>
-                <option value="clust">Clustering</option>
-                <option value="post">Post-processing</option>
-            </param>
-            <when value="pre">
-                <param name="reads_bam" type="data" format="bam" label="BAM file containing alignments" />
-            </when>
-            <when value="clust">
-                <param name="input_bbo" type="data" format="tabular" label="Input blockgroups file" />
-                <param name="reference" type="select" label="Select reference genome">
-                    <option value="hg19">Human (hg19)</option>
-                    <option value="mm10">Mouse (mm10)</option>
-                    <option value="dm3">Fly (dm3)</option>
-                </param>
-            </when>
-            <when value="post">
-                <param name="clusters_bed" type="data" format="bed" label="BED file containing clusters (output of BlockClust)" />
-                <param name="cmsearch_out" type="data" format="tabular" label="Output of cmsearch tool" />
-                <param name="sim_tab_in" type="data" format="tabular" label="Pairwise similarities file" />
-            </when>
-        </conditional>
-    </inputs>
-
-    <outputs>
-        <data format="bed" name="tags_bed" label="BlockClust: BAM to BED on ${on_string}">
-            <filter> mode["operation"]=="pre"</filter>
-        </data>
-        <data format="bed" name="clusters" label="BlockClust: Clustering BED on ${on_string}">
-            <filter> mode["operation"]=="clust"</filter>
-        </data>
-        <data format="pdf" name="hclust_plot" label="BlockClust: Hierarchical clustering plot on ${on_string}" >
-            <filter> mode["operation"]=="clust"</filter>
-        </data>
-        <data format="tabular" name="sim_tab_out" label="BlockClust: Pairwise similarities on ${on_string}">
-            <filter> mode["operation"]=="clust"</filter>
-        </data>
-        <data format="tabular" name="analysis" label="BlockClust: Cluster analysis on ${on_string}" >
-            <filter> mode["operation"]=="post"</filter>
-        </data>
-        <data format="pdf" name="cluster_dist" label="BlockClust: Cluster distribution on ${on_string}" >
-            <filter> mode["operation"]=="post"</filter>
-        </data>
-    </outputs>
-    <help>
-
-.. class:: infomark
-
-**What it does** 
-
-Clusters the read profiles (i.e., blockgroups) from the blockbuster tool.
-
-**Inputs**
-
-BlockClust needs output of tool blockbuster as input 
-
-**Output**
-
-BlockClust produces a fasta file containing clusters.
-
-------
-
-**Licenses**
-
-If **BlockClust** is used to obtain results for scientific publications it
-should be cited as [1]_.
-
-**References** 
-
-.. [1] 
-
-------
-
-    </help>
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst	Mon Oct 21 12:33:32 2013 -0400
@@ -0,0 +1,106 @@
+This package is a Galaxy workflow for BlockClust pipeline.
+
+It uses the Glimmer3 tool (Delcher et al. 2007) trained on a known set of
+genes to generate gene predictions on a new genome, and then calls EMBOSS
+(Rice et al. 2000) to translate the predictions into a FASTA file of
+predicted protein sequences. The workflow requires two input files:
+
+* Nucleotide FASTA file of know gene sequences (training set)
+* Nucleotide FASTA file of genome sequence or assembled contigs
+
+First an interpolated context model (ICM) is built from the set of known
+genes, preferably from the closest relative organism(s) available. Next this
+ICM model is used to predict genes on the genomic FASTA file. This produces
+a FASTA file of the predicted gene nucleotide sequences, which is translated
+into protein sequences using the EMBOSS tool transeq.
+
+Glimmer is intended for finding genes in microbial DNA, especially bacteria,
+archaea, and viruses.
+
+See http://www.galaxyproject.org for information about the Galaxy Project.
+
+
+Sample Data
+===========
+
+As an example, we will use the first public assembly of the 2011 Shiga-toxin
+producing *Escherichia coli* O104:H4 outbreak in Germany. This was part of the
+open-source crowd-sourcing analysis described in Rohde et al. (2011) and here:
+https://github.com/ehec-outbreak-crowdsourced/BGI-data-analysis/wiki
+
+You can upload this assembly directly into Galaxy using the "Upload File" tool
+with either of these URLs - Galaxy should recognise this is a FASTA file with
+3,057 sequences:
+
+* http://static.xbase.ac.uk/files/results/nick/TY2482/TY2482.fasta.txt
+* https://github.com/ehec-outbreak-crowdsourced/BGI-data-analysis/blob/master/strains/TY2482/seqProject/BGI/assemblies/NickLoman/TY2482.fasta.txt
+
+This FASTA file ``TY2482.fasta.txt`` was the initial TY-2482 strain assembled
+by Nick Loman from 5 runs of Ion Torrent data released by the BGI, using the
+MIRA 3.2 assembler. It was initially released via his blog,
+http://pathogenomics.bham.ac.uk/blog/2011/06/ehec-genome-assembly/
+
+We will also need a training set of known *E. coli* genes, for example the
+model strain *Escherichia coli* str. K-12 substr. MG1655 which is well
+annotated. You can upload the NCBI FASTA file ``NC_000913.ffn`` of the
+gene nucleotide sequences directly into Galaxy via this URL, which Galaxy
+should recognise as a FASTA file with 4,321 sequences:
+
+* ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Escherichia_coli_K_12_substr__MG1655_uid57779/NC_000913.ffn
+
+Then run the workflow, which should produce 2,333 predicted genes for the
+TY2482 assembly (two FASTA files, nucleotide and protein sequences).
+
+
+Citation
+========
+
+If you use this workflow directly, or a derivative of it, or the associated
+wrappers for Galaxy, in work leading to a scientific publication,
+please cite:
+
+P. Videm  at al...
+
+For Glimmer3 please cite:
+
+Delcher, A.L., Bratke, K.A., Powers, E.C., and Salzberg, S.L. (2007)
+Identifying bacterial genes and endosymbiont DNA with Glimmer.
+Bioinformatics 23(6), 673-679.
+http://dx.doi.org/10.1093/bioinformatics/btm009
+
+For EMBOSS please cite:
+
+Rice, P., Longden, I. and Bleasby, A. (2000)
+EMBOSS: The European Molecular Biology Open Software Suite
+Trends in Genetics 16(6), 276-277.
+http://dx.doi.org/10.1016/S0168-9525(00)02024-2
+
+
+Additional References
+=====================
+
+Rohde, H., Qin, J., Cui, Y., Li, D., Loman, N.J., et al. (2011)
+Open-source genomic analysis of shiga-toxin-producing E. coli O104:H4.
+New England Journal of Medicine 365, 718-724.
+http://dx.doi.org/10.1056/NEJMoa1107643
+
+
+Availability
+============
+
+This workflow is available on the main Galaxy Tool Shed:
+
+http://toolshed.g2.bx.psu.edu/view/bgruening/glimmer_gene_calling_workflow
+
+Development is being done on github:
+
+https://github.com/bgruening/galaxytools/workflows/glimmer3/
+
+
+Dependencies
+============
+
+These dependencies should be resolved automatically via the Galaxy Tool Shed:
+
+* http://toolshed.g2.bx.psu.edu/view/bgruening/glimmer3
+* http://toolshed.g2.bx.psu.edu/view/devteam/emboss_5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Mon Oct 21 12:33:32 2013 -0400
@@ -0,0 +1,10 @@
+<?xml version="1.0"?>
+<repositories description="This workflow requires a number of different repositories.">
+    <repository changeset_revision="0deb03bc35df" name="blockclust" owner="rnateam" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="94926c35b6f3" name="segemehl" owner="rnateam" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="2f9d4b518b03" name="blockbuster" owner="rnateam" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    <!--<repository name="graphclust" owner="rnateam" />-->
+    <repository changeset_revision="5c6344f67ad0" name="infernal" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="e86bc4b0ddb5" name="package_eden_1_1" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="7003196be1b1" name="package_mcl_12_135" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+</repositories>
--- a/tool_dependencies.xml	Mon Oct 21 11:34:09 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-   <package name="blockclust" version="1.0">
-        <install version="1.0">
-            <actions>
-                <action type="download_by_url">https://github.com/bgruening/download_store/raw/master/blockclust/blockclust-1.0.tar.gz</action>
-                <action type="shell_command">make</action>
-                <action type="move_file">
-                    <source>BlockClust</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>BlockClustPipeLine.pl</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>blockclust.config</source>
-                    <destination>$INSTALL_DIR/data/</destination>
-                </action>
-                <action type="download_file">https://github.com/bgruening/download_store/raw/master/blockclust/blockclust-data-1.0/annotations/dm3.tar.gz</action>
-                <action type="shell_command">tar -xfvz dm3.tar.gz -C $INSTALL_DIR/data/annotations/</action>
-                <action type="download_file">https://github.com/bgruening/download_store/raw/master/blockclust/blockclust-data-1.0/annotations/hg19.tar.gz</action>
-                <action type="shell_command">tar -xfvz hg19.tar.gz -C $INSTALL_DIR/data/annotations/</action>
-                <action type="download_file">https://github.com/bgruening/download_store/raw/master/blockclust/blockclust-data-1.0/annotations/mm10.tar.gz</action>
-                <action type="shell_command">tar -xfvz mm10.tar.gz -C $INSTALL_DIR/data/annotations/</action>
-                <action type="download_file">https://github.com/bgruening/download_store/raw/master/blockclust/blockclust-data-1.0/annotations/panTro4.tar.gz</action>
-                <action type="shell_command">tar -xfvz panTro4.tar.gz -C $INSTALL_DIR/data/annotations/</action>
-                <action type="download_file">https://github.com/bgruening/download_store/raw/master/blockclust/blockclust-data-1.0/annotations/rheMac3.tar.gz</action>
-                <action type="shell_command">tar -xfvz rheMac3.tar.gz -C $INSTALL_DIR/data/annotations/</action>
-                <action type="set_environment">
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
-                    <environment_variable name="BLOCKCLUST_DATA_PATH" action="prepend_to">$INSTALL_DIR/data</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>
-	
-        </readme>
-    </package>
-</tool_dependency>