Mercurial > repos > rnateam > blockclust
changeset 3:8d17799a1f91 draft
Uploaded
| author | bgruening |
|---|---|
| date | Mon, 21 Oct 2013 12:33:32 -0400 |
| parents | 0deb03bc35df |
| children | 4882bc140680 |
| files | blockclust.xml readme.rst repository_dependencies.xml tool_dependencies.xml |
| diffstat | 4 files changed, 116 insertions(+), 161 deletions(-) [+] |
line wrap: on
line diff
--- a/blockclust.xml Mon Oct 21 11:34:09 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,121 +0,0 @@ -<tool id="blockclust" name="BlockClust" version="0.1"> - <description>Non-coding RNA clustering from deep sequencing read profiles</description> - <requirements> - <requirement type="package" version="1.0">blockclust</requirement> - </requirements> - <!--<version_command> -version</version_command>--> - <command> - #set $blockclust_data_path = \$BLOCKCLUST_DATA_PATH - #if str($mode.operation) == "pre": - #set $outputdir = $tags_bed.extra_files_path - BlockClustPipeLine.pl -m PRE -bam $mode.reads_bam -tbed $tags_bed; - #end if - - #if str($mode.operation) == "clust": - #set $outputdir = $clusters.extra_files_path - #set $accept_bed=list() - #set $reject_bed=list() - ## prepare annotations - #if str($mode.reference) == "hg19": - $accept_bed.append("$blockclust_data_path/annotations/hg19/hg19.accept.bed") - $reject_bed.append("$blockclust_data_path/annotations/hg19/hg19.reject.bed") - #elif str($mode.reference) == "mm10": - $accept_bed.append("$blockclust_data_path/annotations/mm10/mm10.accept.bed") - $reject_bed.append("$blockclust_data_path/annotations/mm10/mm10.reject.bed") - #elif str($mode.reference) == "dm3": - $accept_bed.append("$blockclust_data_path/annotations/dm3/dm3.accept.bed") - $reject_bed.append("$blockclust_data_path/annotations/dm3/dm3.reject.bed") - #end if - BlockClustPipeLine.pl -m TEST -f SEQUENCE -c $blockclust_data_path/blockclust.config - -t $mode.input_bbo - -a #echo ''.join( $accept_bed ) - -r #echo ''.join( $reject_bed ) - -o $outputdir; - cp #echo os.path.join($outputdir, 'mcl_clusters','all_clusters.bed')# $clusters; - cp #echo os.path.join($outputdir, 'hclust_tree.pdf')# $hclust_plot; - cp #echo os.path.join($outputdir, 'discretized.gspan.tab')# $sim_tab_out; - #end if - - #if str($mode.operation) == "post": - #set $outputdir = $clusters_bed.extra_files_path - BlockClustPipeLine.pl -m POST -cbed $mode.clusters_bed -cm $mode.cmsearch_out -tab $mode.sim_tab_in -o $outputdir; - cp #echo os.path.join($outputdir, 'cluster_distribution.pdf')# $cluster_dist; - #end if - </command> - <inputs> - <conditional name="mode"> - <param name="operation" type="select" label="Select mode of operation"> - <option value="pre">Pre-processing </option> - <option value="clust">Clustering</option> - <option value="post">Post-processing</option> - </param> - <when value="pre"> - <param name="reads_bam" type="data" format="bam" label="BAM file containing alignments" /> - </when> - <when value="clust"> - <param name="input_bbo" type="data" format="tabular" label="Input blockgroups file" /> - <param name="reference" type="select" label="Select reference genome"> - <option value="hg19">Human (hg19)</option> - <option value="mm10">Mouse (mm10)</option> - <option value="dm3">Fly (dm3)</option> - </param> - </when> - <when value="post"> - <param name="clusters_bed" type="data" format="bed" label="BED file containing clusters (output of BlockClust)" /> - <param name="cmsearch_out" type="data" format="tabular" label="Output of cmsearch tool" /> - <param name="sim_tab_in" type="data" format="tabular" label="Pairwise similarities file" /> - </when> - </conditional> - </inputs> - - <outputs> - <data format="bed" name="tags_bed" label="BlockClust: BAM to BED on ${on_string}"> - <filter> mode["operation"]=="pre"</filter> - </data> - <data format="bed" name="clusters" label="BlockClust: Clustering BED on ${on_string}"> - <filter> mode["operation"]=="clust"</filter> - </data> - <data format="pdf" name="hclust_plot" label="BlockClust: Hierarchical clustering plot on ${on_string}" > - <filter> mode["operation"]=="clust"</filter> - </data> - <data format="tabular" name="sim_tab_out" label="BlockClust: Pairwise similarities on ${on_string}"> - <filter> mode["operation"]=="clust"</filter> - </data> - <data format="tabular" name="analysis" label="BlockClust: Cluster analysis on ${on_string}" > - <filter> mode["operation"]=="post"</filter> - </data> - <data format="pdf" name="cluster_dist" label="BlockClust: Cluster distribution on ${on_string}" > - <filter> mode["operation"]=="post"</filter> - </data> - </outputs> - <help> - -.. class:: infomark - -**What it does** - -Clusters the read profiles (i.e., blockgroups) from the blockbuster tool. - -**Inputs** - -BlockClust needs output of tool blockbuster as input - -**Output** - -BlockClust produces a fasta file containing clusters. - ------- - -**Licenses** - -If **BlockClust** is used to obtain results for scientific publications it -should be cited as [1]_. - -**References** - -.. [1] - ------- - - </help> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Mon Oct 21 12:33:32 2013 -0400 @@ -0,0 +1,106 @@ +This package is a Galaxy workflow for BlockClust pipeline. + +It uses the Glimmer3 tool (Delcher et al. 2007) trained on a known set of +genes to generate gene predictions on a new genome, and then calls EMBOSS +(Rice et al. 2000) to translate the predictions into a FASTA file of +predicted protein sequences. The workflow requires two input files: + +* Nucleotide FASTA file of know gene sequences (training set) +* Nucleotide FASTA file of genome sequence or assembled contigs + +First an interpolated context model (ICM) is built from the set of known +genes, preferably from the closest relative organism(s) available. Next this +ICM model is used to predict genes on the genomic FASTA file. This produces +a FASTA file of the predicted gene nucleotide sequences, which is translated +into protein sequences using the EMBOSS tool transeq. + +Glimmer is intended for finding genes in microbial DNA, especially bacteria, +archaea, and viruses. + +See http://www.galaxyproject.org for information about the Galaxy Project. + + +Sample Data +=========== + +As an example, we will use the first public assembly of the 2011 Shiga-toxin +producing *Escherichia coli* O104:H4 outbreak in Germany. This was part of the +open-source crowd-sourcing analysis described in Rohde et al. (2011) and here: +https://github.com/ehec-outbreak-crowdsourced/BGI-data-analysis/wiki + +You can upload this assembly directly into Galaxy using the "Upload File" tool +with either of these URLs - Galaxy should recognise this is a FASTA file with +3,057 sequences: + +* http://static.xbase.ac.uk/files/results/nick/TY2482/TY2482.fasta.txt +* https://github.com/ehec-outbreak-crowdsourced/BGI-data-analysis/blob/master/strains/TY2482/seqProject/BGI/assemblies/NickLoman/TY2482.fasta.txt + +This FASTA file ``TY2482.fasta.txt`` was the initial TY-2482 strain assembled +by Nick Loman from 5 runs of Ion Torrent data released by the BGI, using the +MIRA 3.2 assembler. It was initially released via his blog, +http://pathogenomics.bham.ac.uk/blog/2011/06/ehec-genome-assembly/ + +We will also need a training set of known *E. coli* genes, for example the +model strain *Escherichia coli* str. K-12 substr. MG1655 which is well +annotated. You can upload the NCBI FASTA file ``NC_000913.ffn`` of the +gene nucleotide sequences directly into Galaxy via this URL, which Galaxy +should recognise as a FASTA file with 4,321 sequences: + +* ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Escherichia_coli_K_12_substr__MG1655_uid57779/NC_000913.ffn + +Then run the workflow, which should produce 2,333 predicted genes for the +TY2482 assembly (two FASTA files, nucleotide and protein sequences). + + +Citation +======== + +If you use this workflow directly, or a derivative of it, or the associated +wrappers for Galaxy, in work leading to a scientific publication, +please cite: + +P. Videm at al... + +For Glimmer3 please cite: + +Delcher, A.L., Bratke, K.A., Powers, E.C., and Salzberg, S.L. (2007) +Identifying bacterial genes and endosymbiont DNA with Glimmer. +Bioinformatics 23(6), 673-679. +http://dx.doi.org/10.1093/bioinformatics/btm009 + +For EMBOSS please cite: + +Rice, P., Longden, I. and Bleasby, A. (2000) +EMBOSS: The European Molecular Biology Open Software Suite +Trends in Genetics 16(6), 276-277. +http://dx.doi.org/10.1016/S0168-9525(00)02024-2 + + +Additional References +===================== + +Rohde, H., Qin, J., Cui, Y., Li, D., Loman, N.J., et al. (2011) +Open-source genomic analysis of shiga-toxin-producing E. coli O104:H4. +New England Journal of Medicine 365, 718-724. +http://dx.doi.org/10.1056/NEJMoa1107643 + + +Availability +============ + +This workflow is available on the main Galaxy Tool Shed: + +http://toolshed.g2.bx.psu.edu/view/bgruening/glimmer_gene_calling_workflow + +Development is being done on github: + +https://github.com/bgruening/galaxytools/workflows/glimmer3/ + + +Dependencies +============ + +These dependencies should be resolved automatically via the Galaxy Tool Shed: + +* http://toolshed.g2.bx.psu.edu/view/bgruening/glimmer3 +* http://toolshed.g2.bx.psu.edu/view/devteam/emboss_5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Mon Oct 21 12:33:32 2013 -0400 @@ -0,0 +1,10 @@ +<?xml version="1.0"?> +<repositories description="This workflow requires a number of different repositories."> + <repository changeset_revision="0deb03bc35df" name="blockclust" owner="rnateam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="94926c35b6f3" name="segemehl" owner="rnateam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="2f9d4b518b03" name="blockbuster" owner="rnateam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <!--<repository name="graphclust" owner="rnateam" />--> + <repository changeset_revision="5c6344f67ad0" name="infernal" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="e86bc4b0ddb5" name="package_eden_1_1" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="7003196be1b1" name="package_mcl_12_135" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> +</repositories>
--- a/tool_dependencies.xml Mon Oct 21 11:34:09 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="blockclust" version="1.0"> - <install version="1.0"> - <actions> - <action type="download_by_url">https://github.com/bgruening/download_store/raw/master/blockclust/blockclust-1.0.tar.gz</action> - <action type="shell_command">make</action> - <action type="move_file"> - <source>BlockClust</source> - <destination>$INSTALL_DIR/bin</destination> - </action> - <action type="move_file"> - <source>BlockClustPipeLine.pl</source> - <destination>$INSTALL_DIR/bin</destination> - </action> - <action type="move_file"> - <source>blockclust.config</source> - <destination>$INSTALL_DIR/data/</destination> - </action> - <action type="download_file">https://github.com/bgruening/download_store/raw/master/blockclust/blockclust-data-1.0/annotations/dm3.tar.gz</action> - <action type="shell_command">tar -xfvz dm3.tar.gz -C $INSTALL_DIR/data/annotations/</action> - <action type="download_file">https://github.com/bgruening/download_store/raw/master/blockclust/blockclust-data-1.0/annotations/hg19.tar.gz</action> - <action type="shell_command">tar -xfvz hg19.tar.gz -C $INSTALL_DIR/data/annotations/</action> - <action type="download_file">https://github.com/bgruening/download_store/raw/master/blockclust/blockclust-data-1.0/annotations/mm10.tar.gz</action> - <action type="shell_command">tar -xfvz mm10.tar.gz -C $INSTALL_DIR/data/annotations/</action> - <action type="download_file">https://github.com/bgruening/download_store/raw/master/blockclust/blockclust-data-1.0/annotations/panTro4.tar.gz</action> - <action type="shell_command">tar -xfvz panTro4.tar.gz -C $INSTALL_DIR/data/annotations/</action> - <action type="download_file">https://github.com/bgruening/download_store/raw/master/blockclust/blockclust-data-1.0/annotations/rheMac3.tar.gz</action> - <action type="shell_command">tar -xfvz rheMac3.tar.gz -C $INSTALL_DIR/data/annotations/</action> - <action type="set_environment"> - <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> - <environment_variable name="BLOCKCLUST_DATA_PATH" action="prepend_to">$INSTALL_DIR/data</environment_variable> - </action> - </actions> - </install> - <readme> - - </readme> - </package> -</tool_dependency>
