Mercurial > repos > diodupima > coast_search_blast
changeset 0:88fe28c4976d draft
"planemo upload commit 4ee329c2779b2e6e1ea0b58e4c636ff0658a8852-dirty"
author | diodupima |
---|---|
date | Wed, 07 Jul 2021 16:19:40 +0000 |
parents | |
children | 42cb02af7874 |
files | coast_search_blast.xml macros.xml table_data_table_conf.xml.sample tool-data/coast_taxonomic_filters.loc.sample |
diffstat | 4 files changed, 309 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coast_search_blast.xml Wed Jul 07 16:19:40 2021 +0000 @@ -0,0 +1,46 @@ +<tool id="coast_search_blast" name="COAST - Search with BLAST" version="0.1.1"> + <description>Run a new job, and identify the closest proteomes, using BLAST</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="2.10">blast</requirement> + <expand macro="requirements"/> + </requirements> + <command ><![CDATA[ + coast + @QUERY@ + @DB@ + --engine blast + --inplace_workspace + @AAI_FILTER@ + @OUTPUT@ + @QUERY_KEYWORDS@ + @OUTPUT_FORMAT@ + @BLAST_TAX_FILTER@ + @BLAST_ALN_OPTIONS@ + @GENERIC_ALN_OPTIONS@ + ]]></command> + <inputs> + <expand macro="input_query" /> + <expand macro="protein_db" /> + <expand macro="blast_taxon_filter"/> + <expand macro="output_format"/> + <expand macro="aai_filter"/> + <section name="aln_adv" title="Advanced Alignment Settings" > + <expand macro="blast_aln_options"/> + <expand macro="generic_aln_options"/> + </section> + </inputs> + <outputs> + <expand macro="results_alignment"/> + <expand macro="results_report"/> + </outputs> + <tests> + </tests> + <help><![CDATA[ + COAST BLASTp + Alignment search tool that identifies similar proteomes using the BLASTp alignment engine. + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Jul 07 16:19:40 2021 +0000 @@ -0,0 +1,228 @@ +<macros> + <token name="@TOOL_VERSION@">0.1.1</token> + <xml name="requirements"> + <requirement type="package" version="0.1.1">coast</requirement> + </xml> + <xml name="citations"> + <citations> + <citation type="bibtex">@misc{noauthor_coast_nodate, + title = {{COAST} - {Compartive} {Ominc} {Alignment} {Search} {Tool}}, + url = {https://gitlab.com/coast_tool/COAST}, + abstract = {Alignment search tool that identifies similar proteomes}, + language = {en}, + urldate = {2021-06-22}, + } + </citation> + </citations> + </xml> + <xml name="input_query"> + <conditional name="query_type"> + <param name="source" type="select" label="Select the type of input file"> + <option value="coast_gb">COAST from GenBank</option> + <option value="coast_fa">COAST from FASTA</option> + </param> + <when value="coast_gb"> + <param name="query_file" type="data" format="GenBank" label="Load a query proteome in Genebank"/> + <param name="query_key" type="select" label="List the GB file Features to be used as Proteins, do so in a way to prevent duplicated proteins."> + <option value="CDS" selected="true">CDS</option> + <option value="product">product</option> + </param> + </when> + <when value="coast_fa"> + <param name="query_file" type="data" format="FASTA" label="Load a query proteome in FASTA"/> + </when> + </conditional> + </xml> + <token name="@QUERY@"><![CDATA[ + "$query_type.query_file" + ]]></token> + <token name="@QUERY_KEYWORDS@"><![CDATA[ + #if $query_type.source == 'coast_gb' + --keywords '$query_type.query_key' + #end if + ]]></token> + + <xml name="protein_db"> + <param name="db" type="select" optional="false" label="BLAST-Ready protein sequences database."> + <options from_data_table="blastdb_p" /> + </param> + </xml> + <token name="@DB@"><![CDATA[ + "$db" + ]]></token> + + <xml name="output_format"> + <param name="outfmt" type="select" optional="true" multiple="true" display="checkboxes" label="Select outputs"> + <option value="b" selected="true">Best Hits</option> + <option value="a" selected="true">Aggregated Metrics</option> + <option value="r" selected="true">Report</option> + </param> + </xml> + <token name="@OUTPUT_FORMAT@"><![CDATA[ + #if $outfmt + --outfmt + #for $format in $outfmt + '${format}' + #end for + #end if + ]]></token> + <token name="@OUTPUT@"><![CDATA[ + --quiet + ]]></token> + + <xml name="aai_filter"> + <param name="aai" type="integer" value="10" label="AAIc filtering score"/> + <param name="min_cov" type="integer" value="50" label="Minimum Coverage for AAIbd hit selection"/> + <param name="min_id" type="integer" value="40" label="Minimum Amino Acid Identity for AAIbd hit selection"/> + </xml> + <token name="@AAI_FILTER@"><![CDATA[ + --aai '$aai' + --cov '$min_cov' + --id '$min_id' + ]]></token> + + <xml name="hypothetical_filter"> + <param name="hypothetical" type="boolean" checked="false" label="Filter hypothetical proteins from query. Read description for more information." truevalue="--filter_hypothetical" falsevalue=""/> + </xml> + <token name="@HYPO_FILTER@"><![CDATA[ + #if $hypothetical + '$hypothetical' + #end if + ]]></token> + + <xml name="results_alignment"> + <data format_source="tabular" format="tabular" name="blast_results" label="COAST - Batch alignment results" from_work_dir="blast_results.tab"/> + </xml> + + <xml name="results_report"> + <data format_source="html" format="html" name="coast_report" label="COAST - Summarized report" from_work_dir="coast_report.html"> + <filter>"r" in outfmt</filter> + </data> + <data format_source="tabular" format="tabular" label="COAST - Best-hits table" name="bh_results" from_work_dir="bh_results.tab"> + <filter>"b" in outfmt</filter> + </data> + <data format_source="tabular" format="tabular" label="COAST - Results table" name="coast_results" from_work_dir="coast_results.tab"> + <filter>"a" in outfmt</filter> + </data> + </xml> + + <xml name="blast_taxon_filter"> + <conditional name="filter_type"> + <param name="taxon_filter_type" type="select" label="Type of taxonomic filter"> + <option value="taxidlist_dm">Pre-defined taxonomic filters</option> + <option value="taxidlist_user">User-provided file based list</option> + <option value="taxonlist">Comma separated list</option> + </param> + <when value="taxidlist_dm"> + <param name="taxidlist" type="select" optional="true" label="Select pre-defined taxonomic filters"> + <options from_data_table="coast_taxonomic_filters" /> + </param> + </when> + <when value="taxidlist_user"> + <param name="taxidlist" type="data" format="txt" optional="true" label="Load file with filtering taxids."/> + </when> + <when value="taxonlist"> + <param name="taxonlist" type="text" optional="true" label="Comma separated list of TAXIDs nodes, ranking species or lower"/> + </when> + </conditional> + </xml> + <token name="@BLAST_TAX_FILTER@"><![CDATA[ + #if $filter_type.taxon_filter_type == "taxidlist_dm" + --taxidlist '$filter_type.taxidlist.fields.path' + #end if + #if $filter_type.taxon_filter_type == "taxidlist_user" + --taxidlist '$filter_type.taxidlist' + #end if + #if $filter_type.taxon_filter_type == "taxonlist" + --taxonlist '$filter_type.taxonlist' + #end if + ]]></token> + + <xml name="diamond_taxon_filter"> + <conditional name="filter_type"> + <param name="taxon_filter_type" type="select" label="Type of taxonomic filter"> + <option value="taxonlist_pre_defined">Pre-defined taxonomic filters</option> + <option value="taxonlist">Comma separated list</option> + </param> + <when value="taxonlist_pre_defined"> + <param name="taxonlist" type="select" optional="true" label="Select pre-defined taxonomic filters"> + <option value="10239">Viruses - 102239</option> + <option value="2157">Archaea - 2157</option> + <option value="2">Bacteria - 2</option> + </param> + </when> + <when value="taxonlist"> + <param name="taxonlist" type="text" optional="true" label="Comma separated list of TAXIDs nodes, ranking species or lower"/> + </when> + </conditional> + </xml> + <token name="@DIAMOND_TAX_FILTER@"><![CDATA[ + #if $taxonlist + --taxonlist '$taxonlist' + #end if + ]]></token> + + <xml name="generic_aln_options"> + <param name="threshold_no" type="integer" optional="true" label="E-Value Threshold"/> + <param name="scoring_matrix" type="select" optional="true" label="Scoring matrix"> + <option value="BLOSUM45">BLOSUM45</option> + <option value="BLOSUM50">BLOSUM50</option> + <option value="BLOSUM62">BLOSUM62</option> + <option value="BLOSUM80">BLOSUM80</option> + <option value="BLOSUM90">BLOSUM90</option> + <option value="PAM250">PAM250</option> + <option value="PAM70">PAM70</option> + <option value="PAM30">PAM30</option> + </param> + <param name="gap_open" type="integer" optional="true" label="Gap opening penalty"/> + <param name="gap_ext" type="integer" optional="true" label="Gap extension penalty"/> + </xml> + <token name="@GENERIC_ALN_OPTIONS@"><![CDATA[ + #if $aln_adv.scoring_matrix + --matrix '$aln_adv.scoring_matrix' + #end if + #if $aln_adv.threshold_no + --evalue '$aln_adv.threshold_no' + #end if + #if $aln_adv.gap_open + --gapopen '$aln_adv.gap_open' + #end if + #if $aln_adv.gap_ext + --gapextend '$aln_adv.gap_ext' + #end if + ]]></token> + + <xml name="blast_aln_options"> + <param name="task" type="select" optional="true" label="Type of BLAST"> + <option value="blast">blast</option> + <option value="blastp-fast">blastp-fast</option> + <option value="blastp-short">blastp-short</option> + </param> + </xml> + <token name="@BLAST_ALN_OPTIONS@"><![CDATA[ + #if $aln_adv.task + --task '$aln_adv.task' + #end if + ]]></token> + + <xml name="diamond_aln_options"> + <param name="diamond_sens" type="select" label="Select the desired sensibility"> + <option value="sensitive" selected="true">sensitive</option> + <option value="more-sensitive">more sensitive</option> + <option value="very-sensitive">very sensitive</option> + <option value="ultra-sensitive">ultra sensitive</option> + </param> + </xml> + <token name="@DIAMOND_ALN_OPTIONS@"><![CDATA[ + #if $aln_adv.diamond_sens + --sens '$aln_adv.diamond_sens' + #end if + ]]></token> + + <xml name="merlin_db_selection"> + <param name="db" type="select" label="Select the desired database"> + <option value="UniProtKB_SwissProt">SwissProt</option> + <option value="UniProtKB_Trembl">Trembl</option> + </param> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/table_data_table_conf.xml.sample Wed Jul 07 16:19:40 2021 +0000 @@ -0,0 +1,7 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <table name="coast_taxonomic_filters" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, node_name, path</columns> + <file path="tool-data/coast_taxonomic_filters.loc" /> + </table> +</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/coast_taxonomic_filters.loc.sample Wed Jul 07 16:19:40 2021 +0000 @@ -0,0 +1,28 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a sam_fa_indices.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The sam_fa_indices.loc +#file has this format (white space characters are TAB characters): +# +#index <seq> <location> +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/sam/, +#then the sam_fa_indices.loc entry would look like this: +# +#index hg18 /depot/data2/galaxy/sam/hg18.fa +# +#and your /depot/data2/galaxy/sam/ directory +#would contain hg18.fa and hg18.fa.fai files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai +# +#Your coast_taxonomic_filters.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#index hg18 /depot/data2/galaxy/sam/hg18.fa +#index hg19 /depot/data2/galaxy/sam/hg19.fa \ No newline at end of file