changeset 56:4ffaf58c9791 draft

"planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/tools/ncbi_blast_plus commit 7eaaba230e124ac12114584e7ee5ff3b8aeda859"
author peterjc
date Tue, 29 Mar 2022 14:51:48 +0000
parents 673539b0f155
children fbff6786b2f6
files tools/ncbi_blast_plus/README.rst tools/ncbi_blast_plus/get_species_taxids.xml tools/ncbi_blast_plus/ncbi_macros.xml
diffstat 3 files changed, 130 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/tools/ncbi_blast_plus/README.rst	Wed Mar 17 19:17:18 2021 +0000
+++ b/tools/ncbi_blast_plus/README.rst	Tue Mar 29 14:51:48 2022 +0000
@@ -136,7 +136,15 @@
 ============== ===============================================================
 Version        Changes
 -------------- ---------------------------------------------------------------
-2.10.1+galaxy1 - Make locally installed database selector non-optional.
+2.10.1+galaxy1 - Add tool `NCBI get species taxids` that wraps NCBI's
+                 `get_species_taxids.sh` script
+                 (https://www.ncbi.nlm.nih.gov/books/NBK546209/).
+                 It allows to determine all species taxids below a certain
+                 Taxon.
+               - Add the possibility to restrict BLAST searches taxonomically
+                 by species taxids given in a file.
+               - Properly quote cached databases
+               - Make locally installed database selector non-optional.
 2.10.1+galaxy0 - Updated for NCBI BLAST+ 2.10.1 release.
                - Supports locally installed v4 or v5 format BLAST databases
                  (listed in the ``blastdb*.loc`` files).
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/get_species_taxids.xml	Tue Mar 29 14:51:48 2022 +0000
@@ -0,0 +1,106 @@
+<tool id="get_species_taxids" name="NCBI get species taxids" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description></description>
+    <macros>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="preamble"/>
+    <command detect_errors="aggressive"><![CDATA[
+#if $type_cond.type_sel == 'names'
+    #for name in $type_cond.names.split(',')
+        taxid=\$(get_species_taxids.sh -n '$name' | grep Taxid | sed 's/Taxid://') &&
+        if [ -z \$taxid ]; then
+            >&2 echo "could not find taxid for $name" && exit 1;
+        else
+            echo " $name -> \$taxid";
+        fi &&
+        get_species_taxids.sh -t \$taxid >> species_ids.txt &&
+    #end for
+#else
+    #for taxid in $type_cond.ids.split(',')
+       get_species_taxids.sh -t $taxid >> species_ids.txt &&
+    #end for
+#end if
+sort -n -u  species_ids.txt > '$output'
+    ]]></command>
+    <inputs>
+        <conditional name="type_cond">
+            <param name="type_sel" type="select" label="Get taxids by">
+                <option value="names">Taxon names</option>
+                <option value="ids">Taxon ids</option>
+            </param>
+            <when value="names">
+                <param name="names" type="text" label="Taxon names" help="comma separated">
+                    <validator type="regex" message="Enter a comma separated list of taxon names">[a-zA-Z ,]+$</validator>
+                </param>
+            </when>
+            <when value="ids">
+                <param name="ids" type="text" label="Taxon ids" help="comma separated">
+                    <validator type="regex" message="Enter a comma separated list of taxids">[0-9,]+$</validator>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="txt" name="output"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="type_cond|type_sel" value="names"/>
+            <param name="type_cond|names" value="Enterobacterales"/>
+            <output name="output" ftype="txt">
+                <assert_contents>
+                    <has_line line="9"/>
+                    <has_line line="2791989"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="type_cond|type_sel" value="ids"/>
+            <param name="type_cond|ids" value="91347"/>
+            <output name="output" ftype="txt">
+                <assert_contents>
+                    <has_line line="9"/>
+                    <has_line line="2791989"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="type_cond|type_sel" value="names"/>
+            <param name="type_cond|names" value="Enterobacterales,Hominidae"/>
+            <output name="output" ftype="txt">
+                <assert_contents>
+                    <has_line line="9"/>
+                    <has_line line="9606"/>
+                    <has_line line="2791989"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="type_cond|type_sel" value="ids"/>
+            <param name="type_cond|ids" value="91347,9604"/>
+            <output name="output" ftype="txt">
+                <assert_contents>
+                    <has_line line="9"/>
+                    <has_line line="9606"/>
+                    <has_line line="2791989"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+Returns a list of species taxids for a taxon. It relies on the get_species_taxids.sh script of the BLAST+ package https://www.ncbi.nlm.nih.gov/books/NBK546209/
+
+-------
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations"/>
+</tool>
--- a/tools/ncbi_blast_plus/ncbi_macros.xml	Wed Mar 17 19:17:18 2021 +0000
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml	Tue Mar 29 14:51:48 2022 +0000
@@ -558,6 +558,8 @@
                 <option value="gilist">GI identifiers</option>
                 <option value="negative_gilist">Negative GI identifiers</option>
                 <option value="seqidlist">Sequence identifiers (SeqId's)</option>
+                <option value="taxidlist">Taxonomy identifiers (TaxId's)</option>
+                <option value="negative_taxidlist">Negative taxonomy identifiers (TaxId's)</option>
             </param>
             <when value="none" />
             <when value="gilist">
@@ -572,8 +574,17 @@
                 <param argument="-seqidlist" type="data" format="txt" label=" Restrict search of database to list of SeqId's"
                        help="This option is only available for database searches."/>
             </when>
+            <when value="taxidlist">
+                <param argument="-taxidlist" type="data" format="txt" label="Restrict search of database to list of TaxId's"
+                       help="This option is only available for database searches."/>
+            </when>
+            <when value="negative_taxidlist">
+                <param argument="-negative_taxidlist" type="data" format="txt" label="Restrict search of database to list of TaxId's"
+                       help="This option is only available for database searches."/>
+            </when>
         </conditional>
     </xml>
+
 <!--Tokens-->
     <token name="@ADV_MATRIX_GAPCOSTS@"><![CDATA[
 #if str($adv_opts.matrix_gapcosts.matrix):
@@ -595,6 +606,10 @@
     -gilist '{$adv_opts.adv_optional_id_files_opts.gilist}'
 #elif $adv_opts.adv_optional_id_files_opts.adv_optional_id_files_opts_selector == 'seqidlist':
     -seqidlist '${adv_opts.adv_optional_id_files_opts.seqidlist}'
+#elif $adv_opts.adv_optional_id_files_opts.adv_optional_id_files_opts_selector == 'taxidlist':
+    -taxidlist '${adv_opts.adv_optional_id_files_opts.taxidlist}'
+#elif $adv_opts.adv_optional_id_files_opts.adv_optional_id_files_opts_selector == 'negative_taxidlist':
+    -negative_taxidlist '${adv_opts.adv_optional_id_files_opts.negative_taxidlist}'
 #end if
     ]]></token>