changeset 36:f0edead5615e draft

planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/tools/ncbi_blast_plus commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
author peterjc
date Tue, 23 Oct 2018 06:27:03 -0400
parents 782a1a9c87d7
children 996c3ef26f37
files tools/ncbi_blast_plus/README.rst tools/ncbi_blast_plus/blastxml_to_tabular.py tools/ncbi_blast_plus/ncbi_macros.xml tools/ncbi_blast_plus/ncbi_makeprofiledb.xml tools/ncbi_blast_plus/repository_dependencies.xml tools/ncbi_blast_plus/tool_dependencies.xml
diffstat 6 files changed, 39 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/tools/ncbi_blast_plus/README.rst	Mon Jul 09 10:05:00 2018 -0400
+++ b/tools/ncbi_blast_plus/README.rst	Tue Oct 23 06:27:03 2018 -0400
@@ -213,7 +213,7 @@
           setup via ``tool-data/tool_data_table_conf.xml.sample``
         - Replace ``.extra_files_path`` with ``.files_path`` (internal change,
           thanks to Bjoern Gruening and John Chilton).
-        - Added "NCBI BLAST+ integrated into Galaxy" preprint citation.
+        - Added *"NCBI BLAST+ integrated into Galaxy"* preprint citation.
 v0.1.03 - Reorder XML elements (internal change only).
         - Planemo for Tool Shed upload (``.shed.yml``, internal change only).
 v0.1.04 - Fixed regression using BLAST databases from the history. Currently
@@ -259,6 +259,8 @@
         - Document the BLAST+ 2.6.0 change in the standard 12 column output
           from ``qacc,sacc,...`` to ``qaccver,saccver,...`` instead.
         - Accept gzipped FASTA inputs (contribution from Anton Nekrutenko).
+v0.3.1  - Clarify help text for max hits option, confusing as depending on the
+          output format it must be mapped to different command line arguments.
 ======= ======================================================================
 
 
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py	Mon Jul 09 10:05:00 2018 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py	Tue Oct 23 06:27:03 2018 -0400
@@ -168,6 +168,7 @@
 
 
 def convert(blastxml_filename, output_handle):
+    """Convert BLAST XML input from a file to tabular on given handle."""
     blast_program = None
     # get an iterable
     try:
--- a/tools/ncbi_blast_plus/ncbi_macros.xml	Mon Jul 09 10:05:00 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml	Tue Oct 23 06:27:03 2018 -0400
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@WRAPPER_VERSION@">0.3.0</token>
+    <token name="@WRAPPER_VERSION@">0.3.1</token>
     <xml name="parallelism">
         <!-- If job splitting is enabled, break up the query file into parts -->
         <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
@@ -443,7 +443,7 @@
     </xml>
 
     <xml name="input_max_hits">
-        <param name="max_hits" type="integer" min="0" value="0" label="Maximum hits to show" help="Use zero for default limits" />
+        <param name="max_hits" type="integer" min="0" value="0" label="Maximum hits to consider/show" help="Use zero for default limits. For HTML and plain text output this value is passed -num_descriptions and -num_alignments but for XML and tabular etc, this is used with -max_target_seqs instead. In either case, in addition to limiting the final output, this alters internal limits during the search, which can in some cases exclude matches which would otherwise become the best hit." />
         <param argument="-max_hsps" type="integer" min="1" optional="true" value="" label="Maximum number of HSPs (alignments) to keep for any single query-subject pair" help="The HSPs shown will be the best as judged by expect value. If this option is not set, BLAST shows all HSPs meeting the expect value criteria" />
     </xml>
 
@@ -589,15 +589,41 @@
     <token name="@ADV_FILTER_QUERY@">$adv_opts.filter_query</token>
     <token name="@ADV_MAX_HITS@"><![CDATA[
 ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs used to simply override -num_descriptions and -num_alignments
-## but this was changed in BLAST+ 2.2.27 onwards to force their use (raised with NCBI)
+##
+## Quoting BLAST 2.7.1+ output from "blastp --help" or "blastn --help":
+##
+##  *** Formatting options
+##   -num_descriptions <Integer, >=0>
+##    Number of database sequences to show one-line descriptions for
+##    Not applicable for outfmt > 4
+##    Default = `500'
+##     * Incompatible with:  max_target_seqs
+##  -num_alignments <Integer, >=0>
+##    Number of database sequences to show alignments for
+##    Default = `250'
+##     * Incompatible with:  max_target_seqs
+##
+##  *** Restrict search or results
+##
+##  -max_target_seqs <Integer, >=1>
+##   Maximum number of aligned sequences to keep
+##   Not applicable for outfmt <= 4
+##   Default = `500'
+##    * Incompatible with:  num_descriptions, num_alignments
+##
+## So, taken at face value we do still need to treat the Text and HTML output
+## differently from the Tabular and XML, yet the treatment of these limits is
+## different (during search or after the search when writing the output):
+## https://blastedbio.blogspot.com/2015/12/blast-max-target-sequences-bug.html
+##
+## See also our user-facing help text.
 #if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
     #if str($output.out_format) in ["6", "ext", "cols", "5"]:
         ## Most output formats use this, including tabular and XML:
         -max_target_seqs '${adv_opts.max_hits}'
     #else
         ## Text and HTML output formats 0-4 currently need this instead:
-        -num_descriptions $adv_opts.max_hits -num_alignments $adv_opts.max_hits
+        -num_descriptions '${adv_opts.max_hits}' -num_alignments '${adv_opts.max_hits}'
     #end if
 #end if
 #if str($adv_opts.max_hsps)
--- a/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml	Mon Jul 09 10:05:00 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml	Tue Oct 23 06:27:03 2018 -0400
@@ -90,7 +90,7 @@
             <param name="contain_pssm_scores_type" value="yes" />
             <output name="outfile" file="empty_file.dat" ftype="blastdbd" >
                 <extra_files type="file" value="cd00003_and_cd00008.phr" name="blastdb.phr" />
-                <extra_files type="file" value="cd00003_and_cd00008.pin" name="blastdb.pin" lines_diff="2" />
+                <extra_files type="file" value="cd00003_and_cd00008.pin" name="blastdb.pin" compare="sim_size" delta="0" />
                 <extra_files type="file" value="cd00003_and_cd00008.psq" name="blastdb.psq" />
                 <extra_files type="file" value="cd00003_and_cd00008.freq" name="blastdb.freq" />
                 <extra_files type="file" value="cd00003_and_cd00008.loo" name="blastdb.loo" />
--- a/tools/ncbi_blast_plus/repository_dependencies.xml	Mon Jul 09 10:05:00 2018 -0400
+++ b/tools/ncbi_blast_plus/repository_dependencies.xml	Tue Oct 23 06:27:03 2018 -0400
@@ -1,4 +1,4 @@
 <?xml version="1.0"?>
 <repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format).">
-    <repository changeset_revision="63befb860c3e" name="blast_datatypes" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="3eada762af11" name="blast_datatypes" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
 </repositories>
--- a/tools/ncbi_blast_plus/tool_dependencies.xml	Mon Jul 09 10:05:00 2018 -0400
+++ b/tools/ncbi_blast_plus/tool_dependencies.xml	Tue Oct 23 06:27:03 2018 -0400
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="blast" version="2.7.1">
-        <repository changeset_revision="fe6e1a80853c" name="package_blast_plus_2_7_1" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    <package name="blast" version="2.5.0">
+        <repository changeset_revision="de5976f2c96d" name="package_blast_plus_2_5_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>