# HG changeset patch
# User peterjc
# Date 1530386018 14400
# Node ID 42e6f199d11f5d81722c3459db1adcf42f376cef
# Parent 360352490a06dca849a0526fe2e8ceb9d4054f74
v0.3.0 Updated for NCBI BLAST+ 2.7.1
diff -r 360352490a06 -r 42e6f199d11f test-data/chimera.fasta.gz
Binary file test-data/chimera.fasta.gz has changed
diff -r 360352490a06 -r 42e6f199d11f test-data/rhodopsin_nucs.fasta.gz
Binary file test-data/rhodopsin_nucs.fasta.gz has changed
diff -r 360352490a06 -r 42e6f199d11f test-data/three_human_mRNA.fasta.gz
Binary file test-data/three_human_mRNA.fasta.gz has changed
diff -r 360352490a06 -r 42e6f199d11f tools/ncbi_blast_plus/README.rst
--- a/tools/ncbi_blast_plus/README.rst Tue Jun 05 11:42:10 2018 -0400
+++ b/tools/ncbi_blast_plus/README.rst Sat Jun 30 15:13:38 2018 -0400
@@ -1,10 +1,9 @@
Galaxy wrappers for NCBI BLAST+ suite
=====================================
-These wrappers are copyright 2010-2017 by Peter Cock (The James Hutton Institute,
-UK) and additional contributors including Edward Kirton, John Chilton,
-Nicola Soranzo, Jim Johnson, Bjoern Gruening, and Caleb Easterly.
-
+These wrappers are copyright 2010-2018 by Peter Cock (James Hutton Institute,
+UK) and additional contributors including Edward Kirton, John Chilton, Nicola
+Soranzo, Jim Johnson, Bjoern Gruening, Caleb Easterly, and Anton Nekrutenko.
See the licence text below.
Note this does not work with the NCBI 'legacy' BLAST suite written in C
@@ -259,6 +258,7 @@
- Depends on BioConda or legacy ToolShed ``package_blast_plus_2_7_1``.
- Document the BLAST+ 2.6.0 change in the standard 12 column output
from ``qacc,sacc,...`` to ``qaccver,saccver,...`` instead.
+ - Accept gzipped FASTA inputs (contribution from Anton Nekrutenko).
======= ======================================================================
diff -r 360352490a06 -r 42e6f199d11f tools/ncbi_blast_plus/check_no_duplicates.py
--- a/tools/ncbi_blast_plus/check_no_duplicates.py Tue Jun 05 11:42:10 2018 -0400
+++ b/tools/ncbi_blast_plus/check_no_duplicates.py Sat Jun 30 15:13:38 2018 -0400
@@ -9,10 +9,11 @@
will return a non-zero error if any duplicate identifiers
are found.
"""
-
+import gzip
import os
import sys
+
if "-v" in sys.argv or "--version" in sys.argv:
print("v0.0.23")
sys.exit(0)
@@ -24,7 +25,19 @@
sys.stderr.write("Missing FASTA file %r\n" % filename)
sys.exit(2)
files += 1
- handle = open(filename)
+
+ with open(filename, "rb") as binary_handle:
+ magic = binary_handle.read(2)
+ if not magic:
+ # Empty file, special case
+ continue
+ elif magic == b'\x1f\x8b':
+ # Gzipped
+ handle = gzip.open(filename, "rt")
+ elif magic[0:1] == b">":
+ # Not gzipped, shoudl be plain FASTA
+ handle = open(filename, "r")
+
for line in handle:
if line.startswith(">"):
# The split will also take care of the new line character,
diff -r 360352490a06 -r 42e6f199d11f tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Tue Jun 05 11:42:10 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Sat Jun 30 15:13:38 2018 -0400
@@ -7,24 +7,29 @@
+ 0 ):
--perc_identity $adv_opts.identity_cutoff
+-perc_identity '${adv_opts.identity_cutoff}'
#end if
-$adv_opts.ungapped
+${adv_opts.ungapped}
@ADV_ID_LIST_FILTER@
@ADV_QCOV_HSP_PERC@
## only use window size if dc-megablast mode is used
@@ -35,9 +40,10 @@
@ADV_GAPEXTEND@
## End of advanced options:
#end if
+]]>
-
+
@@ -102,6 +108,16 @@
+
+
+
+
+
+
+
+
+
+
diff -r 360352490a06 -r 42e6f199d11f tools/ncbi_blast_plus/ncbi_macros.xml
--- a/tools/ncbi_blast_plus/ncbi_macros.xml Tue Jun 05 11:42:10 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml Sat Jun 30 15:13:38 2018 -0400
@@ -357,7 +357,7 @@
-
+
@@ -533,42 +533,46 @@
-
+
+ ]]>
-
-#if float(str($adv_opts.qcov_hsp_perc)) > 0:
- -qcov_hsp_perc $adv_opts.qcov_hsp_perc
+ 0:
+ -qcov_hsp_perc '${adv_opts.qcov_hsp_perc}'
#end if
-
+ ]]>
-
+
+ ]]>-num_threads "\${GALAXY_SLOTS:-8}"
-
+
+ ]]>
- -out '$output1'
+
+ ]]>$adv_opts.filter_query
-
+ 0):
#if str($output.out_format) in ["6", "ext", "cols", "5"]:
## Most output formats use this, including tabular and XML:
- -max_target_seqs $adv_opts.max_hits
+ -max_target_seqs '${adv_opts.max_hits}'
#else
## Text and HTML output formats 0-4 currently need this instead:
-num_descriptions $adv_opts.max_hits -num_alignments $adv_opts.max_hits
#end if
#end if
#if str($adv_opts.max_hsps)
- -max_hsps $adv_opts.max_hsps
+ -max_hsps '${adv_opts.max_hsps}'
#end if
-
-
+ ]]>
+
-
+ ]]>
+
+ ]]>
-
+
+ ]]>
-
+
+ ]]>
-
+
+ ]]>
-
+
+ ]]>
-
+
+ ]]>
- #if str($db_opts.db_opts_selector)=='db'
+
+#end if
+]]>
-
+
+ ]]>
+ 10.1093/nar/25.17.338910.1186/1471-2105-10-42110.1186/s13742-015-0080-7
- **Output format**
+ '
====== ============= ===========================================
The third option is to customise the tabular output by selecting which
@@ -735,8 +743,9 @@
The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
-
- .. class:: warningmark
+ ]]>
+
- .. class:: warningmark
+ ]]>
+
-
- **Advanced Options**
+ ]]>
+ `_.
For amino acid substitution matrices, see `BLAST Substitution Matrices
-<https://www.ncbi.nlm.nih.gov/books/NBK279684/#_appendices_BLAST_Substitution_Matrices_>`_ in the same
+`_ in the same
appendices.
-
+ ]]>
diff -r 360352490a06 -r 42e6f199d11f tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Tue Jun 05 11:42:10 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Sat Jun 30 15:13:38 2018 -0400
@@ -5,21 +5,30 @@
ncbi_macros.xml
-
+
+> '$outfile'
+ ]]>
@@ -57,7 +66,7 @@
NOTE Double check the new database would be self contained first
-->
-
+
@@ -158,7 +167,7 @@
-
+