changeset 10:e6337ef07e9a draft

v0.1.08 can search against multiple locally installed databases
author peterjc
date Wed, 09 Mar 2016 06:02:38 -0500
parents 1039cd165d92
children 4c47abb4c993
files test-data/blastdb.loc test-data/blastn_chimera_vs_rhodopsin_db.tabular test-data/blastn_chimera_vs_three_human_and_rhodopsin_db.tabular test-data/blastn_chimera_vs_three_human_db.tabular test-data/rhodopsin_nucs.dbinfo.txt test-data/rhodopsin_nucs.fasta.log.txt test-data/rhodopsin_nucs.fasta.nhd test-data/rhodopsin_nucs.fasta.nhi test-data/rhodopsin_nucs.fasta.nhr test-data/rhodopsin_nucs.fasta.nin test-data/rhodopsin_nucs.fasta.nog test-data/rhodopsin_nucs.fasta.nsd test-data/rhodopsin_nucs.fasta.nsi test-data/rhodopsin_nucs.fasta.nsq tools/ncbi_blast_plus/README.rst tools/ncbi_blast_plus/blastxml_to_tabular.py tools/ncbi_blast_plus/check_no_duplicates.py tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml tools/ncbi_blast_plus/ncbi_macros.xml
diffstat 20 files changed, 163 insertions(+), 93 deletions(-) [+]
line wrap: on
line diff
--- a/test-data/blastdb.loc	Thu Nov 19 06:15:32 2015 -0500
+++ b/test-data/blastdb.loc	Wed Mar 09 06:02:38 2016 -0500
@@ -5,3 +5,4 @@
 # See the file tool-data/blastdb.loc.sample for more information.
 #
 three_human_mRNA	Three Human mRNAs	${__HERE__}/three_human_mRNA.fasta
+rhodopsin_nucs	Rhodopsin nucleotides	${__HERE__}/rhodopsin_nucs.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_chimera_vs_rhodopsin_db.tabular	Wed Mar 09 06:02:38 2016 -0500
@@ -0,0 +1,8 @@
+chimera	gi|57163782|ref|NM_001009242.1|	92.31	1014	78	0	8881	9894	34	1047	0.0	1441
+chimera	gi|283855822|gb|GQ290312.1|	91.53	956	81	0	8881	9836	4	959	0.0	1317
+chimera	gi|18148870|dbj|AB062417.1|	87.65	1012	123	2	8884	9894	37	1047	0.0	1175
+chimera	gi|283855845|gb|GQ290303.1|	91.52	330	28	0	8881	9210	4	333	8e-130	455
+chimera	gi|283855845|gb|GQ290303.1|	91.36	243	19	2	9542	9783	3127	3368	1e-92	331
+chimera	gi|283855845|gb|GQ290303.1|	94.22	173	10	0	9208	9380	1410	1582	1e-72	265
+chimera	gi|283855845|gb|GQ290303.1|	92.94	170	12	0	9375	9544	2854	3023	2e-67	248
+chimera	gi|283855845|gb|GQ290303.1|	95.59	68	3	0	9781	9848	4222	4289	7e-26	110
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_chimera_vs_three_human_and_rhodopsin_db.tabular	Wed Mar 09 06:02:38 2016 -0500
@@ -0,0 +1,11 @@
+chimera	ENA|AB011145|AB011145.1	100.00	4560	0	0	1	4560	121	4680	0.0	8421
+chimera	ENA|M10051|M10051.1	99.93	4331	3	0	4560	8890	60	4390	0.0	7982
+chimera	ENA|BC112106|BC112106.1	100.00	1093	0	0	8881	9973	121	1213	0.0	2019
+chimera	gi|57163782|ref|NM_001009242.1|	92.31	1014	78	0	8881	9894	34	1047	0.0	1441
+chimera	gi|283855822|gb|GQ290312.1|	91.53	956	81	0	8881	9836	4	959	0.0	1317
+chimera	gi|18148870|dbj|AB062417.1|	87.65	1012	123	2	8884	9894	37	1047	0.0	1175
+chimera	gi|283855845|gb|GQ290303.1|	91.52	330	28	0	8881	9210	4	333	2e-129	455
+chimera	gi|283855845|gb|GQ290303.1|	91.36	243	19	2	9542	9783	3127	3368	3e-92	331
+chimera	gi|283855845|gb|GQ290303.1|	94.22	173	10	0	9208	9380	1410	1582	3e-72	265
+chimera	gi|283855845|gb|GQ290303.1|	92.94	170	12	0	9375	9544	2854	3023	3e-67	248
+chimera	gi|283855845|gb|GQ290303.1|	95.59	68	3	0	9781	9848	4222	4289	2e-25	110
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_chimera_vs_three_human_db.tabular	Wed Mar 09 06:02:38 2016 -0500
@@ -0,0 +1,3 @@
+chimera	ENA|AB011145|AB011145.1	100.00	4560	0	0	1	4560	121	4680	0.0	8421
+chimera	ENA|M10051|M10051.1	99.93	4331	3	0	4560	8890	60	4390	0.0	7982
+chimera	ENA|BC112106|BC112106.1	100.00	1093	0	0	8881	9973	121	1213	0.0	2019
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rhodopsin_nucs.dbinfo.txt	Wed Mar 09 06:02:38 2016 -0500
@@ -0,0 +1,7 @@
+Database: Rhodopsin nucleotides
+	6 sequences; 10,296 total bases
+
+Date: Mar 8, 2016  4:58 PM	Longest sequence: 4,301 bases
+
+Volumes:
+	/mnt/galaxy/repositories/galaxy_blast/test-data/rhodopsin_nucs.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rhodopsin_nucs.fasta.log.txt	Wed Mar 09 06:02:38 2016 -0500
@@ -0,0 +1,5 @@
+New DB title:  Rhodopsin nucleotides
+Sequence type: Nucleotide
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rhodopsin_nucs.fasta.nhd	Wed Mar 09 06:02:38 2016 -0500
@@ -0,0 +1,6 @@
+12397459091
+20759409394
+22689758313
+28815213262
+36620822910
+40074407105
Binary file test-data/rhodopsin_nucs.fasta.nhi has changed
Binary file test-data/rhodopsin_nucs.fasta.nhr has changed
Binary file test-data/rhodopsin_nucs.fasta.nin has changed
Binary file test-data/rhodopsin_nucs.fasta.nog has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rhodopsin_nucs.fasta.nsd	Wed Mar 09 06:02:38 2016 -0500
@@ -0,0 +1,6 @@
+gnl|bl_ord_id|00
+gnl|bl_ord_id|11
+gnl|bl_ord_id|22
+gnl|bl_ord_id|33
+gnl|bl_ord_id|44
+gnl|bl_ord_id|55
Binary file test-data/rhodopsin_nucs.fasta.nsi has changed
Binary file test-data/rhodopsin_nucs.fasta.nsq has changed
--- a/tools/ncbi_blast_plus/README.rst	Thu Nov 19 06:15:32 2015 -0500
+++ b/tools/ncbi_blast_plus/README.rst	Wed Mar 09 06:02:38 2016 -0500
@@ -1,14 +1,14 @@
 Galaxy wrappers for NCBI BLAST+ suite
 =====================================
 
-These wrappers are copyright 2010-2014 by Peter Cock (The James Hutton Institute,
+These wrappers are copyright 2010-2016 by Peter Cock (The James Hutton Institute,
 UK) and additional contributors including Edward Kirton, John Chilton,
 Nicola Soranzo, Jim Johnson, and Bjoern Gruening.
 
 See the licence text below.
 
-Currently tested with NCBI BLAST 2.2.30+ (i.e. version 2.2.30 of BLAST+),
-and does not work with the NCBI 'legacy' BLAST suite (e.g. ``blastall``).
+Currently tested with NCBI BLAST+ 2.2.31, and does not work with the NCBI
+'legacy' BLAST suite (e.g. ``blastall``).
 
 Note that these wrappers (and the associated datatypes) were originally
 distributed as part of the main Galaxy repository, but as of August 2012
@@ -227,6 +227,8 @@
           on the Tool Shed test framework, but that is not currently in use).
         - Fixed macro problem with version field in blastxml_to_tabular.xml
           (contribution from Bjoern Gruening and Daniel Blankenberg).
+v0.1.08 - Allow searching against multiple locally installed databases
+          (contribution from Gildas Le Corguillé and Emma Prudent).
 ======= ======================================================================
 
 
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py	Thu Nov 19 06:15:32 2015 -0500
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py	Wed Mar 09 06:02:38 2016 -0500
@@ -8,7 +8,7 @@
 The 12 columns output are 'qseqid sseqid pident length mismatch gapopen qstart
 qend sstart send evalue bitscore' or 'std' at the BLAST+ command line, which
 mean:
-   
+
 ====== ========= ============================================
 Column NCBI name Description
 ------ --------- --------------------------------------------
@@ -69,23 +69,20 @@
     print "v0.1.04"
     sys.exit(0)
 
-if sys.version_info[:2] >= ( 2, 5 ):
+if sys.version_info[:2] >= (2, 5):
     try:
         from xml.etree import cElementTree as ElementTree
     except ImportError:
         from xml.etree import ElementTree as ElementTree
 else:
     from galaxy import eggs
-    import pkg_resources; pkg_resources.require( "elementtree" )
+    import pkg_resources
+    pkg_resources.require("elementtree")
     from elementtree import ElementTree
 
-def stop_err( msg ):
-    sys.stderr.write("%s\n" % msg)
-    sys.exit(1)
-
 if len(sys.argv) == 4 and sys.argv[3] in ["std", "x22", "ext"]:
-    #False positive if user really has a BLAST XML file called 'std' or 'ext'...
-    stop_err("""ERROR: The script API has changed, sorry.
+    # False positive if user really has a BLAST XML file called 'std' or 'ext'...
+    sys.exit("""ERROR: The script API has changed, sorry.
 
 Instead of the old style:
 
@@ -116,35 +113,35 @@
 colnames = 'qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles'.split(',')
 
 if len(args) < 1:
-    stop_err("ERROR: No BLASTXML input files given; run with --help to see options.")
+    sys.exit("ERROR: No BLASTXML input files given; run with --help to see options.")
 
 out_fmt = options.columns
 if out_fmt == "std":
     extended = False
     cols = None
 elif out_fmt == "x22":
-    stop_err("Format argument x22 has been replaced with ext (extended 25 columns)")
+    sys.exit("Format argument x22 has been replaced with ext (extended 25 columns)")
 elif out_fmt == "ext":
     extended = True
     cols = None
 else:
-    cols = out_fmt.replace(" ", ",").split(",") #Allow space or comma separated
-    #Remove any blank entries due to trailing comma,
-    #or annoying "None" dummy value from Galaxy if no columns
+    cols = out_fmt.replace(" ", ",").split(",")  # Allow space or comma separated
+    # Remove any blank entries due to trailing comma,
+    # or annoying "None" dummy value from Galaxy if no columns
     cols = [c for c in cols if c and c != "None"]
     extra = set(cols).difference(colnames)
     if extra:
-        stop_err("These are not recognised column names: %s" % ",".join(sorted(extra)))
+        sys.exit("These are not recognised column names: %s" % ",".join(sorted(extra)))
     del extra
     assert set(colnames).issuperset(cols), cols
     if not cols:
-        stop_err("No columns selected!")
-    extended = max(colnames.index(c) for c in cols) >= 12 #Do we need any higher columns?
+        sys.exit("No columns selected!")
+    extended = max(colnames.index(c) for c in cols) >= 12  # Do we need any higher columns?
 del out_fmt
 
 for in_file in args:
     if not os.path.isfile(in_file):
-        stop_err("Input BLAST XML file not found: %s" % in_file)
+        sys.exit("Input BLAST XML file not found: %s" % in_file)
 
 
 re_default_query_id = re.compile("^Query_\d+$")
@@ -161,156 +158,157 @@
 def convert(blastxml_filename, output_handle):
     blast_program = None
     # get an iterable
-    try: 
+    try:
         context = ElementTree.iterparse(blastxml_filename, events=("start", "end"))
-    except:
-        stop_err("Invalid data format.")
+    except Exception:
+        sys.exit("Invalid data format.")
     # turn it into an iterator
     context = iter(context)
     # get the root element
     try:
         event, root = context.next()
-    except:
-        stop_err( "Invalid data format." )
+    except Exception:
+        sys.exit("Invalid data format.")
     for event, elem in context:
         if event == "end" and elem.tag == "BlastOutput_program":
             blast_program = elem.text
         # for every <Iteration> tag
         if event == "end" and elem.tag == "Iteration":
-            #Expecting either this, from BLAST 2.2.25+ using FASTA vs FASTA
+            # Expecting either this, from BLAST 2.2.25+ using FASTA vs FASTA
             # <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>
             # <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>
             # <Iteration_query-len>406</Iteration_query-len>
             # <Iteration_hits></Iteration_hits>
             #
-            #Or, from BLAST 2.2.24+ run online
+            # Or, from BLAST 2.2.24+ run online
             # <Iteration_query-ID>Query_1</Iteration_query-ID>
             # <Iteration_query-def>Sample</Iteration_query-def>
             # <Iteration_query-len>516</Iteration_query-len>
             # <Iteration_hits>...
             qseqid = elem.findtext("Iteration_query-ID")
             if re_default_query_id.match(qseqid):
-                #Place holder ID, take the first word of the query definition
-                qseqid = elem.findtext("Iteration_query-def").split(None,1)[0]
+                # Place holder ID, take the first word of the query definition
+                qseqid = elem.findtext("Iteration_query-def").split(None, 1)[0]
             qlen = int(elem.findtext("Iteration_query-len"))
 
             # for every <Hit> within <Iteration>
             for hit in elem.findall("Iteration_hits/Hit"):
-                #Expecting either this,
+                # Expecting either this,
                 # <Hit_id>gi|3024260|sp|P56514.1|OPSD_BUFBU</Hit_id>
                 # <Hit_def>RecName: Full=Rhodopsin</Hit_def>
                 # <Hit_accession>P56514</Hit_accession>
-                #or,
+                # or,
                 # <Hit_id>Subject_1</Hit_id>
                 # <Hit_def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Hit_def>
                 # <Hit_accession>Subject_1</Hit_accession>
                 #
-                #apparently depending on the parse_deflines switch
+                # apparently depending on the parse_deflines switch
                 #
-                #Or, with a local database not using -parse_seqids can get this,
+                # Or, with a local database not using -parse_seqids can get this,
                 # <Hit_id>gnl|BL_ORD_ID|2</Hit_id>
                 # <Hit_def>chrIII gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence</Hit_def>
                 # <Hit_accession>2</Hit_accession>
-                sseqid = hit.findtext("Hit_id").split(None,1)[0]
+                sseqid = hit.findtext("Hit_id").split(None, 1)[0]
                 hit_def = sseqid + " " + hit.findtext("Hit_def")
                 if re_default_subject_id.match(sseqid) \
                 and sseqid == hit.findtext("Hit_accession"):
-                    #Place holder ID, take the first word of the subject definition
+                    # Place holder ID, take the first word of the subject definition
                     hit_def = hit.findtext("Hit_def")
-                    sseqid = hit_def.split(None,1)[0]
+                    sseqid = hit_def.split(None, 1)[0]
                 if sseqid.startswith("gnl|BL_ORD_ID|") \
                 and sseqid == "gnl|BL_ORD_ID|" + hit.findtext("Hit_accession"):
-                    #Alternative place holder ID, again take the first word of hit_def
+                    # Alternative place holder ID, again take the first word of hit_def
                     hit_def = hit.findtext("Hit_def")
-                    sseqid = hit_def.split(None,1)[0]
+                    sseqid = hit_def.split(None, 1)[0]
                 # for every <Hsp> within <Hit>
                 for hsp in hit.findall("Hit_hsps/Hsp"):
                     nident = hsp.findtext("Hsp_identity")
                     length = hsp.findtext("Hsp_align-len")
-                    pident = "%0.2f" % (100*float(nident)/float(length))
+                    pident = "%0.2f" % (100 * float(nident) / float(length))
 
                     q_seq = hsp.findtext("Hsp_qseq")
                     h_seq = hsp.findtext("Hsp_hseq")
                     m_seq = hsp.findtext("Hsp_midline")
                     assert len(q_seq) == len(h_seq) == len(m_seq) == int(length)
-                    gapopen = str(len(q_seq.replace('-', ' ').split())-1  + \
-                                  len(h_seq.replace('-', ' ').split())-1)
+                    gapopen = str(len(q_seq.replace('-', ' ').split()) - 1 +
+                                  len(h_seq.replace('-', ' ').split()) - 1)
 
                     mismatch = m_seq.count(' ') + m_seq.count('+') \
                              - q_seq.count('-') - h_seq.count('-')
-                    #TODO - Remove this alternative mismatch calculation and test
-                    #once satisifed there are no problems
+                    # TODO - Remove this alternative mismatch calculation and test
+                    # once satisifed there are no problems
                     expected_mismatch = len(q_seq) \
-                                      - sum(1 for q,h in zip(q_seq, h_seq) \
+                                      - sum(1 for q, h in zip(q_seq, h_seq)
                                             if q == h or q == "-" or h == "-")
-                    xx = sum(1 for q,h in zip(q_seq, h_seq) if q=="X" and h=="X")
+                    xx = sum(1 for q, h in zip(q_seq, h_seq) if q == "X" and h == "X")
                     if not (expected_mismatch - q_seq.count("X") <= int(mismatch) <= expected_mismatch + xx):
-                        stop_err("%s vs %s mismatches, expected %i <= %i <= %i" \
+                        sys.exit("%s vs %s mismatches, expected %i <= %i <= %i"
                                  % (qseqid, sseqid, expected_mismatch - q_seq.count("X"),
                                     int(mismatch), expected_mismatch))
 
-                    #TODO - Remove this alternative identity calculation and test
-                    #once satisifed there are no problems
-                    expected_identity = sum(1 for q,h in zip(q_seq, h_seq) if q == h)
+                    # TODO - Remove this alternative identity calculation and test
+                    # once satisifed there are no problems
+                    expected_identity = sum(1 for q, h in zip(q_seq, h_seq) if q == h)
                     if not (expected_identity - xx <= int(nident) <= expected_identity + q_seq.count("X")):
-                        stop_err("%s vs %s identities, expected %i <= %i <= %i" \
+                        sys.exit("%s vs %s identities, expected %i <= %i <= %i"
                                  % (qseqid, sseqid, expected_identity, int(nident),
                                     expected_identity + q_seq.count("X")))
 
-
                     evalue = hsp.findtext("Hsp_evalue")
                     if evalue == "0":
                         evalue = "0.0"
                     else:
                         evalue = "%0.0e" % float(evalue)
-                
+
                     bitscore = float(hsp.findtext("Hsp_bit-score"))
                     if bitscore < 100:
-                        #Seems to show one decimal place for lower scores
+                        # Seems to show one decimal place for lower scores
                         bitscore = "%0.1f" % bitscore
                     else:
-                        #Note BLAST does not round to nearest int, it truncates
+                        # Note BLAST does not round to nearest int, it truncates
                         bitscore = "%i" % bitscore
 
                     values = [qseqid,
                               sseqid,
                               pident,
-                              length, #hsp.findtext("Hsp_align-len")
+                              length,  # hsp.findtext("Hsp_align-len")
                               str(mismatch),
                               gapopen,
-                              hsp.findtext("Hsp_query-from"), #qstart,
-                              hsp.findtext("Hsp_query-to"), #qend,
-                              hsp.findtext("Hsp_hit-from"), #sstart,
-                              hsp.findtext("Hsp_hit-to"), #send,
-                              evalue, #hsp.findtext("Hsp_evalue") in scientific notation
-                              bitscore, #hsp.findtext("Hsp_bit-score") rounded
+                              hsp.findtext("Hsp_query-from"),  # qstart,
+                              hsp.findtext("Hsp_query-to"),  # qend,
+                              hsp.findtext("Hsp_hit-from"),  # sstart,
+                              hsp.findtext("Hsp_hit-to"),  # send,
+                              evalue,  # hsp.findtext("Hsp_evalue") in scientific notation
+                              bitscore,  # hsp.findtext("Hsp_bit-score") rounded
                               ]
 
                     if extended:
                         try:
-                            sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(" >"))
-                            salltitles = "<>".join(name.split(None,1)[1] for name in hit_def.split(" >"))
+                            sallseqid = ";".join(name.split(None, 1)[0] for name in hit_def.split(" >"))
+                            salltitles = "<>".join(name.split(None, 1)[1] for name in hit_def.split(" >"))
                         except IndexError as e:
-                            stop_err("Problem splitting multuple hits?\n%r\n--> %s" % (hit_def, e))
-                        #print hit_def, "-->", sallseqid
+                            sys.exit("Problem splitting multuple hits?\n%r\n--> %s" % (hit_def, e))
+                        # print hit_def, "-->", sallseqid
                         positive = hsp.findtext("Hsp_positive")
-                        ppos = "%0.2f" % (100*float(positive)/float(length))
+                        ppos = "%0.2f" % (100 * float(positive) / float(length))
                         qframe = hsp.findtext("Hsp_query-frame")
                         sframe = hsp.findtext("Hsp_hit-frame")
                         if blast_program == "blastp":
-                            #Probably a bug in BLASTP that they use 0 or 1 depending on format
-                            if qframe == "0": qframe = "1"
-                            if sframe == "0": sframe = "1"
+                            # Probably a bug in BLASTP that they use 0 or 1 depending on format
+                            if qframe == "0":
+                                qframe = "1"
+                            if sframe == "0":
+                                sframe = "1"
                         slen = int(hit.findtext("Hit_len"))
                         values.extend([sallseqid,
-                                       hsp.findtext("Hsp_score"), #score,
+                                       hsp.findtext("Hsp_score"),  # score,
                                        nident,
                                        positive,
-                                       hsp.findtext("Hsp_gaps"), #gaps,
+                                       hsp.findtext("Hsp_gaps"),  # gaps,
                                        ppos,
                                        qframe,
                                        sframe,
-                                       #NOTE - for blastp, XML shows original seq, tabular uses XXX masking
+                                       # NOTE - for blastp, XML shows original seq, tabular uses XXX masking
                                        q_seq,
                                        h_seq,
                                        str(qlen),
@@ -318,9 +316,9 @@
                                        salltitles,
                                        ])
                     if cols:
-                        #Only a subset of the columns are needed
+                        # Only a subset of the columns are needed
                         values = [values[colnames.index(c)] for c in cols]
-                    #print "\t".join(values) 
+                    # print "\t".join(values)
                     output_handle.write("\t".join(values) + "\n")
             # prevents ElementTree from growing large datastructure
             root.clear()
@@ -339,6 +337,5 @@
 if options.output:
     outfile.close()
 else:
-    #Using stdout
+    # Using stdout
     pass
-
--- a/tools/ncbi_blast_plus/check_no_duplicates.py	Thu Nov 19 06:15:32 2015 -0500
+++ b/tools/ncbi_blast_plus/check_no_duplicates.py	Wed Mar 09 06:02:38 2016 -0500
@@ -16,30 +16,27 @@
     print("v0.0.22")
     sys.exit(0)
 
-def stop_err(msg, error=1):
-    sys.stderr.write("%s\n" % msg)
-    sys.exit(error)
-
-
 identifiers = set()
 files = 0
 for filename in sys.argv[1:]:
     if not os.path.isfile(filename):
-        stop_err("Missing FASTA file %r" % filename, 2)
+        sys.stderr.write("Missing FASTA file %r\n" % filename)
+        sys.exit(2)
     files += 1
     handle = open(filename)
     for line in handle:
         if line.startswith(">"):
-            #The split will also take care of the new line character,
-            #e.g. ">test\n" and ">test description here\n" both give "test"
+            # The split will also take care of the new line character,
+            # e.g. ">test\n" and ">test description here\n" both give "test"
             seq_id = line[1:].split(None, 1)[0]
             if seq_id in identifiers:
                 handle.close()
-                stop_err("Repeated identifiers, e.g. %r" % seq_id, 1)
+                sys.exit("Repeated identifiers, e.g. %r" % seq_id)
             identifiers.add(seq_id)
     handle.close()
 if not files:
-    stop_err("No FASTA files given to check for duplicates", 3)
+    sys.stderr.write("No FASTA files given to check for duplicates\n")
+    sys.exit(3)
 elif files == 1:
     print("%i sequences" % len(identifiers))
 else:
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml	Thu Nov 19 06:15:32 2015 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml	Wed Mar 09 06:02:38 2016 -0500
@@ -25,6 +25,11 @@
             <param name="db_opts|database" value="three_human_mRNA" />
             <output name="info" file="three_human_mRNA.dbinfo.txt" ftype="txt" lines_diff="4" />
         </test>
+        <test>
+            <param name="db_opts|db_type" value="nucl" />
+            <param name="db_opts|database" value="rhodopsin_nucs" />
+            <output name="info" file="rhodopsin_nucs.dbinfo.txt" ftype="txt" lines_diff="4" />
+        </test>
     </tests>
     <help>
     
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml	Thu Nov 19 06:15:32 2015 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml	Wed Mar 09 06:02:38 2016 -0500
@@ -120,6 +120,28 @@
             <param name="max_hits" value="1" />
             <output name="output1" file="blastn_chimera_vs_three_human_max1.txt" ftype="txt" />
         </test>
+        <test>
+            <param name="query" value="chimera.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="db" />
+            <param name="database" value="three_human_mRNA" />
+            <param name="out_format" value="6" />
+            <output name="output1" file="blastn_chimera_vs_three_human_db.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="chimera.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="db" />
+            <param name="database" value="rhodopsin_nucs" />
+            <param name="out_format" value="6" />
+            <output name="output1" file="blastn_chimera_vs_rhodopsin_db.tabular" ftype="tabular" />
+        </test>
+        <!-- next test is passing in two blast databases -->
+        <test>
+            <param name="query" value="chimera.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="db" />
+            <param name="database" value="three_human_mRNA,rhodopsin_nucs" />
+            <param name="out_format" value="6" />
+            <output name="output1" file="blastn_chimera_vs_three_human_and_rhodopsin_db.tabular" ftype="tabular" />
+        </test>
     </tests>
     <help>
     
--- a/tools/ncbi_blast_plus/ncbi_macros.xml	Thu Nov 19 06:15:32 2015 -0500
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml	Wed Mar 09 06:02:38 2016 -0500
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@WRAPPER_VERSION@">0.1.07</token>
+    <token name="@WRAPPER_VERSION@">0.1.08</token>
     <xml name="parallelism">
         <!-- If job splitting is enabled, break up the query file into parts -->
         <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
@@ -184,7 +184,7 @@
               <option value="file">FASTA file from your history (see warning note below)</option>
             </param>
             <when value="db">
-                <param name="database" type="select" label="Nucleotide BLAST database">
+                <param name="database" type="select" multiple="true" label="Nucleotide BLAST database">
                     <options from_data_table="blastdb" />
                 </param>
                 <param name="histdb" type="hidden" value="" />
@@ -210,7 +210,7 @@
               <option value="file">FASTA file from your history (see warning note below)</option>
             </param>
             <when value="db">
-                <param name="database" type="select" label="Protein BLAST database">
+                <param name="database" type="select" multiple="true" label="Protein BLAST database">
                     <options from_data_table="blastdb_p" />
                 </param>
                 <param name="histdb" type="hidden" value="" />
@@ -255,12 +255,12 @@
               <option value="prot">Protein</option>
             </param>
             <when value="nucl">
-                <param name="database" type="select" label="Nucleotide BLAST database">
+                <param name="database" type="select" multiple="true" label="Nucleotide BLAST database">
                     <options from_data_table="blastdb" />
                 </param>
             </when>
             <when value="prot">
-                <param name="database" type="select" label="Protein BLAST database">
+                <param name="database" type="select" multiple="true" label="Protein BLAST database">
                     <options from_data_table="blastdb_p" />
                 </param>
             </when>
@@ -352,7 +352,7 @@
     <token name="@THREADS@">-num_threads "\${GALAXY_SLOTS:-8}"</token>
     <token name="@BLAST_DB_SUBJECT@">
 #if $db_opts.db_opts_selector == "db":
-  -db "${db_opts.database.fields.path}"
+  -db "${' '.join(str( $db_opts.database.fields.path ).split( ',' ))}"
 #elif $db_opts.db_opts_selector == "histdb":
   -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
 #else: