Repository 'ncbi_blast_plus'
hg clone https://testtoolshed.g2.bx.psu.edu/repos/peterjc/ncbi_blast_plus

Changeset 40:f83e5d79b6ab (2014-02-26)
Previous changeset 39:22b7cdcf4960 (2014-02-20) Next changeset 41:af4da561893b (2014-02-26)
Commit message:
Uploaded v0.1.0 preview 3, adds a missing test file & more tests for makeblastdb
modified:
test-data/four_human_proteins_taxid.fasta.pin
tools/ncbi_blast_plus/README.rst
tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
tools/ncbi_blast_plus/ncbi_makeblastdb.xml
added:
test-data/blastn_rhodopsin_vs_three_human_converted.tabular
test-data/convert2blastmask_four_human_masked.maskinfo-asn1
test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary
test-data/four_human_proteins.fasta.log.txt
test-data/four_human_proteins_taxid.fasta.log.txt
test-data/segmasker_four_human.fasta
test-data/segmasker_four_human.maskinfo-asn1
test-data/segmasker_four_human.maskinfo-asn1-binary
tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml
tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml
removed:
test-data/four_human_proteins.fasta.log
test-data/four_human_proteins_taxid.fasta.log
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/blastn_rhodopsin_vs_three_human_converted.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_rhodopsin_vs_three_human_converted.tabular Wed Feb 26 10:35:01 2014 -0500
b
@@ -0,0 +1,7 @@
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 4e-132 460
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 3e-93 331
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 3e-73 265
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 3e-68 248
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/convert2blastmask_four_human_masked.maskinfo-asn1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert2blastmask_four_human_masked.maskinfo-asn1 Wed Feb 26 10:35:01 2014 -0500
b
@@ -0,0 +1,158 @@
+Blast-db-mask-info ::= {
+  algo-id 0,
+  algo-program seg,
+  algo-options "window=12; locut=2.2; hicut=2.5",
+  masks {
+    masks {
+      int {
+        from 6,
+        to 18,
+        id swissprot {
+          name "ERP44_HUMAN",
+          accession "Q9BS26",
+          release "reviewed"
+        }
+      },
+      packed-int {
+        {
+          from 11,
+          to 46,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 325,
+          to 332,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 421,
+          to 496,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 501,
+          to 516,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 536,
+          to 558,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 636,
+          to 648,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 737,
+          to 762,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 789,
+          to 806,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 970,
+          to 983,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 999,
+          to 1010,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        }
+      },
+      packed-int {
+        {
+          from 3,
+          to 26,
+          id swissprot {
+            name "INSR_HUMAN",
+            accession "P06213",
+            release "reviewed"
+          }
+        },
+        {
+          from 372,
+          to 390,
+          id swissprot {
+            name "INSR_HUMAN",
+            accession "P06213",
+            release "reviewed"
+          }
+        },
+        {
+          from 766,
+          to 791,
+          id swissprot {
+            name "INSR_HUMAN",
+            accession "P06213",
+            release "reviewed"
+          }
+        },
+        {
+          from 1312,
+          to 1324,
+          id swissprot {
+            name "INSR_HUMAN",
+            accession "P06213",
+            release "reviewed"
+          }
+        }
+      },
+      int {
+        from 230,
+        to 246,
+        id swissprot {
+          name "OPSD_HUMAN",
+          accession "P08100",
+          release "reviewed"
+        }
+      }
+    },
+    more FALSE
+  }
+}
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary
b
Binary file test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary has changed
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins.fasta.log
--- a/test-data/four_human_proteins.fasta.log Thu Feb 20 05:39:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,10 +0,0 @@
-
-
-Building a new DB, current time: 11/21/2013 11:16:27
-New DB name:   /tmp/tmpnSjpCP/tmpwAbNo4/database/files/000/dataset_2_files/blastdb
-New DB title:  Just 4 human proteins
-Sequence type: Protein
-Keep Linkouts: T
-Keep MBits: T
-Maximum file size: 1000000000B
-Adding sequences from FASTA; added 4 sequences in 0.00202417 seconds.
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins.fasta.log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta.log.txt Wed Feb 26 10:35:01 2014 -0500
b
@@ -0,0 +1,5 @@
+New DB title:  Just 4 human proteins
+Sequence type: Protein
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins_taxid.fasta.log
--- a/test-data/four_human_proteins_taxid.fasta.log Thu Feb 20 05:39:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,10 +0,0 @@
-
-
-Building a new DB, current time: 02/10/2014 18:40:09
-New DB name:   four_human_proteins_taxid.fasta
-New DB title:  Just 4 human proteins
-Sequence type: Protein
-Keep Linkouts: T
-Keep MBits: T
-Maximum file size: 1000000000B
-Adding sequences from FASTA; added 4 sequences in 0.00230002 seconds.
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins_taxid.fasta.log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.log.txt Wed Feb 26 10:35:01 2014 -0500
b
@@ -0,0 +1,5 @@
+New DB title:  Just 4 human proteins
+Sequence type: Protein
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins_taxid.fasta.pin
b
Binary file test-data/four_human_proteins_taxid.fasta.pin has changed
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/segmasker_four_human.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/segmasker_four_human.fasta Wed Feb 26 10:35:01 2014 -0500
b
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq
+qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy
+qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE
+NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf
+pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme
+fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes
+attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/segmasker_four_human.maskinfo-asn1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/segmasker_four_human.maskinfo-asn1 Wed Feb 26 10:35:01 2014 -0500
b
@@ -0,0 +1,114 @@
+Blast-db-mask-info ::= {
+  algo-id 1,
+  algo-program seg,
+  algo-options "window=12; locut=2.2; hicut=2.5",
+  masks {
+    masks {
+      int {
+        from 6,
+        to 18,
+        id local id 1
+      },
+      packed-int {
+        {
+          from 11,
+          to 46,
+          id local id 2
+        },
+        {
+          from 325,
+          to 332,
+          id local id 2
+        },
+        {
+          from 421,
+          to 443,
+          id local id 2
+        },
+        {
+          from 437,
+          to 450,
+          id local id 2
+        },
+        {
+          from 447,
+          to 496,
+          id local id 2
+        },
+        {
+          from 501,
+          to 516,
+          id local id 2
+        },
+        {
+          from 536,
+          to 554,
+          id local id 2
+        },
+        {
+          from 545,
+          to 558,
+          id local id 2
+        },
+        {
+          from 636,
+          to 648,
+          id local id 2
+        },
+        {
+          from 737,
+          to 762,
+          id local id 2
+        },
+        {
+          from 789,
+          to 806,
+          id local id 2
+        },
+        {
+          from 970,
+          to 983,
+          id local id 2
+        },
+        {
+          from 999,
+          to 1010,
+          id local id 2
+        }
+      },
+      packed-int {
+        {
+          from 3,
+          to 26,
+          id local id 3
+        },
+        {
+          from 372,
+          to 390,
+          id local id 3
+        },
+        {
+          from 766,
+          to 782,
+          id local id 3
+        },
+        {
+          from 780,
+          to 791,
+          id local id 3
+        },
+        {
+          from 1312,
+          to 1324,
+          id local id 3
+        }
+      },
+      int {
+        from 230,
+        to 246,
+        id local id 4
+      }
+    },
+    more FALSE
+  }
+}
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/segmasker_four_human.maskinfo-asn1-binary
b
Binary file test-data/segmasker_four_human.maskinfo-asn1-binary has changed
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/README.rst
--- a/tools/ncbi_blast_plus/README.rst Thu Feb 20 05:39:48 2014 -0500
+++ b/tools/ncbi_blast_plus/README.rst Wed Feb 26 10:35:01 2014 -0500
b
@@ -136,25 +136,29 @@
         - Development moved to GitHub, https://github.com/peterjc/galaxy_blast
         - Updated citation information (Cock et al. 2013).
 v0.0.21 - Use macros to simplify the XML wrappers.
-        - Added wrapper for dustmasker
-        - Enabled masking for makeblastdb
-        - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes
+        - Added wrapper for dustmasker.
+        - Enabled masking for makeblastdb.
+        - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes.
           defined in updated blast_datatypes on Galaxy ToolShed.
-        - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26
-        - Now depends on package_blast_plus_2_2_27 in ToolShed
-v0.0.22 - More use macros to simplify the wrappers
-        - Set number of threads via $GALAXY_SLOTS environment variable
-        - More descriptive default output names
-        - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18)
+        - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26.
+        - Now depends on package_blast_plus_2_2_27 in ToolShed.
+v0.0.22 - More use macros to simplify the wrappers.
+        - Set number of threads via $GALAXY_SLOTS environment variable.
+        - More descriptive default output names.
+        - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18).
         - Pre-check for duplicate identifiers in makeblastdb wrapper.
-        - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27
-        - Now depends on package_blast_plus_2_2_28 in ToolShed
+        - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27.
+        - Now depends on package_blast_plus_2_2_28 in ToolShed.
         - Extended tabular output includes 'salltitles' as column 25.
-v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed
-        - Tablar output now includes option to pick specific columns
+v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed.
+        - Tablar output now includes option to pick specific columns.
         - BLAST XML to tabular tool supports multiple input files.
-        - More detailed descriptions for BLASTN and BLASTP task option
+        - More detailed descriptions for BLASTN and BLASTP task option.
+        - Wrappers for segmasker, dustmasker and convert2blastmask.
+        - Supports using maskinfo with makeblastdb wrapper.
         - Supports setting a taxonomy ID in makeblastdb wrapper.
+        - Subtle changes like new conditional settings will require some old
+          workflows be updated to cope. 
 ======= ======================================================================
 
 
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Wed Feb 26 10:35:01 2014 -0500
b
@@ -0,0 +1,87 @@
+<tool id="ncbi_convert2blastmask_wrapper" name="NCBI BLAST+ convert2blastmask" version="0.1.00">
+    <description>Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb</description>
+    <macros>
+        <token name="@BINARY@">convert2blastmask</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+convert2blastmask
+-in $infile
+-masking_algorithm "$masking_algorithm"
+-masking_options "$masking_options"
+$parse_seqids
+-out "$outfile"
+-outfmt $outformat
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="fasta" label="masked FASTA file"/> 
+        <param name="masking_algorithm" type="select" label="Used masking algorithm">
+            <option value="dust">DUST</option>
+            <option value="seg" selected="true">SEG</option>
+            <option value="windowmasker">windowmasker</option>
+            <option value="repeat">repeat</option>
+            <option value="other">other</option>
+        </param>
+        <param name="masking_options" type="text" value="" size="20" label="Masking algorithm options to create the masked input" 
+            help ="free text to describe the options used to create the masking files. (-masking_options)">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable" />
+            </sanitizer>
+        </param>
+        <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="true" label="Parse Seq-ids in FASTA input" help="(-parse_seqids)" />
+        <param name="outformat" type="select" label="Output format">
+            <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
+            <option value="maskinfo_asn1_text" selected="True">maskinfo ASN.1 text</option>
+            <option value="maskinfo_xml">maskinfo_xml</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="maskinfo-asn1" label="SEG Masked File">
+            <change_format>
+                <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
+ <!--
+                <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" />
+ -->
+                <when input="outformat" value="maskinfo_xml" format="xml" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="four_human_proteins_masked.fasta" ftype="fasta" />
+            <param name="masking_algorithm" value="seg" />
+            <param name="masking_options" value="window=12; locut=2.2; hicut=2.5" />
+            <param name="parse_seqids" value="True" />
+            <param name="outformat" value="maskinfo_asn1_bin" />
+            <output name="outfile" file="convert2blastmask_four_human_masked.maskinfo-asn1-binary" />
+        </test>
+        <test>
+            <param name="infile" value="four_human_proteins_masked.fasta" ftype="fasta" />
+            <param name="masking_algorithm" value="seg" />
+            <param name="masking_options" value="window=12; locut=2.2; hicut=2.5" />
+            <param name="parse_seqids" value="True" />
+            <param name="outformat" value="maskinfo_asn1_text" />
+            <output name="outfile" file="convert2blastmask_four_human_masked.maskinfo-asn1" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb.
+
+More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers (a more specific paper covering this wrapper is planned):
+
+@REFERENCES@
+    </help>
+</tool>
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Thu Feb 20 05:39:48 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Wed Feb 26 10:35:01 2014 -0500
b
@@ -27,27 +27,24 @@
         <param name="level" type="integer" value="20" label="DUST level" help="Score threshold for subwindows" />
         <param name="linker" type="integer" value="1" label="DUST linker" help="How close masked intervals should be to get merged together" />
         <param name="outformat" type="select" label="Output format">
-<!-- acclist and maskinfo_xml are listed as possible output formats in
-     "dustmasker -help", but were not recognized by NCBI BLAST up to
-     release 2.2.27+. Fixed in BLAST 2.2.28+.
-     seqloc_* formats are not very useful -->
-<!--            <option value="acclist">acclist</option>-->
+            <!-- seqloc_* formats are not very useful
+                 and what BLAST+ calls 'interval' is not what Galaxy calls interval format
+            -->
             <option value="fasta">FASTA</option>
-            <option value="interval" selected="true">interval</option>
             <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
-            <option value="maskinfo_asn1_text">maskinfo ASN.1 text</option>
-<!--            <option value="maskinfo_xml">maskinfo_xml</option>
-            <option value="seqloc_asn1_bin">seqloc_asn1_bin</option>
-            <option value="seqloc_asn1_text">seqloc_asn1_text</option>
-            <option value="seqloc_xml">seqloc_xml</option>-->
+            <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option>
+            <option value="maskinfo_xml">maskinfo_xml</option>
         </param>
     </inputs>
     <outputs>
-        <data name="outfile" format="interval" label="DUST Masked File">
+        <data name="outfile" format="maskinfo-asn1" label="DUST Masked File">
             <change_format>
                 <when input="outformat" value="fasta" format="fasta" />
                 <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
+ <!--
                 <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" />
+ -->
+                <when input="outformat" value="maskinfo_xml" format="xml" />
             </change_format>
         </data>
     </outputs>
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Thu Feb 20 05:39:48 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Wed Feb 26 10:35:01 2014 -0500
b
b'@@ -8,21 +8,15 @@\n     <command interpreter="python">check_no_duplicates.py\n ##First check for duplicates (since BLAST+ 2.2.28 fails to do so)\n ##and abort (via the ampersand ampersand trick) if any are found.\n-#for $i in $in\n-"${i.file}"\n-#end for\n+#for i in $input_file#"${i}" #end for#\n &amp;&amp;\n makeblastdb -out "${os.path.join($outfile.extra_files_path,\'blastdb\')}"\n $parse_seqids\n $hash_index\n ## Single call to -in with multiple filenames space separated with outer quotes\n ## (presumably any filenames with spaces would be a problem). Note this gives\n-## some extra spaces, e.g. -in " file1 file2 file3  " but BLAST seems happy:\n--in "\n-#for $i in $in\n-${i.file}\n-#end for\n-"\n+## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy:\n+-in "#for i in $input_file#${i} #end for#"\n #if $title:\n -title "$title"\n #else:\n@@ -33,20 +27,13 @@\n ## --------------------------------------------------------------------\n ## Masking\n ## --------------------------------------------------------------------\n-#set $mask_string = \'\'\n-#set $sep = \'-mask_data \'\n-#for $i in $mask_data\n-#set $mask_string += $sep + str($i.file)\n-#set $sep = \',\'\n+## HACK: If no mask files, evaluates as a list with just None in it:\n+## See Trello issue https://trello.com/c/lp5YmA1O\n+#if \' \'.join( map(str, $mask_data_file) ) != \'None\':\n+#for i in $mask_data_file:\n+-mask_data "${i}"\n #end for\n-$mask_string\n-## #set $gi_mask_string = \'\'\n-## #set $sep = \'-gi_mask -gi_mask_name \'\n-## #for $i in $gi_mask\n-## #set $gi_mask_string += $sep + str($i.file)\n-## #set $sep = \',\'\n-## #end for\n-## $gi_mask_string\n+#end if\n ## --------------------------------------------------------------------\n ## Taxonomy\n ## --------------------------------------------------------------------\n@@ -58,7 +45,7 @@\n #end if\n ## --------------------------------------------------------------------\n ## Capture the stdout log information to the primary file (plain text):\n-&gt;&gt; "$outfile"\n+&gt; "$outfile"\n     </command>\n     <expand macro="stdio" />\n     <inputs>\n@@ -66,29 +53,18 @@\n             <option value="prot">protein</option>\n             <option value="nucl">nucleotide</option>\n         </param>\n-        <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)\n+        <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)?\n              NOTE Double check the new database would be self contained first\n-        <repeat name="in" title="BLAST or FASTA Database" min="1">\n-            <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="BLAST or FASTA database" />\n-        </repeat>\n         -->\n-        <!-- TODO Switch this to using <param ... multiple="true" /> instead of <repeat> block? -->\n-        <repeat name="in" title="FASTA file" min="1">\n-            <param name="file" type="data" format="fasta" />\n-        </repeat>\n+        <!-- Note this is a mandatory parameter - default should be most recent FASTA file -->\n+        <param name="input_file" type="data" multiple="true" optional="false" format="fasta" label="Input FASTA files(s)" help="One or more FASTA files" />\n         <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" />\n         <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe \'|\' symbols" />\n         <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />\n         <!-- SEQUENCE MASKING OPTIONS -->\n-        <repeat name="mask_data" title="Masking data file">\n-            <param name="mask_data_file" type="data" format='..b'p in the log file as well.\n+             Likewise there is a datestamp in the log file as well, so use contains comparison\n+             With and without the masking makes no difference.\n              With and without the taxid the only real difference is in the *.phr file.\n         -->\n         <test>\n             <param name="dbtype" value="prot" />\n-            <param name="file" value="four_human_proteins.fasta" ftype="fasta" />\n+            <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />\n             <param name="title" value="Just 4 human proteins" />\n             <param name="parse_seqids" value="" />\n             <param name="hash_index" value="true" />\n-            <output name="out_file" file="four_human_proteins.fasta.log" ftype="blastdbp" lines_diff="6">\n+            <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">\n                 <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />\n                 <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />\n                 <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />\n@@ -143,13 +120,13 @@\n         </test>\n         <test>\n             <param name="dbtype" value="prot" />\n-            <param name="file" value="four_human_proteins.fasta" ftype="fasta" />\n+            <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />\n             <param name="title" value="Just 4 human proteins" />\n             <param name="parse_seqids" value="" />\n             <param name="hash_index" value="true" />\n             <param name="taxselect" value="id" />\n             <param name="taxid" value="9606" />\n-            <output name="out_file" file="four_human_proteins_taxid.fasta.log" ftype="blastdbp" lines_diff="6">\n+            <output name="out_file" compare="contains" file="four_human_proteins_taxid.fasta.log.txt" ftype="blastdbp">\n                 <extra_files type="file" value="four_human_proteins_taxid.fasta.phr" name="blastdb.phr" />\n                 <extra_files type="file" value="four_human_proteins_taxid.fasta.pin" name="blastdb.pin" lines_diff="2" />\n                 <extra_files type="file" value="four_human_proteins_taxid.fasta.psq" name="blastdb.psq" />\n@@ -160,6 +137,24 @@\n                 <extra_files type="file" value="four_human_proteins_taxid.fasta.psi" name="blastdb.psi" />\n             </output>\n         </test>\n+        <test>\n+            <param name="dbtype" value="prot" />\n+            <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />\n+            <param name="title" value="Just 4 human proteins" />\n+            <param name="parse_seqids" value="" />\n+            <param name="hash_index" value="true" />\n+            <param name="mask_data_file" value="segmasker_four_human.maskinfo-asn1" ftype="maskinfo-asn1" />\n+            <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">\n+                <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />\n+                <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />\n+                <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />\n+                <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" />\n+                <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" />\n+                <extra_files type="file" value="four_human_proteins.fasta.phi" name="blastdb.phi" />\n+                <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" />\n+                <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" />\n+            </output>\n+        </test>\n     </tests>\n     <help>\n **What it does**\n'
b
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Wed Feb 26 10:35:01 2014 -0500
b
@@ -0,0 +1,101 @@
+<tool id="ncbi_segmasker_wrapper" name="NCBI BLAST+ segmasker" version="0.1.00">
+    <description>low-complexity regions in protein sequences</description>
+    <macros>
+        <token name="@BINARY@">segmasker</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+segmasker
+#if $db_opts.db_opts_selector == "db":
+  -in "${db_opts.database.fields.path}" -infmt blastdb
+#elif $db_opts.db_opts_selector == "histdb":
+  -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb
+#else:
+  -in "$subject" -infmt fasta
+#end if
+-out "$outfile"
+-window $window
+-locut $locut
+-hicut $hicut
+-outfmt $outformat
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <expand macro="input_conditional_protein_db" />
+        <param name="window" type="integer" value="12" label="SEG window length" help="(-window)" />
+        <param name="locut" type="float" value="2.2" label="SEG low cutoff" help="(-locut)" />
+        <param name="hicut" type="float" value="2.5" label="SEG high cutoff" help="(-hicut)" />
+        <param name="outformat" type="select" label="Output format">
+            <!-- seqloc_* formats are not very useful
+                 and what BLAST+ calls 'interval' is not what Galaxy calls interval format
+            -->
+            <option value="fasta">FASTA</option>
+            <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
+            <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option>
+            <option value="maskinfo_xml">maskinfo_xml</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="maskinfo-asn1" label="SEG Masked File">
+            <change_format>
+                <when input="outformat" value="fasta" format="fasta" />
+                <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
+ <!--
+                <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" />
+ -->
+                <when input="outformat" value="maskinfo_xml" format="xml" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="window" value="12" />
+            <param name="locut" value="2.2" />
+            <param name="hicut" value="2.5" />
+            <param name="outformat" value="fasta" />
+            <output name="outfile" file="segmasker_four_human.fasta" />
+        </test>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="window" value="12" />
+            <param name="locut" value="2.2" />
+            <param name="hicut" value="2.5" />
+            <param name="outformat" value="maskinfo_asn1_bin" />
+            <output name="outfile" file="segmasker_four_human.maskinfo-asn1-binary" />
+        </test>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="window" value="12" />
+            <param name="locut" value="2.2" />
+            <param name="hicut" value="2.5" />
+            <param name="outformat" value="maskinfo_asn1_text" />
+            <output name="outfile" file="segmasker_four_human.maskinfo-asn1" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+This tool identifies and masks out low complexity regions of a protein database (or proteins in FASTA format) by using the SEG_ algorithm.
+
+If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool.
+
+More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+.. _SEG: http://www.ncbi.nlm.nih.gov/pubmed/8743706
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers (a more specific paper covering this wrapper is planned):
+
+@REFERENCES@
+    </help>
+</tool>