# HG changeset patch
# User peterjc
# Date 1393428901 18000
# Node ID f83e5d79b6abaeaf774f9299a0b646be3f8a689c
# Parent 22b7cdcf49603e8626ee5a7d15e961df8d947b5f
Uploaded v0.1.0 preview 3, adds a missing test file & more tests for makeblastdb
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/blastn_rhodopsin_vs_three_human_converted.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_rhodopsin_vs_three_human_converted.tabular Wed Feb 26 10:35:01 2014 -0500
@@ -0,0 +1,7 @@
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 4e-132 460
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 3e-93 331
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 3e-73 265
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 3e-68 248
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/convert2blastmask_four_human_masked.maskinfo-asn1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert2blastmask_four_human_masked.maskinfo-asn1 Wed Feb 26 10:35:01 2014 -0500
@@ -0,0 +1,158 @@
+Blast-db-mask-info ::= {
+ algo-id 0,
+ algo-program seg,
+ algo-options "window=12; locut=2.2; hicut=2.5",
+ masks {
+ masks {
+ int {
+ from 6,
+ to 18,
+ id swissprot {
+ name "ERP44_HUMAN",
+ accession "Q9BS26",
+ release "reviewed"
+ }
+ },
+ packed-int {
+ {
+ from 11,
+ to 46,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 325,
+ to 332,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 421,
+ to 496,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 501,
+ to 516,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 536,
+ to 558,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 636,
+ to 648,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 737,
+ to 762,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 789,
+ to 806,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 970,
+ to 983,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 999,
+ to 1010,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ }
+ },
+ packed-int {
+ {
+ from 3,
+ to 26,
+ id swissprot {
+ name "INSR_HUMAN",
+ accession "P06213",
+ release "reviewed"
+ }
+ },
+ {
+ from 372,
+ to 390,
+ id swissprot {
+ name "INSR_HUMAN",
+ accession "P06213",
+ release "reviewed"
+ }
+ },
+ {
+ from 766,
+ to 791,
+ id swissprot {
+ name "INSR_HUMAN",
+ accession "P06213",
+ release "reviewed"
+ }
+ },
+ {
+ from 1312,
+ to 1324,
+ id swissprot {
+ name "INSR_HUMAN",
+ accession "P06213",
+ release "reviewed"
+ }
+ }
+ },
+ int {
+ from 230,
+ to 246,
+ id swissprot {
+ name "OPSD_HUMAN",
+ accession "P08100",
+ release "reviewed"
+ }
+ }
+ },
+ more FALSE
+ }
+}
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary
Binary file test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary has changed
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins.fasta.log
--- a/test-data/four_human_proteins.fasta.log Thu Feb 20 05:39:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-
-
-Building a new DB, current time: 11/21/2013 11:16:27
-New DB name: /tmp/tmpnSjpCP/tmpwAbNo4/database/files/000/dataset_2_files/blastdb
-New DB title: Just 4 human proteins
-Sequence type: Protein
-Keep Linkouts: T
-Keep MBits: T
-Maximum file size: 1000000000B
-Adding sequences from FASTA; added 4 sequences in 0.00202417 seconds.
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins.fasta.log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta.log.txt Wed Feb 26 10:35:01 2014 -0500
@@ -0,0 +1,5 @@
+New DB title: Just 4 human proteins
+Sequence type: Protein
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins_taxid.fasta.log
--- a/test-data/four_human_proteins_taxid.fasta.log Thu Feb 20 05:39:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-
-
-Building a new DB, current time: 02/10/2014 18:40:09
-New DB name: four_human_proteins_taxid.fasta
-New DB title: Just 4 human proteins
-Sequence type: Protein
-Keep Linkouts: T
-Keep MBits: T
-Maximum file size: 1000000000B
-Adding sequences from FASTA; added 4 sequences in 0.00230002 seconds.
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins_taxid.fasta.log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.log.txt Wed Feb 26 10:35:01 2014 -0500
@@ -0,0 +1,5 @@
+New DB title: Just 4 human proteins
+Sequence type: Protein
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins_taxid.fasta.pin
Binary file test-data/four_human_proteins_taxid.fasta.pin has changed
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/segmasker_four_human.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/segmasker_four_human.fasta Wed Feb 26 10:35:01 2014 -0500
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq
+qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy
+qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE
+NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf
+pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme
+fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes
+attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/segmasker_four_human.maskinfo-asn1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/segmasker_four_human.maskinfo-asn1 Wed Feb 26 10:35:01 2014 -0500
@@ -0,0 +1,114 @@
+Blast-db-mask-info ::= {
+ algo-id 1,
+ algo-program seg,
+ algo-options "window=12; locut=2.2; hicut=2.5",
+ masks {
+ masks {
+ int {
+ from 6,
+ to 18,
+ id local id 1
+ },
+ packed-int {
+ {
+ from 11,
+ to 46,
+ id local id 2
+ },
+ {
+ from 325,
+ to 332,
+ id local id 2
+ },
+ {
+ from 421,
+ to 443,
+ id local id 2
+ },
+ {
+ from 437,
+ to 450,
+ id local id 2
+ },
+ {
+ from 447,
+ to 496,
+ id local id 2
+ },
+ {
+ from 501,
+ to 516,
+ id local id 2
+ },
+ {
+ from 536,
+ to 554,
+ id local id 2
+ },
+ {
+ from 545,
+ to 558,
+ id local id 2
+ },
+ {
+ from 636,
+ to 648,
+ id local id 2
+ },
+ {
+ from 737,
+ to 762,
+ id local id 2
+ },
+ {
+ from 789,
+ to 806,
+ id local id 2
+ },
+ {
+ from 970,
+ to 983,
+ id local id 2
+ },
+ {
+ from 999,
+ to 1010,
+ id local id 2
+ }
+ },
+ packed-int {
+ {
+ from 3,
+ to 26,
+ id local id 3
+ },
+ {
+ from 372,
+ to 390,
+ id local id 3
+ },
+ {
+ from 766,
+ to 782,
+ id local id 3
+ },
+ {
+ from 780,
+ to 791,
+ id local id 3
+ },
+ {
+ from 1312,
+ to 1324,
+ id local id 3
+ }
+ },
+ int {
+ from 230,
+ to 246,
+ id local id 4
+ }
+ },
+ more FALSE
+ }
+}
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/segmasker_four_human.maskinfo-asn1-binary
Binary file test-data/segmasker_four_human.maskinfo-asn1-binary has changed
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/README.rst
--- a/tools/ncbi_blast_plus/README.rst Thu Feb 20 05:39:48 2014 -0500
+++ b/tools/ncbi_blast_plus/README.rst Wed Feb 26 10:35:01 2014 -0500
@@ -136,25 +136,29 @@
- Development moved to GitHub, https://github.com/peterjc/galaxy_blast
- Updated citation information (Cock et al. 2013).
v0.0.21 - Use macros to simplify the XML wrappers.
- - Added wrapper for dustmasker
- - Enabled masking for makeblastdb
- - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes
+ - Added wrapper for dustmasker.
+ - Enabled masking for makeblastdb.
+ - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes.
defined in updated blast_datatypes on Galaxy ToolShed.
- - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26
- - Now depends on package_blast_plus_2_2_27 in ToolShed
-v0.0.22 - More use macros to simplify the wrappers
- - Set number of threads via $GALAXY_SLOTS environment variable
- - More descriptive default output names
- - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18)
+ - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26.
+ - Now depends on package_blast_plus_2_2_27 in ToolShed.
+v0.0.22 - More use macros to simplify the wrappers.
+ - Set number of threads via $GALAXY_SLOTS environment variable.
+ - More descriptive default output names.
+ - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18).
- Pre-check for duplicate identifiers in makeblastdb wrapper.
- - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27
- - Now depends on package_blast_plus_2_2_28 in ToolShed
+ - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27.
+ - Now depends on package_blast_plus_2_2_28 in ToolShed.
- Extended tabular output includes 'salltitles' as column 25.
-v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed
- - Tablar output now includes option to pick specific columns
+v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed.
+ - Tablar output now includes option to pick specific columns.
- BLAST XML to tabular tool supports multiple input files.
- - More detailed descriptions for BLASTN and BLASTP task option
+ - More detailed descriptions for BLASTN and BLASTP task option.
+ - Wrappers for segmasker, dustmasker and convert2blastmask.
+ - Supports using maskinfo with makeblastdb wrapper.
- Supports setting a taxonomy ID in makeblastdb wrapper.
+ - Subtle changes like new conditional settings will require some old
+ workflows be updated to cope.
======= ======================================================================
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Wed Feb 26 10:35:01 2014 -0500
@@ -0,0 +1,87 @@
+
+ Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb
+
+ convert2blastmask
+ ncbi_macros.xml
+
+
+
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+convert2blastmask
+-in $infile
+-masking_algorithm "$masking_algorithm"
+-masking_options "$masking_options"
+$parse_seqids
+-out "$outfile"
+-outfmt $outformat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb.
+
+More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers (a more specific paper covering this wrapper is planned):
+
+@REFERENCES@
+
+
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Thu Feb 20 05:39:48 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Wed Feb 26 10:35:01 2014 -0500
@@ -27,27 +27,24 @@
-
-
+
-
-
-
+
+
-
+
+
+
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Thu Feb 20 05:39:48 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Wed Feb 26 10:35:01 2014 -0500
@@ -8,21 +8,15 @@
check_no_duplicates.py
##First check for duplicates (since BLAST+ 2.2.28 fails to do so)
##and abort (via the ampersand ampersand trick) if any are found.
-#for $i in $in
-"${i.file}"
-#end for
+#for i in $input_file#"${i}" #end for#
&&
makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}"
$parse_seqids
$hash_index
## Single call to -in with multiple filenames space separated with outer quotes
## (presumably any filenames with spaces would be a problem). Note this gives
-## some extra spaces, e.g. -in " file1 file2 file3 " but BLAST seems happy:
--in "
-#for $i in $in
-${i.file}
-#end for
-"
+## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy:
+-in "#for i in $input_file#${i} #end for#"
#if $title:
-title "$title"
#else:
@@ -33,20 +27,13 @@
## --------------------------------------------------------------------
## Masking
## --------------------------------------------------------------------
-#set $mask_string = ''
-#set $sep = '-mask_data '
-#for $i in $mask_data
-#set $mask_string += $sep + str($i.file)
-#set $sep = ','
+## HACK: If no mask files, evaluates as a list with just None in it:
+## See Trello issue https://trello.com/c/lp5YmA1O
+#if ' '.join( map(str, $mask_data_file) ) != 'None':
+#for i in $mask_data_file:
+-mask_data "${i}"
#end for
-$mask_string
-## #set $gi_mask_string = ''
-## #set $sep = '-gi_mask -gi_mask_name '
-## #for $i in $gi_mask
-## #set $gi_mask_string += $sep + str($i.file)
-## #set $sep = ','
-## #end for
-## $gi_mask_string
+#end if
## --------------------------------------------------------------------
## Taxonomy
## --------------------------------------------------------------------
@@ -58,7 +45,7 @@
#end if
## --------------------------------------------------------------------
## Capture the stdout log information to the primary file (plain text):
->> "$outfile"
+> "$outfile"
@@ -66,29 +53,18 @@
-
-
-
-
-
+
+
-
-
-
-
-
+
+
+
@@ -121,16 +97,17 @@
-
+
-
-
+
-
+
@@ -160,6 +137,24 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
**What it does**
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Wed Feb 26 10:35:01 2014 -0500
@@ -0,0 +1,101 @@
+
+ low-complexity regions in protein sequences
+
+ segmasker
+ ncbi_macros.xml
+
+
+
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+segmasker
+#if $db_opts.db_opts_selector == "db":
+ -in "${db_opts.database.fields.path}" -infmt blastdb
+#elif $db_opts.db_opts_selector == "histdb":
+ -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb
+#else:
+ -in "$subject" -infmt fasta
+#end if
+-out "$outfile"
+-window $window
+-locut $locut
+-hicut $hicut
+-outfmt $outformat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool identifies and masks out low complexity regions of a protein database (or proteins in FASTA format) by using the SEG_ algorithm.
+
+If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool.
+
+More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+.. _SEG: http://www.ncbi.nlm.nih.gov/pubmed/8743706
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers (a more specific paper covering this wrapper is planned):
+
+@REFERENCES@
+
+