Previous changeset 39:22b7cdcf4960 (2014-02-20) Next changeset 41:af4da561893b (2014-02-26) |
Commit message:
Uploaded v0.1.0 preview 3, adds a missing test file & more tests for makeblastdb |
modified:
test-data/four_human_proteins_taxid.fasta.pin tools/ncbi_blast_plus/README.rst tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml tools/ncbi_blast_plus/ncbi_makeblastdb.xml |
added:
test-data/blastn_rhodopsin_vs_three_human_converted.tabular test-data/convert2blastmask_four_human_masked.maskinfo-asn1 test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary test-data/four_human_proteins.fasta.log.txt test-data/four_human_proteins_taxid.fasta.log.txt test-data/segmasker_four_human.fasta test-data/segmasker_four_human.maskinfo-asn1 test-data/segmasker_four_human.maskinfo-asn1-binary tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml |
removed:
test-data/four_human_proteins.fasta.log test-data/four_human_proteins_taxid.fasta.log |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/blastn_rhodopsin_vs_three_human_converted.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human_converted.tabular Wed Feb 26 10:35:01 2014 -0500 |
b |
@@ -0,0 +1,7 @@ +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 4e-132 460 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 3e-93 331 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 3e-73 265 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 3e-68 248 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208 |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/convert2blastmask_four_human_masked.maskinfo-asn1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/convert2blastmask_four_human_masked.maskinfo-asn1 Wed Feb 26 10:35:01 2014 -0500 |
b |
@@ -0,0 +1,158 @@ +Blast-db-mask-info ::= { + algo-id 0, + algo-program seg, + algo-options "window=12; locut=2.2; hicut=2.5", + masks { + masks { + int { + from 6, + to 18, + id swissprot { + name "ERP44_HUMAN", + accession "Q9BS26", + release "reviewed" + } + }, + packed-int { + { + from 11, + to 46, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 325, + to 332, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 421, + to 496, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 501, + to 516, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 536, + to 558, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 636, + to 648, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 737, + to 762, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 789, + to 806, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 970, + to 983, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 999, + to 1010, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + } + }, + packed-int { + { + from 3, + to 26, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + }, + { + from 372, + to 390, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + }, + { + from 766, + to 791, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + }, + { + from 1312, + to 1324, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + } + }, + int { + from 230, + to 246, + id swissprot { + name "OPSD_HUMAN", + accession "P08100", + release "reviewed" + } + } + }, + more FALSE + } +} |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary |
b |
Binary file test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary has changed |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins.fasta.log --- a/test-data/four_human_proteins.fasta.log Thu Feb 20 05:39:48 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,10 +0,0 @@ - - -Building a new DB, current time: 11/21/2013 11:16:27 -New DB name: /tmp/tmpnSjpCP/tmpwAbNo4/database/files/000/dataset_2_files/blastdb -New DB title: Just 4 human proteins -Sequence type: Protein -Keep Linkouts: T -Keep MBits: T -Maximum file size: 1000000000B -Adding sequences from FASTA; added 4 sequences in 0.00202417 seconds. |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins.fasta.log.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.fasta.log.txt Wed Feb 26 10:35:01 2014 -0500 |
b |
@@ -0,0 +1,5 @@ +New DB title: Just 4 human proteins +Sequence type: Protein +Keep Linkouts: T +Keep MBits: T +Maximum file size: 1000000000B |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins_taxid.fasta.log --- a/test-data/four_human_proteins_taxid.fasta.log Thu Feb 20 05:39:48 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,10 +0,0 @@ - - -Building a new DB, current time: 02/10/2014 18:40:09 -New DB name: four_human_proteins_taxid.fasta -New DB title: Just 4 human proteins -Sequence type: Protein -Keep Linkouts: T -Keep MBits: T -Maximum file size: 1000000000B -Adding sequences from FASTA; added 4 sequences in 0.00230002 seconds. |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins_taxid.fasta.log.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.log.txt Wed Feb 26 10:35:01 2014 -0500 |
b |
@@ -0,0 +1,5 @@ +New DB title: Just 4 human proteins +Sequence type: Protein +Keep Linkouts: T +Keep MBits: T +Maximum file size: 1000000000B |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins_taxid.fasta.pin |
b |
Binary file test-data/four_human_proteins_taxid.fasta.pin has changed |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/segmasker_four_human.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/segmasker_four_human.fasta Wed Feb 26 10:35:01 2014 -0500 |
b |
@@ -0,0 +1,61 @@ +>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 +MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF +SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK +REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER +VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK +CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD +CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF +HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL +>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 +MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG +GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS +DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD +LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG +KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP +DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT +IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE +Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq +qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy +qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV +ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD +KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD +QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE +NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA +QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK +APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD +EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR +HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS +WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ +SQQSQPVELDPFGAAPFPSKQ +>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 +MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL +QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL +VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE +ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL +GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG +CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC +TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL +EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE +RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ +NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS +DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE +RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL +KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf +pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV +SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV +SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG +PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR +EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG +FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA +AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV +RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN +CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme +fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN +PS +>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY +VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG +GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP +EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes +attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI +YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/segmasker_four_human.maskinfo-asn1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/segmasker_four_human.maskinfo-asn1 Wed Feb 26 10:35:01 2014 -0500 |
b |
@@ -0,0 +1,114 @@ +Blast-db-mask-info ::= { + algo-id 1, + algo-program seg, + algo-options "window=12; locut=2.2; hicut=2.5", + masks { + masks { + int { + from 6, + to 18, + id local id 1 + }, + packed-int { + { + from 11, + to 46, + id local id 2 + }, + { + from 325, + to 332, + id local id 2 + }, + { + from 421, + to 443, + id local id 2 + }, + { + from 437, + to 450, + id local id 2 + }, + { + from 447, + to 496, + id local id 2 + }, + { + from 501, + to 516, + id local id 2 + }, + { + from 536, + to 554, + id local id 2 + }, + { + from 545, + to 558, + id local id 2 + }, + { + from 636, + to 648, + id local id 2 + }, + { + from 737, + to 762, + id local id 2 + }, + { + from 789, + to 806, + id local id 2 + }, + { + from 970, + to 983, + id local id 2 + }, + { + from 999, + to 1010, + id local id 2 + } + }, + packed-int { + { + from 3, + to 26, + id local id 3 + }, + { + from 372, + to 390, + id local id 3 + }, + { + from 766, + to 782, + id local id 3 + }, + { + from 780, + to 791, + id local id 3 + }, + { + from 1312, + to 1324, + id local id 3 + } + }, + int { + from 230, + to 246, + id local id 4 + } + }, + more FALSE + } +} |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/segmasker_four_human.maskinfo-asn1-binary |
b |
Binary file test-data/segmasker_four_human.maskinfo-asn1-binary has changed |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/README.rst --- a/tools/ncbi_blast_plus/README.rst Thu Feb 20 05:39:48 2014 -0500 +++ b/tools/ncbi_blast_plus/README.rst Wed Feb 26 10:35:01 2014 -0500 |
b |
@@ -136,25 +136,29 @@ - Development moved to GitHub, https://github.com/peterjc/galaxy_blast - Updated citation information (Cock et al. 2013). v0.0.21 - Use macros to simplify the XML wrappers. - - Added wrapper for dustmasker - - Enabled masking for makeblastdb - - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes + - Added wrapper for dustmasker. + - Enabled masking for makeblastdb. + - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes. defined in updated blast_datatypes on Galaxy ToolShed. - - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26 - - Now depends on package_blast_plus_2_2_27 in ToolShed -v0.0.22 - More use macros to simplify the wrappers - - Set number of threads via $GALAXY_SLOTS environment variable - - More descriptive default output names - - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18) + - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26. + - Now depends on package_blast_plus_2_2_27 in ToolShed. +v0.0.22 - More use macros to simplify the wrappers. + - Set number of threads via $GALAXY_SLOTS environment variable. + - More descriptive default output names. + - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18). - Pre-check for duplicate identifiers in makeblastdb wrapper. - - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27 - - Now depends on package_blast_plus_2_2_28 in ToolShed + - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27. + - Now depends on package_blast_plus_2_2_28 in ToolShed. - Extended tabular output includes 'salltitles' as column 25. -v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed - - Tablar output now includes option to pick specific columns +v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed. + - Tablar output now includes option to pick specific columns. - BLAST XML to tabular tool supports multiple input files. - - More detailed descriptions for BLASTN and BLASTP task option + - More detailed descriptions for BLASTN and BLASTP task option. + - Wrappers for segmasker, dustmasker and convert2blastmask. + - Supports using maskinfo with makeblastdb wrapper. - Supports setting a taxonomy ID in makeblastdb wrapper. + - Subtle changes like new conditional settings will require some old + workflows be updated to cope. ======= ====================================================================== |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Wed Feb 26 10:35:01 2014 -0500 |
b |
@@ -0,0 +1,87 @@ +<tool id="ncbi_convert2blastmask_wrapper" name="NCBI BLAST+ convert2blastmask" version="0.1.00"> + <description>Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb</description> + <macros> + <token name="@BINARY@">convert2blastmask</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +convert2blastmask +-in $infile +-masking_algorithm "$masking_algorithm" +-masking_options "$masking_options" +$parse_seqids +-out "$outfile" +-outfmt $outformat + </command> + <expand macro="stdio" /> + <inputs> + <param name="infile" type="data" format="fasta" label="masked FASTA file"/> + <param name="masking_algorithm" type="select" label="Used masking algorithm"> + <option value="dust">DUST</option> + <option value="seg" selected="true">SEG</option> + <option value="windowmasker">windowmasker</option> + <option value="repeat">repeat</option> + <option value="other">other</option> + </param> + <param name="masking_options" type="text" value="" size="20" label="Masking algorithm options to create the masked input" + help ="free text to describe the options used to create the masking files. (-masking_options)"> + <sanitizer invalid_char=""> + <valid initial="string.printable" /> + </sanitizer> + </param> + <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="true" label="Parse Seq-ids in FASTA input" help="(-parse_seqids)" /> + <param name="outformat" type="select" label="Output format"> + <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option> + <option value="maskinfo_asn1_text" selected="True">maskinfo ASN.1 text</option> + <option value="maskinfo_xml">maskinfo_xml</option> + </param> + </inputs> + <outputs> + <data name="outfile" format="maskinfo-asn1" label="SEG Masked File"> + <change_format> + <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" /> + <!-- + <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" /> + --> + <when input="outformat" value="maskinfo_xml" format="xml" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="infile" value="four_human_proteins_masked.fasta" ftype="fasta" /> + <param name="masking_algorithm" value="seg" /> + <param name="masking_options" value="window=12; locut=2.2; hicut=2.5" /> + <param name="parse_seqids" value="True" /> + <param name="outformat" value="maskinfo_asn1_bin" /> + <output name="outfile" file="convert2blastmask_four_human_masked.maskinfo-asn1-binary" /> + </test> + <test> + <param name="infile" value="four_human_proteins_masked.fasta" ftype="fasta" /> + <param name="masking_algorithm" value="seg" /> + <param name="masking_options" value="window=12; locut=2.2; hicut=2.5" /> + <param name="parse_seqids" value="True" /> + <param name="outformat" value="maskinfo_asn1_text" /> + <output name="outfile" file="convert2blastmask_four_human_masked.maskinfo-asn1" /> + </test> + </tests> + <help> +**What it does** + +Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb. + +More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers (a more specific paper covering this wrapper is planned): + +@REFERENCES@ + </help> +</tool> |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Thu Feb 20 05:39:48 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Wed Feb 26 10:35:01 2014 -0500 |
b |
@@ -27,27 +27,24 @@ <param name="level" type="integer" value="20" label="DUST level" help="Score threshold for subwindows" /> <param name="linker" type="integer" value="1" label="DUST linker" help="How close masked intervals should be to get merged together" /> <param name="outformat" type="select" label="Output format"> -<!-- acclist and maskinfo_xml are listed as possible output formats in - "dustmasker -help", but were not recognized by NCBI BLAST up to - release 2.2.27+. Fixed in BLAST 2.2.28+. - seqloc_* formats are not very useful --> -<!-- <option value="acclist">acclist</option>--> + <!-- seqloc_* formats are not very useful + and what BLAST+ calls 'interval' is not what Galaxy calls interval format + --> <option value="fasta">FASTA</option> - <option value="interval" selected="true">interval</option> <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option> - <option value="maskinfo_asn1_text">maskinfo ASN.1 text</option> -<!-- <option value="maskinfo_xml">maskinfo_xml</option> - <option value="seqloc_asn1_bin">seqloc_asn1_bin</option> - <option value="seqloc_asn1_text">seqloc_asn1_text</option> - <option value="seqloc_xml">seqloc_xml</option>--> + <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option> + <option value="maskinfo_xml">maskinfo_xml</option> </param> </inputs> <outputs> - <data name="outfile" format="interval" label="DUST Masked File"> + <data name="outfile" format="maskinfo-asn1" label="DUST Masked File"> <change_format> <when input="outformat" value="fasta" format="fasta" /> <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" /> + <!-- <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" /> + --> + <when input="outformat" value="maskinfo_xml" format="xml" /> </change_format> </data> </outputs> |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_makeblastdb.xml --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Thu Feb 20 05:39:48 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Wed Feb 26 10:35:01 2014 -0500 |
b |
b'@@ -8,21 +8,15 @@\n <command interpreter="python">check_no_duplicates.py\n ##First check for duplicates (since BLAST+ 2.2.28 fails to do so)\n ##and abort (via the ampersand ampersand trick) if any are found.\n-#for $i in $in\n-"${i.file}"\n-#end for\n+#for i in $input_file#"${i}" #end for#\n &&\n makeblastdb -out "${os.path.join($outfile.extra_files_path,\'blastdb\')}"\n $parse_seqids\n $hash_index\n ## Single call to -in with multiple filenames space separated with outer quotes\n ## (presumably any filenames with spaces would be a problem). Note this gives\n-## some extra spaces, e.g. -in " file1 file2 file3 " but BLAST seems happy:\n--in "\n-#for $i in $in\n-${i.file}\n-#end for\n-"\n+## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy:\n+-in "#for i in $input_file#${i} #end for#"\n #if $title:\n -title "$title"\n #else:\n@@ -33,20 +27,13 @@\n ## --------------------------------------------------------------------\n ## Masking\n ## --------------------------------------------------------------------\n-#set $mask_string = \'\'\n-#set $sep = \'-mask_data \'\n-#for $i in $mask_data\n-#set $mask_string += $sep + str($i.file)\n-#set $sep = \',\'\n+## HACK: If no mask files, evaluates as a list with just None in it:\n+## See Trello issue https://trello.com/c/lp5YmA1O\n+#if \' \'.join( map(str, $mask_data_file) ) != \'None\':\n+#for i in $mask_data_file:\n+-mask_data "${i}"\n #end for\n-$mask_string\n-## #set $gi_mask_string = \'\'\n-## #set $sep = \'-gi_mask -gi_mask_name \'\n-## #for $i in $gi_mask\n-## #set $gi_mask_string += $sep + str($i.file)\n-## #set $sep = \',\'\n-## #end for\n-## $gi_mask_string\n+#end if\n ## --------------------------------------------------------------------\n ## Taxonomy\n ## --------------------------------------------------------------------\n@@ -58,7 +45,7 @@\n #end if\n ## --------------------------------------------------------------------\n ## Capture the stdout log information to the primary file (plain text):\n->> "$outfile"\n+> "$outfile"\n </command>\n <expand macro="stdio" />\n <inputs>\n@@ -66,29 +53,18 @@\n <option value="prot">protein</option>\n <option value="nucl">nucleotide</option>\n </param>\n- <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)\n+ <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)?\n NOTE Double check the new database would be self contained first\n- <repeat name="in" title="BLAST or FASTA Database" min="1">\n- <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="BLAST or FASTA database" />\n- </repeat>\n -->\n- <!-- TODO Switch this to using <param ... multiple="true" /> instead of <repeat> block? -->\n- <repeat name="in" title="FASTA file" min="1">\n- <param name="file" type="data" format="fasta" />\n- </repeat>\n+ <!-- Note this is a mandatory parameter - default should be most recent FASTA file -->\n+ <param name="input_file" type="data" multiple="true" optional="false" format="fasta" label="Input FASTA files(s)" help="One or more FASTA files" />\n <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" />\n <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe \'|\' symbols" />\n <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />\n <!-- SEQUENCE MASKING OPTIONS -->\n- <repeat name="mask_data" title="Masking data file">\n- <param name="mask_data_file" type="data" format='..b'p in the log file as well.\n+ Likewise there is a datestamp in the log file as well, so use contains comparison\n+ With and without the masking makes no difference.\n With and without the taxid the only real difference is in the *.phr file.\n -->\n <test>\n <param name="dbtype" value="prot" />\n- <param name="file" value="four_human_proteins.fasta" ftype="fasta" />\n+ <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />\n <param name="title" value="Just 4 human proteins" />\n <param name="parse_seqids" value="" />\n <param name="hash_index" value="true" />\n- <output name="out_file" file="four_human_proteins.fasta.log" ftype="blastdbp" lines_diff="6">\n+ <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">\n <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />\n <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />\n <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />\n@@ -143,13 +120,13 @@\n </test>\n <test>\n <param name="dbtype" value="prot" />\n- <param name="file" value="four_human_proteins.fasta" ftype="fasta" />\n+ <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />\n <param name="title" value="Just 4 human proteins" />\n <param name="parse_seqids" value="" />\n <param name="hash_index" value="true" />\n <param name="taxselect" value="id" />\n <param name="taxid" value="9606" />\n- <output name="out_file" file="four_human_proteins_taxid.fasta.log" ftype="blastdbp" lines_diff="6">\n+ <output name="out_file" compare="contains" file="four_human_proteins_taxid.fasta.log.txt" ftype="blastdbp">\n <extra_files type="file" value="four_human_proteins_taxid.fasta.phr" name="blastdb.phr" />\n <extra_files type="file" value="four_human_proteins_taxid.fasta.pin" name="blastdb.pin" lines_diff="2" />\n <extra_files type="file" value="four_human_proteins_taxid.fasta.psq" name="blastdb.psq" />\n@@ -160,6 +137,24 @@\n <extra_files type="file" value="four_human_proteins_taxid.fasta.psi" name="blastdb.psi" />\n </output>\n </test>\n+ <test>\n+ <param name="dbtype" value="prot" />\n+ <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />\n+ <param name="title" value="Just 4 human proteins" />\n+ <param name="parse_seqids" value="" />\n+ <param name="hash_index" value="true" />\n+ <param name="mask_data_file" value="segmasker_four_human.maskinfo-asn1" ftype="maskinfo-asn1" />\n+ <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">\n+ <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />\n+ <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />\n+ <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />\n+ <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" />\n+ <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" />\n+ <extra_files type="file" value="four_human_proteins.fasta.phi" name="blastdb.phi" />\n+ <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" />\n+ <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" />\n+ </output>\n+ </test>\n </tests>\n <help>\n **What it does**\n' |
b |
diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Wed Feb 26 10:35:01 2014 -0500 |
b |
@@ -0,0 +1,101 @@ +<tool id="ncbi_segmasker_wrapper" name="NCBI BLAST+ segmasker" version="0.1.00"> + <description>low-complexity regions in protein sequences</description> + <macros> + <token name="@BINARY@">segmasker</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +segmasker +#if $db_opts.db_opts_selector == "db": + -in "${db_opts.database.fields.path}" -infmt blastdb +#elif $db_opts.db_opts_selector == "histdb": + -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb +#else: + -in "$subject" -infmt fasta +#end if +-out "$outfile" +-window $window +-locut $locut +-hicut $hicut +-outfmt $outformat + </command> + <expand macro="stdio" /> + <inputs> + <expand macro="input_conditional_protein_db" /> + <param name="window" type="integer" value="12" label="SEG window length" help="(-window)" /> + <param name="locut" type="float" value="2.2" label="SEG low cutoff" help="(-locut)" /> + <param name="hicut" type="float" value="2.5" label="SEG high cutoff" help="(-hicut)" /> + <param name="outformat" type="select" label="Output format"> + <!-- seqloc_* formats are not very useful + and what BLAST+ calls 'interval' is not what Galaxy calls interval format + --> + <option value="fasta">FASTA</option> + <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option> + <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option> + <option value="maskinfo_xml">maskinfo_xml</option> + </param> + </inputs> + <outputs> + <data name="outfile" format="maskinfo-asn1" label="SEG Masked File"> + <change_format> + <when input="outformat" value="fasta" format="fasta" /> + <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" /> + <!-- + <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" /> + --> + <when input="outformat" value="maskinfo_xml" format="xml" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="window" value="12" /> + <param name="locut" value="2.2" /> + <param name="hicut" value="2.5" /> + <param name="outformat" value="fasta" /> + <output name="outfile" file="segmasker_four_human.fasta" /> + </test> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="window" value="12" /> + <param name="locut" value="2.2" /> + <param name="hicut" value="2.5" /> + <param name="outformat" value="maskinfo_asn1_bin" /> + <output name="outfile" file="segmasker_four_human.maskinfo-asn1-binary" /> + </test> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="window" value="12" /> + <param name="locut" value="2.2" /> + <param name="hicut" value="2.5" /> + <param name="outformat" value="maskinfo_asn1_text" /> + <output name="outfile" file="segmasker_four_human.maskinfo-asn1" /> + </test> + </tests> + <help> +**What it does** + +This tool identifies and masks out low complexity regions of a protein database (or proteins in FASTA format) by using the SEG_ algorithm. + +If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool. + +More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ +.. _SEG: http://www.ncbi.nlm.nih.gov/pubmed/8743706 + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers (a more specific paper covering this wrapper is planned): + +@REFERENCES@ + </help> +</tool> |