# HG changeset patch # User peterjc # Date 1393428901 18000 # Node ID f83e5d79b6abaeaf774f9299a0b646be3f8a689c # Parent 22b7cdcf49603e8626ee5a7d15e961df8d947b5f Uploaded v0.1.0 preview 3, adds a missing test file & more tests for makeblastdb diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/blastn_rhodopsin_vs_three_human_converted.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human_converted.tabular Wed Feb 26 10:35:01 2014 -0500 @@ -0,0 +1,7 @@ +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 4e-132 460 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 3e-93 331 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 3e-73 265 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 3e-68 248 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208 diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/convert2blastmask_four_human_masked.maskinfo-asn1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/convert2blastmask_four_human_masked.maskinfo-asn1 Wed Feb 26 10:35:01 2014 -0500 @@ -0,0 +1,158 @@ +Blast-db-mask-info ::= { + algo-id 0, + algo-program seg, + algo-options "window=12; locut=2.2; hicut=2.5", + masks { + masks { + int { + from 6, + to 18, + id swissprot { + name "ERP44_HUMAN", + accession "Q9BS26", + release "reviewed" + } + }, + packed-int { + { + from 11, + to 46, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 325, + to 332, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 421, + to 496, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 501, + to 516, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 536, + to 558, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 636, + to 648, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 737, + to 762, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 789, + to 806, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 970, + to 983, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 999, + to 1010, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + } + }, + packed-int { + { + from 3, + to 26, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + }, + { + from 372, + to 390, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + }, + { + from 766, + to 791, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + }, + { + from 1312, + to 1324, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + } + }, + int { + from 230, + to 246, + id swissprot { + name "OPSD_HUMAN", + accession "P08100", + release "reviewed" + } + } + }, + more FALSE + } +} diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary Binary file test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary has changed diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins.fasta.log --- a/test-data/four_human_proteins.fasta.log Thu Feb 20 05:39:48 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ - - -Building a new DB, current time: 11/21/2013 11:16:27 -New DB name: /tmp/tmpnSjpCP/tmpwAbNo4/database/files/000/dataset_2_files/blastdb -New DB title: Just 4 human proteins -Sequence type: Protein -Keep Linkouts: T -Keep MBits: T -Maximum file size: 1000000000B -Adding sequences from FASTA; added 4 sequences in 0.00202417 seconds. diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins.fasta.log.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.fasta.log.txt Wed Feb 26 10:35:01 2014 -0500 @@ -0,0 +1,5 @@ +New DB title: Just 4 human proteins +Sequence type: Protein +Keep Linkouts: T +Keep MBits: T +Maximum file size: 1000000000B diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins_taxid.fasta.log --- a/test-data/four_human_proteins_taxid.fasta.log Thu Feb 20 05:39:48 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ - - -Building a new DB, current time: 02/10/2014 18:40:09 -New DB name: four_human_proteins_taxid.fasta -New DB title: Just 4 human proteins -Sequence type: Protein -Keep Linkouts: T -Keep MBits: T -Maximum file size: 1000000000B -Adding sequences from FASTA; added 4 sequences in 0.00230002 seconds. diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins_taxid.fasta.log.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.log.txt Wed Feb 26 10:35:01 2014 -0500 @@ -0,0 +1,5 @@ +New DB title: Just 4 human proteins +Sequence type: Protein +Keep Linkouts: T +Keep MBits: T +Maximum file size: 1000000000B diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/four_human_proteins_taxid.fasta.pin Binary file test-data/four_human_proteins_taxid.fasta.pin has changed diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/segmasker_four_human.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/segmasker_four_human.fasta Wed Feb 26 10:35:01 2014 -0500 @@ -0,0 +1,61 @@ +>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 +MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF +SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK +REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER +VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK +CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD +CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF +HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL +>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 +MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG +GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS +DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD +LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG +KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP +DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT +IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE +Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq +qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy +qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV +ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD +KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD +QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE +NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA +QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK +APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD +EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR +HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS +WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ +SQQSQPVELDPFGAAPFPSKQ +>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 +MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL +QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL +VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE +ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL +GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG +CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC +TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL +EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE +RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ +NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS +DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE +RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL +KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf +pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV +SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV +SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG +PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR +EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG +FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA +AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV +RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN +CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme +fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN +PS +>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY +VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG +GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP +EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes +attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI +YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/segmasker_four_human.maskinfo-asn1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/segmasker_four_human.maskinfo-asn1 Wed Feb 26 10:35:01 2014 -0500 @@ -0,0 +1,114 @@ +Blast-db-mask-info ::= { + algo-id 1, + algo-program seg, + algo-options "window=12; locut=2.2; hicut=2.5", + masks { + masks { + int { + from 6, + to 18, + id local id 1 + }, + packed-int { + { + from 11, + to 46, + id local id 2 + }, + { + from 325, + to 332, + id local id 2 + }, + { + from 421, + to 443, + id local id 2 + }, + { + from 437, + to 450, + id local id 2 + }, + { + from 447, + to 496, + id local id 2 + }, + { + from 501, + to 516, + id local id 2 + }, + { + from 536, + to 554, + id local id 2 + }, + { + from 545, + to 558, + id local id 2 + }, + { + from 636, + to 648, + id local id 2 + }, + { + from 737, + to 762, + id local id 2 + }, + { + from 789, + to 806, + id local id 2 + }, + { + from 970, + to 983, + id local id 2 + }, + { + from 999, + to 1010, + id local id 2 + } + }, + packed-int { + { + from 3, + to 26, + id local id 3 + }, + { + from 372, + to 390, + id local id 3 + }, + { + from 766, + to 782, + id local id 3 + }, + { + from 780, + to 791, + id local id 3 + }, + { + from 1312, + to 1324, + id local id 3 + } + }, + int { + from 230, + to 246, + id local id 4 + } + }, + more FALSE + } +} diff -r 22b7cdcf4960 -r f83e5d79b6ab test-data/segmasker_four_human.maskinfo-asn1-binary Binary file test-data/segmasker_four_human.maskinfo-asn1-binary has changed diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/README.rst --- a/tools/ncbi_blast_plus/README.rst Thu Feb 20 05:39:48 2014 -0500 +++ b/tools/ncbi_blast_plus/README.rst Wed Feb 26 10:35:01 2014 -0500 @@ -136,25 +136,29 @@ - Development moved to GitHub, https://github.com/peterjc/galaxy_blast - Updated citation information (Cock et al. 2013). v0.0.21 - Use macros to simplify the XML wrappers. - - Added wrapper for dustmasker - - Enabled masking for makeblastdb - - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes + - Added wrapper for dustmasker. + - Enabled masking for makeblastdb. + - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes. defined in updated blast_datatypes on Galaxy ToolShed. - - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26 - - Now depends on package_blast_plus_2_2_27 in ToolShed -v0.0.22 - More use macros to simplify the wrappers - - Set number of threads via $GALAXY_SLOTS environment variable - - More descriptive default output names - - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18) + - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26. + - Now depends on package_blast_plus_2_2_27 in ToolShed. +v0.0.22 - More use macros to simplify the wrappers. + - Set number of threads via $GALAXY_SLOTS environment variable. + - More descriptive default output names. + - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18). - Pre-check for duplicate identifiers in makeblastdb wrapper. - - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27 - - Now depends on package_blast_plus_2_2_28 in ToolShed + - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27. + - Now depends on package_blast_plus_2_2_28 in ToolShed. - Extended tabular output includes 'salltitles' as column 25. -v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed - - Tablar output now includes option to pick specific columns +v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed. + - Tablar output now includes option to pick specific columns. - BLAST XML to tabular tool supports multiple input files. - - More detailed descriptions for BLASTN and BLASTP task option + - More detailed descriptions for BLASTN and BLASTP task option. + - Wrappers for segmasker, dustmasker and convert2blastmask. + - Supports using maskinfo with makeblastdb wrapper. - Supports setting a taxonomy ID in makeblastdb wrapper. + - Subtle changes like new conditional settings will require some old + workflows be updated to cope. ======= ====================================================================== diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Wed Feb 26 10:35:01 2014 -0500 @@ -0,0 +1,87 @@ + + Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb + + convert2blastmask + ncbi_macros.xml + + + +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +convert2blastmask +-in $infile +-masking_algorithm "$masking_algorithm" +-masking_options "$masking_options" +$parse_seqids +-out "$outfile" +-outfmt $outformat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb. + +More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers (a more specific paper covering this wrapper is planned): + +@REFERENCES@ + + diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Thu Feb 20 05:39:48 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Wed Feb 26 10:35:01 2014 -0500 @@ -27,27 +27,24 @@ - - + - - - + + - + + + diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_makeblastdb.xml --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Thu Feb 20 05:39:48 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Wed Feb 26 10:35:01 2014 -0500 @@ -8,21 +8,15 @@ check_no_duplicates.py ##First check for duplicates (since BLAST+ 2.2.28 fails to do so) ##and abort (via the ampersand ampersand trick) if any are found. -#for $i in $in -"${i.file}" -#end for +#for i in $input_file#"${i}" #end for# && makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}" $parse_seqids $hash_index ## Single call to -in with multiple filenames space separated with outer quotes ## (presumably any filenames with spaces would be a problem). Note this gives -## some extra spaces, e.g. -in " file1 file2 file3 " but BLAST seems happy: --in " -#for $i in $in -${i.file} -#end for -" +## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy: +-in "#for i in $input_file#${i} #end for#" #if $title: -title "$title" #else: @@ -33,20 +27,13 @@ ## -------------------------------------------------------------------- ## Masking ## -------------------------------------------------------------------- -#set $mask_string = '' -#set $sep = '-mask_data ' -#for $i in $mask_data -#set $mask_string += $sep + str($i.file) -#set $sep = ',' +## HACK: If no mask files, evaluates as a list with just None in it: +## See Trello issue https://trello.com/c/lp5YmA1O +#if ' '.join( map(str, $mask_data_file) ) != 'None': +#for i in $mask_data_file: +-mask_data "${i}" #end for -$mask_string -## #set $gi_mask_string = '' -## #set $sep = '-gi_mask -gi_mask_name ' -## #for $i in $gi_mask -## #set $gi_mask_string += $sep + str($i.file) -## #set $sep = ',' -## #end for -## $gi_mask_string +#end if ## -------------------------------------------------------------------- ## Taxonomy ## -------------------------------------------------------------------- @@ -58,7 +45,7 @@ #end if ## -------------------------------------------------------------------- ## Capture the stdout log information to the primary file (plain text): ->> "$outfile" +> "$outfile" @@ -66,29 +53,18 @@ - - - - - + + - - - - - + + + @@ -121,16 +97,17 @@ - + - + @@ -143,13 +120,13 @@ - + - + @@ -160,6 +137,24 @@ + + + + + + + + + + + + + + + + + + **What it does** diff -r 22b7cdcf4960 -r f83e5d79b6ab tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Wed Feb 26 10:35:01 2014 -0500 @@ -0,0 +1,101 @@ + + low-complexity regions in protein sequences + + segmasker + ncbi_macros.xml + + + +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +segmasker +#if $db_opts.db_opts_selector == "db": + -in "${db_opts.database.fields.path}" -infmt blastdb +#elif $db_opts.db_opts_selector == "histdb": + -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb +#else: + -in "$subject" -infmt fasta +#end if +-out "$outfile" +-window $window +-locut $locut +-hicut $hicut +-outfmt $outformat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool identifies and masks out low complexity regions of a protein database (or proteins in FASTA format) by using the SEG_ algorithm. + +If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool. + +More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ +.. _SEG: http://www.ncbi.nlm.nih.gov/pubmed/8743706 + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers (a more specific paper covering this wrapper is planned): + +@REFERENCES@ + +