Previous changeset 38:2f7fac29bb3c (2014-01-15) Next changeset 40:f83e5d79b6ab (2014-02-26) |
Commit message:
Uploaded v0.1.0 preview 2, includes missing new test files |
modified:
tools/ncbi_blast_plus/README.rst tools/ncbi_blast_plus/blastxml_to_tabular.py tools/ncbi_blast_plus/blastxml_to_tabular.xml tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml tools/ncbi_blast_plus/ncbi_macros.xml tools/ncbi_blast_plus/ncbi_makeblastdb.xml tools/ncbi_blast_plus/tool_dependencies.xml |
added:
test-data/blastn_rhodopsin_vs_three_human.columns.tabular test-data/blastn_rhodopsin_vs_three_human.xml test-data/four_human_proteins_taxid.fasta.log test-data/four_human_proteins_taxid.fasta.phd test-data/four_human_proteins_taxid.fasta.phi test-data/four_human_proteins_taxid.fasta.phr test-data/four_human_proteins_taxid.fasta.pin test-data/four_human_proteins_taxid.fasta.pog test-data/four_human_proteins_taxid.fasta.psd test-data/four_human_proteins_taxid.fasta.psi test-data/four_human_proteins_taxid.fasta.psq |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/blastn_rhodopsin_vs_three_human.columns.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human.columns.tabular Thu Feb 20 05:39:48 2014 -0500 |
b |
@@ -0,0 +1,7 @@ +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 4301 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 4301 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 4301 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 4301 1213 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 983 1213 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1047 1213 |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/blastn_rhodopsin_vs_three_human.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human.xml Thu Feb 20 05:39:48 2014 -0500 |
b |
b'@@ -0,0 +1,549 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+ <BlastOutput_program>blastn</BlastOutput_program>\n+ <BlastOutput_version>BLASTN 2.2.29+</BlastOutput_version>\n+ <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), "A greedy algorithm for aligning DNA sequences", J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference>\n+ <BlastOutput_db></BlastOutput_db>\n+ <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+ <BlastOutput_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</BlastOutput_query-def>\n+ <BlastOutput_query-len>1047</BlastOutput_query-len>\n+ <BlastOutput_param>\n+ <Parameters>\n+ <Parameters_expect>1e-40</Parameters_expect>\n+ <Parameters_sc-match>1</Parameters_sc-match>\n+ <Parameters_sc-mismatch>-2</Parameters_sc-mismatch>\n+ <Parameters_gap-open>0</Parameters_gap-open>\n+ <Parameters_gap-extend>0</Parameters_gap-extend>\n+ <Parameters_filter>L;m;</Parameters_filter>\n+ </Parameters>\n+ </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+ <Iteration_iter-num>1</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+ <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>15</Statistics_hsp-len>\n+ <Statistics_eff-space>4933992</Statistics_eff-space>\n+ <Statistics_kappa>0.46</Statistics_kappa>\n+ <Statistics_lambda>1.28</Statistics_lambda>\n+ <Statistics_entropy>0.85</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>2</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+ <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>15</Statistics_hsp-len>\n+ <Statistics_eff-space>4933992</Statistics_eff-space>\n+ <Statistics_kappa>0.46</Statistics_kappa>\n+ <Statistics_lambda>1.28</Statistics_lambda>\n+ <Statistics_entropy>0.85</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>3</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+ <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+ <Hit_num>1</Hit_num>\n+ <Hit_id>Subject_3</Hit_id>\n+ <Hit_def>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds</Hit_def>\n+ <Hit_accession>Subject_3</Hit_accession>\n+ <Hit_len>1213</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>1474.75</Hsp_bit-score>\n+ <Hsp_score>798</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>1</Hsp_query-from>\n+ <Hsp_query-to>1047</Hsp_query-to>\n+ <Hsp_hit-from>88</Hsp_hit-from>\n+ <Hsp_hit-to>1134</Hsp_hit-to>\n+ <Hsp_query-frame>1</Hsp_query-frame>\n+ <Hsp_hit-frame>1</Hsp_hit-frame>\n+ <Hsp_identity>964</Hsp_identity>\n+ <Hsp_positive>964</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>1047</Hsp_align-len>\n+ <Hsp_qseq>ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCC'..b'|||||||| |||||||||||||||||| | || || ||||||||||||||||||||||| |||||||| || ||||||||||| | || |||||||||| |||||| || ||||||||||| || |||||||| ||||| || || || ||||| | |||||||||||||||||| | |||||||||||||| ||||||||||||||||| || ||||||||||||||||| |||||||| ||||||||||||||||||||||||||||||||||| |||||||||||||| || ||||||||||||||||| |||||||| || || ||||| |||| ||||||||| || |||||||| ||||| ||||||||||||| || ||||| |||||||||| | | |||| |||||| ||||| || ||||||||||||||||| || ||||||| ||||||| ||||| |||| || |||||||| |||||||| |||||||||||||||||||| || ||||||||||||||||| |||||||| |||||||| |||||||||||||| || ||||||||||||||||||||||||||||| || |||||| |||||||||| | ||||| ||||||||||||||||| ||||| ||| |||| || |||||||||||||||||||| || ||||||||||||| || | ||| |||| ||||| |||||||| ||||||||||||||||||||||||||||||||| ||||||| ||||||| ||||||||||| || |||||||| |||||||| | |||||||||||||| ||||| ||||| |||||||| ||||||</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>15</Statistics_hsp-len>\n+ <Statistics_eff-space>4933992</Statistics_eff-space>\n+ <Statistics_kappa>0.46</Statistics_kappa>\n+ <Statistics_lambda>1.28</Statistics_lambda>\n+ <Statistics_entropy>0.85</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>16</Iteration_iter-num>\n+ <Iteration_query-ID>Query_6</Iteration_query-ID>\n+ <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+ <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>15</Statistics_hsp-len>\n+ <Statistics_eff-space>6353949</Statistics_eff-space>\n+ <Statistics_kappa>0.46</Statistics_kappa>\n+ <Statistics_lambda>1.28</Statistics_lambda>\n+ <Statistics_entropy>0.85</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>17</Iteration_iter-num>\n+ <Iteration_query-ID>Query_6</Iteration_query-ID>\n+ <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+ <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>15</Statistics_hsp-len>\n+ <Statistics_eff-space>6353949</Statistics_eff-space>\n+ <Statistics_kappa>0.46</Statistics_kappa>\n+ <Statistics_lambda>1.28</Statistics_lambda>\n+ <Statistics_entropy>0.85</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>18</Iteration_iter-num>\n+ <Iteration_query-ID>Query_6</Iteration_query-ID>\n+ <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+ <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>15</Statistics_hsp-len>\n+ <Statistics_eff-space>6353949</Statistics_eff-space>\n+ <Statistics_kappa>0.46</Statistics_kappa>\n+ <Statistics_lambda>1.28</Statistics_lambda>\n+ <Statistics_entropy>0.85</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n' |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.log --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.log Thu Feb 20 05:39:48 2014 -0500 |
b |
@@ -0,0 +1,10 @@ + + +Building a new DB, current time: 02/10/2014 18:40:09 +New DB name: four_human_proteins_taxid.fasta +New DB title: Just 4 human proteins +Sequence type: Protein +Keep Linkouts: T +Keep MBits: T +Maximum file size: 1000000000B +Adding sequences from FASTA; added 4 sequences in 0.00230002 seconds. |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.phd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.phd Thu Feb 20 05:39:48 2014 -0500 |
b |
@@ -0,0 +1,4 @@ +11117184492 +29249033410 +36665887501 +5392473183 |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.phi |
b |
Binary file test-data/four_human_proteins_taxid.fasta.phi has changed |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.phr |
b |
Binary file test-data/four_human_proteins_taxid.fasta.phr has changed |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.pin |
b |
Binary file test-data/four_human_proteins_taxid.fasta.pin has changed |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.pog |
b |
Binary file test-data/four_human_proteins_taxid.fasta.pog has changed |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.psd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.psd Thu Feb 20 05:39:48 2014 -0500 |
b |
@@ -0,0 +1,4 @@ +gnl|bl_ord_id|00 +gnl|bl_ord_id|11 +gnl|bl_ord_id|22 +gnl|bl_ord_id|33 |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.psi |
b |
Binary file test-data/four_human_proteins_taxid.fasta.psi has changed |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.psq |
b |
Binary file test-data/four_human_proteins_taxid.fasta.psq has changed |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/README.rst --- a/tools/ncbi_blast_plus/README.rst Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/README.rst Thu Feb 20 05:39:48 2014 -0500 |
b |
@@ -25,17 +25,7 @@ (``blastxml``) and protein and nucleotide BLAST databases (``blastdbp`` and ``blastdbn``). -You must tell Galaxy about any system level BLAST databases using configuration -files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein -databases like NR), and blastdb_d.loc (protein domain databases like CDD or -SMART) which are located in the tool-data/ folder. Sample files are included -which explain the tab-based format to use. - -You can download the NCBI provided databases as tar-balls from here: - -* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR) -* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD) - +See the configuration notes below. Manual Installation =================== @@ -78,6 +68,31 @@ ./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools +Configuration +============= + +You must tell Galaxy about any system level BLAST databases using configuration +files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein +databases like NR), and blastdb_d.loc (protein domain databases like CDD or +SMART) which are located in the tool-data/ folder. Sample files are included +which explain the tab-based format to use. + +You can download the NCBI provided databases as tar-balls from here: + +* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR) +* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD) + +The BLAST+ binaries support multi-threaded operation, which is handled via the +$GALAXY_SLOTS environment variable. This should be set automatically by Galaxy +via your job runner settings, which allows you to (for example) allocate four +cores to each BLAST job. + +In addition, the BLAST+ wrappers also support high level parallelism by task +splitting if ``use_tasked_jobs = True`` is enabled in your ``universe_wsgi.ini`` +configuration file. Essentially, the FASTA input query files are broken up into +batches of 1000 sequences, a separate BLAST child job is run for each chunk, +and then the BLAST output files are merged (in order). This is transparent +for the end user. History ======= @@ -105,7 +120,7 @@ (all too often our users where having to re-run searches just to get one of the missing columns like query or subject length) v0.0.18 - Defensive quoting of filenames in case of spaces (where possible, - BLAST+ handling of some mult-file arguments is problematic). + BLAST+ handling of some multi-file arguments is problematic). v0.0.19 - Added wrappers for rpsblast and rpstblastn, and new blastdb_d.loc for the domain databases they use (e.g. CDD, PFAM or SMART). - Correct case of exception regular expression (for error handling @@ -139,6 +154,7 @@ - Tablar output now includes option to pick specific columns - BLAST XML to tabular tool supports multiple input files. - More detailed descriptions for BLASTN and BLASTP task option + - Supports setting a taxonomy ID in makeblastdb wrapper. ======= ====================================================================== |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/blastxml_to_tabular.py --- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Thu Feb 20 05:39:48 2014 -0500 |
[ |
@@ -66,7 +66,7 @@ from optparse import OptionParser if "-v" in sys.argv or "--version" in sys.argv: - print "v0.0.23" + print "v0.1.00" sys.exit(0) if sys.version_info[:2] >= ( 2, 5 ): |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/blastxml_to_tabular.xml --- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Thu Feb 20 05:39:48 2014 -0500 |
b |
@@ -28,33 +28,33 @@ <when value="ext"/> <when value="cols"> <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns"> - <option selected="true" value="qseqid">1 qseqid = Query Seq-id (ID of your sequence)</option> - <option selected="true" value="sseqid">2 sseqid = Subject Seq-id (ID of the database hit)</option> - <option selected="true" value="pident">3 pident = Percentage of identical matches</option> - <option selected="true" value="length">4 length = Alignment length</option> - <option selected="true" value="mismatch">5 mismatch = Number of mismatches</option> - <option selected="true" value="gapopen">6 gapopen = Number of gap openings</option> - <option selected="true" value="qstart">7 qstart = Start of alignment in query</option> - <option selected="true" value="qend">8 qend = End of alignment in query</option> - <option selected="true" value="sstart">9 sstart = Start of alignment in subject (database hit)</option> - <option selected="true" value="send">10 send = End of alignment in subject (database hit)</option> - <option selected="true" value="evalue">11 evalue = Expectation value (E-value)</option> - <option selected="true" value="bitscore">12 bitscore = Bit score</option> + <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option> + <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option> + <option selected="true" value="pident">pident = Percentage of identical matches</option> + <option selected="true" value="length">length = Alignment length</option> + <option selected="true" value="mismatch">mismatch = Number of mismatches</option> + <option selected="true" value="gapopen">gapopen = Number of gap openings</option> + <option selected="true" value="qstart">qstart = Start of alignment in query</option> + <option selected="true" value="qend">qend = End of alignment in query</option> + <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option> + <option selected="true" value="send">send = End of alignment in subject (database hit)</option> + <option selected="true" value="evalue">evalue = Expectation value (E-value)</option> + <option selected="true" value="bitscore">bitscore = Bit score</option> </param> <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns"> - <option value="sallseqid">13 sallseqid = All subject Seq-id(s), separated by a ';'</option> - <option value="score">14 score = Raw score</option> - <option value="nident">15 nident = Number of identical matches</option> - <option value="positive">16 positive = Number of positive-scoring matches</option> - <option value="gaps">17 gaps = Total number of gaps</option> - <option value="ppos">18 ppos = Percentage of positive-scoring matches</option> - <option value="qframe">19 qframe = Query frame</option> - <option value="sframe">20 sframe = Subject frame</option> - <option value="qseq">21 qseq = Aligned part of query sequence</option> - <option value="sseq">22 sseq = Aligned part of subject sequence</option> - <option value="qlen">23 qlen = Query sequence length</option> - <option value="slen">24 slen = Subject sequence length</option> - <option value="salltitles">25 salltitles = All subject title(s), separated by a '<>'</option> + <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option> + <option value="score">score = Raw score</option> + <option value="nident">nident = Number of identical matches</option> + <option value="positive">positive = Number of positive-scoring matches</option> + <option value="gaps">gaps = Total number of gaps</option> + <option value="ppos">ppos = Percentage of positive-scoring matches</option> + <option value="qframe">qframe = Query frame</option> + <option value="sframe">sframe = Subject frame</option> + <option value="qseq">qseq = Aligned part of query sequence</option> + <option value="sseq">sseq = Aligned part of subject sequence</option> + <option value="qlen">qlen = Query sequence length</option> + <option value="slen">slen = Subject sequence length</option> + <option value="salltitles">salltitles = All subject title(s), separated by a '<>'</option> </param> </when> </conditional> |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Thu Feb 20 05:39:48 2014 -0500 |
b |
@@ -83,13 +83,14 @@ <help> **What it does** -This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST algorithm. +This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST_ algorithm. If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool. More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_. .. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ +.. _DUST: http://www.ncbi.nlm.nih.gov/pubmed/16796549 **References** |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/ncbi_macros.xml --- a/tools/ncbi_blast_plus/ncbi_macros.xml Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_macros.xml Thu Feb 20 05:39:48 2014 -0500 |
b |
@@ -31,33 +31,33 @@ <when value="ext"/> <when value="cols"> <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns"> - <option selected="true" value="qseqid">1 qseqid = Query Seq-id (ID of your sequence)</option> - <option selected="true" value="sseqid">2 sseqid = Subject Seq-id (ID of the database hit)</option> - <option selected="true" value="pident">3 pident = Percentage of identical matches</option> - <option selected="true" value="length">4 length = Alignment length</option> - <option selected="true" value="mismatch">5 mismatch = Number of mismatches</option> - <option selected="true" value="gapopen">6 gapopen = Number of gap openings</option> - <option selected="true" value="qstart">7 qstart = Start of alignment in query</option> - <option selected="true" value="qend">8 qend = End of alignment in query</option> - <option selected="true" value="sstart">9 sstart = Start of alignment in subject (database hit)</option> - <option selected="true" value="send">10 send = End of alignment in subject (database hit)</option> - <option selected="true" value="evalue">11 evalue = Expectation value (E-value)</option> - <option selected="true" value="bitscore">12 bitscore = Bit score</option> + <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option> + <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option> + <option selected="true" value="pident">pident = Percentage of identical matches</option> + <option selected="true" value="length">length = Alignment length</option> + <option selected="true" value="mismatch">mismatch = Number of mismatches</option> + <option selected="true" value="gapopen">gapopen = Number of gap openings</option> + <option selected="true" value="qstart">qstart = Start of alignment in query</option> + <option selected="true" value="qend">qend = End of alignment in query</option> + <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option> + <option selected="true" value="send">send = End of alignment in subject (database hit)</option> + <option selected="true" value="evalue">evalue = Expectation value (E-value)</option> + <option selected="true" value="bitscore">bitscore = Bit score</option> </param> <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns"> - <option value="sallseqid">13 sallseqid = All subject Seq-id(s), separated by a ';'</option> - <option value="score">14 score = Raw score</option> - <option value="nident">15 nident = Number of identical matches</option> - <option value="positive">16 positive = Number of positive-scoring matches</option> - <option value="gaps">17 gaps = Total number of gaps</option> - <option value="ppos">18 ppos = Percentage of positive-scoring matches</option> - <option value="qframe">19 qframe = Query frame</option> - <option value="sframe">20 sframe = Subject frame</option> - <option value="qseq">21 qseq = Aligned part of query sequence</option> - <option value="sseq">22 sseq = Aligned part of subject sequence</option> - <option value="qlen">23 qlen = Query sequence length</option> - <option value="slen">24 slen = Subject sequence length</option> - <option value="salltitles">25 salltitles = All subject title(s), separated by a '<>'</option> + <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option> + <option value="score">score = Raw score</option> + <option value="nident">nident = Number of identical matches</option> + <option value="positive">positive = Number of positive-scoring matches</option> + <option value="gaps">gaps = Total number of gaps</option> + <option value="ppos">ppos = Percentage of positive-scoring matches</option> + <option value="qframe">qframe = Query frame</option> + <option value="sframe">sframe = Subject frame</option> + <option value="qseq">qseq = Aligned part of query sequence</option> + <option value="sseq">sseq = Aligned part of subject sequence</option> + <option value="qlen">qlen = Query sequence length</option> + <option value="slen">slen = Subject sequence length</option> + <option value="salltitles">salltitles = All subject title(s), separated by a '<>'</option> </param> <!-- TODO, the other columns, like taxonomy --> </when> |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/ncbi_makeblastdb.xml --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Thu Feb 20 05:39:48 2014 -0500 |
b |
@@ -29,7 +29,10 @@ ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful -title "BLAST Database" #end if --dbtype $dbtype +-dbtype $dbtype +## -------------------------------------------------------------------- +## Masking +## -------------------------------------------------------------------- #set $mask_string = '' #set $sep = '-mask_data ' #for $i in $mask_data @@ -44,11 +47,15 @@ ## #set $sep = ',' ## #end for ## $gi_mask_string -## #if $tax.select == 'id': -## -taxid $tax.id -## #else if $tax.select == 'map': -## -taxid_map $tax.map -## #end if +## -------------------------------------------------------------------- +## Taxonomy +## -------------------------------------------------------------------- +#if $tax.taxselect == 'id': +-taxid $tax.taxid +## TODO - Can we use a tabular file for the taxonomy mapping? +## #else if $tax.taxselect == 'map': +## -taxid_map $tax.taxmap +#end if ## -------------------------------------------------------------------- ## Capture the stdout log information to the primary file (plain text): >> "$outfile" @@ -83,23 +90,25 @@ --> <!-- TAXONOMY OPTIONS --> - <!-- TODO <conditional name="tax"> - <param name="select" type="select" label="Taxonomy options"> - <option value="">Do not assign sequences to Taxonomy IDs</option> - <option value="id">Assign all sequences to one Taxonomy ID</option> + <param name="taxselect" type="select" label="Taxonomy options"> + <option value="">Do not assign a Taxonomy ID to the sequences</option> + <option value="id">Assign the same Taxonomy ID to all the sequences</option> + <!-- <option value="map">Supply text file mapping sequence IDs to taxnomy IDs</option> + --> </param> <when value=""> </when> <when value="id"> - <param name="id" type="integer" value="" label="NCBI taxonomy ID" help="Integer >=0" /> + <param name="taxid" type="integer" value="" label="NCBI taxonomy ID" help="Integer >=0, e.g. 9606 for Homo sapiens" min="0" /> </when> + <!-- TODO: File format? <when value="map"> - <param name="file" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" /> + <param name="taxmap" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" /> </when> + --> </conditional> - --> </inputs> <outputs> <!-- If we only accepted one FASTA file, we could use its human name here... --> @@ -112,6 +121,8 @@ </outputs> <tests> <!-- Note the (two line) PIN file is not reproducible run to run. + Likewise there is a datestamp in the log file as well. + With and without the taxid the only real difference is in the *.phr file. --> <test> <param name="dbtype" value="prot" /> @@ -130,6 +141,25 @@ <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" /> </output> </test> + <test> + <param name="dbtype" value="prot" /> + <param name="file" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="title" value="Just 4 human proteins" /> + <param name="parse_seqids" value="" /> + <param name="hash_index" value="true" /> + <param name="taxselect" value="id" /> + <param name="taxid" value="9606" /> + <output name="out_file" file="four_human_proteins_taxid.fasta.log" ftype="blastdbp" lines_diff="6"> + <extra_files type="file" value="four_human_proteins_taxid.fasta.phr" name="blastdb.phr" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.pin" name="blastdb.pin" lines_diff="2" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.psq" name="blastdb.psq" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.pog" name="blastdb.pog" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.phd" name="blastdb.phd" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.phi" name="blastdb.phi" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.psd" name="blastdb.psd" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.psi" name="blastdb.psi" /> + </output> + </test> </tests> <help> **What it does** |
b |
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/tool_dependencies.xml --- a/tools/ncbi_blast_plus/tool_dependencies.xml Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/tool_dependencies.xml Thu Feb 20 05:39:48 2014 -0500 |
b |
@@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> <package name="blast+" version="2.2.29"> - <repository changeset_revision="61c4017d3bf2" name="package_blast_plus_2_2_29" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="c021862e9ea8" name="package_blast_plus_2_2_29" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency> |