Galaxy |

Changeset 7:08be1b2b0bf6 (2013-04-25)

Previous changeset 6:e77e30f1deeb (2013-04-25) Next changeset 8:e710b9446493 (2013-04-25)

Commit message:
Uploaded v0.0.19 preview take 4, added missing wrapper for RPS-TBLASTN

added:
tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml

diff -r e77e30f1deeb -r 08be1b2b0bf6 tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Thu Apr 25 09:05:13 2013 -0400

b'@@ -0,0 +1,237 @@\n+<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.0.3">\n+ <description>Search protein domain database (PSSMs) with translated nucleotide query sequence(s)</description>\n+ \n+ <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>\n+ <requirements>\n+ <requirement type="binary">rpstblastn</requirement>\n+ <requirement type="package" version="2.2.26+">blast+</requirement>\n+ </requirements>\n+ <version_command>rpstblastn -version</version_command>\n+ <command>\n+## The command is a Cheetah template which allows some Python based syntax.\n+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n+rpstblastn\n+-query "$query"\n+#if $db_opts.db_opts_selector == "db":\n+ -db "${db_opts.database.fields.path}"\n+#elif $db_opts.db_opts_selector == "histdb":\n+ -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n+#end if\n+-evalue $evalue_cutoff\n+-out "$output1"\n+##Set the extended list here so if/when we add things, saved workflows are not affected\n+#if str($out_format)=="ext":\n+ -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n+#else:\n+ -outfmt $out_format\n+#end if\n+##Seems rpstblastn does not currently support multiple threads :(\n+##-num_threads 8\n+#if $adv_opts.adv_opts_selector=="advanced":\n+$adv_opts.filter_query\n+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n+## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n+-max_target_seqs $adv_opts.max_hits\n+#end if\n+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n+-word_size $adv_opts.word_size\n+#end if\n+$adv_opts.parse_deflines\n+## End of advanced options:\n+#end if\n+ </command>\n+ <stdio>\n+ \n+ <exit_code range="1:" />\n+ <exit_code range=":-1" />\n+ \n+ <regex match="Error:" />\n+ <regex match="Exception:" />\n+ </stdio>\n+ <inputs>\n+ <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> \n+ <conditional name="db_opts">\n+ <param name="db_opts_selector" type="select" label="Protein domain database (PSSM)">\n+ <option value="db" selected="True">Locally installed BLAST database</option>\n+ \n+ </param>\n+ <when value="db">\n+ <param name="database" type="select" label="Protein domain database">\n+ <options from_file="blastdb_d.loc">\n+ <column name="value" index="0"/>\n+ <column name="name" index="1"/>\n+ <column name="path" index="2"/>\n+ </options>\n+ </param>\n+ <param name="histdb" type="hidden" value="" />\n+ <param name="subject" type="hidden" value="" />\n+ </when>\n+ \n+ </conditional>\n+ <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />\n+ <param name="out_format" type="select" label="Output format">\n+ <option value="6">Tabul'..b"rce, the eukaryotic \n+counterpart to COGs, see http://www.ncbi.nlm.nih.gov/COG/new/\n+\n+*Cog* - PSSMs from automatically aligned sequences and sequence\n+fragments classified in the COGs resource, which focuses primarily\n+on prokaryotes, see http://www.ncbi.nlm.nih.gov/COG/new/\n+\n+*Pfam* - PSSMs from Pfam-A seed alignment database, see\n+http://pfam.sanger.ac.uk/\n+\n+*Smart* - PSSMs from SMART domain alignment database, see\n+http://smart.embl-heidelberg.de/\n+\n+*Tigr* - PSSMs from TIGRFAM database of protein families, see\n+http://www.jcvi.org/cms/research/projects/tigrfams/overview/\n+\n+*Prk* - PSSms from automatically aligned stable clusters in the\n+Protein Clusters database, see\n+http://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&db=proteinclusters\n+\n+The exact list of domain databases offered will depend on how your\n+local Galaxy has been configured.\n+\n+-----\n+\n+**Output format**\n+\n+Because Galaxy focuses on processing tabular data, the default output of this\n+tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n+\n+====== ========= ============================================\n+Column NCBI name Description\n+------ --------- --------------------------------------------\n+ 1 qseqid Query Seq-id (ID of your sequence)\n+ 2 sseqid Subject Seq-id (ID of the database hit)\n+ 3 pident Percentage of identical matches\n+ 4 length Alignment length\n+ 5 mismatch Number of mismatches\n+ 6 gapopen Number of gap openings\n+ 7 qstart Start of alignment in query\n+ 8 qend End of alignment in query\n+ 9 sstart Start of alignment in subject (database hit)\n+ 10 send End of alignment in subject (database hit)\n+ 11 evalue Expectation value (E-value)\n+ 12 bitscore Bit score\n+====== ========= ============================================\n+\n+The BLAST+ tools can optionally output additional columns of information,\n+but this takes longer to calculate. Most (but not all) of these columns are\n+included by selecting the extended tabular output. The extra columns are\n+included *after* the standard 12 columns. This is so that you can write\n+workflow filtering steps that accept either the 12 or 24 column tabular\n+BLAST output. Galaxy now uses this extended 24 column output by default.\n+\n+====== ============= ===========================================\n+Column NCBI name Description\n+------ ------------- -------------------------------------------\n+ 13 sallseqid All subject Seq-id(s), separated by a ';'\n+ 14 score Raw score\n+ 15 nident Number of identical matches\n+ 16 positive Number of positive-scoring matches\n+ 17 gaps Total number of gaps\n+ 18 ppos Percentage of positive-scoring matches\n+ 19 qframe Query frame\n+ 20 sframe Subject frame\n+ 21 qseq Aligned part of query sequence\n+ 22 sseq Aligned part of subject sequence\n+ 23 qlen Query sequence length\n+ 24 slen Subject sequence length\n+====== ============= ===========================================\n+\n+The third option is BLAST XML output, which is designed to be parsed by\n+another program, and is understood by some Galaxy tools.\n+\n+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+\n+-------\n+\n+**References**\n+\n+Marchler-Bauer A, Bryant SH. CD-Search: protein domain annotations on the fly. Nucleic Acids Res. 2004 Jul 1;32(Web Server issue):W327-31.\n+\n+ </help>\n+</tool>\n"