Repository 'blastxml_to_top_descr'
hg clone https://testtoolshed.g2.bx.psu.edu/repos/peterjc/blastxml_to_top_descr

Changeset 1:ec31e95c56e1 (2013-04-29)
Previous changeset 0:b23e0a323bee (2013-04-29) Next changeset 2:58359ce5bde9 (2013-04-29)
Commit message:
Uploaded v0.0.5
modified:
tools/ncbi_blast_plus/blastxml_to_top_descr.py
tools/ncbi_blast_plus/blastxml_to_top_descr.txt
tools/ncbi_blast_plus/blastxml_to_top_descr.xml
added:
test-data/blastp_four_human_vs_rhodopsin.xml
test-data/blastp_four_human_vs_rhodopsin_top3.tabular
b
diff -r b23e0a323bee -r ec31e95c56e1 test-data/blastp_four_human_vs_rhodopsin.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_four_human_vs_rhodopsin.xml Mon Apr 29 13:08:52 2013 -0400
[
b'@@ -0,0 +1,646 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+  <BlastOutput_program>blastp</BlastOutput_program>\n+  <BlastOutput_version>BLASTP 2.2.26+</BlastOutput_version>\n+  <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+  <BlastOutput_db></BlastOutput_db>\n+  <BlastOutput_query-ID>sp|Q9BS26|ERP44_HUMAN</BlastOutput_query-ID>\n+  <BlastOutput_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</BlastOutput_query-def>\n+  <BlastOutput_query-len>406</BlastOutput_query-len>\n+  <BlastOutput_param>\n+    <Parameters>\n+      <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+      <Parameters_expect>1e-08</Parameters_expect>\n+      <Parameters_gap-open>11</Parameters_gap-open>\n+      <Parameters_gap-extend>1</Parameters_gap-extend>\n+      <Parameters_filter>F</Parameters_filter>\n+    </Parameters>\n+  </BlastOutput_param>\n+  <BlastOutput_iterations>\n+    <Iteration>\n+      <Iteration_iter-num>1</Iteration_iter-num>\n+      <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+      <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+      <Iteration_query-len>406</Iteration_query-len>\n+      <Iteration_hits></Iteration_hits>\n+      <Iteration_stat>\n+        <Statistics>\n+          <Statistics_db-num>0</Statistics_db-num>\n+          <Statistics_db-len>0</Statistics_db-len>\n+          <Statistics_hsp-len>30</Statistics_hsp-len>\n+          <Statistics_eff-space>119568</Statistics_eff-space>\n+          <Statistics_kappa>0.041</Statistics_kappa>\n+          <Statistics_lambda>0.267</Statistics_lambda>\n+          <Statistics_entropy>0.14</Statistics_entropy>\n+        </Statistics>\n+      </Iteration_stat>\n+      <Iteration_message>No hits found</Iteration_message>\n+    </Iteration>\n+    <Iteration>\n+      <Iteration_iter-num>2</Iteration_iter-num>\n+      <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+      <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+      <Iteration_query-len>406</Iteration_query-len>\n+      <Iteration_hits></Iteration_hits>\n+      <Iteration_stat>\n+        <Statistics>\n+          <Statistics_db-num>0</Statistics_db-num>\n+          <Statistics_db-len>0</Statistics_db-len>\n+          <Statistics_hsp-len>30</Statistics_hsp-len>\n+          <Statistics_eff-space>119568</Statistics_eff-space>\n+          <Statistics_kappa>0.041</Statistics_kappa>\n+          <Statistics_lambda>0.267</Statistics_lambda>\n+          <Statistics_entropy>0.14</Statistics_entropy>\n+        </Statistics>\n+      </Iteration_stat>\n+      <Iteration_message>No hits found</Iteration_message>\n+    </Iteration>\n+    <Iteration>\n+      <Iteration_iter-num>3</Iteration_iter-num>\n+      <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+      <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+      <Iteration_query-len>406</Iteration_query-len>\n+      <Iteration_hits></Iteration_hits>\n+      <Iteration_stat>\n+        <Statistics>\n+          <Statistics_db-num>0</Statistics_db-num>\n+          <Statistics_db-len>0</Statistics_db-len>\n+          <Statistics_hsp-len>30</Statistics_hsp-len>\n+          <Statistics_eff-space>119568</Statistics_eff-space>\n+          <Statistics_kappa>0.041</Statistics_kappa>\n+          <Statistics_lambda>0.267</Statistics_lambda>\n+          <Statistics_entropy>0.14</Statistics_entropy>\n+        </Statistics>\n+      </Iteration_stat>\n+      <Iteration_message>No hits found</Iteratio'..b'VPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGID YT   E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>\n+            </Hsp>\n+          </Hit_hsps>\n+        </Hit>\n+      </Iteration_hits>\n+      <Iteration_stat>\n+        <Statistics>\n+          <Statistics_db-num>0</Statistics_db-num>\n+          <Statistics_db-len>0</Statistics_db-len>\n+          <Statistics_hsp-len>29</Statistics_hsp-len>\n+          <Statistics_eff-space>101761</Statistics_eff-space>\n+          <Statistics_kappa>0.041</Statistics_kappa>\n+          <Statistics_lambda>0.267</Statistics_lambda>\n+          <Statistics_entropy>0.14</Statistics_entropy>\n+        </Statistics>\n+      </Iteration_stat>\n+    </Iteration>\n+    <Iteration>\n+      <Iteration_iter-num>24</Iteration_iter-num>\n+      <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID>\n+      <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>\n+      <Iteration_query-len>348</Iteration_query-len>\n+      <Iteration_hits>\n+        <Hit>\n+          <Hit_num>1</Hit_num>\n+          <Hit_id>gi|12583665|dbj|BAB21486.1|</Hit_id>\n+          <Hit_def>fresh water form rod opsin [Conger myriaster]</Hit_def>\n+          <Hit_accession>BAB21486</Hit_accession>\n+          <Hit_len>354</Hit_len>\n+          <Hit_hsps>\n+            <Hsp>\n+              <Hsp_num>1</Hsp_num>\n+              <Hsp_bit-score>599.356</Hsp_bit-score>\n+              <Hsp_score>1544</Hsp_score>\n+              <Hsp_evalue>0</Hsp_evalue>\n+              <Hsp_query-from>1</Hsp_query-from>\n+              <Hsp_query-to>341</Hsp_query-to>\n+              <Hsp_hit-from>1</Hsp_hit-from>\n+              <Hsp_hit-to>342</Hsp_hit-to>\n+              <Hsp_query-frame>0</Hsp_query-frame>\n+              <Hsp_hit-frame>0</Hsp_hit-frame>\n+              <Hsp_identity>281</Hsp_identity>\n+              <Hsp_positive>314</Hsp_positive>\n+              <Hsp_gaps>1</Hsp_gaps>\n+              <Hsp_align-len>342</Hsp_align-len>\n+              <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE</Hsp_qseq>\n+              <Hsp_hseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE</Hsp_hseq>\n+              <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV  TW MALACA PPL GWSRYIPEGLQCSCGIDYYT  P +NNESFVIYMF  HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP   +D ASAT SKTE</Hsp_midline>\n+            </Hsp>\n+          </Hit_hsps>\n+        </Hit>\n+      </Iteration_hits>\n+      <Iteration_stat>\n+        <Statistics>\n+          <Statistics_db-num>0</Statistics_db-num>\n+          <Statistics_db-len>0</Statistics_db-len>\n+          <Statistics_hsp-len>29</Statistics_hsp-len>\n+          <Statistics_eff-space>101761</Statistics_eff-space>\n+          <Statistics_kappa>0.041</Statistics_kappa>\n+          <Statistics_lambda>0.267</Statistics_lambda>\n+          <Statistics_entropy>0.14</Statistics_entropy>\n+        </Statistics>\n+      </Iteration_stat>\n+    </Iteration>\n+  </BlastOutput_iterations>\n+</BlastOutput>\n\\ No newline at end of file\n'
b
diff -r b23e0a323bee -r ec31e95c56e1 test-data/blastp_four_human_vs_rhodopsin_top3.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_four_human_vs_rhodopsin_top3.tabular Mon Apr 29 13:08:52 2013 -0400
[
@@ -0,0 +1,25 @@
+#Query BLAST hit 1 BLAST hit 2 BLAST hit 3
+sp|Q9BS26|ERP44_HUMAN
+sp|Q9BS26|ERP44_HUMAN
+sp|Q9BS26|ERP44_HUMAN
+sp|Q9BS26|ERP44_HUMAN
+sp|Q9BS26|ERP44_HUMAN
+sp|Q9BS26|ERP44_HUMAN
+sp|Q9NSY1|BMP2K_HUMAN
+sp|Q9NSY1|BMP2K_HUMAN
+sp|Q9NSY1|BMP2K_HUMAN
+sp|Q9NSY1|BMP2K_HUMAN
+sp|Q9NSY1|BMP2K_HUMAN
+sp|Q9NSY1|BMP2K_HUMAN
+sp|P06213|INSR_HUMAN
+sp|P06213|INSR_HUMAN
+sp|P06213|INSR_HUMAN
+sp|P06213|INSR_HUMAN
+sp|P06213|INSR_HUMAN
+sp|P06213|INSR_HUMAN
+sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]
+sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin
+sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
+sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| rhodopsin [Myotis pilosus]
+sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A rhodopsin [Bos taurus]
+sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| fresh water form rod opsin [Conger myriaster]
b
diff -r b23e0a323bee -r ec31e95c56e1 tools/ncbi_blast_plus/blastxml_to_top_descr.py
--- a/tools/ncbi_blast_plus/blastxml_to_top_descr.py Mon Apr 29 13:08:32 2013 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_top_descr.py Mon Apr 29 13:08:52 2013 -0400
[
@@ -7,6 +7,10 @@
 import sys
 import re
 
+if "-v" in sys.argv or "--version" in sys.argv:
+    print "v0.0.5"
+    sys.exit(0)
+
 if sys.version_info[:2] >= ( 2, 5 ):
     import xml.etree.cElementTree as ElementTree
 else:
@@ -58,6 +62,7 @@
 
 
 count = 0
+pos_count = 0
 outfile = open(out_file, 'w')
 outfile.write("#Query\t%s\n" % "\t".join("BLAST hit %i" % (i+1) for i in range(topN)))
 for event, elem in context:
@@ -103,6 +108,8 @@
             assert hit_def not in hit_descrs
             hit_descrs.append(hit_def)
         #print "%r has %i hits" % (qseqid, len(hit_descrs))
+        if hit_descrs:
+            pos_count += 1
         hit_descrs = hit_descrs[:topN]
         while len(hit_descrs) < topN:
             hit_descrs.append("")
@@ -112,4 +119,4 @@
         root.clear()
         elem.clear()
 outfile.close()
-print "%i BLAST results" % count
+print "Of %i queries, %i had BLAST results" % (count, pos_count)
b
diff -r b23e0a323bee -r ec31e95c56e1 tools/ncbi_blast_plus/blastxml_to_top_descr.txt
--- a/tools/ncbi_blast_plus/blastxml_to_top_descr.txt Mon Apr 29 13:08:32 2013 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_top_descr.txt Mon Apr 29 13:08:52 2013 -0400
b
@@ -50,6 +50,9 @@
 v0.0.1 - Initial version.
 v0.0.2 - Since BLAST+ was moved out of the Galaxy core, now have a dependency
          on the 'blast_datatypes' repository in the Tool Shed.
+v0.0.3 - Include the test files required to run the unit tests
+v0.0.4 - Quote filenames in case they contain spaces (internal change)
+v0.0.5 - Include number of queries with BLAST matches in stdout (peek text)
 
 
 Developers
@@ -58,10 +61,10 @@
 This script and related tools are being developed on the following hg branch:
 http://bitbucket.org/peterjc/galaxy-central/src/tools
 
-For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
 the following command from the Galaxy root folder:
 
-$ tar -czf blastxml_to_top_descr.tar.gz tools/ncbi_blast_plus/blastxml_to_top_descr.* tools/ncbi_blast_plus/repository_dependencies.xml
+$ tar -czf blastxml_to_top_descr.tar.gz tools/ncbi_blast_plus/blastxml_to_top_descr.* tools/ncbi_blast_plus/repository_dependencies.xml test-data/blastp_four_human_vs_rhodopsin.xml test-data/blastp_four_human_vs_rhodopsin_top3.tabular
 
 Check this worked:
 
@@ -70,6 +73,8 @@
 tools/ncbi_blast_plus/blastxml_to_top_descr.txt
 tools/ncbi_blast_plus/blastxml_to_top_descr.xml
 tools/ncbi_blast_plus/repository_dependencies.xml
+test-data/blastp_four_human_vs_rhodopsin.xml
+test-data/blastp_four_human_vs_rhodopsin_top3.tabular
 
 
 Licence (MIT/BSD style)
b
diff -r b23e0a323bee -r ec31e95c56e1 tools/ncbi_blast_plus/blastxml_to_top_descr.xml
--- a/tools/ncbi_blast_plus/blastxml_to_top_descr.xml Mon Apr 29 13:08:32 2013 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_top_descr.xml Mon Apr 29 13:08:52 2013 -0400
b
@@ -1,7 +1,8 @@
-<tool id="blastxml_to_top_descr" name="BLAST top hit descriptions" version="0.0.1">
+<tool id="blastxml_to_top_descr" name="BLAST top hit descriptions" version="0.0.5">
     <description>Make a table from BLAST XML</description>
+    <version_command interpreter="python">blastxml_to_top_descr.py --version</version_command>
     <command interpreter="python">
-      blastxml_to_top_descr.py $blastxml_file $tabular_file $topN
+      blastxml_to_top_descr.py "${blastxml_file}" "${tabular_file}" ${topN}
     </command>
     <inputs>
         <param name="blastxml_file" type="data" format="blastxml" label="BLAST results as XML"/>