Previous changeset 0:b23e0a323bee (2013-04-29) Next changeset 2:58359ce5bde9 (2013-04-29) |
Commit message:
Uploaded v0.0.5 |
modified:
tools/ncbi_blast_plus/blastxml_to_top_descr.py tools/ncbi_blast_plus/blastxml_to_top_descr.txt tools/ncbi_blast_plus/blastxml_to_top_descr.xml |
added:
test-data/blastp_four_human_vs_rhodopsin.xml test-data/blastp_four_human_vs_rhodopsin_top3.tabular |
b |
diff -r b23e0a323bee -r ec31e95c56e1 test-data/blastp_four_human_vs_rhodopsin.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_four_human_vs_rhodopsin.xml Mon Apr 29 13:08:52 2013 -0400 |
[ |
b'@@ -0,0 +1,646 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+ <BlastOutput_program>blastp</BlastOutput_program>\n+ <BlastOutput_version>BLASTP 2.2.26+</BlastOutput_version>\n+ <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+ <BlastOutput_db></BlastOutput_db>\n+ <BlastOutput_query-ID>sp|Q9BS26|ERP44_HUMAN</BlastOutput_query-ID>\n+ <BlastOutput_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</BlastOutput_query-def>\n+ <BlastOutput_query-len>406</BlastOutput_query-len>\n+ <BlastOutput_param>\n+ <Parameters>\n+ <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+ <Parameters_expect>1e-08</Parameters_expect>\n+ <Parameters_gap-open>11</Parameters_gap-open>\n+ <Parameters_gap-extend>1</Parameters_gap-extend>\n+ <Parameters_filter>F</Parameters_filter>\n+ </Parameters>\n+ </BlastOutput_param>\n+ <BlastOutput_iterations>\n+ <Iteration>\n+ <Iteration_iter-num>1</Iteration_iter-num>\n+ <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+ <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>406</Iteration_query-len>\n+ <Iteration_hits></Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>30</Statistics_hsp-len>\n+ <Statistics_eff-space>119568</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+ </Iteration>\n+ <Iteration>\n+ <Iteration_iter-num>2</Iteration_iter-num>\n+ <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+ <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>406</Iteration_query-len>\n+ <Iteration_hits></Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>30</Statistics_hsp-len>\n+ <Statistics_eff-space>119568</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+ </Iteration>\n+ <Iteration>\n+ <Iteration_iter-num>3</Iteration_iter-num>\n+ <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+ <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>406</Iteration_query-len>\n+ <Iteration_hits></Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>30</Statistics_hsp-len>\n+ <Statistics_eff-space>119568</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteratio'..b'VPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGID YT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+ </Hit>\n+ </Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>29</Statistics_hsp-len>\n+ <Statistics_eff-space>101761</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ </Iteration>\n+ <Iteration>\n+ <Iteration_iter-num>24</Iteration_iter-num>\n+ <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID>\n+ <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>348</Iteration_query-len>\n+ <Iteration_hits>\n+ <Hit>\n+ <Hit_num>1</Hit_num>\n+ <Hit_id>gi|12583665|dbj|BAB21486.1|</Hit_id>\n+ <Hit_def>fresh water form rod opsin [Conger myriaster]</Hit_def>\n+ <Hit_accession>BAB21486</Hit_accession>\n+ <Hit_len>354</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>599.356</Hsp_bit-score>\n+ <Hsp_score>1544</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>1</Hsp_query-from>\n+ <Hsp_query-to>341</Hsp_query-to>\n+ <Hsp_hit-from>1</Hsp_hit-from>\n+ <Hsp_hit-to>342</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>281</Hsp_identity>\n+ <Hsp_positive>314</Hsp_positive>\n+ <Hsp_gaps>1</Hsp_gaps>\n+ <Hsp_align-len>342</Hsp_align-len>\n+ <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE</Hsp_qseq>\n+ <Hsp_hseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV TW MALACA PPL GWSRYIPEGLQCSCGIDYYT P +NNESFVIYMF HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP +D ASAT SKTE</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+ </Hit>\n+ </Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>29</Statistics_hsp-len>\n+ <Statistics_eff-space>101761</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ </Iteration>\n+ </BlastOutput_iterations>\n+</BlastOutput>\n\\ No newline at end of file\n' |
b |
diff -r b23e0a323bee -r ec31e95c56e1 test-data/blastp_four_human_vs_rhodopsin_top3.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_four_human_vs_rhodopsin_top3.tabular Mon Apr 29 13:08:52 2013 -0400 |
[ |
@@ -0,0 +1,25 @@ +#Query BLAST hit 1 BLAST hit 2 BLAST hit 3 +sp|Q9BS26|ERP44_HUMAN +sp|Q9BS26|ERP44_HUMAN +sp|Q9BS26|ERP44_HUMAN +sp|Q9BS26|ERP44_HUMAN +sp|Q9BS26|ERP44_HUMAN +sp|Q9BS26|ERP44_HUMAN +sp|Q9NSY1|BMP2K_HUMAN +sp|Q9NSY1|BMP2K_HUMAN +sp|Q9NSY1|BMP2K_HUMAN +sp|Q9NSY1|BMP2K_HUMAN +sp|Q9NSY1|BMP2K_HUMAN +sp|Q9NSY1|BMP2K_HUMAN +sp|P06213|INSR_HUMAN +sp|P06213|INSR_HUMAN +sp|P06213|INSR_HUMAN +sp|P06213|INSR_HUMAN +sp|P06213|INSR_HUMAN +sp|P06213|INSR_HUMAN +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] +sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin +sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis] +sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| rhodopsin [Myotis pilosus] +sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A rhodopsin [Bos taurus] +sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| fresh water form rod opsin [Conger myriaster] |
b |
diff -r b23e0a323bee -r ec31e95c56e1 tools/ncbi_blast_plus/blastxml_to_top_descr.py --- a/tools/ncbi_blast_plus/blastxml_to_top_descr.py Mon Apr 29 13:08:32 2013 -0400 +++ b/tools/ncbi_blast_plus/blastxml_to_top_descr.py Mon Apr 29 13:08:52 2013 -0400 |
[ |
@@ -7,6 +7,10 @@ import sys import re +if "-v" in sys.argv or "--version" in sys.argv: + print "v0.0.5" + sys.exit(0) + if sys.version_info[:2] >= ( 2, 5 ): import xml.etree.cElementTree as ElementTree else: @@ -58,6 +62,7 @@ count = 0 +pos_count = 0 outfile = open(out_file, 'w') outfile.write("#Query\t%s\n" % "\t".join("BLAST hit %i" % (i+1) for i in range(topN))) for event, elem in context: @@ -103,6 +108,8 @@ assert hit_def not in hit_descrs hit_descrs.append(hit_def) #print "%r has %i hits" % (qseqid, len(hit_descrs)) + if hit_descrs: + pos_count += 1 hit_descrs = hit_descrs[:topN] while len(hit_descrs) < topN: hit_descrs.append("") @@ -112,4 +119,4 @@ root.clear() elem.clear() outfile.close() -print "%i BLAST results" % count +print "Of %i queries, %i had BLAST results" % (count, pos_count) |
b |
diff -r b23e0a323bee -r ec31e95c56e1 tools/ncbi_blast_plus/blastxml_to_top_descr.txt --- a/tools/ncbi_blast_plus/blastxml_to_top_descr.txt Mon Apr 29 13:08:32 2013 -0400 +++ b/tools/ncbi_blast_plus/blastxml_to_top_descr.txt Mon Apr 29 13:08:52 2013 -0400 |
b |
@@ -50,6 +50,9 @@ v0.0.1 - Initial version. v0.0.2 - Since BLAST+ was moved out of the Galaxy core, now have a dependency on the 'blast_datatypes' repository in the Tool Shed. +v0.0.3 - Include the test files required to run the unit tests +v0.0.4 - Quote filenames in case they contain spaces (internal change) +v0.0.5 - Include number of queries with BLAST matches in stdout (peek text) Developers @@ -58,10 +61,10 @@ This script and related tools are being developed on the following hg branch: http://bitbucket.org/peterjc/galaxy-central/src/tools -For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use +For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use the following command from the Galaxy root folder: -$ tar -czf blastxml_to_top_descr.tar.gz tools/ncbi_blast_plus/blastxml_to_top_descr.* tools/ncbi_blast_plus/repository_dependencies.xml +$ tar -czf blastxml_to_top_descr.tar.gz tools/ncbi_blast_plus/blastxml_to_top_descr.* tools/ncbi_blast_plus/repository_dependencies.xml test-data/blastp_four_human_vs_rhodopsin.xml test-data/blastp_four_human_vs_rhodopsin_top3.tabular Check this worked: @@ -70,6 +73,8 @@ tools/ncbi_blast_plus/blastxml_to_top_descr.txt tools/ncbi_blast_plus/blastxml_to_top_descr.xml tools/ncbi_blast_plus/repository_dependencies.xml +test-data/blastp_four_human_vs_rhodopsin.xml +test-data/blastp_four_human_vs_rhodopsin_top3.tabular Licence (MIT/BSD style) |
b |
diff -r b23e0a323bee -r ec31e95c56e1 tools/ncbi_blast_plus/blastxml_to_top_descr.xml --- a/tools/ncbi_blast_plus/blastxml_to_top_descr.xml Mon Apr 29 13:08:32 2013 -0400 +++ b/tools/ncbi_blast_plus/blastxml_to_top_descr.xml Mon Apr 29 13:08:52 2013 -0400 |
b |
@@ -1,7 +1,8 @@ -<tool id="blastxml_to_top_descr" name="BLAST top hit descriptions" version="0.0.1"> +<tool id="blastxml_to_top_descr" name="BLAST top hit descriptions" version="0.0.5"> <description>Make a table from BLAST XML</description> + <version_command interpreter="python">blastxml_to_top_descr.py --version</version_command> <command interpreter="python"> - blastxml_to_top_descr.py $blastxml_file $tabular_file $topN + blastxml_to_top_descr.py "${blastxml_file}" "${tabular_file}" ${topN} </command> <inputs> <param name="blastxml_file" type="data" format="blastxml" label="BLAST results as XML"/> |