Mercurial > repos > peterjc > blastxml_to_top_descr
changeset 1:ec31e95c56e1 draft
Uploaded v0.0.5
author | peterjc |
---|---|
date | Mon, 29 Apr 2013 13:08:52 -0400 |
parents | b23e0a323bee |
children | 58359ce5bde9 |
files | test-data/blastp_four_human_vs_rhodopsin.xml test-data/blastp_four_human_vs_rhodopsin_top3.tabular tools/ncbi_blast_plus/blastxml_to_top_descr.py tools/ncbi_blast_plus/blastxml_to_top_descr.txt tools/ncbi_blast_plus/blastxml_to_top_descr.xml |
diffstat | 5 files changed, 689 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_four_human_vs_rhodopsin.xml Mon Apr 29 13:08:52 2013 -0400 @@ -0,0 +1,646 @@ +<?xml version="1.0"?> +<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> +<BlastOutput> + <BlastOutput_program>blastp</BlastOutput_program> + <BlastOutput_version>BLASTP 2.2.26+</BlastOutput_version> + <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference> + <BlastOutput_db></BlastOutput_db> + <BlastOutput_query-ID>sp|Q9BS26|ERP44_HUMAN</BlastOutput_query-ID> + <BlastOutput_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</BlastOutput_query-def> + <BlastOutput_query-len>406</BlastOutput_query-len> + <BlastOutput_param> + <Parameters> + <Parameters_matrix>BLOSUM62</Parameters_matrix> + <Parameters_expect>1e-08</Parameters_expect> + <Parameters_gap-open>11</Parameters_gap-open> + <Parameters_gap-extend>1</Parameters_gap-extend> + <Parameters_filter>F</Parameters_filter> + </Parameters> + </BlastOutput_param> + <BlastOutput_iterations> + <Iteration> + <Iteration_iter-num>1</Iteration_iter-num> + <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> + <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> + <Iteration_query-len>406</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119568</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>2</Iteration_iter-num> + <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> + <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> + <Iteration_query-len>406</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119568</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>3</Iteration_iter-num> + <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> + <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> + <Iteration_query-len>406</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119568</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>4</Iteration_iter-num> + <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> + <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> + <Iteration_query-len>406</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119568</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>5</Iteration_iter-num> + <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> + <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> + <Iteration_query-len>406</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119568</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>6</Iteration_iter-num> + <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> + <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> + <Iteration_query-len>406</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119568</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>7</Iteration_iter-num> + <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID> + <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> + <Iteration_query-len>1161</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>38</Statistics_hsp-len> + <Statistics_eff-space>348130</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>8</Iteration_iter-num> + <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID> + <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> + <Iteration_query-len>1161</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>38</Statistics_hsp-len> + <Statistics_eff-space>348130</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>9</Iteration_iter-num> + <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID> + <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> + <Iteration_query-len>1161</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>38</Statistics_hsp-len> + <Statistics_eff-space>348130</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>10</Iteration_iter-num> + <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID> + <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> + <Iteration_query-len>1161</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>38</Statistics_hsp-len> + <Statistics_eff-space>348130</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>11</Iteration_iter-num> + <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID> + <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> + <Iteration_query-len>1161</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>38</Statistics_hsp-len> + <Statistics_eff-space>348130</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>12</Iteration_iter-num> + <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID> + <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> + <Iteration_query-len>1161</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>38</Statistics_hsp-len> + <Statistics_eff-space>348130</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>13</Iteration_iter-num> + <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID> + <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> + <Iteration_query-len>1382</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>39</Statistics_hsp-len> + <Statistics_eff-space>414987</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>14</Iteration_iter-num> + <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID> + <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> + <Iteration_query-len>1382</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>39</Statistics_hsp-len> + <Statistics_eff-space>414987</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>15</Iteration_iter-num> + <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID> + <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> + <Iteration_query-len>1382</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>39</Statistics_hsp-len> + <Statistics_eff-space>414987</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>16</Iteration_iter-num> + <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID> + <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> + <Iteration_query-len>1382</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>39</Statistics_hsp-len> + <Statistics_eff-space>414987</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>17</Iteration_iter-num> + <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID> + <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> + <Iteration_query-len>1382</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>39</Statistics_hsp-len> + <Statistics_eff-space>414987</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>18</Iteration_iter-num> + <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID> + <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> + <Iteration_query-len>1382</Iteration_query-len> + <Iteration_hits></Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>39</Statistics_hsp-len> + <Statistics_eff-space>414987</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> + </Iteration> + <Iteration> + <Iteration_iter-num>19</Iteration_iter-num> + <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID> + <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def> + <Iteration_query-len>348</Iteration_query-len> + <Iteration_hits> + <Hit> + <Hit_num>1</Hit_num> + <Hit_id>gi|57163783|ref|NP_001009242.1|</Hit_id> + <Hit_def>rhodopsin [Felis catus]</Hit_def> + <Hit_accession>NP_001009242</Hit_accession> + <Hit_len>348</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>701.049</Hsp_bit-score> + <Hsp_score>1808</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>348</Hsp_query-to> + <Hsp_hit-from>1</Hsp_hit-from> + <Hsp_hit-to>348</Hsp_hit-to> + <Hsp_query-frame>0</Hsp_query-frame> + <Hsp_hit-frame>0</Hsp_hit-frame> + <Hsp_identity>336</Hsp_identity> + <Hsp_positive>343</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>348</Hsp_align-len> + <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA</Hsp_qseq> + <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA</Hsp_hseq> + <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T SKTETSQVAPA</Hsp_midline> + </Hsp> + </Hit_hsps> + </Hit> + </Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>29</Statistics_hsp-len> + <Statistics_eff-space>101761</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + </Iteration> + <Iteration> + <Iteration_iter-num>20</Iteration_iter-num> + <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID> + <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def> + <Iteration_query-len>348</Iteration_query-len> + <Iteration_hits> + <Hit> + <Hit_num>1</Hit_num> + <Hit_id>gi|3024260|sp|P56514.1|OPSD_BUFBU</Hit_id> + <Hit_def>RecName: Full=Rhodopsin</Hit_def> + <Hit_accession>P56514</Hit_accession> + <Hit_len>354</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>619.002</Hsp_bit-score> + <Hsp_score>1595</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>341</Hsp_query-to> + <Hsp_hit-from>1</Hsp_hit-from> + <Hsp_hit-to>342</Hsp_hit-to> + <Hsp_query-frame>0</Hsp_query-frame> + <Hsp_hit-frame>0</Hsp_hit-frame> + <Hsp_identity>290</Hsp_identity> + <Hsp_positive>322</Hsp_positive> + <Hsp_gaps>1</Hsp_gaps> + <Hsp_align-len>342</Hsp_align-len> + <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE</Hsp_qseq> + <Hsp_hseq>MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE</Hsp_hseq> + <Hsp_midline>MNGTEGPNFY+P SN TGVVRSPFEYPQYYLAEPWQ+S+L AYMFLLI+LGFPINF+TLYVT+QHKKLRTPLNYILLNLA A+ FMVL GFT T+Y+S++GYF+ G TGC +EGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRF ENHA+MGVAFTW+MAL+CA PPL GWSRYIPEG+QCSCG+DYYTLKPEVNNESFVIYMFVVHFTIP+IIIFFCYG+LV TVKEAAAQQQESATTQKAEKEVTRMVIIMV+ FLICWVPYASVAF+IF++QGS FGPIFMT+PAFFAKS++IYNPVIYIM+NKQFRNCM+TT+CCGKNP G+D+A SA SKTE</Hsp_midline> + </Hsp> + </Hit_hsps> + </Hit> + </Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>29</Statistics_hsp-len> + <Statistics_eff-space>101761</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + </Iteration> + <Iteration> + <Iteration_iter-num>21</Iteration_iter-num> + <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID> + <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def> + <Iteration_query-len>348</Iteration_query-len> + <Iteration_hits> + <Hit> + <Hit_num>1</Hit_num> + <Hit_id>gi|283855846|gb|ADB45242.1|</Hit_id> + <Hit_def>rhodopsin [Cynopterus brachyotis]</Hit_def> + <Hit_accession>ADB45242</Hit_accession> + <Hit_len>328</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>653.284</Hsp_bit-score> + <Hsp_score>1684</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>11</Hsp_query-from> + <Hsp_query-to>338</Hsp_query-to> + <Hsp_hit-from>1</Hsp_hit-from> + <Hsp_hit-to>328</Hsp_hit-to> + <Hsp_query-frame>0</Hsp_query-frame> + <Hsp_hit-frame>0</Hsp_hit-frame> + <Hsp_identity>311</Hsp_identity> + <Hsp_positive>321</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>328</Hsp_align-len> + <Hsp_qseq>VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS</Hsp_qseq> + <Hsp_hseq>VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS</Hsp_hseq> + <Hsp_midline>VPFSN TGVVRSPFE+PQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+A TWVMALACAAPPL GWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T S</Hsp_midline> + </Hsp> + </Hit_hsps> + </Hit> + </Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>29</Statistics_hsp-len> + <Statistics_eff-space>101761</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + </Iteration> + <Iteration> + <Iteration_iter-num>22</Iteration_iter-num> + <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID> + <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def> + <Iteration_query-len>348</Iteration_query-len> + <Iteration_hits> + <Hit> + <Hit_num>1</Hit_num> + <Hit_id>gi|283855823|gb|ADB45229.1|</Hit_id> + <Hit_def>rhodopsin [Myotis pilosus]</Hit_def> + <Hit_accession>ADB45229</Hit_accession> + <Hit_len>328</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>631.328</Hsp_bit-score> + <Hsp_score>1627</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>11</Hsp_query-from> + <Hsp_query-to>338</Hsp_query-to> + <Hsp_hit-from>1</Hsp_hit-from> + <Hsp_hit-to>328</Hsp_hit-to> + <Hsp_query-frame>0</Hsp_query-frame> + <Hsp_hit-frame>0</Hsp_hit-frame> + <Hsp_identity>311</Hsp_identity> + <Hsp_positive>323</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>328</Hsp_align-len> + <Hsp_qseq>VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS</Hsp_qseq> + <Hsp_hseq>VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS</Hsp_hseq> + <Hsp_midline>VPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVA+LFMV GGFT+TLYTS+HGYFVFG TGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+AFTWVMALACAAPPLAGWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMV+AFLICW+PYASVAFYIFTHQGSNFGP+FMTIPAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T S</Hsp_midline> + </Hsp> + </Hit_hsps> + </Hit> + </Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>29</Statistics_hsp-len> + <Statistics_eff-space>101761</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + </Iteration> + <Iteration> + <Iteration_iter-num>23</Iteration_iter-num> + <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID> + <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def> + <Iteration_query-len>348</Iteration_query-len> + <Iteration_hits> + <Hit> + <Hit_num>1</Hit_num> + <Hit_id>gi|223523|prf||0811197A</Hit_id> + <Hit_def>rhodopsin [Bos taurus]</Hit_def> + <Hit_accession>0811197A</Hit_accession> + <Hit_len>347</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>673.315</Hsp_bit-score> + <Hsp_score>1736</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>348</Hsp_query-to> + <Hsp_hit-from>1</Hsp_hit-from> + <Hsp_hit-to>347</Hsp_hit-to> + <Hsp_query-frame>0</Hsp_query-frame> + <Hsp_hit-frame>0</Hsp_hit-frame> + <Hsp_identity>324</Hsp_identity> + <Hsp_positive>336</Hsp_positive> + <Hsp_gaps>1</Hsp_gaps> + <Hsp_align-len>348</Hsp_align-len> + <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA</Hsp_qseq> + <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq> + <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGID YT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline> + </Hsp> + </Hit_hsps> + </Hit> + </Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>29</Statistics_hsp-len> + <Statistics_eff-space>101761</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + </Iteration> + <Iteration> + <Iteration_iter-num>24</Iteration_iter-num> + <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID> + <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def> + <Iteration_query-len>348</Iteration_query-len> + <Iteration_hits> + <Hit> + <Hit_num>1</Hit_num> + <Hit_id>gi|12583665|dbj|BAB21486.1|</Hit_id> + <Hit_def>fresh water form rod opsin [Conger myriaster]</Hit_def> + <Hit_accession>BAB21486</Hit_accession> + <Hit_len>354</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>599.356</Hsp_bit-score> + <Hsp_score>1544</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>341</Hsp_query-to> + <Hsp_hit-from>1</Hsp_hit-from> + <Hsp_hit-to>342</Hsp_hit-to> + <Hsp_query-frame>0</Hsp_query-frame> + <Hsp_hit-frame>0</Hsp_hit-frame> + <Hsp_identity>281</Hsp_identity> + <Hsp_positive>314</Hsp_positive> + <Hsp_gaps>1</Hsp_gaps> + <Hsp_align-len>342</Hsp_align-len> + <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE</Hsp_qseq> + <Hsp_hseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE</Hsp_hseq> + <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV TW MALACA PPL GWSRYIPEGLQCSCGIDYYT P +NNESFVIYMF HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP +D ASAT SKTE</Hsp_midline> + </Hsp> + </Hit_hsps> + </Hit> + </Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>29</Statistics_hsp-len> + <Statistics_eff-space>101761</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + </Iteration> + </BlastOutput_iterations> +</BlastOutput> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_four_human_vs_rhodopsin_top3.tabular Mon Apr 29 13:08:52 2013 -0400 @@ -0,0 +1,25 @@ +#Query BLAST hit 1 BLAST hit 2 BLAST hit 3 +sp|Q9BS26|ERP44_HUMAN +sp|Q9BS26|ERP44_HUMAN +sp|Q9BS26|ERP44_HUMAN +sp|Q9BS26|ERP44_HUMAN +sp|Q9BS26|ERP44_HUMAN +sp|Q9BS26|ERP44_HUMAN +sp|Q9NSY1|BMP2K_HUMAN +sp|Q9NSY1|BMP2K_HUMAN +sp|Q9NSY1|BMP2K_HUMAN +sp|Q9NSY1|BMP2K_HUMAN +sp|Q9NSY1|BMP2K_HUMAN +sp|Q9NSY1|BMP2K_HUMAN +sp|P06213|INSR_HUMAN +sp|P06213|INSR_HUMAN +sp|P06213|INSR_HUMAN +sp|P06213|INSR_HUMAN +sp|P06213|INSR_HUMAN +sp|P06213|INSR_HUMAN +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] +sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin +sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis] +sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| rhodopsin [Myotis pilosus] +sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A rhodopsin [Bos taurus] +sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| fresh water form rod opsin [Conger myriaster]
--- a/tools/ncbi_blast_plus/blastxml_to_top_descr.py Mon Apr 29 13:08:32 2013 -0400 +++ b/tools/ncbi_blast_plus/blastxml_to_top_descr.py Mon Apr 29 13:08:52 2013 -0400 @@ -7,6 +7,10 @@ import sys import re +if "-v" in sys.argv or "--version" in sys.argv: + print "v0.0.5" + sys.exit(0) + if sys.version_info[:2] >= ( 2, 5 ): import xml.etree.cElementTree as ElementTree else: @@ -58,6 +62,7 @@ count = 0 +pos_count = 0 outfile = open(out_file, 'w') outfile.write("#Query\t%s\n" % "\t".join("BLAST hit %i" % (i+1) for i in range(topN))) for event, elem in context: @@ -103,6 +108,8 @@ assert hit_def not in hit_descrs hit_descrs.append(hit_def) #print "%r has %i hits" % (qseqid, len(hit_descrs)) + if hit_descrs: + pos_count += 1 hit_descrs = hit_descrs[:topN] while len(hit_descrs) < topN: hit_descrs.append("") @@ -112,4 +119,4 @@ root.clear() elem.clear() outfile.close() -print "%i BLAST results" % count +print "Of %i queries, %i had BLAST results" % (count, pos_count)
--- a/tools/ncbi_blast_plus/blastxml_to_top_descr.txt Mon Apr 29 13:08:32 2013 -0400 +++ b/tools/ncbi_blast_plus/blastxml_to_top_descr.txt Mon Apr 29 13:08:52 2013 -0400 @@ -50,6 +50,9 @@ v0.0.1 - Initial version. v0.0.2 - Since BLAST+ was moved out of the Galaxy core, now have a dependency on the 'blast_datatypes' repository in the Tool Shed. +v0.0.3 - Include the test files required to run the unit tests +v0.0.4 - Quote filenames in case they contain spaces (internal change) +v0.0.5 - Include number of queries with BLAST matches in stdout (peek text) Developers @@ -58,10 +61,10 @@ This script and related tools are being developed on the following hg branch: http://bitbucket.org/peterjc/galaxy-central/src/tools -For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use +For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use the following command from the Galaxy root folder: -$ tar -czf blastxml_to_top_descr.tar.gz tools/ncbi_blast_plus/blastxml_to_top_descr.* tools/ncbi_blast_plus/repository_dependencies.xml +$ tar -czf blastxml_to_top_descr.tar.gz tools/ncbi_blast_plus/blastxml_to_top_descr.* tools/ncbi_blast_plus/repository_dependencies.xml test-data/blastp_four_human_vs_rhodopsin.xml test-data/blastp_four_human_vs_rhodopsin_top3.tabular Check this worked: @@ -70,6 +73,8 @@ tools/ncbi_blast_plus/blastxml_to_top_descr.txt tools/ncbi_blast_plus/blastxml_to_top_descr.xml tools/ncbi_blast_plus/repository_dependencies.xml +test-data/blastp_four_human_vs_rhodopsin.xml +test-data/blastp_four_human_vs_rhodopsin_top3.tabular Licence (MIT/BSD style)
--- a/tools/ncbi_blast_plus/blastxml_to_top_descr.xml Mon Apr 29 13:08:32 2013 -0400 +++ b/tools/ncbi_blast_plus/blastxml_to_top_descr.xml Mon Apr 29 13:08:52 2013 -0400 @@ -1,7 +1,8 @@ -<tool id="blastxml_to_top_descr" name="BLAST top hit descriptions" version="0.0.1"> +<tool id="blastxml_to_top_descr" name="BLAST top hit descriptions" version="0.0.5"> <description>Make a table from BLAST XML</description> + <version_command interpreter="python">blastxml_to_top_descr.py --version</version_command> <command interpreter="python"> - blastxml_to_top_descr.py $blastxml_file $tabular_file $topN + blastxml_to_top_descr.py "${blastxml_file}" "${tabular_file}" ${topN} </command> <inputs> <param name="blastxml_file" type="data" format="blastxml" label="BLAST results as XML"/>