changeset 5:2921c0f29c85 draft

Uploaded v0.0.8 preview 1, attempt auto-install and unit test.
author peterjc
date Tue, 30 Apr 2013 13:04:41 -0400
parents d2b527ddb42d
children 54c3d14e5621
files test-data/blastp_sample.blast2go.tabular test-data/blastp_sample.xml tool-data/blast2go.loc.sample tools/blast2go/blast2go.py tools/blast2go/blast2go.txt tools/blast2go/blast2go.xml tools/blast2go/tool_dependencies.xml
diffstat 7 files changed, 385 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_sample.blast2go.tabular	Tue Apr 30 13:04:41 2013 -0400
@@ -0,0 +1,1 @@
+Sample	GO:0005488	tail tape measure protein
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_sample.xml	Tue Apr 30 13:04:41 2013 -0400
@@ -0,0 +1,293 @@
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">
+<BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>BLASTP 2.2.24+</BlastOutput_version>
+  <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>nr</BlastOutput_db>
+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
+  <BlastOutput_query-def>Sample</BlastOutput_query-def>
+  <BlastOutput_query-len>516</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>1e-30</Parameters_expect>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_query-ID>Query_1</Iteration_query-ID>
+      <Iteration_query-def>Sample</Iteration_query-def>
+      <Iteration_query-len>516</Iteration_query-len>
+      <Iteration_hits>
+        <Hit>
+          <Hit_num>1</Hit_num>
+          <Hit_id>gi|119953746|ref|YP_950551.1|</Hit_id>
+          <Hit_def>tail tape measure protein [Streptococcus phage SMP] &gt;gi|118430558|gb|ABK91882.1| tail tape measure protein [Streptococcus suis phage SMP]</Hit_def>
+          <Hit_accession>YP_950551</Hit_accession>
+          <Hit_len>659</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>949.117592429394</Hsp_bit-score>
+              <Hsp_score>2452</Hsp_score>
+              <Hsp_evalue>0</Hsp_evalue>
+              <Hsp_query-from>1</Hsp_query-from>
+              <Hsp_query-to>516</Hsp_query-to>
+              <Hsp_hit-from>27</Hsp_hit-from>
+              <Hsp_hit-to>542</Hsp_hit-to>
+              <Hsp_query-frame>0</Hsp_query-frame>
+              <Hsp_hit-frame>0</Hsp_hit-frame>
+              <Hsp_identity>500</Hsp_identity>
+              <Hsp_positive>500</Hsp_positive>
+              <Hsp_gaps>0</Hsp_gaps>
+              <Hsp_align-len>516</Hsp_align-len>
+              <Hsp_qseq>FHLLNSGGSALSVMFAKLVGIIAGISAPIWXXXXXXXXXXXXXXXXYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDWAKGLLGIKS</Hsp_qseq>
+              <Hsp_hseq>FHLLNSGGSALSVMFAKLVGIIAGISAPIWAVIGVIAALVAGFVLLYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDWAKGLLGIKS</Hsp_hseq>
+              <Hsp_midline>FHLLNSGGSALSVMFAKLVGIIAGISAPIW                YNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDWAKGLLGIKS</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>2</Hit_num>
+          <Hit_id>gi|148986157|ref|ZP_01819143.1|</Hit_id>
+          <Hit_def>unknown phage protein [Streptococcus pneumoniae SP3-BS71] &gt;gi|147921871|gb|EDK72998.1| unknown phage protein [Streptococcus pneumoniae SP3-BS71]</Hit_def>
+          <Hit_accession>ZP_01819143</Hit_accession>
+          <Hit_len>1031</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>174.481245259597</Hsp_bit-score>
+              <Hsp_score>441</Hsp_score>
+              <Hsp_evalue>1.54640812741294e-41</Hsp_evalue>
+              <Hsp_query-from>49</Hsp_query-from>
+              <Hsp_query-to>300</Hsp_query-to>
+              <Hsp_hit-from>679</Hsp_hit-from>
+              <Hsp_hit-to>897</Hsp_hit-to>
+              <Hsp_query-frame>0</Hsp_query-frame>
+              <Hsp_hit-frame>0</Hsp_hit-frame>
+              <Hsp_identity>104</Hsp_identity>
+              <Hsp_positive>148</Hsp_positive>
+              <Hsp_gaps>33</Hsp_gaps>
+              <Hsp_align-len>252</Hsp_align-len>
+              <Hsp_qseq>TNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWS</Hsp_qseq>
+              <Hsp_hseq>TNEGFRDAVTTVWNAILEVINAVVSEISNFVMSIFGTVVTWWTENQELIRTSAETVWNAIYTVISTILDILGPLLQAGWDNIQLIITTTWEIIKIVVETAINVVLGVIQAVMQIITGDWSGAWETIKGVFSTVWQAIQSIVQT-------IFSAIQSYISNILNGISGT----VSNIWNSIKDTVSN----------------------VLNAISSTVSSVWEGIKSTISSAINGARDAVSSAIEAIKGLFN</Hsp_hseq>
+              <Hsp_midline>TNE FR  V   W AI   I+  V  + +FVM ++G +V WW ENQELIR +AETVWNAI TV+ T++  L P++Q  WD I  ++TT   +IK VV+T + VVLG+I+AVMQ+I GDWSGAWET+KGV  T+W+ I+S+VQ        IF    +++ +I + + GT    V  IW+ IK TVSN                      V NAIS+  S++W  I +T+ S +      + + +E IK +++</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>3</Hit_num>
+          <Hit_id>gi|77411259|ref|ZP_00787609.1|</Hit_id>
+          <Hit_def>tail tape meausure protein [Streptococcus agalactiae CJB111] &gt;gi|77162685|gb|EAO73646.1| tail tape meausure protein [Streptococcus agalactiae CJB111]</Hit_def>
+          <Hit_accession>ZP_00787609</Hit_accession>
+          <Hit_len>1039</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>165.621655013498</Hsp_bit-score>
+              <Hsp_score>418</Hsp_score>
+              <Hsp_evalue>7.61538823982138e-39</Hsp_evalue>
+              <Hsp_query-from>50</Hsp_query-from>
+              <Hsp_query-to>310</Hsp_query-to>
+              <Hsp_hit-from>655</Hsp_hit-from>
+              <Hsp_hit-to>904</Hsp_hit-to>
+              <Hsp_query-frame>0</Hsp_query-frame>
+              <Hsp_hit-frame>0</Hsp_hit-frame>
+              <Hsp_identity>107</Hsp_identity>
+              <Hsp_positive>158</Hsp_positive>
+              <Hsp_gaps>11</Hsp_gaps>
+              <Hsp_align-len>261</Hsp_align-len>
+              <Hsp_qseq>NEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVF</Hsp_qseq>
+              <Hsp_hseq>HEGFRTAVTEIWNAIYAFLSVIIQQISSFVMSIWGTLTTWWTENQQLILNAANTVWTAISTVIQTIMTILGPYLQASWENIKLIITTAWDIIKVVVETAINVVLGIIKAVMQIITGDWSGAWETIKQVVSTVWEAIKSLISIVLSAIAQ-------FISNSWNGIKGTMTNLL----NSIKSVVSNVWNSIKSTISSILSSIGSTVSSVWNGMKATISGVLSGISNTVSSVWNGVKSTITNAINGAKNAVSSAINAIKNLF</Hsp_hseq>
+              <Hsp_midline>+E FRT V   W AI + +S  ++ + SFVM +WG +  WW ENQ+LI   A TVW AI TV++T+MT L P +Q +W+ I  ++TT  ++IK VV+T + VVLGIIKAVMQ+I GDWSGAWET+K V  T+WE IKSL+ + +  + Q       F+ + W+ + GT+  ++    + IK+ VSN   ++   I +I++SI +T  +VWN +    S + + IS TV SV   +   I   +   K   S+A   IK +F</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>4</Hit_num>
+          <Hit_id>gi|76786754|ref|YP_329383.1|</Hit_id>
+          <Hit_def>prophage LambdaSa04, tail tape measure protein, TP901 family [Streptococcus agalactiae A909] &gt;gi|76561811|gb|ABA44395.1| prophage LambdaSa04, tail tape measure protein, TP901 family [Streptococcus agalactiae A909]</Hit_def>
+          <Hit_accession>YP_329383</Hit_accession>
+          <Hit_len>1039</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>159.073262222903</Hsp_bit-score>
+              <Hsp_score>401</Hsp_score>
+              <Hsp_evalue>6.55719737745379e-37</Hsp_evalue>
+              <Hsp_query-from>50</Hsp_query-from>
+              <Hsp_query-to>310</Hsp_query-to>
+              <Hsp_hit-from>655</Hsp_hit-from>
+              <Hsp_hit-to>904</Hsp_hit-to>
+              <Hsp_query-frame>0</Hsp_query-frame>
+              <Hsp_hit-frame>0</Hsp_hit-frame>
+              <Hsp_identity>103</Hsp_identity>
+              <Hsp_positive>156</Hsp_positive>
+              <Hsp_gaps>11</Hsp_gaps>
+              <Hsp_align-len>261</Hsp_align-len>
+              <Hsp_qseq>NEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVF</Hsp_qseq>
+              <Hsp_hseq>HEGFRTAVTEIWNAIYAFLTVIIQQISSFVMSIWGTLITWWTENQQLILNATNTVWTAISTVIQTIMTILAPYLQASWENIKLIITTAWDIIKVVVETAINVVLGIIKAVMQIITGDWSGAWETIKQVVSTVWEVIKSLISIVLSAIAQ-------FISNSWNGIKGTMTNLL----NSIKGVVSNVWNGIKSTISSILSSIGSTVSSIWNGMKATISGVLSGISSTVSFVWNGVKSTITNAINGAKNAVSSAINAIKNLF</Hsp_hseq>
+              <Hsp_midline>+E FRT V   W AI + ++  ++ + SFVM +WG ++ WW ENQ+LI     TVW AI TV++T+MT L P +Q +W+ I  ++TT  ++IK VV+T + VVLGIIKAVMQ+I GDWSGAWET+K V  T+WE IKSL+ + +  + Q       F+ + W+ + GT+  ++    + IK  VSN    +   I +I++SI +T  ++WN +    S + + IS+TV  V   +   I   +   K   S+A   IK +F</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>5</Hit_num>
+          <Hit_id>gi|153811333|ref|ZP_01964001.1|</Hit_id>
+          <Hit_def>hypothetical protein RUMOBE_01725 [Ruminococcus obeum ATCC 29174] &gt;gi|149832460|gb|EDM87544.1| hypothetical protein RUMOBE_01725 [Ruminococcus obeum ATCC 29174]</Hit_def>
+          <Hit_accession>ZP_01964001</Hit_accession>
+          <Hit_len>1228</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>157.147264343316</Hsp_bit-score>
+              <Hsp_score>396</Hsp_score>
+              <Hsp_evalue>2.33083876931167e-36</Hsp_evalue>
+              <Hsp_query-from>3</Hsp_query-from>
+              <Hsp_query-to>516</Hsp_query-to>
+              <Hsp_hit-from>573</Hsp_hit-from>
+              <Hsp_hit-to>1059</Hsp_hit-to>
+              <Hsp_query-frame>0</Hsp_query-frame>
+              <Hsp_hit-frame>0</Hsp_hit-frame>
+              <Hsp_identity>167</Hsp_identity>
+              <Hsp_positive>247</Hsp_positive>
+              <Hsp_gaps>113</Hsp_gaps>
+              <Hsp_align-len>557</Hsp_align-len>
+              <Hsp_qseq>LLNSGGSALSVMFAKLVGIIAGISAPIWXXXXXXXXXXXXXXXXYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQV---AIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLI-----------KQAISNAWEIIKTKT-----------------------SEIWNAITTFLSGIWEGIKTAASTAWEWIKTT-ISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIEN-IKSTVSNGWNNL---VSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLI-NGFVEGVKGAAGRLIDAVGGAVSGAIDWAKGLLGIKS</Hsp_qseq>
+              <Hsp_hseq>LVKAGG--FSGVFTKALGLI---TSPAAIVVGVIAAITAVIIHLWNTNEDFRNTITAIWQKIKDAFTT---------------FAAGISERLSALGITFSDVTSAIKTIWDGFCNLLAPVLEAAFSTIAIALQTAFNVI-----------LGIWDVFSAVFSGDWSGAWEAIKGIFSSIWDGLKEYFSTIIGAVKGVADVF---LGWFGTNWETVWNGVKTFFEGIW--------NGISSFFEGI--------------WNGISTFCTTVWNGIVTNVTAFCTTVHDTISTIFNAVKDVVSNVWETIKNVVQVAIMFIVEVVKAAFELITVPFRFIWENCRDTIISVWETIKSAVQTAINFVKDNIITPVMNAISATITTVWNAIQTTFTTVINAIKSAVQTAWNFMKDNVVTPVMNAISTTISTVWNTIKTTFTTVINAIKSAVQTAWNFMKNSVITPVMNGIKTVITTVWNAIKTAVQTVVNA---IKTTVQTVF-NAVKTTVTTIWNAIKTGTSTAWN----AVKTAVTTPINAAKSAVTSAIN------GIKS</Hsp_hseq>
+              <Hsp_midline>L+ +GG   S +F K +G+I   ++P                  +NTNE+FR  + A W+ IK A +T                 A  +E    +  T   V +AI+T+ +     L P+++ A+  I   + T  NVI           LGI      + +GDWSGAWE +KG+  +IW+G+K        A+ G+  +F   L +  + W+TVW  +      IW        N I++ +E I              WN IST  + +W  I T V +  TT+   I T    +K V S  WE IK V    ++ IV +V   F+LI           +  I + WE IK+                         + +WNAI T  + +   IK+A  TAW ++K   ++ VM  I + I T WN IKT+ +  +N IKSA + AWN +K+++ T + N IK+ ++  WN +   V TV NA   I + V+T F NAV      I NAI  G     N     VK A    I+A   AV+ AI+      GIKS</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>6</Hit_num>
+          <Hit_id>gi|56962696|ref|YP_174422.1|</Hit_id>
+          <Hit_def>hypothetical protein ABC0922 [Bacillus clausii KSM-K16] &gt;gi|56908934|dbj|BAD63461.1| phage-related protein [Bacillus clausii KSM-K16]</Hit_def>
+          <Hit_accession>YP_174422</Hit_accession>
+          <Hit_len>593</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>146.746875793547</Hsp_bit-score>
+              <Hsp_score>369</Hsp_score>
+              <Hsp_evalue>3.12404663750498e-33</Hsp_evalue>
+              <Hsp_query-from>48</Hsp_query-from>
+              <Hsp_query-to>433</Hsp_query-to>
+              <Hsp_hit-from>123</Hsp_hit-from>
+              <Hsp_hit-to>465</Hsp_hit-to>
+              <Hsp_query-frame>0</Hsp_query-frame>
+              <Hsp_hit-frame>0</Hsp_hit-frame>
+              <Hsp_identity>112</Hsp_identity>
+              <Hsp_positive>187</Hsp_positive>
+              <Hsp_gaps>49</Hsp_gaps>
+              <Hsp_align-len>389</Hsp_align-len>
+              <Hsp_qseq>NTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGL---VQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSN</Hsp_qseq>
+              <Hsp_hseq>QTNETFRNGVIQAWEAIKTTMETVVATIVTFVSEKLAQIKAFWDEHGAAVMQAVTNIFNGIKSIIEPVMNGILAIMQFVWPFIVSLIQMVWGNIQGVISGALNIIMGLVKAFAGLFTGDFS-----------LMWEGIKQLFSGALEAIWNVVQLLLFGR--LLKIASSLFTGLMGVFSKMWGAISNLFLTALNGIRSFFSTIFTPIQ-------NVVMTVMGFIRNAISTG----LTTASNVVQTVLTAIRTVFLTVFNAVRNV-----------VTTAISFVQNFISTGISAARTAVTSALNAIKTTFTTIFNAVRSSVTTAMTNIKTAISN-------GIQSAWQ----AVLNFVGRFREAGKNIVNSIAEGITSAIGAVKNAISN</Hsp_hseq>
+              <Hsp_midline> TNE FR  V  AWEAIK+ + T V  +V+FV +   Q+ A+W+E+   + Q    ++N I++++E VM  ++ I+Q  W  I++++  V   I+ V+   L +++G++KA   +  GD+S            +WEGIK L   A++ +   VQ+   G   L  I  +++  +M V   +W  I      A+  +      I T IQ       N + TV   I  AIST     LTT    +QT L  I+TV+   +  ++ V           VT     ++  IS      +T  +   NAI T  + I+  ++++ +TA   IKT ISN        I++AW     ++ N +   + A +N  N+I   I++AI  +K+ +SN</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>7</Hit_num>
+          <Hit_id>gi|50914476|ref|YP_060448.1|</Hit_id>
+          <Hit_def>unknown phage protein [Streptococcus pyogenes MGAS10394] &gt;gi|40218580|gb|AAR83234.1| prophage pi2 protein [Streptococcus pyogenes] &gt;gi|50261625|gb|AAT72393.1| unknown [Streptococcus pyogenes] &gt;gi|50903550|gb|AAT87265.1| unknown phage protein [Streptococcus pyogenes MGAS10394]</Hit_def>
+          <Hit_accession>YP_060448</Hit_accession>
+          <Hit_len>1039</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>146.36167621763</Hsp_bit-score>
+              <Hsp_score>368</Hsp_score>
+              <Hsp_evalue>4.74132513340056e-33</Hsp_evalue>
+              <Hsp_query-from>50</Hsp_query-from>
+              <Hsp_query-to>227</Hsp_query-to>
+              <Hsp_hit-from>655</Hsp_hit-from>
+              <Hsp_hit-to>832</Hsp_hit-to>
+              <Hsp_query-frame>0</Hsp_query-frame>
+              <Hsp_hit-frame>0</Hsp_hit-frame>
+              <Hsp_identity>78</Hsp_identity>
+              <Hsp_positive>112</Hsp_positive>
+              <Hsp_gaps>0</Hsp_gaps>
+              <Hsp_align-len>178</Hsp_align-len>
+              <Hsp_qseq>NEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWD</Hsp_qseq>
+              <Hsp_hseq>NEGFRTAVIEIWNAIYAFISVIIQEISTFIMTIWGTLTTWWTENQALIQAAVETVWNAISTVIQTVMSLIGPYLEAAWANIQLIITTAWEIIKTVVETAITVVLGIIKAIMQAITGDWSGAWETIKGVLQRVWQAIQQIVTTILSAIGQFISNTWNGIKNTFSNILSAISGIVSSIWN</Hsp_hseq>
+              <Hsp_midline>NE FRT V   W AI + IS  ++ + +F+M +WG +  WW ENQ LI+   ETVWNAI TV++TVM+ + P ++ AW  I  ++TT   +IKTVV+T + VVLGIIKA+MQ I GDWSGAWET+KGV   +W+ I+ +V   +  + Q        +K+ +  +   I  +V  IW+</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>8</Hit_num>
+          <Hit_id>gi|29374987|ref|NP_814140.1|</Hit_id>
+          <Hit_def>tail protein [Enterococcus faecalis V583] &gt;gi|29342445|gb|AAO80211.1| tail protein [Enterococcus faecalis V583]</Hit_def>
+          <Hit_accession>NP_814140</Hit_accession>
+          <Hit_len>1049</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>139.0428842752</Hsp_bit-score>
+              <Hsp_score>349</Hsp_score>
+              <Hsp_evalue>6.84844401007043e-31</Hsp_evalue>
+              <Hsp_query-from>73</Hsp_query-from>
+              <Hsp_query-to>482</Hsp_query-to>
+              <Hsp_hit-from>545</Hsp_hit-from>
+              <Hsp_hit-to>920</Hsp_hit-to>
+              <Hsp_query-frame>0</Hsp_query-frame>
+              <Hsp_hit-frame>0</Hsp_hit-frame>
+              <Hsp_identity>110</Hsp_identity>
+              <Hsp_positive>196</Hsp_positive>
+              <Hsp_gaps>78</Hsp_gaps>
+              <Hsp_align-len>432</Hsp_align-len>
+              <Hsp_qseq>EAVVSFVMDLWGQMVAWWNENQELIRQ-------TAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVL----NVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSIS-----------NALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGF</Hsp_qseq>
+              <Hsp_hseq>DSIVKTASGLKGSLVKTWNDITAKVSEIWKKFTDAGKKTFDGFKKTVENVFNGIKNFLQTVWNVIYAVVGAIIVNTINIWKGIFDG--------FKAYFQYL-------WDLIKAIATGVWEKIGDTVTGIINGFIGVIKGIFDAFKTFFQQIWDAVVYSVTIAWNGIKNTVTSVSTAIKNFVTPIFNAIKTTITNVFNAIKNTATNVWNAIKTTISNVVQTILNF---------------------------------VTPIFNTMKNTITNIFNAIRNTASSVWNSIKTTISNIVTSVKNTVINIFNALKNSITNIFNAIRNTASTVWNSIKSTVSNIVSATVNTVKNLFNGMKNTVSSIWDGVRNTISNVVNAVKNTISNVWGGITGTVSN----IFNGVKNAIDGPMNAAKNLVKNVV----DAIKGF</Hsp_hseq>
+              <Hsp_midline>+++V     L G +V  WN+    + +         +  ++  +  VE V   +   +QT W++I AVV  ++    N+ K + D          KA  Q +       W+ +K +A  +WE I   V   I+G + + +      K+ +  +W  ++  V   W+ IK TV++  TA+   +  I  +I+TT   V+NAI   A+N+W AI TT+ +V+ TI  +                                 VT  F+ +K  I+N +  I+   S +WN+I T +S I   +K      +  +K +I+N+   I++   T WN+IK+++S           N  N +K+   + W+ +++ IS  +  +K+T+SN W  +  TV+N    I + V+   D  +NAA+N + N +    D I GF</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>9</Hit_num>
+          <Hit_id>gi|163941333|ref|YP_001646217.1|</Hit_id>
+          <Hit_def>prophage LambdaBa01, membrane protein, putative [Bacillus weihenstephanensis KBAB4] &gt;gi|163863530|gb|ABY44589.1| prophage LambdaBa01, membrane protein, putative [Bacillus weihenstephanensis KBAB4]</Hit_def>
+          <Hit_accession>YP_001646217</Hit_accession>
+          <Hit_len>725</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>138.657684699283</Hsp_bit-score>
+              <Hsp_score>348</Hsp_score>
+              <Hsp_evalue>8.15996781441799e-31</Hsp_evalue>
+              <Hsp_query-from>61</Hsp_query-from>
+              <Hsp_query-to>480</Hsp_query-to>
+              <Hsp_hit-from>142</Hsp_hit-from>
+              <Hsp_hit-to>560</Hsp_hit-to>
+              <Hsp_query-frame>0</Hsp_query-frame>
+              <Hsp_hit-frame>0</Hsp_hit-frame>
+              <Hsp_identity>118</Hsp_identity>
+              <Hsp_positive>203</Hsp_positive>
+              <Hsp_gaps>29</Hsp_gaps>
+              <Hsp_align-len>434</Hsp_align-len>
+              <Hsp_qseq>WEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIK---AVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKT----VWS-------AAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLIN</Hsp_qseq>
+              <Hsp_hseq>WDAIKQWTIDAWNAIGEFLVGIWDGIVQWASEAWNSISESTSAVWNSIKEFLIGIWNGIVEFVVT-WGT--AILETYVGIWTSIFNFCMEIWNGIVEYLTSVLQGIATFFTEIWTSISTFFQEIWNGLVAFITPVLQGIADFFAM-----------IWNGISTVIQTVWNFITQYLQAIWTAILYFATPLFESIKNFISECWNKISSTTSLVWETIKNFLVSCWNGLVSFVTPIFEKIKSWIISVWDTISSATMAVWNAVKNFLQACWNGLVSIVTPIFDAIKNWIVNVWNAISSTTSAVWNAIKSYLSSLWNSIVSTASSIFNSIKSAISTVWNMISSASSSVWNGIKSTLSSIWNGIKSTASSVWNGLKDAIMTPVRWVTSAVSGAFNGMKSAVLGVWDGIKSGIRTAINGIIRIINKFI-DGFNTPAELLN</Hsp_hseq>
+              <Hsp_midline>W+AIK     A  A+  F++ +W  +V W +E    I ++   VWN+I+  +  +   ++  V T W    A++ T + +  ++ +  +++  GI++   +V+Q I   ++  W ++      IW G+ + +   + G+   F             +W  I  V+  +W++I   +    TA+      +  SI+      WN IS+  S +W  I   ++S    +  ++    E IK+    VW        A W  +K    A    +V +VT  FD IK  I N W  I + TS +WNAI ++LS +W  I + AS+ +  IK+ IS V   I S   + WN IK+++S+  N IKS A + WN +K AI T +  + S VS  +N + S V      I S +RT  +  +     FI +  +   +L+N</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+      </Iteration_hits>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>6589360</Statistics_db-num>
+          <Statistics_db-len>-2041834015</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>504129014857</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
--- a/tool-data/blast2go.loc.sample	Fri Feb 22 09:15:24 2013 -0500
+++ b/tool-data/blast2go.loc.sample	Tue Apr 30 13:04:41 2013 -0400
@@ -6,19 +6,22 @@
 # Column 3 - Filename, Galaxy will use this when calling the tool
 #
 # Probably the most important setting in the properties file is the
-# Blast2GO database to use. Currently b2g4pipe v2.3.5 ships with an
-# old configuration so consult http://blast2go.org for the latest
-# public database they host in Spain. We also strongly recommend
+# Blast2GO database to use. Currently b2g4pipe v2.5 ships with an
+# old configuration so consult http://www.blast2go.com for the latest
+# public database they host in Spain (or find this by running the GUI
+# version of Blast2GO via Java Web Start under the menu entry "Tools",
+# "General Settings", "DataAccess setting"). We also strongly recommend
 # configuring a local Blast2GO database.
 #
 # The property filenames can be fullied qualified paths like
 # /opt/b2g4pipe/Spain_2012_August.properties or provided they are
 # in the same folder as the Blast2GO JAR file, just the filename
 # like Spain_2012_August.properties instead. This is intended to
-# make migrating between versions of Blast2GO easier (as the
+# make migrating between future versions of Blast2GO easier (as the
 # property files change between versions), and simpler overall.
 #
-Local_2011_May	Local database (May 2011)	Local_2011_May.properties
-Spain_2010_May	Database in Spain (May 2010)	Spain_2010_May.properties
-Spain_2011_June	Database in Spain (June 2011)	Spain_2011_June.properties
-Spain_2012_August	Database in Spain (August 2012)	Spain_2012_August.properties
+#Local_2011_May	Local database (May 2011)	Local_2011_May.properties
+#Spain_2010_May	Database in Spain (May 2010)	Spain_2010_May.properties
+#Spain_2011_June	Database in Spain (June 2011)	Spain_2011_June.properties
+#Spain_2012_August	Database in Spain (August 2012)	Spain_2012_August.properties
+default	Default settings	b2gPipe.properties
--- a/tools/blast2go/blast2go.py	Fri Feb 22 09:15:24 2013 -0500
+++ b/tools/blast2go/blast2go.py	Tue Apr 30 13:04:41 2013 -0400
@@ -24,9 +24,8 @@
 import subprocess
 
 #You may need to edit this to match your local setup,
-#blast2go_jar = "/opt/b2g4pipe/blast2go.jar"
-blast2go_jar = "/opt/b2g4pipe_v2.5/blast2go.jar"
-
+blast2go_dir = os.environ.get("B2G4PIPE", "/opt/b2g4pipe_v2.5/")
+blast2go_jar = os.path.join(blast2go_dir, "blast2go.jar")
 
 def stop_err(msg, error_level=1):
     """Print error message to stdout and quit with given error level."""
--- a/tools/blast2go/blast2go.txt	Fri Feb 22 09:15:24 2013 -0500
+++ b/tools/blast2go/blast2go.txt	Tue Apr 30 13:04:41 2013 -0400
@@ -26,9 +26,18 @@
 http://www.blast2go.org/
 
 
+Automated Installation
+======================
 
-Installation
-============
+Installation via the Galaxy Tool Shed should take care of the Galaxy side of
+things, including the dependency on 'blast_datatypes' which defines the
+'blastxml' file format. However, you will also probably need to configure
+the Blast2GO property file(s), for example if you have a local Blast2GO
+database (which we recommend for speed).
+
+
+Manual Installation
+===================
 
 The main dependency is b2g4pipe which must be installed manually. Also we
 strongly recommend installing a local Blast2GO database as well (see the
@@ -37,16 +46,14 @@
 
 http://www.blast2go.com/data/blast2go/b2g4pipe_v2.5.zip
 
-You can change the path by editing the definition near the start of the Python
-script blast2go.py, but by default it expects the underlying tool to be here:
+You can change the path by setting the B2G4PIP environement variable to
+the desired folder, but by default the script looks for the JAR file here:
 
 /opt/b2g4pipe_v2.5/blast2go.jar
 
-Installation of the Galaxy wrapper should work automatically via the Galaxy
-Tool Shed, including the dependency on 'blast_datatypes' for the 'blastxml'
-file format definition. To install the wrapper manually, first install
-'blast_datatypes', then copy or move the following files under the Galaxy
-tools folder, e.g. in a tools/blast2go/ folder:
+To install the wrapper manually, first install 'blast_datatypes', then
+copy or move the following files under the Galaxy tools folder, e.g. in a
+tools/blast2go/ folder:
 
 * blast2go.xml (the Galaxy tool definition)
 * blast2go.py (the Python wrapper script)
@@ -58,6 +65,15 @@
 
 <tool file="blast2go/blast2go.xml" />
 
+If you wish to run the unit tests, also add this to tools_conf.xml.sample
+and move/copy the test-data files under Galaxy's test-data folder. Then:
+
+$ ./run_functional_tests.sh -id blast2go
+
+
+Configuration
+=============
+
 As part of setting up b2g4pipe you will need to setup one or more Blast2GO
 property files which tell the tool which database to use etc. The example
 b2gPipe.properties provided with b2g4pipe is often out of date. The current
@@ -73,7 +89,9 @@
 
 (1) ID for the setup, e.g. Spain_2012_August
 (2) Description for the setup, e.g. Database in Spain (August 2012)
-(3) Properties filename for the setup, e.g. /opt/b2g4pipe/Spain_2012_August.properties
+(3) Properties filename for the setup, e.g. Spain_2012_August.properties
+    relative to the main JAR file, or with a full path
+    e.g. /opt/b2g4pipe/Spain_2012_August.properties
 
 Avoid including "Blast2GO" in the description (column 2) as this text will be
 included in the automatically assigned output dataset name. The blast2go.loc
@@ -105,6 +123,11 @@
           - Now uses the switch -annot instead of -a (this change breaks
             support for b2g4pipe v2.3.5 unfortunately)
        - Catch a few error messages and treat them explicitly as errors.
+v0.0.7 - Update output description in XML file (b2g4pipe v2.3.5 included
+         the sequence description, b2g4pipe v2.5 omits this).
+v0.0.8 - Automated installation via the Galaxy Tool Shed.
+       - Added unit test.
+       - Explain how to load the tabular file into the Blast2GO GUI.
 
 
 Developers
@@ -113,10 +136,10 @@
 This script and related tools are being developed on the following hg branch:
 http://bitbucket.org/peterjc/galaxy-central/src/tools
 
-For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball I use
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use
 the following command from the Galaxy root folder:
 
-$ tar -czf blast2go.tar.gz tools/blast2go/blast2go.xml tools/blast2go/blast2go.py tools/blast2go/blast2go.txt tools/blast2go/repository_dependencies.xml tool-data/blast2go.loc.sample
+$ tar -czf blast2go.tar.gz tools/blast2go/blast2go.xml tools/blast2go/blast2go.py tools/blast2go/blast2go.txt tools/blast2go/repository_dependencies.xml tools/blast2go/tool_dependencies.xml tool-data/blast2go.loc.sample test-data/blastp_sample.xml test-data/blastp_sample.blast2go.tabular
 
 Check this worked:
 
@@ -125,7 +148,10 @@
 tools/blast2go/blast2go.py
 tools/blast2go/blast2go.txt
 tools/blast2go/repository_dependencies.xml
+tools/blast2go/tool_dependencies.xml
 tool-data/blast2go.loc.sample
+test-data/blastp_sample.xml
+test-data/blastp_sample.blast2go.tabular
 
 
 Licence (MIT/BSD style)
--- a/tools/blast2go/blast2go.xml	Fri Feb 22 09:15:24 2013 -0500
+++ b/tools/blast2go/blast2go.xml	Tue Apr 30 13:04:41 2013 -0400
@@ -1,5 +1,8 @@
-<tool id="blast2go" name="Blast2GO" version="0.0.6">
+<tool id="blast2go" name="Blast2GO" version="0.0.8">
     <description>Maps BLAST results to GO annotation terms</description>
+    <requirements>
+        <requirement type="package" version="2.5">b2g4pipe</requirement>
+    </requirements>
     <command interpreter="python">
         blast2go.py "${xml}" "${prop.fields.path}" "${tab}"
     </command>
@@ -21,9 +24,12 @@
     <outputs>
         <data name="tab" format="tabular" label="Blast2GO ${prop.fields.name}" />
     </outputs>
-    <requirements>
-    </requirements>
     <tests>
+        <test>
+            <param name="xml" value="blastp_sample.xml" ftype="blastxml"/>
+            <param name="prop" value="default"/>
+            <output name="tab" file="blastp_sample.blast2go.tabular" ftype="tabular"/>
+        </test>
     </tests>
     <help>
 .. class:: warningmark
@@ -36,8 +42,8 @@
 
 **What it does**
 
-This runs b2g4Pipe, the command line (no GUI) version of Blast2GO designed
-for use in pipelines.
+This runs b2g4Pipe v2.5, which is the command line (no GUI) version of
+Blast2GO designed for use in pipelines.
 
 It takes as input BLAST XML results against a protein database, typically
 the NCBI non-redundant (NR) database. This tool will accept concatenated
@@ -51,17 +57,23 @@
 The output from this tool is a tabular file containing three columns, with
 the order taken from query order in the original BLAST XML file:
 
-====== ====================================
+====== ====================
 Column Description
------- ------------------------------------
-     1 ID and description of query sequence
+------ --------------------
+     1 ID of query sequence
      2 GO term
      3 GO description
-====== ====================================
+====== ====================
 
 Note that if no GO terms are assigned to a sequence (e.g. if it had no
 BLAST matches), then it will not be present in the output file.
 
+This tabular file is called an "Annotation File" in the Blast2GO GUI.
+If you download the tabular file, and rename it to use the extension
+".annot", then it can be opened with the Blast2GO GUI via the "File",
+"Load Annotation (.annot)" menu (keyboard shortcut ALT+L). You can
+then run some of the interactive analyses offered in the GUI tool.
+
 
 **Advanced Settings**
 
@@ -89,7 +101,7 @@
 Bioinformatics 21:3674-3676, 2005.
 http://dx.doi.org/10.1093/bioinformatics/bti610
 
-http://www.blast2go.org/
+http://www.blast2go.com/
 
     </help>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blast2go/tool_dependencies.xml	Tue Apr 30 13:04:41 2013 -0400
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="b2g4pipe" version="2.5">
+        <install version="1.0">
+            <actions>
+                <!-- The ZIP file decompresses to give a folder b2g4pipe -->
+                <action type="download_by_url">http://www.blast2go.com/data/blast2go/b2g4pipe_v2.5.zip</action>
+                <action type="move_directory_files"><source_directory>b2g4pipe</source_directory><destination_directory>$INSTALL_DIR/</destination_directory></action>
+		<!-- Set environment variable so Python script knows where to look -->
+		<action type="set_environment"><environment_variable name="B2G4PIPE" action="set_to">$INSTALL_DIR</environment_variable></action>
+            </actions>
+        </install>
+        <readme>
+Downloads b2g4pipe v2.5
+        </readme>
+    </package>
+</tool_dependency>
+