changeset 71:0c4ac210068b draft

handle reverse matches
author Jan Kanis <jan.code@jankanis.nl>
date Wed, 18 Jun 2014 14:12:00 +0200
parents fa8a93bdefd7
children 0ef071bba164
files blast2html.py test-data/blast xml example1.html
diffstat 2 files changed, 50 insertions(+), 42 deletions(-) [+]
line wrap: on
line diff
--- a/blast2html.py	Wed Jun 18 12:25:37 2014 +0200
+++ b/blast2html.py	Wed Jun 18 14:12:00 2014 +0200
@@ -87,29 +87,37 @@
     
     step = 60
 
-    def split(txt):
-        return [txt[i:i+step] for i in range(0, len(txt), step)]
-
     qfrom = int(hsp['Hsp_query-from'])
     qto = int(hsp['Hsp_query-to'])
+    qframe = int(hsp['Hsp_query-frame'])
     hfrom = int(hsp['Hsp_hit-from'])
     hto = int(hsp['Hsp_hit-to'])
+    hframe = int(hsp['Hsp_hit-frame'])
     qseq = hsp.Hsp_qseq.text
     midline = hsp.Hsp_midline.text
     hseq = hsp.Hsp_hseq.text
+
+    if not qframe in [1, -1]:
+        warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_query-frame of {}".format(nodeid(hsp), qframe))
+        qframe = -1 if qframe < 0 else 1
+    if not hframe in [1, -1]:
+        warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_hit-frame of {}".format(nodeid(hsp), hframe))
+        hframe = -1 if hframe < 0 else 1
     
-    offset = 0
+    def split(txt):
+        return [txt[i:i+step] for i in range(0, len(txt), step)]
+
     for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), step)):
         yield (
-            "Query  {:>7}  {}  {}\n".format(qfrom+offset, qs, qfrom+offset+len(qs)-1) +
+            "Query  {:>7}  {}  {}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) +
             "       {:7}  {}\n".format('', mid) +
-            "Subject{:>7}  {}  {}".format(hfrom+offset, hs, hfrom+offset+len(hs)-1)
+            "Subject{:>7}  {}  {}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe)
         )
         
-    if qfrom+len(qseq)-1 != qto:
+    if qfrom+(len(qseq)-1)*qframe != qto:
         warnings.warn("Error in BlastXML input: Hsp node {} qseq length mismatch: from {} to {} length {}".format(
             nodeid(hsp), qfrom, qto, len(qseq)))
-    if hfrom+len(hseq)-1 != hto:
+    if hfrom+(len(hseq)-1)*hframe != hto:
         warnings.warn("Error in BlastXML input: Hsp node {} hseq length mismatch: from {} to {} length {}".format(
             nodeid(hsp), hfrom, hto, len(hseq)))
 
--- a/test-data/blast xml example1.html	Wed Jun 18 12:25:37 2014 +0200
+++ b/test-data/blast xml example1.html	Wed Jun 18 14:12:00 2014 +0200
@@ -6667,7 +6667,7 @@
 
                   <pre class=alignmentgraphic>Query        2  GTCCGTCG  9
                 ||||||||
-Subject    177  GTCCGTCG  184</pre>
+Subject    177  GTCCGTCG  170</pre>
                 </div>
                 <div class=hotspot id=hotspot1-26-3>
                   <p class=range>
@@ -6715,7 +6715,7 @@
 
                   <pre class=alignmentgraphic>Query        2  GTCCGTC  8
                 |||||||
-Subject   2048  GTCCGTC  2054</pre>
+Subject   2048  GTCCGTC  2042</pre>
                 </div>
                 <div class=hotspot id=hotspot1-26-5>
                   <p class=range>
@@ -6853,7 +6853,7 @@
 
                   <pre class=alignmentgraphic>Query        8  CGTGAAGA  15
                 ||||||||
-Subject   1634  CGTGAAGA  1641</pre>
+Subject   1634  CGTGAAGA  1627</pre>
                 </div>
                 <div class=hotspot id=hotspot1-28-3>
                   <p class=range>
@@ -6994,7 +6994,7 @@
 
                   <pre class=alignmentgraphic>Query        8  CGTGAAGA  15
                 ||||||||
-Subject   1634  CGTGAAGA  1641</pre>
+Subject   1634  CGTGAAGA  1627</pre>
                 </div>
                 <div class=hotspot id=hotspot1-29-3>
                   <p class=range>
@@ -7111,7 +7111,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    298  TCGTGAAGA  306</pre>
+Subject    298  TCGTGAAGA  290</pre>
                 </div>
                 <div class=hotspot id=hotspot1-31-2>
                   <p class=range>
@@ -7672,7 +7672,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-37-2>
                   <p class=range>
@@ -7813,7 +7813,7 @@
 
                   <pre class=alignmentgraphic>Query       10  TGAAGAG  16
                 |||||||
-Subject   1569  TGAAGAG  1575</pre>
+Subject   1569  TGAAGAG  1563</pre>
                 </div>
 
               </div>
@@ -7924,7 +7924,7 @@
 
                   <pre class=alignmentgraphic>Query       10  TGAAGAG  16
                 |||||||
-Subject   1569  TGAAGAG  1575</pre>
+Subject   1569  TGAAGAG  1563</pre>
                 </div>
 
               </div>
@@ -8392,7 +8392,7 @@
 
                   <pre class=alignmentgraphic>Query        8  CGTGAAGA  15
                 ||||||||
-Subject   1634  CGTGAAGA  1641</pre>
+Subject   1634  CGTGAAGA  1627</pre>
                 </div>
                 <div class=hotspot id=hotspot1-45-3>
                   <p class=range>
@@ -8551,7 +8551,7 @@
 
                   <pre class=alignmentgraphic>Query        8  CGTGAAGA  15
                 ||||||||
-Subject   1634  CGTGAAGA  1641</pre>
+Subject   1634  CGTGAAGA  1627</pre>
                 </div>
                 <div class=hotspot id=hotspot1-46-3>
                   <p class=range>
@@ -8716,7 +8716,7 @@
 
                   <pre class=alignmentgraphic>Query       10  TGAAGAG  16
                 |||||||
-Subject   1569  TGAAGAG  1575</pre>
+Subject   1569  TGAAGAG  1563</pre>
                 </div>
 
               </div>
@@ -8809,7 +8809,7 @@
 
                   <pre class=alignmentgraphic>Query       10  TGAAGAG  16
                 |||||||
-Subject   1569  TGAAGAG  1575</pre>
+Subject   1569  TGAAGAG  1563</pre>
                 </div>
 
               </div>
@@ -8878,7 +8878,7 @@
 
                   <pre class=alignmentgraphic>Query        8  CGTGAAGA  15
                 ||||||||
-Subject   1634  CGTGAAGA  1641</pre>
+Subject   1634  CGTGAAGA  1627</pre>
                 </div>
                 <div class=hotspot id=hotspot1-49-3>
                   <p class=range>
@@ -9043,7 +9043,7 @@
 
                   <pre class=alignmentgraphic>Query       10  TGAAGAG  16
                 |||||||
-Subject   1569  TGAAGAG  1575</pre>
+Subject   1569  TGAAGAG  1563</pre>
                 </div>
 
               </div>
@@ -9088,7 +9088,7 @@
 
                   <pre class=alignmentgraphic>Query        4  CCGTCGTGA  12
                 |||||||||
-Subject     19  CCGTCGTGA  27</pre>
+Subject     19  CCGTCGTGA  11</pre>
                 </div>
 
               </div>
@@ -9178,7 +9178,7 @@
 
                   <pre class=alignmentgraphic>Query        4  CCGTCGTGA  12
                 |||||||||
-Subject     19  CCGTCGTGA  27</pre>
+Subject     19  CCGTCGTGA  11</pre>
                 </div>
 
               </div>
@@ -9268,7 +9268,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-55-2>
                   <p class=range>
@@ -9385,7 +9385,7 @@
 
                   <pre class=alignmentgraphic>Query        8  CGTGAAGA  15
                 ||||||||
-Subject   1587  CGTGAAGA  1594</pre>
+Subject   1587  CGTGAAGA  1580</pre>
                 </div>
                 <div class=hotspot id=hotspot1-56-3>
                   <p class=range>
@@ -9550,7 +9550,7 @@
 
                   <pre class=alignmentgraphic>Query       10  TGAAGAG  16
                 |||||||
-Subject   1533  TGAAGAG  1539</pre>
+Subject   1533  TGAAGAG  1527</pre>
                 </div>
 
               </div>
@@ -9814,7 +9814,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-61-2>
                   <p class=range>
@@ -9907,7 +9907,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-62-2>
                   <p class=range>
@@ -10000,7 +10000,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-63-2>
                   <p class=range>
@@ -10093,7 +10093,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-64-2>
                   <p class=range>
@@ -10186,7 +10186,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-65-2>
                   <p class=range>
@@ -10279,7 +10279,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-66-2>
                   <p class=range>
@@ -10348,7 +10348,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-67-2>
                   <p class=range>
@@ -10441,7 +10441,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-68-2>
                   <p class=range>
@@ -10534,7 +10534,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-69-2>
                   <p class=range>
@@ -10627,7 +10627,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-70-2>
                   <p class=range>
@@ -10768,7 +10768,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-71-2>
                   <p class=range>
@@ -11263,7 +11263,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-76-2>
                   <p class=range>
@@ -11380,7 +11380,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-77-2>
                   <p class=range>
@@ -11473,7 +11473,7 @@
 
                   <pre class=alignmentgraphic>Query        7  TCGTGAAGA  15
                 |||||||||
-Subject    305  TCGTGAAGA  313</pre>
+Subject    305  TCGTGAAGA  297</pre>
                 </div>
                 <div class=hotspot id=hotspot1-78-2>
                   <p class=range>
@@ -12961,7 +12961,7 @@
 
                   <pre class=alignmentgraphic>Query        5  CGTCGTGA  12
                 ||||||||
-Subject     52  CGTCGTGA  59</pre>
+Subject     52  CGTCGTGA  45</pre>
                 </div>
 
               </div>