annotate swalign.py @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
1 import os
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
2 import ctypes
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
3 import string
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
4
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
5 script_dir = os.path.dirname(os.path.realpath(__file__))
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
6 swalign = ctypes.cdll.LoadLibrary(os.path.join(script_dir, 'libswalign.so'))
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
7
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
8 REVCOMP_TABLE = string.maketrans('acgtrymkbdhvACGTRYMKBDHV', 'tgcayrkmvhdbTGCAYRKMVHDB')
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
9
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
10
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
11 # C struct for ctypes
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
12 class SeqPairC(ctypes.Structure):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
13 _fields_ = [
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
14 ('a', ctypes.c_char_p),
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
15 ('alen', ctypes.c_uint),
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
16 ('b', ctypes.c_char_p),
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
17 ('blen', ctypes.c_uint),
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
18 ]
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
19
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
20
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
21 # C struct for ctypes
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
22 class AlignC(ctypes.Structure):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
23 _fields_ = [
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
24 ('seqs', ctypes.POINTER(SeqPairC)),
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
25 ('start_a', ctypes.c_int),
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
26 ('start_b', ctypes.c_int),
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
27 ('end_a', ctypes.c_int),
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
28 ('end_b', ctypes.c_int),
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
29 ('matches', ctypes.c_int),
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
30 ('score', ctypes.c_double),
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
31 ]
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
32
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
33
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
34 # The Python version
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
35 class Align(object):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
36 def __init__(self, align_c):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
37 self.target = align_c.seqs.contents.a
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
38 self.query = align_c.seqs.contents.b
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
39 # Where the first base of the target aligns on the query, in query coordinates (or 1, if <= 0).
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
40 self.start_target = align_c.start_a
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
41 # Where the first base of the query aligns on the target, in target coordinates (or 1, if <= 0).
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
42 self.start_query = align_c.start_b
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
43 # Where the last base of the target aligns on the query, in query coordinates.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
44 self.end_target = align_c.end_a
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
45 # Where the last base of the query aligns on the target, in target coordinates.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
46 self.end_query = align_c.end_b
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
47 self.matches = align_c.matches
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
48 self.score = align_c.score
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
49
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
50 # Provide this common function.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
51 def __str__(self):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
52 """Print a human-readable representation of the alignment."""
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
53 start_query = str(self.start_query)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
54 start_target = str(self.start_target)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
55 start_width = str(max(len(start_query), len(start_target)))
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
56 line_format = '{:'+start_width+'} {} {}'
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
57 output = line_format.format(start_target, self.target, self.end_target) + '\n'
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
58 output += line_format.format(start_query, self.query, self.end_query)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
59 return output
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
60
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
61
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
62 # Initialize functions (define types).
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
63 swalign.smith_waterman.restype = ctypes.POINTER(AlignC)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
64 swalign.revcomp.restype = ctypes.c_char_p
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
65
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
66
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
67 def smith_waterman(target, query):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
68 seq_pair = SeqPairC(target, len(target), query, len(query))
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
69 align_c = swalign.smith_waterman(ctypes.pointer(seq_pair), 1).contents
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
70 return Align(align_c)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
71
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
72
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
73 def smith_waterman_duplex(target, query):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
74 """Smith-Waterman align query to target in both orientations and return the best.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
75 Convenience function that calls smith_waterman() twice, and returns the
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
76 alignment with the highest score."""
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
77 align = smith_waterman(target, query)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
78 query_rc = revcomp(query)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
79 align_rc = smith_waterman(target, query_rc)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
80 if align_rc.score > align.score:
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
81 return align_rc
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
82 else:
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
83 return align
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
84
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
85
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
86 def revcomp(seq):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
87 """Return the reverse complement of the input sequence.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
88 Leaves the input string unaltered."""
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
89 return seq.translate(REVCOMP_TABLE)[::-1]
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
90
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
91
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
92 def revcomp_inplace(seq):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
93 """Convert the input sequence to its reverse complement.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
94 WARNING: This will alter the input string in-place!"""
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
95 swalign.revcomp(seq)