comparison swalign.py @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 import os
2 import ctypes
3 import string
4
5 script_dir = os.path.dirname(os.path.realpath(__file__))
6 swalign = ctypes.cdll.LoadLibrary(os.path.join(script_dir, 'libswalign.so'))
7
8 REVCOMP_TABLE = string.maketrans('acgtrymkbdhvACGTRYMKBDHV', 'tgcayrkmvhdbTGCAYRKMVHDB')
9
10
11 # C struct for ctypes
12 class SeqPairC(ctypes.Structure):
13 _fields_ = [
14 ('a', ctypes.c_char_p),
15 ('alen', ctypes.c_uint),
16 ('b', ctypes.c_char_p),
17 ('blen', ctypes.c_uint),
18 ]
19
20
21 # C struct for ctypes
22 class AlignC(ctypes.Structure):
23 _fields_ = [
24 ('seqs', ctypes.POINTER(SeqPairC)),
25 ('start_a', ctypes.c_int),
26 ('start_b', ctypes.c_int),
27 ('end_a', ctypes.c_int),
28 ('end_b', ctypes.c_int),
29 ('matches', ctypes.c_int),
30 ('score', ctypes.c_double),
31 ]
32
33
34 # The Python version
35 class Align(object):
36 def __init__(self, align_c):
37 self.target = align_c.seqs.contents.a
38 self.query = align_c.seqs.contents.b
39 # Where the first base of the target aligns on the query, in query coordinates (or 1, if <= 0).
40 self.start_target = align_c.start_a
41 # Where the first base of the query aligns on the target, in target coordinates (or 1, if <= 0).
42 self.start_query = align_c.start_b
43 # Where the last base of the target aligns on the query, in query coordinates.
44 self.end_target = align_c.end_a
45 # Where the last base of the query aligns on the target, in target coordinates.
46 self.end_query = align_c.end_b
47 self.matches = align_c.matches
48 self.score = align_c.score
49
50 # Provide this common function.
51 def __str__(self):
52 """Print a human-readable representation of the alignment."""
53 start_query = str(self.start_query)
54 start_target = str(self.start_target)
55 start_width = str(max(len(start_query), len(start_target)))
56 line_format = '{:'+start_width+'} {} {}'
57 output = line_format.format(start_target, self.target, self.end_target) + '\n'
58 output += line_format.format(start_query, self.query, self.end_query)
59 return output
60
61
62 # Initialize functions (define types).
63 swalign.smith_waterman.restype = ctypes.POINTER(AlignC)
64 swalign.revcomp.restype = ctypes.c_char_p
65
66
67 def smith_waterman(target, query):
68 seq_pair = SeqPairC(target, len(target), query, len(query))
69 align_c = swalign.smith_waterman(ctypes.pointer(seq_pair), 1).contents
70 return Align(align_c)
71
72
73 def smith_waterman_duplex(target, query):
74 """Smith-Waterman align query to target in both orientations and return the best.
75 Convenience function that calls smith_waterman() twice, and returns the
76 alignment with the highest score."""
77 align = smith_waterman(target, query)
78 query_rc = revcomp(query)
79 align_rc = smith_waterman(target, query_rc)
80 if align_rc.score > align.score:
81 return align_rc
82 else:
83 return align
84
85
86 def revcomp(seq):
87 """Return the reverse complement of the input sequence.
88 Leaves the input string unaltered."""
89 return seq.translate(REVCOMP_TABLE)[::-1]
90
91
92 def revcomp_inplace(seq):
93 """Convert the input sequence to its reverse complement.
94 WARNING: This will alter the input string in-place!"""
95 swalign.revcomp(seq)