Mercurial > repos > nick > duplex
comparison swalign.py @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
| author | nick |
|---|---|
| date | Thu, 02 Feb 2017 18:44:31 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 17:836fa4fe9494 | 18:e4d75f9efb90 |
|---|---|
| 1 import os | |
| 2 import ctypes | |
| 3 import string | |
| 4 | |
| 5 script_dir = os.path.dirname(os.path.realpath(__file__)) | |
| 6 swalign = ctypes.cdll.LoadLibrary(os.path.join(script_dir, 'libswalign.so')) | |
| 7 | |
| 8 REVCOMP_TABLE = string.maketrans('acgtrymkbdhvACGTRYMKBDHV', 'tgcayrkmvhdbTGCAYRKMVHDB') | |
| 9 | |
| 10 | |
| 11 # C struct for ctypes | |
| 12 class SeqPairC(ctypes.Structure): | |
| 13 _fields_ = [ | |
| 14 ('a', ctypes.c_char_p), | |
| 15 ('alen', ctypes.c_uint), | |
| 16 ('b', ctypes.c_char_p), | |
| 17 ('blen', ctypes.c_uint), | |
| 18 ] | |
| 19 | |
| 20 | |
| 21 # C struct for ctypes | |
| 22 class AlignC(ctypes.Structure): | |
| 23 _fields_ = [ | |
| 24 ('seqs', ctypes.POINTER(SeqPairC)), | |
| 25 ('start_a', ctypes.c_int), | |
| 26 ('start_b', ctypes.c_int), | |
| 27 ('end_a', ctypes.c_int), | |
| 28 ('end_b', ctypes.c_int), | |
| 29 ('matches', ctypes.c_int), | |
| 30 ('score', ctypes.c_double), | |
| 31 ] | |
| 32 | |
| 33 | |
| 34 # The Python version | |
| 35 class Align(object): | |
| 36 def __init__(self, align_c): | |
| 37 self.target = align_c.seqs.contents.a | |
| 38 self.query = align_c.seqs.contents.b | |
| 39 # Where the first base of the target aligns on the query, in query coordinates (or 1, if <= 0). | |
| 40 self.start_target = align_c.start_a | |
| 41 # Where the first base of the query aligns on the target, in target coordinates (or 1, if <= 0). | |
| 42 self.start_query = align_c.start_b | |
| 43 # Where the last base of the target aligns on the query, in query coordinates. | |
| 44 self.end_target = align_c.end_a | |
| 45 # Where the last base of the query aligns on the target, in target coordinates. | |
| 46 self.end_query = align_c.end_b | |
| 47 self.matches = align_c.matches | |
| 48 self.score = align_c.score | |
| 49 | |
| 50 # Provide this common function. | |
| 51 def __str__(self): | |
| 52 """Print a human-readable representation of the alignment.""" | |
| 53 start_query = str(self.start_query) | |
| 54 start_target = str(self.start_target) | |
| 55 start_width = str(max(len(start_query), len(start_target))) | |
| 56 line_format = '{:'+start_width+'} {} {}' | |
| 57 output = line_format.format(start_target, self.target, self.end_target) + '\n' | |
| 58 output += line_format.format(start_query, self.query, self.end_query) | |
| 59 return output | |
| 60 | |
| 61 | |
| 62 # Initialize functions (define types). | |
| 63 swalign.smith_waterman.restype = ctypes.POINTER(AlignC) | |
| 64 swalign.revcomp.restype = ctypes.c_char_p | |
| 65 | |
| 66 | |
| 67 def smith_waterman(target, query): | |
| 68 seq_pair = SeqPairC(target, len(target), query, len(query)) | |
| 69 align_c = swalign.smith_waterman(ctypes.pointer(seq_pair), 1).contents | |
| 70 return Align(align_c) | |
| 71 | |
| 72 | |
| 73 def smith_waterman_duplex(target, query): | |
| 74 """Smith-Waterman align query to target in both orientations and return the best. | |
| 75 Convenience function that calls smith_waterman() twice, and returns the | |
| 76 alignment with the highest score.""" | |
| 77 align = smith_waterman(target, query) | |
| 78 query_rc = revcomp(query) | |
| 79 align_rc = smith_waterman(target, query_rc) | |
| 80 if align_rc.score > align.score: | |
| 81 return align_rc | |
| 82 else: | |
| 83 return align | |
| 84 | |
| 85 | |
| 86 def revcomp(seq): | |
| 87 """Return the reverse complement of the input sequence. | |
| 88 Leaves the input string unaltered.""" | |
| 89 return seq.translate(REVCOMP_TABLE)[::-1] | |
| 90 | |
| 91 | |
| 92 def revcomp_inplace(seq): | |
| 93 """Convert the input sequence to its reverse complement. | |
| 94 WARNING: This will alter the input string in-place!""" | |
| 95 swalign.revcomp(seq) |
