annotate seqtools.py @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
1 import os
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
2 import ctypes
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
3
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
4 script_dir = os.path.dirname(os.path.realpath(__file__))
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
5 seqtools = ctypes.cdll.LoadLibrary(os.path.join(script_dir, 'libseqtools.so'))
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
6 seqtools.get_revcomp.restype = ctypes.c_char_p
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
7 seqtools.transfer_gaps.restype = ctypes.c_char_p
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
8
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
9
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
10 def get_revcomp(seq):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
11 return seqtools.get_revcomp(seq)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
12
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
13
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
14 def get_diffs_frac_simple(consensus, family):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
15 c_consensus = ctypes.c_char_p(consensus)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
16 c_family = (ctypes.c_char_p * len(family))()
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
17 for i, seq in enumerate(family):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
18 c_family[i] = ctypes.c_char_p(seq)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
19 seqtools.get_diffs_frac_simple.restype = ctypes.POINTER(ctypes.c_double * len(c_family))
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
20 diffs = seqtools.get_diffs_frac_simple(c_consensus, c_family, len(c_family))
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
21 return diffs.contents
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
22
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
23
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
24 def get_diffs_frac_binned(consensus, family, bins):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
25 seq_len = None
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
26 c_consensus = ctypes.c_char_p(consensus)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
27 c_family = (ctypes.c_char_p * len(family))()
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
28 for i, seq in enumerate(family):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
29 if seq_len:
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
30 if seq_len != len(seq):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
31 return None
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
32 else:
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
33 seq_len = len(seq)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
34 c_family[i] = ctypes.c_char_p(seq)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
35 double_array_pointer = ctypes.POINTER(ctypes.c_double * bins)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
36 seqtools.get_diffs_frac_binned.restype = ctypes.POINTER(double_array_pointer * len(c_family))
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
37 diffs_binned_c = seqtools.get_diffs_frac_binned(c_consensus, c_family, len(c_family), seq_len, bins)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
38 diffs_binned = []
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
39 for diffs_c in diffs_binned_c.contents:
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
40 diffs_binned.append(diffs_c.contents)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
41 return diffs_binned
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
42
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
43
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
44 def transfer_gaps(aligned, seq, gap_char_in='-', gap_char_out='-'):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
45 gap_char_in_c = ctypes.c_char(gap_char_in)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
46 gap_char_out_c = ctypes.c_char(gap_char_out)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
47 return seqtools.transfer_gaps(aligned, seq, gap_char_in_c, gap_char_out_c)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
48
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
49
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
50 def transfer_gaps_multi(seqs, aligned, gap_char_in='-', gap_char_out='-'):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
51 gap_char_in_c = ctypes.c_char(gap_char_in)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
52 gap_char_out_c = ctypes.c_char(gap_char_out)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
53 n_seqs = len(seqs)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
54 assert n_seqs == len(aligned), 'Error: Unequal number of gapped and ungapped sequences.'
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
55 seqs_c = (ctypes.c_char_p * n_seqs)()
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
56 for i, seq in enumerate(seqs):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
57 seqs_c[i] = ctypes.c_char_p(seq)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
58 aligned_c = (ctypes.c_char_p * n_seqs)()
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
59 for i, seq in enumerate(aligned):
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
60 aligned_c[i] = ctypes.c_char_p(seq)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
61 seqtools.transfer_gaps_multi.restype = ctypes.POINTER(ctypes.c_char_p * n_seqs)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
62 output_c = seqtools.transfer_gaps_multi(n_seqs, aligned_c, seqs_c, gap_char_in_c, gap_char_out_c)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
63 output = []
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
64 for seq in output_c.contents:
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
65 output.append(seq)
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
66 return output