annotate seqtools.py @ 4:af383638de66 draft

planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
author nick
date Mon, 23 Nov 2015 18:44:23 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
1 import os
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
2 import ctypes
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
3
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
4 script_dir = os.path.dirname(os.path.realpath(__file__))
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
5 seqtools = ctypes.cdll.LoadLibrary(os.path.join(script_dir, 'seqtoolsc.so'))
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
6 seqtools.get_revcomp.restype = ctypes.c_char_p
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
7 seqtools.transfer_gaps.restype = ctypes.c_char_p
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
8
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
9
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
10 def get_revcomp(seq):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
11 return seqtools.get_revcomp(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
12
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
13
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
14 def get_diffs_frac_simple(consensus, family):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
15 c_consensus = ctypes.c_char_p(consensus)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
16 c_family = (ctypes.c_char_p * len(family))()
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
17 for i, seq in enumerate(family):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
18 c_family[i] = ctypes.c_char_p(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
19 seqtools.get_diffs_frac_simple.restype = ctypes.POINTER(ctypes.c_double * len(c_family))
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
20 diffs = seqtools.get_diffs_frac_simple(c_consensus, c_family, len(c_family))
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
21 return diffs.contents
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
22
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
23
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
24 def get_diffs_frac_binned(consensus, family, bins):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
25 seq_len = None
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
26 c_consensus = ctypes.c_char_p(consensus)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
27 c_family = (ctypes.c_char_p * len(family))()
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
28 for i, seq in enumerate(family):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
29 if seq_len:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
30 if seq_len != len(seq):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
31 return None
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
32 else:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
33 seq_len = len(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
34 c_family[i] = ctypes.c_char_p(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
35 double_array_pointer = ctypes.POINTER(ctypes.c_double * bins)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
36 seqtools.get_diffs_frac_binned.restype = ctypes.POINTER(double_array_pointer * len(c_family))
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
37 diffs_binned_c = seqtools.get_diffs_frac_binned(c_consensus, c_family, len(c_family), seq_len, bins)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
38 diffs_binned = []
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
39 for diffs_c in diffs_binned_c.contents:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
40 diffs_binned.append(diffs_c.contents)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
41 return diffs_binned
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
42
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
43
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
44 def transfer_gaps(aligned, seq, gap_char_in='-', gap_char_out='-'):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
45 gap_char_in_c = ctypes.c_char(gap_char_in)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
46 gap_char_out_c = ctypes.c_char(gap_char_out)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
47 return seqtools.transfer_gaps(aligned, seq, gap_char_in_c, gap_char_out_c)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
48
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
49
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
50 def transfer_gaps_multi(seqs, aligned, gap_char_in='-', gap_char_out='-'):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
51 gap_char_in_c = ctypes.c_char(gap_char_in)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
52 gap_char_out_c = ctypes.c_char(gap_char_out)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
53 n_seqs = len(seqs)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
54 assert n_seqs == len(aligned), 'Error: Unequal number of gapped and ungapped sequences.'
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
55 seqs_c = (ctypes.c_char_p * n_seqs)()
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
56 for i, seq in enumerate(seqs):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
57 seqs_c[i] = ctypes.c_char_p(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
58 aligned_c = (ctypes.c_char_p * n_seqs)()
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
59 for i, seq in enumerate(aligned):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
60 aligned_c[i] = ctypes.c_char_p(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
61 seqtools.transfer_gaps_multi.restype = ctypes.POINTER(ctypes.c_char_p * n_seqs)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
62 output_c = seqtools.transfer_gaps_multi(n_seqs, aligned_c, seqs_c, gap_char_in_c, gap_char_out_c)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
63 output = []
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
64 for seq in output_c.contents:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
65 output.append(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
66 return output