annotate consensus.py @ 4:af383638de66 draft

planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
author nick
date Mon, 23 Nov 2015 18:44:23 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
1 import os
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
2 import ctypes
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
3
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
4 script_dir = os.path.dirname(os.path.realpath(__file__))
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
5 consensus = ctypes.cdll.LoadLibrary(os.path.join(script_dir, 'consensusc.so'))
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
6 consensus.get_consensus.restype = ctypes.c_char_p
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
7 consensus.get_consensus_duplex.restype = ctypes.c_char_p
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
8 consensus.build_consensus_duplex_simple.restype = ctypes.c_char_p
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
9
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
10
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
11 # N.B.: The quality scores must be aligned with their accompanying sequences.
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
12 def get_consensus(align, quals=[], cons_thres=-1.0, qual_thres=' ', gapped=False):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
13 cons_thres_c = ctypes.c_double(cons_thres)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
14 qual_thres_c = ctypes.c_char(qual_thres)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
15 n_seqs = len(align)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
16 if gapped:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
17 gapped_c = 1
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
18 else:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
19 gapped_c = 0
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
20 assert not quals or len(quals) == n_seqs, 'Different number of sequences and quals.'
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
21 seq_len = None
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
22 for seq in (align + quals):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
23 if seq_len is None:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
24 seq_len = len(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
25 else:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
26 assert seq_len == len(seq), 'All sequences in the alignment must be the same length.'
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
27 align_c = (ctypes.c_char_p * n_seqs)()
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
28 for i, seq in enumerate(align):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
29 align_c[i] = ctypes.c_char_p(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
30 quals_c = (ctypes.c_char_p * n_seqs)()
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
31 for i, qual in enumerate(quals):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
32 quals_c[i] = ctypes.c_char_p(qual)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
33 if not quals:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
34 quals_c = 0
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
35 return consensus.get_consensus(align_c, quals_c, n_seqs, seq_len, cons_thres_c, qual_thres_c,
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
36 gapped_c)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
37
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
38
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
39 # N.B.: The quality scores must be aligned with their accompanying sequences.
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
40 def get_consensus_duplex(align1, align2, quals1=[], quals2=[], cons_thres=-1.0, qual_thres=' ',
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
41 method='iupac'):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
42 assert method in ('iupac', 'freq')
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
43 cons_thres_c = ctypes.c_double(cons_thres)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
44 qual_thres_c = ctypes.c_char(qual_thres)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
45 n_seqs1 = len(align1)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
46 n_seqs2 = len(align2)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
47 assert (not quals1 and not quals2) or (quals1 and quals2)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
48 assert not quals1 or len(quals1) == n_seqs1
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
49 assert not quals2 or len(quals2) == n_seqs2
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
50 seq_len = None
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
51 for seq in (align1 + align2 + quals1 + quals2):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
52 if seq_len is None:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
53 seq_len = len(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
54 else:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
55 assert seq_len == len(seq), 'All sequences in the alignment must be the same length.'
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
56 align1_c = (ctypes.c_char_p * n_seqs1)()
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
57 for i, seq in enumerate(align1):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
58 align1_c[i] = ctypes.c_char_p(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
59 align2_c = (ctypes.c_char_p * n_seqs1)()
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
60 for i, seq in enumerate(align2):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
61 align2_c[i] = ctypes.c_char_p(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
62 quals1_c = (ctypes.c_char_p * n_seqs1)()
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
63 for i, seq in enumerate(quals1):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
64 quals1_c[i] = ctypes.c_char_p(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
65 quals2_c = (ctypes.c_char_p * n_seqs1)()
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
66 for i, seq in enumerate(quals2):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
67 quals2_c[i] = ctypes.c_char_p(seq)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
68 if not quals1:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
69 quals1_c = 0
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
70 if not quals2:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
71 quals2_c = 0
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
72 return consensus.get_consensus_duplex(align1_c, align2_c, quals1_c, quals2_c, n_seqs1, n_seqs2,
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
73 seq_len, cons_thres_c, qual_thres_c, method)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
74
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
75
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
76 def build_consensus_duplex_simple(cons1, cons2, gapped=False):
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
77 assert len(cons1) == len(cons2)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
78 cons1_c = ctypes.c_char_p(cons1)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
79 cons2_c = ctypes.c_char_p(cons2)
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
80 if gapped:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
81 gapped_c = 1
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
82 else:
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
83 gapped_c = 0
af383638de66 planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff changeset
84 return consensus.build_consensus_duplex_simple(cons1_c, cons2_c, gapped_c)