Mercurial > repos > nick > duplex
annotate consensus.py @ 4:af383638de66 draft
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
author | nick |
---|---|
date | Mon, 23 Nov 2015 18:44:23 -0500 |
parents | |
children |
rev | line source |
---|---|
4
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
1 import os |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
2 import ctypes |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
3 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
4 script_dir = os.path.dirname(os.path.realpath(__file__)) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
5 consensus = ctypes.cdll.LoadLibrary(os.path.join(script_dir, 'consensusc.so')) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
6 consensus.get_consensus.restype = ctypes.c_char_p |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
7 consensus.get_consensus_duplex.restype = ctypes.c_char_p |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
8 consensus.build_consensus_duplex_simple.restype = ctypes.c_char_p |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
9 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
10 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
11 # N.B.: The quality scores must be aligned with their accompanying sequences. |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
12 def get_consensus(align, quals=[], cons_thres=-1.0, qual_thres=' ', gapped=False): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
13 cons_thres_c = ctypes.c_double(cons_thres) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
14 qual_thres_c = ctypes.c_char(qual_thres) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
15 n_seqs = len(align) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
16 if gapped: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
17 gapped_c = 1 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
18 else: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
19 gapped_c = 0 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
20 assert not quals or len(quals) == n_seqs, 'Different number of sequences and quals.' |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
21 seq_len = None |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
22 for seq in (align + quals): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
23 if seq_len is None: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
24 seq_len = len(seq) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
25 else: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
26 assert seq_len == len(seq), 'All sequences in the alignment must be the same length.' |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
27 align_c = (ctypes.c_char_p * n_seqs)() |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
28 for i, seq in enumerate(align): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
29 align_c[i] = ctypes.c_char_p(seq) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
30 quals_c = (ctypes.c_char_p * n_seqs)() |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
31 for i, qual in enumerate(quals): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
32 quals_c[i] = ctypes.c_char_p(qual) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
33 if not quals: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
34 quals_c = 0 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
35 return consensus.get_consensus(align_c, quals_c, n_seqs, seq_len, cons_thres_c, qual_thres_c, |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
36 gapped_c) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
37 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
38 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
39 # N.B.: The quality scores must be aligned with their accompanying sequences. |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
40 def get_consensus_duplex(align1, align2, quals1=[], quals2=[], cons_thres=-1.0, qual_thres=' ', |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
41 method='iupac'): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
42 assert method in ('iupac', 'freq') |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
43 cons_thres_c = ctypes.c_double(cons_thres) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
44 qual_thres_c = ctypes.c_char(qual_thres) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
45 n_seqs1 = len(align1) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
46 n_seqs2 = len(align2) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
47 assert (not quals1 and not quals2) or (quals1 and quals2) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
48 assert not quals1 or len(quals1) == n_seqs1 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
49 assert not quals2 or len(quals2) == n_seqs2 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
50 seq_len = None |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
51 for seq in (align1 + align2 + quals1 + quals2): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
52 if seq_len is None: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
53 seq_len = len(seq) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
54 else: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
55 assert seq_len == len(seq), 'All sequences in the alignment must be the same length.' |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
56 align1_c = (ctypes.c_char_p * n_seqs1)() |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
57 for i, seq in enumerate(align1): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
58 align1_c[i] = ctypes.c_char_p(seq) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
59 align2_c = (ctypes.c_char_p * n_seqs1)() |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
60 for i, seq in enumerate(align2): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
61 align2_c[i] = ctypes.c_char_p(seq) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
62 quals1_c = (ctypes.c_char_p * n_seqs1)() |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
63 for i, seq in enumerate(quals1): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
64 quals1_c[i] = ctypes.c_char_p(seq) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
65 quals2_c = (ctypes.c_char_p * n_seqs1)() |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
66 for i, seq in enumerate(quals2): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
67 quals2_c[i] = ctypes.c_char_p(seq) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
68 if not quals1: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
69 quals1_c = 0 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
70 if not quals2: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
71 quals2_c = 0 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
72 return consensus.get_consensus_duplex(align1_c, align2_c, quals1_c, quals2_c, n_seqs1, n_seqs2, |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
73 seq_len, cons_thres_c, qual_thres_c, method) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
74 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
75 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
76 def build_consensus_duplex_simple(cons1, cons2, gapped=False): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
77 assert len(cons1) == len(cons2) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
78 cons1_c = ctypes.c_char_p(cons1) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
79 cons2_c = ctypes.c_char_p(cons2) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
80 if gapped: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
81 gapped_c = 1 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
82 else: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
83 gapped_c = 0 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
84 return consensus.build_consensus_duplex_simple(cons1_c, cons2_c, gapped_c) |