Mercurial > repos > nick > duplex
diff seqtools.py @ 4:af383638de66 draft
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
| author | nick |
|---|---|
| date | Mon, 23 Nov 2015 18:44:23 -0500 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seqtools.py Mon Nov 23 18:44:23 2015 -0500 @@ -0,0 +1,66 @@ +import os +import ctypes + +script_dir = os.path.dirname(os.path.realpath(__file__)) +seqtools = ctypes.cdll.LoadLibrary(os.path.join(script_dir, 'seqtoolsc.so')) +seqtools.get_revcomp.restype = ctypes.c_char_p +seqtools.transfer_gaps.restype = ctypes.c_char_p + + +def get_revcomp(seq): + return seqtools.get_revcomp(seq) + + +def get_diffs_frac_simple(consensus, family): + c_consensus = ctypes.c_char_p(consensus) + c_family = (ctypes.c_char_p * len(family))() + for i, seq in enumerate(family): + c_family[i] = ctypes.c_char_p(seq) + seqtools.get_diffs_frac_simple.restype = ctypes.POINTER(ctypes.c_double * len(c_family)) + diffs = seqtools.get_diffs_frac_simple(c_consensus, c_family, len(c_family)) + return diffs.contents + + +def get_diffs_frac_binned(consensus, family, bins): + seq_len = None + c_consensus = ctypes.c_char_p(consensus) + c_family = (ctypes.c_char_p * len(family))() + for i, seq in enumerate(family): + if seq_len: + if seq_len != len(seq): + return None + else: + seq_len = len(seq) + c_family[i] = ctypes.c_char_p(seq) + double_array_pointer = ctypes.POINTER(ctypes.c_double * bins) + seqtools.get_diffs_frac_binned.restype = ctypes.POINTER(double_array_pointer * len(c_family)) + diffs_binned_c = seqtools.get_diffs_frac_binned(c_consensus, c_family, len(c_family), seq_len, bins) + diffs_binned = [] + for diffs_c in diffs_binned_c.contents: + diffs_binned.append(diffs_c.contents) + return diffs_binned + + +def transfer_gaps(aligned, seq, gap_char_in='-', gap_char_out='-'): + gap_char_in_c = ctypes.c_char(gap_char_in) + gap_char_out_c = ctypes.c_char(gap_char_out) + return seqtools.transfer_gaps(aligned, seq, gap_char_in_c, gap_char_out_c) + + +def transfer_gaps_multi(seqs, aligned, gap_char_in='-', gap_char_out='-'): + gap_char_in_c = ctypes.c_char(gap_char_in) + gap_char_out_c = ctypes.c_char(gap_char_out) + n_seqs = len(seqs) + assert n_seqs == len(aligned), 'Error: Unequal number of gapped and ungapped sequences.' + seqs_c = (ctypes.c_char_p * n_seqs)() + for i, seq in enumerate(seqs): + seqs_c[i] = ctypes.c_char_p(seq) + aligned_c = (ctypes.c_char_p * n_seqs)() + for i, seq in enumerate(aligned): + aligned_c[i] = ctypes.c_char_p(seq) + seqtools.transfer_gaps_multi.restype = ctypes.POINTER(ctypes.c_char_p * n_seqs) + output_c = seqtools.transfer_gaps_multi(n_seqs, aligned_c, seqs_c, gap_char_in_c, gap_char_out_c) + output = [] + for seq in output_c.contents: + output.append(seq) + return output
