diff seqtools.py @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seqtools.py	Thu Feb 02 18:44:31 2017 -0500
@@ -0,0 +1,66 @@
+import os
+import ctypes
+
+script_dir = os.path.dirname(os.path.realpath(__file__))
+seqtools = ctypes.cdll.LoadLibrary(os.path.join(script_dir, 'libseqtools.so'))
+seqtools.get_revcomp.restype = ctypes.c_char_p
+seqtools.transfer_gaps.restype = ctypes.c_char_p
+
+
+def get_revcomp(seq):
+  return seqtools.get_revcomp(seq)
+
+
+def get_diffs_frac_simple(consensus, family):
+  c_consensus = ctypes.c_char_p(consensus)
+  c_family = (ctypes.c_char_p * len(family))()
+  for i, seq in enumerate(family):
+    c_family[i] = ctypes.c_char_p(seq)
+  seqtools.get_diffs_frac_simple.restype = ctypes.POINTER(ctypes.c_double * len(c_family))
+  diffs = seqtools.get_diffs_frac_simple(c_consensus, c_family, len(c_family))
+  return diffs.contents
+
+
+def get_diffs_frac_binned(consensus, family, bins):
+  seq_len = None
+  c_consensus = ctypes.c_char_p(consensus)
+  c_family = (ctypes.c_char_p * len(family))()
+  for i, seq in enumerate(family):
+    if seq_len:
+      if seq_len != len(seq):
+        return None
+    else:
+      seq_len = len(seq)
+    c_family[i] = ctypes.c_char_p(seq)
+  double_array_pointer = ctypes.POINTER(ctypes.c_double * bins)
+  seqtools.get_diffs_frac_binned.restype = ctypes.POINTER(double_array_pointer * len(c_family))
+  diffs_binned_c = seqtools.get_diffs_frac_binned(c_consensus, c_family, len(c_family), seq_len, bins)
+  diffs_binned = []
+  for diffs_c in diffs_binned_c.contents:
+    diffs_binned.append(diffs_c.contents)
+  return diffs_binned
+
+
+def transfer_gaps(aligned, seq, gap_char_in='-', gap_char_out='-'):
+  gap_char_in_c = ctypes.c_char(gap_char_in)
+  gap_char_out_c = ctypes.c_char(gap_char_out)
+  return seqtools.transfer_gaps(aligned, seq, gap_char_in_c, gap_char_out_c)
+
+
+def transfer_gaps_multi(seqs, aligned, gap_char_in='-', gap_char_out='-'):
+  gap_char_in_c = ctypes.c_char(gap_char_in)
+  gap_char_out_c = ctypes.c_char(gap_char_out)
+  n_seqs = len(seqs)
+  assert n_seqs == len(aligned), 'Error: Unequal number of gapped and ungapped sequences.'
+  seqs_c = (ctypes.c_char_p * n_seqs)()
+  for i, seq in enumerate(seqs):
+    seqs_c[i] = ctypes.c_char_p(seq)
+  aligned_c = (ctypes.c_char_p * n_seqs)()
+  for i, seq in enumerate(aligned):
+    aligned_c[i] = ctypes.c_char_p(seq)
+  seqtools.transfer_gaps_multi.restype = ctypes.POINTER(ctypes.c_char_p * n_seqs)
+  output_c = seqtools.transfer_gaps_multi(n_seqs, aligned_c, seqs_c, gap_char_in_c, gap_char_out_c)
+  output = []
+  for seq in output_c.contents:
+    output.append(seq)
+  return output