annotate methratio.patch @ 10:4d37a7026737 draft default tip

Uploaded
author eugen
date Thu, 16 Aug 2012 04:58:24 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
4d37a7026737 Uploaded
eugen
parents:
diff changeset
1 --- methratio.py.orig 2012-04-10 20:54:39.000000000 +0200
4d37a7026737 Uploaded
eugen
parents:
diff changeset
2 +++ methratio.py 2012-08-16 10:54:42.000000000 +0200
4d37a7026737 Uploaded
eugen
parents:
diff changeset
3 @@ -1,4 +1,5 @@
4d37a7026737 Uploaded
eugen
parents:
diff changeset
4 -import sys, time, os, array, optparse
4d37a7026737 Uploaded
eugen
parents:
diff changeset
5 +#!/usr/bin/python
4d37a7026737 Uploaded
eugen
parents:
diff changeset
6 +import sys, time, os, array, optparse, re
4d37a7026737 Uploaded
eugen
parents:
diff changeset
7 usage = "usage: %prog [options] BSMAP_MAPPING_FILES"
4d37a7026737 Uploaded
eugen
parents:
diff changeset
8 parser = optparse.OptionParser(usage=usage)
4d37a7026737 Uploaded
eugen
parents:
diff changeset
9
4d37a7026737 Uploaded
eugen
parents:
diff changeset
10 @@ -128,26 +129,32 @@
4d37a7026737 Uploaded
eugen
parents:
diff changeset
11
4d37a7026737 Uploaded
eugen
parents:
diff changeset
12 disp('writing %s ...' % options.outfile)
4d37a7026737 Uploaded
eugen
parents:
diff changeset
13 ss = {'C': '+', 'G': '-'}
4d37a7026737 Uploaded
eugen
parents:
diff changeset
14 +trint = {'CG[ACGT]$':'CPG','C[ACT]G$':'CHG', r"C[ACT][ACT]":'CHH', r"[ACGT]CG":'CPG',r"C[AGT]G":'CHG',r"[AGT][AGT]G":'CHH' }
4d37a7026737 Uploaded
eugen
parents:
diff changeset
15 fout = open(options.outfile, 'w')
4d37a7026737 Uploaded
eugen
parents:
diff changeset
16 -z95, z95sq = 1.96, 1.96 * 1.96
4d37a7026737 Uploaded
eugen
parents:
diff changeset
17 -fout.write('chr\tpos\tstrand\tcontext\tratio\ttotal_C\tmethy_C\tCI_lower\tCI_upper\n')
4d37a7026737 Uploaded
eugen
parents:
diff changeset
18 +
4d37a7026737 Uploaded
eugen
parents:
diff changeset
19 +fout.write('chr\tpos\tstrand\tcontext\coverage\tmethylated_C\tperc_methy_C\n')
4d37a7026737 Uploaded
eugen
parents:
diff changeset
20 nc, nd, dep0 = 0, 0, options.min_depth
4d37a7026737 Uploaded
eugen
parents:
diff changeset
21 for cr in sorted(depth.keys()):
4d37a7026737 Uploaded
eugen
parents:
diff changeset
22 depthcr, methcr, refcr = depth[cr], meth[cr], ref[cr]
4d37a7026737 Uploaded
eugen
parents:
diff changeset
23 for i, d in enumerate(depthcr):
4d37a7026737 Uploaded
eugen
parents:
diff changeset
24 + if i < 2: continue
4d37a7026737 Uploaded
eugen
parents:
diff changeset
25 if d < dep0: continue
4d37a7026737 Uploaded
eugen
parents:
diff changeset
26 nc += 1
4d37a7026737 Uploaded
eugen
parents:
diff changeset
27 nd += d
4d37a7026737 Uploaded
eugen
parents:
diff changeset
28 m = methcr[i]
4d37a7026737 Uploaded
eugen
parents:
diff changeset
29 if m == 0 and not options.meth0: continue
4d37a7026737 Uploaded
eugen
parents:
diff changeset
30 ratio = float(m) / d
4d37a7026737 Uploaded
eugen
parents:
diff changeset
31 - seq = refcr[i-2:i+3]
4d37a7026737 Uploaded
eugen
parents:
diff changeset
32 strand = ss[refcr[i]]
4d37a7026737 Uploaded
eugen
parents:
diff changeset
33 - pmid = ratio + z95sq / (2 * d)
4d37a7026737 Uploaded
eugen
parents:
diff changeset
34 - sd = z95 * ((ratio*(1-ratio)/d + z95sq/(4*d*d)) ** 0.5)
4d37a7026737 Uploaded
eugen
parents:
diff changeset
35 - norminator = 1 + z95sq / d
4d37a7026737 Uploaded
eugen
parents:
diff changeset
36 - CIl, CIu = (pmid - sd) / norminator, (pmid + sd) / norminator
4d37a7026737 Uploaded
eugen
parents:
diff changeset
37 - fout.write('%s\t%d\t%c\t%s\t%.3f\t%d\t%d\t%.3f\t%.3f\n' % (cr, i+1, strand, seq, ratio, d, m, CIl, CIu))
4d37a7026737 Uploaded
eugen
parents:
diff changeset
38 + if strand == '-':
4d37a7026737 Uploaded
eugen
parents:
diff changeset
39 + seq = refcr[i-2:i+1]
4d37a7026737 Uploaded
eugen
parents:
diff changeset
40 + else:
4d37a7026737 Uploaded
eugen
parents:
diff changeset
41 + seq = refcr[i:i+3]
4d37a7026737 Uploaded
eugen
parents:
diff changeset
42 +
4d37a7026737 Uploaded
eugen
parents:
diff changeset
43 + for k, v in trint.items():
4d37a7026737 Uploaded
eugen
parents:
diff changeset
44 + if re.match(k,seq):
4d37a7026737 Uploaded
eugen
parents:
diff changeset
45 + context=v
4d37a7026737 Uploaded
eugen
parents:
diff changeset
46 +
4d37a7026737 Uploaded
eugen
parents:
diff changeset
47 + fout.write('%s\t%d\t%c\t%s\t%d\t%d\t%.2f\n' % (cr, i+1, strand, context, d, m, m*100/d))
4d37a7026737 Uploaded
eugen
parents:
diff changeset
48
4d37a7026737 Uploaded
eugen
parents:
diff changeset
49 fout.close()
4d37a7026737 Uploaded
eugen
parents:
diff changeset
50 disp('done.')