annotate bigwig_outlier_bed_slow1.py @ 0:94fe59a127a0 draft default tip

planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
author fubar
date Mon, 01 Jul 2024 03:31:33 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
1 import os
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
2 import pyBigWig
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
3
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
4
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
5 class findOut():
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
6
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
7 def __init__(self, bwname="test.bw", bedname="test.bed", sd_lo=3, sd_hi=3, bedwin=10):
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
8 self.bedwin = bedwin
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
9 self.bwf = pyBigWig.open(bwname, "r")
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
10 self.chrlist = self.bwf.chroms()
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
11 self.bedf = open(bedname, "w")
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
12 self.sd_lo = sd_lo
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
13 self.sd_hi = sd_hi
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
14 self.makeBed(self.chrlist)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
15
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
16 def makeBed(self, chrlist):
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
17 bed = []
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
18 for chr in chrlist:
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
19 print(chr)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
20 gm = self.bwf.stats(chr, 0, type="mean")[0]
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
21 gstd = self.bwf.stats(chr, 0, type="std")[0]
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
22 cutlow = gm - (self.sd_lo * gstd)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
23 cuthi = gm + (self.sd_hi * gstd)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
24 chr_len = self.chrlist[chr]
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
25 nb = chr_len // self.bedwin
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
26 means = self.bwf.stats(chr, 0, chr_len, type="mean", nBins=nb)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
27 inlo = False
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
28 inhi = False
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
29 reg_start = None
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
30 reg_end = None
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
31 reg_means = []
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
32 print('got %d means, gm=%f, lo=%f, hi=%f' % (len(means), gm, cutlow, cuthi))
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
33 for i, m in enumerate(means):
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
34 bend = min(chr_len, (i + 1) * self.bedwin)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
35 featname = "%s_%d" % (chr,i)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
36 if m and (m < cutlow or m > cuthi):
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
37 if inlo:
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
38 if m < cutlow: # extend
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
39 reg_end = bend
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
40 reg_means.append(m)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
41 else: # high so close
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
42 rm = sum(reg_means) / len(reg_means)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
43 bed.append(
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
44 "%s\t%d\t%d\t%s\t%.3f\n"
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
45 % (chr, reg_start, reg_end, featname, rm)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
46 )
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
47 inlo = False
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
48 reg_means = []
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
49 elif inhi:
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
50 if m > cuthi: # extend
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
51 reg_end = bend
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
52 reg_means.append(m)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
53 else:
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
54 rm = sum(reg_means) / len(reg_means)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
55 bed.append(
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
56 "%s\t%d\t%d\t%s\t%.3f\n"
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
57 % (chr, reg_start, reg_end, featname, rm)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
58 )
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
59 inhi = False
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
60 reg_means = []
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
61 elif m < cutlow: # start new low region
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
62 inlo = True
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
63 reg_start = i * self.bedwin
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
64 reg_end = bend
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
65 reg_means = [m]
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
66 elif m > cuthi:
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
67 inhi = True
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
68 reg_start = i * self.bedwin
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
69 reg_end = bend
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
70 reg_means = [m]
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
71 else: # not out of range - write current extended bed region
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
72 if inhi or inlo:
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
73 inhi = False
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
74 inlo = False
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
75 rm = sum(reg_means) / len(reg_means)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
76 bed.append(
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
77 "%s\t%d\t%d\t%s\t%.3f\n"
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
78 % (chr, reg_start, reg_end, featname, rm)
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
79 )
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
80 reg_means = []
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
81 reg_start = None
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
82 reg_end = None
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
83 self.bedf.write(''.join(bed))
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
84 self.bedf.close()
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
85
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
86
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
87 if __name__ == "__main__":
94fe59a127a0 planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
fubar
parents:
diff changeset
88 fo = findOut(bwname="test.bw", bedname="test.bed", sd_lo=2, sd_hi=2, bedwin=100)