Mercurial > repos > boris > hetbox
comparison hetbox.py @ 4:1c21e9f3104b draft default tip
updated script comments and remove "evaluation" column of the report
author | boris |
---|---|
date | Tue, 25 Jun 2013 00:51:35 -0400 |
parents | 57c5ea9c3c5c |
children |
comparison
equal
deleted
inserted
replaced
3:479f860eb8a9 | 4:1c21e9f3104b |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | |
2 # Code by Boris Rebolledo-Jaramillo | 3 # Code by Boris Rebolledo-Jaramillo |
3 # (boris-at-bx.psu.edu) | 4 # (boris-at-bx.psu.edu) |
4 # Edited by Nick Stoler | 5 # Edited by Nick Stoler |
5 # (nick-at-bx.psu.edu) | 6 # (nick-at-bx.psu.edu) |
6 # New in this version: | 7 # New in this version: |
7 # - Add in proper header line if not present | 8 # - Add in proper header line if not present |
9 | |
8 | 10 |
9 import os | 11 import os |
10 import sys | 12 import sys |
11 import array | 13 import array |
12 import numpy | 14 import numpy |
103 'outpch':"*",'main':"Distribution of minor allele frequencies", | 105 'outpch':"*",'main':"Distribution of minor allele frequencies", |
104 'cex.lab':"1.5"} | 106 'cex.lab':"1.5"} |
105 p = graphics.boxplot(formula, axes=0,ylim=ylimit, lty=1,**kwargs1) | 107 p = graphics.boxplot(formula, axes=0,ylim=ylimit, lty=1,**kwargs1) |
106 | 108 |
107 table = base.table(DATA.rx2('SAMPLE')) | 109 table = base.table(DATA.rx2('SAMPLE')) |
108 #graphics.text(-1, 1, 'N:', font=2) | 110 graphics.text(0, -1, 'N:', font=2) |
109 for i in range(1, base.length(table)[0]+1, 1): | 111 for i in range(1, base.length(table)[0]+1, 1): |
110 graphics.text(i, -1, table[i-1], font=2) | 112 graphics.text(i, -1, table[i-1], font=2) |
111 | 113 |
112 graphlabels = base.names(table) | 114 graphlabels = base.names(table) |
113 kwargs3 = {'pos':"-2", 'las':"2", 'cex.axis':"1"} | 115 kwargs3 = {'pos':"-2", 'las':"2", 'cex.axis':"1"} |
114 graphics.axis(1, at=range(1, len(graphlabels)+1, 1),labels=graphlabels, **kwargs3) | 116 graphics.axis(1, at=range(1, len(graphlabels)+1, 1),labels=graphlabels, **kwargs3) |
115 graphics.axis(2,at=(range(0,60,10)),pos=0,font=2) | 117 graphics.axis(2,at=(range(0,60,10)),pos=0,font=2) |
116 grdevices.dev_off() | 118 grdevices.dev_off() |
117 | 119 |
118 if not report: | 120 if not report: |
119 sys.exit(0) | 121 sys.exit(0) |
120 | 122 |
121 | |
122 #################################### | |
123 # GENERATE REPORT | |
124 # report should be something like: | |
125 # SAMPLE NoHET MEDIAN MAD TEST | |
126 # s1 7 10% n p/w/f | |
127 # n <= 5 pass | |
128 # 6 <= n <=10 warn | |
129 # n >= 11 fail | |
130 # MAD <= 2.0 fail | |
131 # MAD > 2.0 pass | |
132 ################################### | |
133 | 123 |
134 SAMPLES=[] | 124 SAMPLES=[] |
135 for i in range(len(table)): | 125 for i in range(len(table)): |
136 SAMPLES.append(base.names(table)[i]) | 126 SAMPLES.append(base.names(table)[i]) |
137 | 127 |
141 MEDIAN = numpy.median(VALUES) | 131 MEDIAN = numpy.median(VALUES) |
142 MAD = numpy.median([abs(i - MEDIAN) for i in VALUES]) # Median absolute distance (robust spread statistic) | 132 MAD = numpy.median([abs(i - MEDIAN) for i in VALUES]) # Median absolute distance (robust spread statistic) |
143 return [NoHET,MEDIAN, MAD] | 133 return [NoHET,MEDIAN, MAD] |
144 | 134 |
145 boxreport = open(report, "w+") | 135 boxreport = open(report, "w+") |
146 boxreport.write("SAMPLE\tTOTAL.SITES\tMEDIAN.FREQ.\tMAD.FREQ\tEVAL\n") | 136 boxreport.write("#sample\tNo.sites\tmedian.freq\tMAD.freq\n") |
137 | |
147 for sample in SAMPLES: | 138 for sample in SAMPLES: |
148 ENTRY = [sample] + boxstats(infile,sample) | 139 ENTRY = [sample] + boxstats(infile,sample) |
149 if ENTRY[1] <= 5: | 140 boxreport.write ('%s\t%d\t%.1f\t%.1f\n' % tuple([ENTRY[i] for i in [0,1,2,3]])) |
150 ENTRY.append('pass') | |
151 elif 6 <= ENTRY[1] <=10: | |
152 ENTRY.append('warn') | |
153 elif ENTRY[1] >= 11: | |
154 ENTRY.append('fail') | |
155 if ENTRY[3] <=2.0: | |
156 ENTRY.append('fail') | |
157 elif ENTRY[3] >2.0: | |
158 ENTRY.append('pass') | |
159 if len(set(ENTRY[4:6])) == 2: | |
160 ENTRY.append('warn') | |
161 else: | |
162 ENTRY.append(list(set(ENTRY[4:6]))[0]) | |
163 boxreport.write ('%s\t%d\t%.1f\t%.1f\t%s\n' % tuple([ENTRY[i] for i in [0,1,2,3,6]])) | |
164 | |
165 boxreport.close() | 141 boxreport.close() |
166 | 142 |
167 | 143 |
168 | 144 |
169 | 145 |