comparison hetbox.py @ 4:1c21e9f3104b draft default tip

updated script comments and remove "evaluation" column of the report
author boris
date Tue, 25 Jun 2013 00:51:35 -0400
parents 57c5ea9c3c5c
children
comparison
equal deleted inserted replaced
3:479f860eb8a9 4:1c21e9f3104b
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2
2 # Code by Boris Rebolledo-Jaramillo 3 # Code by Boris Rebolledo-Jaramillo
3 # (boris-at-bx.psu.edu) 4 # (boris-at-bx.psu.edu)
4 # Edited by Nick Stoler 5 # Edited by Nick Stoler
5 # (nick-at-bx.psu.edu) 6 # (nick-at-bx.psu.edu)
6 # New in this version: 7 # New in this version:
7 # - Add in proper header line if not present 8 # - Add in proper header line if not present
9
8 10
9 import os 11 import os
10 import sys 12 import sys
11 import array 13 import array
12 import numpy 14 import numpy
103 'outpch':"*",'main':"Distribution of minor allele frequencies", 105 'outpch':"*",'main':"Distribution of minor allele frequencies",
104 'cex.lab':"1.5"} 106 'cex.lab':"1.5"}
105 p = graphics.boxplot(formula, axes=0,ylim=ylimit, lty=1,**kwargs1) 107 p = graphics.boxplot(formula, axes=0,ylim=ylimit, lty=1,**kwargs1)
106 108
107 table = base.table(DATA.rx2('SAMPLE')) 109 table = base.table(DATA.rx2('SAMPLE'))
108 #graphics.text(-1, 1, 'N:', font=2) 110 graphics.text(0, -1, 'N:', font=2)
109 for i in range(1, base.length(table)[0]+1, 1): 111 for i in range(1, base.length(table)[0]+1, 1):
110 graphics.text(i, -1, table[i-1], font=2) 112 graphics.text(i, -1, table[i-1], font=2)
111 113
112 graphlabels = base.names(table) 114 graphlabels = base.names(table)
113 kwargs3 = {'pos':"-2", 'las':"2", 'cex.axis':"1"} 115 kwargs3 = {'pos':"-2", 'las':"2", 'cex.axis':"1"}
114 graphics.axis(1, at=range(1, len(graphlabels)+1, 1),labels=graphlabels, **kwargs3) 116 graphics.axis(1, at=range(1, len(graphlabels)+1, 1),labels=graphlabels, **kwargs3)
115 graphics.axis(2,at=(range(0,60,10)),pos=0,font=2) 117 graphics.axis(2,at=(range(0,60,10)),pos=0,font=2)
116 grdevices.dev_off() 118 grdevices.dev_off()
117 119
118 if not report: 120 if not report:
119 sys.exit(0) 121 sys.exit(0)
120 122
121
122 ####################################
123 # GENERATE REPORT
124 # report should be something like:
125 # SAMPLE NoHET MEDIAN MAD TEST
126 # s1 7 10% n p/w/f
127 # n <= 5 pass
128 # 6 <= n <=10 warn
129 # n >= 11 fail
130 # MAD <= 2.0 fail
131 # MAD > 2.0 pass
132 ###################################
133 123
134 SAMPLES=[] 124 SAMPLES=[]
135 for i in range(len(table)): 125 for i in range(len(table)):
136 SAMPLES.append(base.names(table)[i]) 126 SAMPLES.append(base.names(table)[i])
137 127
141 MEDIAN = numpy.median(VALUES) 131 MEDIAN = numpy.median(VALUES)
142 MAD = numpy.median([abs(i - MEDIAN) for i in VALUES]) # Median absolute distance (robust spread statistic) 132 MAD = numpy.median([abs(i - MEDIAN) for i in VALUES]) # Median absolute distance (robust spread statistic)
143 return [NoHET,MEDIAN, MAD] 133 return [NoHET,MEDIAN, MAD]
144 134
145 boxreport = open(report, "w+") 135 boxreport = open(report, "w+")
146 boxreport.write("SAMPLE\tTOTAL.SITES\tMEDIAN.FREQ.\tMAD.FREQ\tEVAL\n") 136 boxreport.write("#sample\tNo.sites\tmedian.freq\tMAD.freq\n")
137
147 for sample in SAMPLES: 138 for sample in SAMPLES:
148 ENTRY = [sample] + boxstats(infile,sample) 139 ENTRY = [sample] + boxstats(infile,sample)
149 if ENTRY[1] <= 5: 140 boxreport.write ('%s\t%d\t%.1f\t%.1f\n' % tuple([ENTRY[i] for i in [0,1,2,3]]))
150 ENTRY.append('pass')
151 elif 6 <= ENTRY[1] <=10:
152 ENTRY.append('warn')
153 elif ENTRY[1] >= 11:
154 ENTRY.append('fail')
155 if ENTRY[3] <=2.0:
156 ENTRY.append('fail')
157 elif ENTRY[3] >2.0:
158 ENTRY.append('pass')
159 if len(set(ENTRY[4:6])) == 2:
160 ENTRY.append('warn')
161 else:
162 ENTRY.append(list(set(ENTRY[4:6]))[0])
163 boxreport.write ('%s\t%d\t%.1f\t%.1f\t%s\n' % tuple([ENTRY[i] for i in [0,1,2,3,6]]))
164
165 boxreport.close() 141 boxreport.close()
166 142
167 143
168 144
169 145