minor_variant_boxplot: hetbox.py comparison

comparison hetbox.py @ 2:3a1ce69571e5 draft

Allow commented lines, de-kludge data frame handling

author	nick
date	Tue, 28 May 2013 16:55:39 -0400
parents	128db16c9399
children	dfa2e75da6aa

comparison

equal deleted inserted replaced

-:a85dd9968af9
+:3a1ce69571e5
 #!/usr/bin/env python
+# New in this version:
+# - handle commented-out header lines
+# - did everything through the Rpy2 interface instead of inline R code
 import os,sys,numpy
 from rpy2.robjects import Formula
 from rpy2.robjects.packages import importr
 from rpy2 import robjects
 if len(args) >= 3:
 report = args[2]
 elif make_report:
 fail('Error: No output report filename provided (as argument 3).')
+# Check input file
 if not os.path.exists(infile):
 fail('Error: Input file '+infile+' could not be found.')
+with open(infile, 'r') as lines:
+line = lines.readline()
+if not line:
+fail('Error: Input file seems to be empty')
+line = line.strip().lstrip('#') # rm whitespace, comment chars
+labels = line.split("\t")
+if 'SAMPLE' not in labels or labels[11] != 'MINOR.FREQ.PERC.':
+fail('Error: Input file does not seem to have a proper header line.')
 utils     = importr('utils')
 graphics  = importr('graphics')
 base      = importr('base')
 grdevices = importr('grDevices')
 grdevices.png(file=outfile, width=1024, height=768)
 # Read file into a data frame
 DATA    = utils.read_delim(infile)
+# Remove comment from header, if
+labels = robjects.r.names(DATA)
+if labels[0][0:2] == 'X.':
+labels[0] = labels[0][2:]
+#robjects.r.assign('data', DATA)
+#robjects.r('data$MINOR.FREQ.PERC. = data$MINOR.FREQ.PERC. * 100')
+#DATA = robjects.r('data')
+#index = data.names.index('MINOR.FREQ.PERC.')
 # Multiply minor allele frequencies by 100 to get percentage
-# Doing this in R, since I can't find a way to modify a column in a data frame
+#  .rx2() looks up a column by its label and returns it as a vector
-# in rpy2
+#  .ro turns the returned object into one that can be operated on per-element
-robjects.r.assign('data', DATA)
+minor_freq = DATA.rx2('MINOR.FREQ.PERC.').ro * 100
-robjects.r('data$MINOR.FREQ.PERC. = data$MINOR.FREQ.PERC. * 100')
+samples    = DATA.rx2('SAMPLE')
-DATA = robjects.r('data')
-#index = data.names.index('MINOR.FREQ.PERC.')
-##  .ro turns the returned object into one that can be operated on per-element
-#DATA.rx2('MINOR.FREQ.PERC.') = DATA.rx('MINOR.FREQ.PERC.').ro * 100
 # Formula() creates a Python object representing the R object returned by x ~ y
-formula = Formula('MINOR.FREQ.PERC.~SAMPLE')
+formula = Formula('minor_freq ~ samples')
 # The "environment" in .getenvironment() is the entire R workspace in which the
 # Formula object exists. The R workspace meaning all the defined variables.
 # Here, the .getenvironment() method is being used to set some variables in the
 # R workspace
-#   Note: .rx2() looks up a column by its label and returns it as a vector
+formula.getenvironment()['minor_freq'] = minor_freq
-formula.getenvironment()['MINOR.FREQ.PERC.'] = DATA.rx2('MINOR.FREQ.PERC.')
+formula.getenvironment()['samples']    = samples
-formula.getenvironment()['SAMPLE']           = DATA.rx2('SAMPLE')
 # create boxplot - fill kwargs1 with the options for the boxplot function
-kwargs1 = {'ylab':"Minor allele frequency (%)",'col':"gray", 'xaxt':"n", 'outpch':"*",'main':"Distribution of minor allele frequencies >= 2%", 'cex.lab':"1.5"}
+kwargs1 = {'ylab':"Minor allele frequency (%)", 'col':"gray", 'xaxt':"n", 'outpch':"*",'main':"Distribution of minor allele frequencies >= 2%", 'cex.lab':"1.5"}
 p = graphics.boxplot(formula, **kwargs1)
-tab    = base.table(DATA.rx2('SAMPLE'))
+table  = base.table(DATA.rx2('SAMPLE'))
-graphics.text(0.5,1,'N:',font=2)
+graphics.text(0.5, 1, 'N:', font=2)
-for i in range(1,base.length(tab)[0]+1,1):
+for i in range(1, base.length(table)[0]+1, 1):
-graphics.text(i,1,tab[i-1], font=2)
+graphics.text(i, 1, table[i-1], font=2)
-graphlabels = base.names(tab)
+graphlabels = base.names(table)
 kwargs3 = {'pos':"0", 'las':"2", 'cex.axis':"1"}
-graphics.axis(1, at=range(1,len(graphlabels)+1,1),labels=graphlabels, **kwargs3)
+graphics.axis(1, at=range(1, len(graphlabels)+1, 1), labels=graphlabels, **kwargs3)
 grdevices.dev_off()
 if not make_report:
 sys.exit(0)
 ####################################
 # GENERATE REPORT
 # report should be something like:
 # SAMPLE NoHET MEDIAN MAD TEST

Mercurial > repos > nick > minor_variant_boxplot

comparison hetbox.py @ 2:3a1ce69571e5 draft