Mercurial > repos > nick > minor_variant_boxplot
changeset 6:99cda2ff12b8 draft
Current version - maybe ready to release
author | nick |
---|---|
date | Tue, 04 Jun 2013 00:06:36 -0400 |
parents | d38113d9c4d6 |
children | a777a76205cc |
files | hetbox.py |
diffstat | 1 files changed, 25 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/hetbox.py Thu May 30 15:41:55 2013 -0400 +++ b/hetbox.py Tue Jun 04 00:06:36 2013 -0400 @@ -1,7 +1,6 @@ #!/usr/bin/env python # New in this version: -# - option to generate report is triggered simply by including report filename -# as third argument +# - Add in proper header line if not present import os,sys,numpy from rpy2.robjects import Formula @@ -12,6 +11,8 @@ sys.stderr.write(message+'\n') sys.exit(1) +COLUMN_LABELS = ['SAMPLE', 'CHR', 'POS', 'A', 'C', 'G', 'T', 'CVRG', 'ALLELES', 'MAJOR', 'MINOR', 'MINOR.FREQ.PERC.'] #, 'STRAND.BIAS'] + args = sys.argv[1:] if len(args) >= 1: infile = args[0] @@ -27,6 +28,7 @@ report = '' # Check input file +add_header = False if not os.path.exists(infile): fail('Error: Input file '+infile+' could not be found.') with open(infile, 'r') as lines: @@ -36,7 +38,10 @@ line = line.strip().lstrip('#') # rm whitespace, comment chars labels = line.split("\t") if 'SAMPLE' not in labels or labels[11] != 'MINOR.FREQ.PERC.': - fail('Error: Input file does not seem to have a proper header line.') + sys.stderr.write("Error: Input file does not seem to have a proper header " + +"line.\nAdding an artificial header..") + add_header = True + utils = importr('utils') @@ -48,15 +53,22 @@ grdevices.png(file=outfile, width=1024, height=768) # Read file into a data frame -DATA = utils.read_delim(infile) -# Remove comment from header, if -labels = robjects.r.names(DATA) -if labels[0][0:2] == 'X.': - labels[0] = labels[0][2:] -#robjects.r.assign('data', DATA) -#robjects.r('data$MINOR.FREQ.PERC. = data$MINOR.FREQ.PERC. * 100') -#DATA = robjects.r('data') -#index = data.names.index('MINOR.FREQ.PERC.') +if add_header: + # add header line manually if not present + DATA = utils.read_delim(infile, header=False) + labels = robjects.r.names(DATA) + for i in range(len(labels)): + try: + labels[i] = COLUMN_LABELS[i] + except IndexError, e: + fail("Error in input file: Too many columns (does not match hardcoded " + +"column labels).") +else: + DATA = utils.read_delim(infile) + # Remove comment from header, if present + labels = robjects.r.names(DATA) + if labels[0][0:2] == 'X.': + labels[0] = labels[0][2:] # Multiply minor allele frequencies by 100 to get percentage # .rx2() looks up a column by its label and returns it as a vector # .ro turns the returned object into one that can be operated on per-element @@ -72,7 +84,7 @@ formula.getenvironment()['samples'] = samples # create boxplot - fill kwargs1 with the options for the boxplot function -kwargs1 = {'ylab':"Minor allele frequency (%)", 'col':"gray", 'xaxt':"n", 'outpch':"*",'main':"Distribution of minor allele frequencies >= 2%", 'cex.lab':"1.5"} +kwargs1 = {'ylab':"Minor allele frequency (%)", 'col':"gray", 'xaxt':"n", 'outpch':"*",'main':"Distribution of minor allele frequencies", 'cex.lab':"1.5"} p = graphics.boxplot(formula, **kwargs1) table = base.table(DATA.rx2('SAMPLE'))