minor_variant_boxplot: hetbox.py comparison

comparison hetbox.py @ 6:99cda2ff12b8 draft

Current version - maybe ready to release

author	nick
date	Tue, 04 Jun 2013 00:06:36 -0400
parents	dfa2e75da6aa
children

comparison

equal deleted inserted replaced

-:d38113d9c4d6
+:99cda2ff12b8
 #!/usr/bin/env python
 # New in this version:
-# - option to generate report is triggered simply by including report filename
+# - Add in proper header line if not present
-#   as third argument
 import os,sys,numpy
 from rpy2.robjects import Formula
 from rpy2.robjects.packages import importr
 from rpy2 import robjects
 def fail(message):
 sys.stderr.write(message+'\n')
 sys.exit(1)
+COLUMN_LABELS = ['SAMPLE', 'CHR',  'POS', 'A', 'C', 'G', 'T', 'CVRG', 'ALLELES', 'MAJOR', 'MINOR', 'MINOR.FREQ.PERC.'] #, 'STRAND.BIAS']
 args = sys.argv[1:]
 if len(args) >= 1:
 infile = args[0]
 else:
 report = args[2]
 else:
 report = ''
 # Check input file
+add_header = False
 if not os.path.exists(infile):
 fail('Error: Input file '+infile+' could not be found.')
 with open(infile, 'r') as lines:
 line = lines.readline()
 if not line:
 fail('Error: Input file seems to be empty')
 line = line.strip().lstrip('#') # rm whitespace, comment chars
 labels = line.split("\t")
 if 'SAMPLE' not in labels or labels[11] != 'MINOR.FREQ.PERC.':
-fail('Error: Input file does not seem to have a proper header line.')
+sys.stderr.write("Error: Input file does not seem to have a proper header "
++"line.\nAdding an artificial header..")
+add_header = True
 utils     = importr('utils')
 graphics  = importr('graphics')
 base      = importr('base')
 rprint    = robjects.globalenv.get("print")
 grdevices = importr('grDevices')
 grdevices.png(file=outfile, width=1024, height=768)
 # Read file into a data frame
-DATA    = utils.read_delim(infile)
+if add_header:
-# Remove comment from header, if
+# add header line manually if not present
-labels = robjects.r.names(DATA)
+DATA = utils.read_delim(infile, header=False)
-if labels[0][0:2] == 'X.':
+labels = robjects.r.names(DATA)
-labels[0] = labels[0][2:]
+for i in range(len(labels)):
-#robjects.r.assign('data', DATA)
+try:
-#robjects.r('data$MINOR.FREQ.PERC. = data$MINOR.FREQ.PERC. * 100')
+labels[i] = COLUMN_LABELS[i]
-#DATA = robjects.r('data')
+except IndexError, e:
-#index = data.names.index('MINOR.FREQ.PERC.')
+fail("Error in input file: Too many columns (does not match hardcoded "
++"column labels).")
+else:
+DATA = utils.read_delim(infile)
+# Remove comment from header, if present
+labels = robjects.r.names(DATA)
+if labels[0][0:2] == 'X.':
+labels[0] = labels[0][2:]
 # Multiply minor allele frequencies by 100 to get percentage
 #  .rx2() looks up a column by its label and returns it as a vector
 #  .ro turns the returned object into one that can be operated on per-element
 minor_freq = DATA.rx2('MINOR.FREQ.PERC.').ro * 100
 samples    = DATA.rx2('SAMPLE')
 # R workspace
 formula.getenvironment()['minor_freq'] = minor_freq
 formula.getenvironment()['samples']    = samples
 # create boxplot - fill kwargs1 with the options for the boxplot function
-kwargs1 = {'ylab':"Minor allele frequency (%)", 'col':"gray", 'xaxt':"n", 'outpch':"*",'main':"Distribution of minor allele frequencies >= 2%", 'cex.lab':"1.5"}
+kwargs1 = {'ylab':"Minor allele frequency (%)", 'col':"gray", 'xaxt':"n", 'outpch':"*",'main':"Distribution of minor allele frequencies", 'cex.lab':"1.5"}
 p = graphics.boxplot(formula, **kwargs1)
 table  = base.table(DATA.rx2('SAMPLE'))
 graphics.text(0.5, 1, 'N:', font=2)
 for i in range(1, base.length(table)[0]+1, 1):

Mercurial > repos > nick > minor_variant_boxplot

comparison hetbox.py @ 6:99cda2ff12b8 draft