changeset 3:4627d99aa105

New script version - change in header
author nick
date Tue, 28 May 2013 13:05:05 -0400
parents d83368b907f7
children 900d91d653cb
files allele-counts.py
diffstat 1 files changed, 9 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/allele-counts.py	Fri May 24 10:54:13 2013 -0400
+++ b/allele-counts.py	Tue May 28 13:05:05 2013 -0400
@@ -2,8 +2,9 @@
 # This parses the output of Dan's "Naive Variant Detector" (previously,
 # "BAM Coverage"). It was forked from the code of "bam-coverage.py".
 #
-# New in this version: default to stdin and stdout, override by using -i and -o
-#   to specify filenames
+# New in this version:
+#   Made header line customizable
+#     - separate from internal column labels, which are used as dict keys
 #
 # TODO:
 # - test handling of -c 0 (and -f 0?)
@@ -12,8 +13,8 @@
 import sys
 from optparse import OptionParser
 
-COLUMNS = ['sample', 'chr', 'pos', 'A', 'C', 'G', 'T', 'coverage', 'alleles',
-  'major', 'minor', 'freq'] #, 'bias']
+COLUMNS = ['sample', 'chr', 'pos', 'A', 'C', 'G', 'T', 'coverage', 'alleles', 'major', 'minor', 'freq'] #, 'bias']
+COLUMN_LABELS = ['SAMPLE', 'CHR',  'POS', 'A', 'C', 'G', 'T', 'CVRG', 'ALLELES', 'MAJOR', 'MINOR', 'MINOR.FREQ.PERC.'] #, 'STRAND.BIAS']
 CANONICAL_VARIANTS = ['A', 'C', 'G', 'T']
 USAGE = """Usage: cat variants.vcf | %prog [options] > alleles.csv
        %prog [options] -i variants.vcf -o alleles.csv"""
@@ -104,8 +105,10 @@
     except IOError, e:
       fail('Error: The given output filename '+outfile+' could not be opened.')
 
+  if len(COLUMNS) != len(COLUMN_LABELS):
+    fail('Error: Internal column names do not match column labels.')
   if print_header:
-    outfile_handle.write('#'+'\t'.join(COLUMNS)+"\n")
+    outfile_handle.write('\t'.join(COLUMN_LABELS)+"\n")
 
   # main loop: process and print one line at a time
   sample_names = []
@@ -270,7 +273,7 @@
       sample['major']  = '.'
     try:
       sample['minor']  = ranked_bases[1][0]
-      sample['freq']   = ranked_bases[1][1] / float(coverage)
+      sample['freq']   = round(ranked_bases[1][1]/float(coverage), 5)
     except IndexError, e:
       sample['minor']  = '.'
       sample['freq']   = 0.0