Mercurial > repos > nick > allele_counts_1
changeset 3:4627d99aa105
New script version - change in header
author | nick |
---|---|
date | Tue, 28 May 2013 13:05:05 -0400 |
parents | d83368b907f7 |
children | 900d91d653cb |
files | allele-counts.py |
diffstat | 1 files changed, 9 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/allele-counts.py Fri May 24 10:54:13 2013 -0400 +++ b/allele-counts.py Tue May 28 13:05:05 2013 -0400 @@ -2,8 +2,9 @@ # This parses the output of Dan's "Naive Variant Detector" (previously, # "BAM Coverage"). It was forked from the code of "bam-coverage.py". # -# New in this version: default to stdin and stdout, override by using -i and -o -# to specify filenames +# New in this version: +# Made header line customizable +# - separate from internal column labels, which are used as dict keys # # TODO: # - test handling of -c 0 (and -f 0?) @@ -12,8 +13,8 @@ import sys from optparse import OptionParser -COLUMNS = ['sample', 'chr', 'pos', 'A', 'C', 'G', 'T', 'coverage', 'alleles', - 'major', 'minor', 'freq'] #, 'bias'] +COLUMNS = ['sample', 'chr', 'pos', 'A', 'C', 'G', 'T', 'coverage', 'alleles', 'major', 'minor', 'freq'] #, 'bias'] +COLUMN_LABELS = ['SAMPLE', 'CHR', 'POS', 'A', 'C', 'G', 'T', 'CVRG', 'ALLELES', 'MAJOR', 'MINOR', 'MINOR.FREQ.PERC.'] #, 'STRAND.BIAS'] CANONICAL_VARIANTS = ['A', 'C', 'G', 'T'] USAGE = """Usage: cat variants.vcf | %prog [options] > alleles.csv %prog [options] -i variants.vcf -o alleles.csv""" @@ -104,8 +105,10 @@ except IOError, e: fail('Error: The given output filename '+outfile+' could not be opened.') + if len(COLUMNS) != len(COLUMN_LABELS): + fail('Error: Internal column names do not match column labels.') if print_header: - outfile_handle.write('#'+'\t'.join(COLUMNS)+"\n") + outfile_handle.write('\t'.join(COLUMN_LABELS)+"\n") # main loop: process and print one line at a time sample_names = [] @@ -270,7 +273,7 @@ sample['major'] = '.' try: sample['minor'] = ranked_bases[1][0] - sample['freq'] = ranked_bases[1][1] / float(coverage) + sample['freq'] = round(ranked_bases[1][1]/float(coverage), 5) except IndexError, e: sample['minor'] = '.' sample['freq'] = 0.0