comparison allele-counts.py @ 14:e5d39283fe4d

allele-counts.py: Document output columns.
author nicksto <nmapsy@gmail.com>
date Wed, 09 Dec 2015 11:03:33 -0500
parents df1fb577db0d
children 44c3abd1b767
comparison
equal deleted inserted replaced
13:df1fb577db0d 14:e5d39283fe4d
21 COLUMNS = ['sample', 'chr', 'pos', 'A', 'C', 'G', 'T', 'coverage', 'alleles', 21 COLUMNS = ['sample', 'chr', 'pos', 'A', 'C', 'G', 'T', 'coverage', 'alleles',
22 'major', 'minor', 'freq', 'bias'] 22 'major', 'minor', 'freq', 'bias']
23 COLUMN_LABELS = ['SAMPLE', 'CHR', 'POS', 'A', 'C', 'G', 'T', 'CVRG', 'ALLELES', 23 COLUMN_LABELS = ['SAMPLE', 'CHR', 'POS', 'A', 'C', 'G', 'T', 'CVRG', 'ALLELES',
24 'MAJOR', 'MINOR', 'MAF', 'BIAS'] 24 'MAJOR', 'MINOR', 'MAF', 'BIAS']
25 CANONICAL_VARIANTS = ['A', 'C', 'G', 'T'] 25 CANONICAL_VARIANTS = ['A', 'C', 'G', 'T']
26 USAGE = """Usage: %prog [options] -i variants.vcf -o alleles.csv 26 USAGE = """Usage: %prog [options] -i variants.vcf -o alleles.tsv
27 cat variants.vcf | %prog [options] > alleles.csv""" 27 cat variants.vcf | %prog [options] > alleles.tsv"""
28 OPT_DEFAULTS = {'infile':'-', 'outfile':'-', 'freq_thres':1.0, 'covg_thres':100, 28 OPT_DEFAULTS = {'infile':'-', 'outfile':'-', 'freq_thres':1.0, 'covg_thres':100,
29 'print_header':False, 'stdin':False, 'stranded':False, 'no_filter':False, 29 'print_header':False, 'stdin':False, 'stranded':False, 'no_filter':False,
30 'debug_loc':'', 'seed':''} 30 'debug_loc':'', 'seed':''}
31 DESCRIPTION = """This will parse the VCF output of the "Naive Variant Caller" 31 DESCRIPTION = """This will parse the VCF output of the "Naive Variant Caller"
32 (aka "BAM Coverage") Galaxy tool. For each position reported, it counts the 32 (aka "BAM Coverage") Galaxy tool. For each position reported, it counts the
33 number of reads of each base, determines the major allele, minor allele (second 33 number of reads of each base, determines the major allele, minor allele (second
34 most frequent variant), and number of alleles above a threshold. So currently 34 most frequent variant), and number of alleles above a threshold. So currently
35 it only considers SNVs (ACGT), including in the coverage figure. By default it 35 it only considers SNVs (ACGT), including in the coverage figure. By default it
36 reads from stdin and prints to stdout.""" 36 reads from stdin and prints to stdout.
37 Prints a tab-delimited set of statistics to stdout.
38 To print output column labels, run "$ echo -n | ./allele-counts.py -H".
39 The columns are: 1:SAMPLE 2:CHR 3:POS 4:A 5:C 6:G 7:T 8:CVRG 9:ALLELES 10:MAJOR
40 11:MINOR 12:MAF 13:BIAS,
41 unless the --stranded option is used, in which case they are:
42 1:SAMPLE 2:CHR 3:POS 4:+A 5:+C 6:+G 7:+T 8:-A 9:-C 10:-G 11:-T 12:CVRG
43 13:ALLELES 14:MAJOR 15:MINOR 16:MAF 17:BIAS.
44 """
37 EPILOG = """Requirements: 45 EPILOG = """Requirements:
38 The input VCF must report the variants for each strand. 46 The input VCF must report the variants for each strand.
39 The variants should be case-sensitive (e.g. all capital base letters). 47 The variants should be case-sensitive (e.g. all capital base letters).
40 Strand bias: Both strands must show the same bases passing the frequency 48 Strand bias: Both strands must show the same bases passing the frequency
41 threshold (but not necessarily in the same order). If the site fails this test, 49 threshold (but not necessarily in the same order). If the site fails this test,