Mercurial > repos > nick > allele_counts_1
changeset 17:44c3abd1b767 draft
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
author | nick |
---|---|
date | Tue, 31 Mar 2020 09:00:51 +0000 |
parents | 25e8f4cf2a81 |
children | a1e7b592c9a8 |
files | 0todo.txt README.md allele-counts.py allele-counts.xml tests/artificial-nofilt.csv.out tests/artificial-samples.csv.out tests/artificial.csv.out tests/real-mit-s.csv.out tests/real-mit.csv.out tests/real-nofilt.csv.out tests/real.csv.out tests/run-tests.py |
diffstat | 12 files changed, 286 insertions(+), 179 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0todo.txt Tue Mar 31 09:00:51 2020 +0000 @@ -0,0 +1,2 @@ +test handling of -c 0 (and -f 0?) +should it technically handle data lines that start with a '#'? \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Tue Mar 31 09:00:51 2020 +0000 @@ -0,0 +1,4 @@ +variant-annotator +================= + +A Galaxy tool for parsing variant counts from a VCF and computing statistics
--- a/allele-counts.py Wed Dec 09 11:31:13 2015 -0500 +++ b/allele-counts.py Tue Mar 31 09:00:51 2020 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 """ Run with -h option or see DESCRIPTION for description. This script's functionality is being obsoleted by the new, and much more sanely @@ -11,7 +11,6 @@ Naive Variant Caller variant count parsing one-liner: $ cat variants.vcf | grep -v '^#' | cut -f 10 | cut -d ':' -f 4 | tr ',=' '\t:' """ -from __future__ import division import os import sys import errno @@ -49,6 +48,7 @@ threshold (but not necessarily in the same order). If the site fails this test, the number of alleles is reported as 0.""" + def get_options(defaults, usage, description='', epilog=''): """Get options, print usage text.""" @@ -124,7 +124,6 @@ if len(coords) > 2: print_sample = coords[2] # set infile_handle to either stdin or the input file - global infile_handle if infile == OPT_DEFAULTS.get('infile'): infile_handle = sys.stdin sys.stderr.write("Reading from standard input..\n") @@ -135,7 +134,6 @@ fail('Error: Input VCF file '+infile+' not found.') # set outfile_handle to either stdout or the output file - global outfile_handle if outfile == OPT_DEFAULTS.get('outfile'): outfile_handle = sys.stdout else: @@ -186,23 +184,18 @@ sys.stderr.write("Error: Sample '"+print_sample+"' not found.\n") sys.exit(1) - site_summary = summarize_site(site_data, sample_names, CANONICAL_VARIANTS, freq_thres, covg_thres, stranded, debug=debug) if debug and site_summary[0]['print']: - print line.split('\t')[9].split(':')[-1] + print(line.split('\t')[9].split(':')[-1]) try: print_site(outfile_handle, site_summary, COLUMNS) except IOError as ioe: if ioe.errno == errno.EPIPE: - cleanup() sys.exit(0) - # close any open filehandles - cleanup() - # keeps Galaxy from giving an error if there were messages on stderr sys.exit(0) @@ -238,7 +231,7 @@ if len(fields) < 9: fail("Error in input VCF: wrong number of fields in data line. " - +"Failed on line:\n"+line) + "Failed on line:\n"+line) site['chr'] = fields[0] site['pos'] = fields[1] @@ -246,35 +239,38 @@ if len(samples) < len(sample_names): fail("Error in input VCF: missing sample fields in data line. " - +"Failed on line:\n"+line) + "Failed on line:\n"+line) elif len(samples) > len(sample_names): fail("Error in input VCF: more sample fields in data line than in header. " - +"Failed on line:\n"+line) + "Failed on line:\n"+line) sample_counts = {} for i in range(len(samples)): - + variant_counts = {} counts = samples[i].split(':')[-1] counts = counts.split(',') for count in counts: - if not count: + if not count or count == '.': continue fields = count.split('=') if len(fields) != 2: fail("Error in input VCF: Incorrect variant data format (must contain " - +"a single '='). Failed on line:\n"+line) + "a single '='). Failed on data \"{}\" in line:\n{}" + .format(count, line)) (variant, reads) = fields if variant[1:] not in canonical: continue - if variant[0] != '-' and variant[0] != '+': - fail("Error in input VCF: variant data not strand-specific. " - +"Failed on line:\n"+line) + if not variant.startswith('-') and not variant.startswith('+'): + fail("Error in input VCF: variant data not strand-specific. Failed on " + "data \"{}\" on line:\n{}".format(variant, line)) try: variant_counts[variant] = int(float(reads)) except ValueError: - fail("Error in input VCF: Variant count not a valid number. Failed on variant count string '"+reads+"'\nIn the following line:\n"+line) + fail("Error in input VCF: Variant count not a valid number. Failed on " + "variant count string \"{}\"\nIn the following line:\n{}" + .format(reads, line)) sample_counts[sample_names[i]] = variant_counts @@ -338,7 +334,7 @@ sample[strand+base_count[0]] = base_count[1] # fill in any zeros for base in canonical: - if not sample.has_key(strand+base): + if strand+base not in sample: sample[strand+base] = 0 sample['alleles'] = count_alleles(variants, freq_thres, debug=debug) @@ -351,7 +347,7 @@ ranked_bases[1] = ranked_bases[2] ranked_bases[2] = tmp_base - if debug: print "ranked +-: "+str(ranked_bases) + if debug: print("ranked +-: "+str(ranked_bases)) sample['coverage'] = coverage try: @@ -396,7 +392,7 @@ if strand in strands: summed_counts[base] = stranded_counts[variant] + summed_counts.get(base, 0) - return summed_counts.items() + return list(summed_counts.items()) def process_read_counts(variant_counts, freq_thres=0, sort=False, debug=False): @@ -423,10 +419,10 @@ variant_counts.sort(reverse=True, key=lambda variant: variant[1]) if debug: - print 'coverage: '+str(coverage)+', freq_thres: '+str(freq_thres) + print('coverage: '+str(coverage)+', freq_thres: '+str(freq_thres)) for variant in variant_counts: - print (variant[0]+': '+str(variant[1])+'/'+str(float(coverage))+' = '+ - str(variant[1]/coverage)) + print((variant[0]+': '+str(variant[1])+'/'+str(float(coverage))+' = '+ + str(variant[1]/coverage))) # remove bases below the frequency threshold if freq_thres > 0: @@ -452,8 +448,8 @@ sort=False, debug=debug) if debug: - print '+ '+str(alleles_plus) - print '- '+str(alleles_minus) + print('+ '+str(alleles_plus)) + print('- '+str(alleles_minus)) # Check if each strand reports the same set of alleles. # Sorting by base is to compare lists without regard to order (as sets). @@ -492,17 +488,9 @@ def fail(message): - cleanup() sys.stderr.write(message+'\n') sys.exit(1) -def cleanup(): - if isinstance(infile_handle, file): - infile_handle.close() - if isinstance(outfile_handle, file): - outfile_handle.close() - - if __name__ == "__main__": main() \ No newline at end of file
--- a/allele-counts.xml Wed Dec 09 11:31:13 2015 -0500 +++ b/allele-counts.xml Tue Mar 31 09:00:51 2020 +0000 @@ -1,6 +1,10 @@ -<tool id="allele_counts_1" version="1.2" name="Variant Annotator"> +<tool id="allele_counts_1" version="1.3" name="Variant Annotator"> <description> process variant counts</description> - <command interpreter="python">allele-counts.py -i $input -o $output -f $freq -c $covg $header $stranded $nofilt + <stdio> + <exit_code range="1:" level="fatal" /> + <exit_code range=":-1" level="fatal" /> + </stdio> + <command>allele-counts.py -i $input -o $output -f $freq -c $covg $header $stranded $nofilt #if $seed: -r $seed #end if @@ -15,12 +19,8 @@ <param name="seed" type="text" value="" label="PRNG seed" /> </inputs> <outputs> - <data name="output" format="tabular"/> + <data name="output" format="tabular" /> </outputs> - <stdio> - <exit_code range="1:" err_level="fatal"/> - <exit_code range=":-1" err_level="fatal"/> - </stdio> <tests> <test> @@ -114,4 +114,40 @@ </help> + <citations> + <citation type="bibtex"> + @article{Blankenberg2014, + author = {Blankenberg, Daniel and {Von Kuster}, Gregory and Bouvier, Emil and Baker, Dannon and Afgan, Enis and Stoler, Nicholas and Taylor, James and Nekrutenko, Anton}, + doi = {10.1186/gb4161}, + issn = {1465-6906}, + journal = {Genome Biology}, + keywords = {galaxy}, + number = {2}, + pages = {403}, + title = {{Dissemination of scientific software with Galaxy ToolShed}}, + url = {http://genomebiology.biomedcentral.com/articles/10.1186/gb4161}, + volume = {15}, + year = {2014} + } + </citation> + <citation type="bibtex"> + @article{Dickins2014, + archivePrefix = {arXiv}, + arxivId = {15334406}, + author = {Dickins, Benjamin and Rebolledo-Jaramillo, Boris and Su, Marcia Shu Wei and Paul, Ian M and Blankenberg, Daniel and Stoler, Nicholas and Makova, Kateryna D and Nekrutenko, Anton}, + doi = {10.2144/000114146}, + eprint = {15334406}, + isbn = {5049880467}, + issn = {19409818}, + journal = {BioTechniques}, + number = {3}, + pages = {134--141}, + pmid = {24641477}, + title = {{Controlling for contamination in re-sequencing studies with a reproducible web-based phylogenetic approach}}, + volume = {56}, + year = {2014} + } + </citation> + </citations> + </tool>
--- a/tests/artificial-nofilt.csv.out Wed Dec 09 11:31:13 2015 -0500 +++ b/tests/artificial-nofilt.csv.out Tue Mar 31 09:00:51 2020 +0000 @@ -1,27 +1,27 @@ -#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MINOR.FREQ.PERC. -THYROID chr1 0 30 0 0 0 30 1 A . 0.0 -THYROID chr1 10 30 0 2 0 32 2 A G 0.0625 -THYROID chr1 20 31 0 1 0 32 0 A G 0.03125 -THYROID chr1 30 21 0 4 0 25 2 A G 0.16 -THYROID chr1 40 22 0 3 0 25 0 A G 0.12 -THYROID chr1 50 3 0 0 0 3 1 A . 0.0 -THYROID chr1 60 2 0 2 0 4 2 A G 0.5 -THYROID chr1 70 1 0 3 0 4 0 G A 0.25 -THYROID chr1 80 104 0 3 0 107 0 A G 0.02804 -THYROID chr1 90 100 2 11 0 113 3 A G 0.09735 -THYROID chr1 100 100 1 11 0 112 0 A G 0.09821 -THYROID chr1 120 0 0 0 0 0 0 . . 0.0 -THYROID chr1 130 0 0 2 0 2 1 G . 0.0 -THYROID chr1 140 0 0 1 0 1 0 G . 0.0 -THYROID chr1 150 0 0 4 0 4 1 G . 0.0 -THYROID chr1 160 0 0 3 0 3 0 G . 0.0 -THYROID chr1 260 106 0 14 0 120 2 A G 0.11667 -THYROID chr1 300 2 0 2 76 80 3 T G 0.025 -THYROID chr1 310 12 0 12 76 100 3 T G 0.12 -THYROID chr1 320 12 0 12 56 80 3 T A 0.15 -THYROID chr1 330 7 0 7 66 80 3 T G 0.0875 -THYROID chr1 340 1 0 1 98 100 0 T G 0.01 -THYROID chr1 350 11 0 11 78 100 0 T A 0.11 -THYROID chr1 400 32 0 8 0 40 2 A G 0.2 -THYROID chr1 410 1 0 2 97 100 0 T G 0.02 -THYROID chr1 420 104 0 0 0 104 1 A . 0.0 +#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MAF BIAS +THYROID chr1 0 30 0 0 0 30 1 A . 0.0 . +THYROID chr1 10 30 0 2 0 32 2 A G 0.0625 0.0 +THYROID chr1 20 31 0 1 0 32 0 A G 0.03125 2.0 +THYROID chr1 30 21 0 4 0 25 2 A G 0.16 0.08013 +THYROID chr1 40 22 0 3 0 25 0 A G 0.12 1.78571 +THYROID chr1 50 3 0 0 0 3 1 A . 0.0 . +THYROID chr1 60 2 0 2 0 4 2 A G 0.5 0.0 +THYROID chr1 70 1 0 3 0 4 0 G A 0.25 2.0 +THYROID chr1 80 104 0 3 0 107 0 A G 0.02804 1.01905 +THYROID chr1 90 100 2 11 0 113 3 A G 0.09735 0.16381 +THYROID chr1 100 100 1 11 0 112 0 A G 0.09821 0.16381 +THYROID chr1 120 0 0 0 0 0 0 . . 0.0 . +THYROID chr1 130 0 0 2 0 2 1 G . 0.0 . +THYROID chr1 140 0 0 1 0 1 0 G . 0.0 . +THYROID chr1 150 0 0 4 0 4 1 G . 0.0 . +THYROID chr1 160 0 0 3 0 3 0 G . 0.0 . +THYROID chr1 260 106 0 14 0 120 2 A G 0.11667 2.4 +THYROID chr1 300 2 0 2 76 80 3 T A 0.025 0.0 +THYROID chr1 310 12 0 12 76 100 3 T A 0.12 0.0 +THYROID chr1 320 12 0 12 56 80 3 T G 0.15 0.64394 +THYROID chr1 330 7 0 7 66 80 3 T G 0.0875 1.06247 +THYROID chr1 340 1 0 1 98 100 0 T A 0.01 1.22222 +THYROID chr1 350 11 0 11 78 100 0 T A 0.11 1.25352 +THYROID chr1 400 32 0 8 0 40 2 A G 0.2 0.0 +THYROID chr1 410 1 0 2 97 100 0 T G 0.02 5.5 +THYROID chr1 420 104 0 0 0 104 1 A . 0.0 .
--- a/tests/artificial-samples.csv.out Wed Dec 09 11:31:13 2015 -0500 +++ b/tests/artificial-samples.csv.out Tue Mar 31 09:00:51 2020 +0000 @@ -1,13 +1,13 @@ -BRAIN chr1 0 30 0 0 0 30 1 A . 0.0 -ARTERY chr1 0 0 0 30 0 30 1 G . 0.0 -THYROID chr1 0 0 30 0 0 30 1 C . 0.0 -BRAIN chr1 10 30 0 0 0 30 1 A . 0.0 -ARTERY chr1 10 30 0 2 0 32 1 A G 0.0625 -THYROID chr1 10 31 0 1 0 32 1 A G 0.03125 -BRAIN chr1 20 30 0 2 0 32 1 A G 0.0625 -ARTERY chr1 20 34 0 6 0 40 2 A G 0.15 -THYROID chr1 20 30 0 2 0 32 0 A G 0.0625 -BRAIN chr1 30 30 0 0 0 30 1 A . 0.0 -BRAIN chr1 40 0 0 0 30 30 1 T . 0.0 -ARTERY chr1 40 1 0 2 97 100 0 T G 0.02 -THYROID chr1 40 0 69 0 31 100 0 C T 0.31 +BRAIN chr1 0 30 0 0 0 30 1 A . 0.0 . +ARTERY chr1 0 0 0 30 0 30 1 G . 0.0 . +THYROID chr1 0 0 30 0 0 30 1 C . 0.0 . +BRAIN chr1 10 30 0 0 0 30 1 A . 0.0 . +ARTERY chr1 10 30 0 2 0 32 1 A G 0.0625 0.0 +THYROID chr1 10 31 0 1 0 32 1 A G 0.03125 2.0 +BRAIN chr1 20 30 0 2 0 32 1 A G 0.0625 0.0 +ARTERY chr1 20 34 0 6 0 40 2 A G 0.15 0.0 +THYROID chr1 20 30 0 2 0 32 0 A G 0.0625 1.88235 +BRAIN chr1 30 30 0 0 0 30 1 A . 0.0 . +BRAIN chr1 40 0 0 0 30 30 1 T . 0.0 . +ARTERY chr1 40 1 0 2 97 100 0 T G 0.02 5.5 +THYROID chr1 40 0 69 0 31 100 0 C T 0.31 1.00096
--- a/tests/artificial.csv.out Wed Dec 09 11:31:13 2015 -0500 +++ b/tests/artificial.csv.out Tue Mar 31 09:00:51 2020 +0000 @@ -1,35 +1,35 @@ -THYROID chr1 0 30 0 0 0 30 1 A . 0.0 -THYROID chr1 10 30 0 2 0 32 1 A G 0.0625 -THYROID chr1 20 31 0 1 0 32 1 A G 0.03125 -THYROID chr1 30 21 0 4 0 25 2 A G 0.16 -THYROID chr1 40 22 0 3 0 25 0 A G 0.12 -THYROID chr1 50 30 0 0 0 30 1 A . 0.0 -THYROID chr1 60 31 0 0 0 31 1 A . 0.0 -THYROID chr1 70 21 0 0 0 21 1 A . 0.0 -THYROID chr1 80 22 0 0 0 22 1 A . 0.0 -THYROID chr1 82 30 0 2 0 32 1 A G 0.0625 -THYROID chr1 84 31 0 1 0 32 1 A G 0.03125 -THYROID chr1 86 21 0 4 0 25 2 A G 0.16 -THYROID chr1 88 22 0 3 0 25 0 A G 0.12 -THYROID chr1 90 30 0 0 0 30 1 A . 0.0 -THYROID chr1 100 31 0 0 0 31 1 A . 0.0 -THYROID chr1 110 21 0 0 0 21 1 A . 0.0 -THYROID chr1 120 22 0 0 0 22 1 A . 0.0 -THYROID chr1 210 20 0 0 0 20 1 A . 0.0 -THYROID chr1 220 22 0 0 0 22 1 A . 0.0 -THYROID chr1 230 182 0 18 0 200 1 A G 0.09 -THYROID chr1 240 180 0 20 0 200 2 A G 0.1 -THYROID chr1 250 178 0 22 0 200 2 A G 0.11 -THYROID chr1 260 106 0 14 0 120 0 A G 0.11667 -THYROID chr1 300 2 0 2 76 80 1 T G 0.025 -THYROID chr1 310 12 0 12 76 100 3 T G 0.12 -THYROID chr1 320 12 0 12 56 80 3 T A 0.15 -THYROID chr1 330 7 0 7 66 80 0 T G 0.0875 -THYROID chr1 340 1 0 1 98 100 1 T G 0.01 -THYROID chr1 350 11 0 11 78 100 0 T A 0.11 -THYROID chr1 400 32 0 8 0 40 2 A G 0.2 -THYROID chr1 410 1 0 2 97 100 0 T G 0.02 -THYROID chr1 420 104 0 0 0 104 1 A . 0.0 -THYROID chr1 430 30 0 0 0 30 1 A . 0.0 -THYROID chr1 440 30 0 0 0 30 1 A . 0.0 -THYROID 27 1234567890 29 0 0 0 29 1 A . 0.0 +THYROID chr1 0 30 0 0 0 30 1 A . 0.0 . +THYROID chr1 10 30 0 2 0 32 1 A G 0.0625 0.0 +THYROID chr1 20 31 0 1 0 32 1 A G 0.03125 2.0 +THYROID chr1 30 21 0 4 0 25 2 A G 0.16 0.08013 +THYROID chr1 40 22 0 3 0 25 0 A G 0.12 1.78571 +THYROID chr1 50 30 0 0 0 30 1 A . 0.0 . +THYROID chr1 60 31 0 0 0 31 1 A . 0.0 . +THYROID chr1 70 21 0 0 0 21 1 A . 0.0 . +THYROID chr1 80 22 0 0 0 22 1 A . 0.0 . +THYROID chr1 82 30 0 2 0 32 1 A G 0.0625 0.0 +THYROID chr1 84 31 0 1 0 32 1 A G 0.03125 2.0 +THYROID chr1 86 21 0 4 0 25 2 A G 0.16 0.08013 +THYROID chr1 88 22 0 3 0 25 0 A G 0.12 1.78571 +THYROID chr1 90 30 0 0 0 30 1 A . 0.0 . +THYROID chr1 100 31 0 0 0 31 1 A . 0.0 . +THYROID chr1 110 21 0 0 0 21 1 A . 0.0 . +THYROID chr1 120 22 0 0 0 22 1 A . 0.0 . +THYROID chr1 210 20 0 0 0 20 1 A . 0.0 . +THYROID chr1 220 22 0 0 0 22 1 A . 0.0 . +THYROID chr1 230 182 0 18 0 200 1 A G 0.09 0.0 +THYROID chr1 240 180 0 20 0 200 2 A G 0.1 0.0 +THYROID chr1 250 178 0 22 0 200 2 A G 0.11 0.0 +THYROID chr1 260 106 0 14 0 120 0 A G 0.11667 2.4 +THYROID chr1 300 2 0 2 76 80 1 T A 0.025 0.0 +THYROID chr1 310 12 0 12 76 100 3 T A 0.12 0.0 +THYROID chr1 320 12 0 12 56 80 3 T G 0.15 0.64394 +THYROID chr1 330 7 0 7 66 80 0 T G 0.0875 1.06247 +THYROID chr1 340 1 0 1 98 100 1 T A 0.01 1.22222 +THYROID chr1 350 11 0 11 78 100 0 T A 0.11 1.25352 +THYROID chr1 400 32 0 8 0 40 2 A G 0.2 0.0 +THYROID chr1 410 1 0 2 97 100 0 T G 0.02 5.5 +THYROID chr1 420 104 0 0 0 104 1 A . 0.0 . +THYROID chr1 430 30 0 0 0 30 1 A . 0.0 . +THYROID chr1 440 30 0 0 0 30 1 A . 0.0 . +THYROID 27 1234567890 29 0 0 0 29 1 A . 0.0 .
--- a/tests/real-mit-s.csv.out Wed Dec 09 11:31:13 2015 -0500 +++ b/tests/real-mit-s.csv.out Tue Mar 31 09:00:51 2020 +0000 @@ -1,12 +1,12 @@ -#SAMPLE CHR POS +A +C +G +T -A -C -G -T CVRG ALLELES MAJOR MINOR MINOR.FREQ.PERC. -S1 chrM 2000 1 9095 1 0 7 5808 0 1 14913 1 C A 0.00054 -S3 chrM 2000 0 7933 0 4 10 5242 1 2 13192 1 C A 0.00076 -S1 chrM 3000 17399 7 22 8 10567 35 22 4 28064 0 A G 0.00157 -S2 chrM 3000 12535 3 24 2 7937 13 12 2 20528 1 A G 0.00175 -S3 chrM 3000 18981 7 29 6 11286 33 17 4 30363 0 A G 0.00152 -S4 chrM 3000 9254 1 15 2 6240 16 14 1 15543 0 A G 0.00187 -S1 chrM 4000 6134 2 1 3 6124 1 1 1 12267 1 A T 0.00033 -S1 chrM 7000 0 17 1 6216 4 9 2 7529 13778 0 T C 0.00189 -S2 chrM 7000 0 7 2 5104 0 9 4 6288 11414 1 T C 0.0014 -S3 chrM 7000 0 9 0 6446 4 4 10 7506 13979 1 T C 0.00093 -S3 chrM 8000 3 1 5023 1 1 0 5043 2 10074 1 G A 0.0004 +#SAMPLE CHR POS +A +C +G +T -A -C -G -T CVRG ALLELES MAJOR MINOR MAF BIAS +S1 chrM 2000 1 9095 1 0 7 5808 0 1 14913 1 C A 0.00054 2.03879 +S3 chrM 2000 0 7933 0 4 10 5242 1 2 13192 1 C A 0.00076 2.51047 +S1 chrM 3000 17399 7 22 8 10567 35 22 4 28064 0 A G 0.00157 0.51868 +S2 chrM 3000 12535 3 24 2 7937 13 12 2 20528 1 A G 0.00175 0.22864 +S3 chrM 3000 18981 7 29 6 11286 33 17 4 30363 0 A G 0.00152 0.01416 +S4 chrM 3000 9254 1 15 2 6240 16 14 1 15543 0 A G 0.00187 0.33202 +S1 chrM 4000 6134 2 1 3 6124 1 1 1 12267 1 A T 0.00033 0.99804 +S1 chrM 7000 0 17 1 6216 4 9 2 7529 13778 0 T C 0.00189 0.81221 +S2 chrM 7000 0 7 2 5104 0 9 4 6288 11414 1 T C 0.0014 0.04254 +S3 chrM 7000 0 9 0 6446 4 4 10 7506 13979 1 T C 0.00093 0.92561 +S3 chrM 8000 3 1 5023 1 1 0 5043 2 10074 1 G A 0.0004 1.00358
--- a/tests/real-mit.csv.out Wed Dec 09 11:31:13 2015 -0500 +++ b/tests/real-mit.csv.out Tue Mar 31 09:00:51 2020 +0000 @@ -1,12 +1,12 @@ -#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MINOR.FREQ.PERC. -S1 chrM 2000 8 14903 1 1 14913 1 C A 0.00054 -S3 chrM 2000 10 13175 1 6 13192 1 C A 0.00076 -S1 chrM 3000 27966 42 44 12 28064 0 A G 0.00157 -S2 chrM 3000 20472 16 36 4 20528 1 A G 0.00175 -S3 chrM 3000 30267 40 46 10 30363 0 A G 0.00152 -S4 chrM 3000 15494 17 29 3 15543 0 A G 0.00187 -S1 chrM 4000 12258 3 2 4 12267 1 A T 0.00033 -S1 chrM 7000 4 26 3 13745 13778 0 T C 0.00189 -S2 chrM 7000 0 16 6 11392 11414 1 T C 0.0014 -S3 chrM 7000 4 13 10 13952 13979 1 T C 0.00093 -S3 chrM 8000 4 1 10066 3 10074 1 G A 0.0004 +#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MAF BIAS +S1 chrM 2000 8 14903 1 1 14913 1 C A 0.00054 2.03879 +S3 chrM 2000 10 13175 1 6 13192 1 C A 0.00076 2.51047 +S1 chrM 3000 27966 42 44 12 28064 0 A G 0.00157 0.51868 +S2 chrM 3000 20472 16 36 4 20528 1 A G 0.00175 0.22864 +S3 chrM 3000 30267 40 46 10 30363 0 A G 0.00152 0.01416 +S4 chrM 3000 15494 17 29 3 15543 0 A G 0.00187 0.33202 +S1 chrM 4000 12258 3 2 4 12267 1 A T 0.00033 0.99804 +S1 chrM 7000 4 26 3 13745 13778 0 T C 0.00189 0.81221 +S2 chrM 7000 0 16 6 11392 11414 1 T C 0.0014 0.04254 +S3 chrM 7000 4 13 10 13952 13979 1 T C 0.00093 0.92561 +S3 chrM 8000 4 1 10066 3 10074 1 G A 0.0004 1.00358
--- a/tests/real-nofilt.csv.out Wed Dec 09 11:31:13 2015 -0500 +++ b/tests/real-nofilt.csv.out Tue Mar 31 09:00:51 2020 +0000 @@ -1,15 +1,15 @@ -#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MINOR.FREQ.PERC. -THYROID chr1 246704250 29 0 0 0 29 1 A . 0.0 -THYROID chr1 246704257 0 0 0 71 71 1 T . 0.0 -THYROID chr1 246704268 104 0 0 0 104 1 A . 0.0 -THYROID chr1 246704269 0 0 0 105 105 1 T . 0.0 -THYROID chr1 246704363 0 72 3 0 75 0 C G 0.04 -THYROID chr1 246704437 5 130 0 0 135 0 C A 0.03704 -THYROID chr1 246707878 0 0 131 0 131 1 G . 0.0 -THYROID chr1 246714587 30 0 43 0 73 2 G A 0.41096 -THYROID chr1 246729215 1 0 1 88 90 0 T G 0.01111 -THYROID chr1 246729216 1 0 1 90 92 0 T G 0.01087 -THYROID chr1 246729378 16 7 0 0 23 0 A C 0.30435 -THYROID chr1 246729392 29 0 10 0 39 0 A G 0.25641 -THYROID chr7 91502881 0 0 0 26 26 1 T . 0.0 -THYROID chr7 91502897 7 36 0 0 43 0 C A 0.16279 +#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MAF BIAS +THYROID chr1 246704250 29 0 0 0 29 1 A . 0.0 . +THYROID chr1 246704257 0 0 0 71 71 1 T . 0.0 . +THYROID chr1 246704268 104 0 0 0 104 1 A . 0.0 . +THYROID chr1 246704269 0 0 0 105 105 1 T . 0.0 . +THYROID chr1 246704363 0 72 3 0 75 0 C G 0.04 1.36364 +THYROID chr1 246704437 5 130 0 0 135 0 C A 0.03704 2.14286 +THYROID chr1 246707878 0 0 131 0 131 1 G . 0.0 . +THYROID chr1 246714587 30 0 43 0 73 2 G A 0.41096 1.22996 +THYROID chr1 246729215 1 0 1 88 90 0 T A 0.01111 1.08537 +THYROID chr1 246729216 1 0 1 90 92 0 T A 0.01087 1.10976 +THYROID chr1 246729378 16 7 0 0 23 0 A C 0.30435 . +THYROID chr1 246729392 29 0 10 0 39 0 A G 0.25641 . +THYROID chr7 91502881 0 0 0 26 26 1 T . 0.0 . +THYROID chr7 91502897 7 36 0 0 43 0 C A 0.16279 1.79167
--- a/tests/real.csv.out Wed Dec 09 11:31:13 2015 -0500 +++ b/tests/real.csv.out Tue Mar 31 09:00:51 2020 +0000 @@ -1,11 +1,11 @@ -THYROID chr1 246704250 29 0 0 0 29 1 A . 0.0 -THYROID chr1 246704257 0 0 0 71 71 1 T . 0.0 -THYROID chr1 246704268 104 0 0 0 104 1 A . 0.0 -THYROID chr1 246704269 0 0 0 105 105 1 T . 0.0 -THYROID chr1 246704363 0 72 3 0 75 0 C G 0.04 -THYROID chr1 246704437 5 130 0 0 135 0 C A 0.03704 -THYROID chr1 246707878 0 0 131 0 131 1 G . 0.0 -THYROID chr1 246714587 30 0 43 0 73 2 G A 0.41096 -THYROID chr1 246729216 1 0 1 90 92 0 T G 0.01087 -THYROID chr7 91502881 0 0 0 26 26 1 T . 0.0 -THYROID chr7 91502897 7 36 0 0 43 0 C A 0.16279 +THYROID chr1 246704250 29 0 0 0 29 1 A . 0.0 . +THYROID chr1 246704257 0 0 0 71 71 1 T . 0.0 . +THYROID chr1 246704268 104 0 0 0 104 1 A . 0.0 . +THYROID chr1 246704269 0 0 0 105 105 1 T . 0.0 . +THYROID chr1 246704363 0 72 3 0 75 0 C G 0.04 1.36364 +THYROID chr1 246704437 5 130 0 0 135 0 C A 0.03704 2.14286 +THYROID chr1 246707878 0 0 131 0 131 1 G . 0.0 . +THYROID chr1 246714587 30 0 43 0 73 2 G A 0.41096 1.22996 +THYROID chr1 246729216 1 0 1 90 92 0 T A 0.01087 1.10976 +THYROID chr7 91502881 0 0 0 26 26 1 T . 0.0 . +THYROID chr7 91502897 7 36 0 0 43 0 C A 0.16279 1.79167
--- a/tests/run-tests.py Wed Dec 09 11:31:13 2015 -0500 +++ b/tests/run-tests.py Tue Mar 31 09:00:51 2020 +0000 @@ -1,8 +1,9 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import os import sys import subprocess +SCRIPT_NAME = 'allele-counts.py' DATASETS = [ 'artificial', 'artificial-samples', @@ -16,15 +17,52 @@ OUT_EXT = '.csv.out' ARGS_KEY = '##comment="ARGS=' +XML = { + 'tests_start':' <tests>', + 'test_start': ' <test>', + 'input': ' <param name="input" value="tests/%s" />', + 'param': ' <param name="%s" value="%s" />', + 'output': ' <output name="output" file="tests/%s" />', + 'test_end': ' </test>', + 'tests_end': ' </tests>', +} +PARAMS = { + '-f':'freq', + '-c':'covg', + '-H':'header', + '-s':'stranded', + '-n':'nofilt', + '-r':'seed', +} +PARAM_ARG = { + '-f':True, + '-c':True, + '-H':False, + '-s':False, + '-n':False, + '-r':True, +} + def main(): - test_dir = os.path.dirname(os.path.relpath(sys.argv[0])) - if test_dir: - test_dir += os.sep + do_print_xml = False + if len(sys.argv) > 1: + if sys.argv[1] == '-x': + do_print_xml = True + else: + sys.stderr.write("Error: unrecognized option '"+sys.argv[1]+"'\n") + sys.exit(1) + + test_dir = os.path.dirname(os.path.realpath(__file__)) + script_dir = os.path.relpath(os.path.dirname(test_dir)) + test_dir = os.path.relpath(test_dir) + + if do_print_xml: + print(XML.get('tests_start')) for dataset in DATASETS: - infile = test_dir+dataset+IN_EXT - outfile = test_dir+dataset+OUT_EXT + infile = os.path.join(test_dir, dataset+IN_EXT) + outfile = os.path.join(test_dir, dataset+OUT_EXT) if not os.path.exists(infile): sys.stderr.write("Error: file not found: "+infile+"\n") @@ -34,11 +72,50 @@ continue options = read_options(infile) - script_cmd = 'allele-counts.py '+options+' -i '+infile - bash_cmd = 'diff '+outfile+' <('+script_cmd+')' - # print infile+":" - print script_cmd - subprocess.call(['bash', '-c', bash_cmd]) + if do_print_xml: + print_xml(infile, outfile, options, XML, PARAMS, PARAM_ARG) + else: + run_tests(infile, outfile, options, script_dir) + + if do_print_xml: + print(XML.get('tests_end')) + + +def run_tests(infile, outfile, options, script_dir): + script_cmd = os.path.join(script_dir, SCRIPT_NAME)+' '+options+' -i '+infile + bash_cmd = 'diff '+outfile+' <('+script_cmd+')' + print(script_cmd) + subprocess.call(['bash', '-c', bash_cmd]) + + +def print_xml(infile, outfile, options_str, xml, params, param_arg): + infile = os.path.basename(infile) + outfile = os.path.basename(outfile) + + options = options_str.split() # on whitespace + + print(xml.get('test_start')) + print(xml.get('input') % infile) + + # read in options one at a time, print <param> line + i = 0 + while i < len(options): + opt = options[i] + if opt not in params or opt not in param_arg: + sys.stderr.write("Error: unknown option '"+opt+"' in ARGS list in file "+infile+"\n") + sys.exit(1) + # takes argument + if param_arg[opt]: + i+=1 + arg = options[i] + print(xml.get('param') % (params[opt], arg)) + # no argument (boolean) + else: + print(xml.get('param') % (params[opt], 'true')) + i+=1 + + print(xml.get('output') % outfile) + print(xml.get('test_end')) def read_options(infile):