changeset 0:d4a8f440a518 draft

Adding ctat_mutations tool.
author trinity_ctat
date Tue, 30 Oct 2018 12:28:26 -0400
parents
children 785fa145418f
files ctat_mutations.xml test-data/reads_1.fastq.gz test-data/reads_2.fastq.gz test-data/varcalling.outdir/annotated_min_filtered.vcf.gz test-data/varcalling.outdir/cancer.tab test-data/varcalling.outdir/cancer.vcf test-data/varcalling.outdir/misc/recalibrated.bai test-data/varcalling.outdir/misc/recalibrated.bam test-data/varcalling.outdir/mutation_inspector.json test-data/varcalling.outdir/variants.vcf test-data/varcalling.outdir/variants.vcf.idx test-data/varcalling.outdir/variants_initial_filtering_clean_snp_RNAedit.vcf_snpeff_updated.vcf.gz tool-data/ctat_genome_resource_libs.loc.sample tool_data_table_conf.xml.sample
diffstat 14 files changed, 597 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ctat_mutations.xml	Tue Oct 30 12:28:26 2018 -0400
@@ -0,0 +1,67 @@
+<tool id="ctat_mutations" name="ctat_mutations" version="1.0.0" profile="17.05">
+    <description>Mutation Pipeline for calling SNPs and variants</description>
+    <requirements>
+        <requirement type="package" version="2.0.1">ctat-mutations</requirement>
+    </requirements>
+    <command detect_errors="default">
+      <![CDATA[
+            python ctat-mutations/ctat_mutations \
+                   --plot \
+                   --out_dir varcalling.outdir \
+                   --threads 8 \
+                   --variant_filtering_mode GATK \
+                   --left "$left" \
+                   --right "$right" \
+                   --genome_lib_dir "${genome_resource_lib.fields.path}" \
+                   --variant_call_mode GATK \
+                   --tissue_type "$tissue_type" \
+                   --email "$cravat_email"
+      ]]>
+    </command>
+    <inputs>
+      <param format="fastq" name="left" type="data" label="Left/Forward strand reads" help="Left read"/>
+      <param format="fastq" name="right" type="data" label="Right/Reverse strand reads" help="Right read"/>
+      <param name="tissue_type" type="select" label="Select a pathology" help="If you don't know, just choose 'General Purpose'">
+        <options from_data_table="cravat_tissues">
+	  <filter type="sort_by" column="1"/>
+	  <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+        </options>
+      </param>
+      <param name="genome_resource_lib" type="select" label="Select a reference genome">
+        <options from_data_table="ctat_genome_resource_libs">
+          <filter type="sort_by" column="2" />
+          <validator type="no_options" message="No indexes are available" />
+        </options>
+      </param>      
+      <section name="adv" title="This service uses the GATK4. GATK4 is licensed by the Broad Institute and is made available to academic users of this service for non-commercial use only. The full text of the license is available here: https://www.broadinstitute.org/gatk/about/license.html. For more information about GATK and full documentation, please visit the GATK website: https://www.broadinstitute.org." expanded="False">
+      </section>
+    </inputs>
+    <outputs>
+      <data format="tabular" name="cancertab" label="${tool.name} on ${on_string}: Cancer Tab" from_work_dir="varcalling.outdir/cancer.tab"/> 
+      <data format="vcf" name="cancerVariants" label="${tool.name} on ${on_string}: Cancer VCF" from_work_dir="varcalling.outdir/cancer.vcf"/> 
+      <data format="vcf" name="allVariants" label="${tool.name} on ${on_string}: All Variants VCF" from_work_dir="varcalling.outdir/variants.vcf"/> 
+      <data format="bam" name="bamfile" label="Bam used in haplotype calling" from_work_dir="varcalling.outdir/misc/recalibrated.bam"/>
+      <data format="txt" name="cravat" label="Annotated (lightly filtered) VCF file" from_work_dir="varcalling.outdir/variants_initial_filtering_clean_snp_RNAedit.vcf_snpeff_updated.vcf.gz"/>
+    </outputs>
+    <tests>
+        <test>
+           <param name="left" value="reads_1.fastq.gz"/>
+           <param name="right" value="reads_2.fastq.gz"/>
+           <param name="tissue_type" value="Other"/>
+           <output name="cancertab" file="varcalling.outdir/cancer.tab" />
+           <output name="cancerVariants" file="varcalling.outdir/cancer.vcf" />
+           <output name="allVariants" file="varcalling.outdir/variants.vcf" />
+           <output name="bamfile" file="varcalling.outdir/misc/recalibrated.bam" />
+           <output name="cravat" file="varcalling.outdir/variants_initial_filtering_clean_snp_RNAedit.vcf_snpeff_updated.vcf.gz"/>
+        </test>
+    </tests>
+    <help>
+
+.. class:: warningmark
+
+Mutation detection in RNA-Seq highlights the GATK Best Practices in RNA-Seq variant calling, several sources of variant annotation, and filtering based on CRAVAT.
+
+    </help>
+    <citations>
+    </citations>
+</tool>
Binary file test-data/reads_1.fastq.gz has changed
Binary file test-data/reads_2.fastq.gz has changed
Binary file test-data/varcalling.outdir/annotated_min_filtered.vcf.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/varcalling.outdir/cancer.tab	Tue Oct 30 12:28:26 2018 -0400
@@ -0,0 +1,14 @@
+CHROM	POS	REF	ALT	GENE	DP	QUAL	MQ	SAO	NSF	NSM	NSN	TUMOR	TISSUE	COSMIC_ID	KGPROD	RS	PMC	CHASM_PVALUE	CHASM_FDR	VEST_PVALUE	VEST_FDR
+chr5	474989	A	G	LOC100288152,SLC9A3	4	96.03	60	NA	NA	NA	NA	carcinoma_--_NS	urinary_tract	COSM4006021	NA	NA	NA	0.1114	0.2	0.96802	1
+chr5	181224474	G	A	TRIM41	45	349.77	60	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	0.0694	0.15	0.48052	1
+chr8	143923759	G	A	PLEC	66	838.77	60	NA	NA	NA	NA	carcinoma_--_adenocarcinoma	large_intestine	COSM3750086	NA	NA	NA	0.0344	0.1	0.84202	1
+chr12	56420869	G	A	TIMELESS	48	384.77	60	NA	NA	NA	NA	carcinoma_--_adenocarcinoma	large_intestine	COSM3753397	NA	NA	NA	0.0744	0.15	0.18439	0.95
+chr17	7673767	C	T	TP53	61	1848.77	60	NA	NA	NA	NA	Ewings_sarcoma-peripheral_primitive_neuroectodermal_tumour_--_NS	bone	COSM3717625	NA	NA	NA	0	0.05	0.01447	0.15
+chr17	7676154	G	C	TP53	80	2161.77	60	NA	NA	NA	NA	haematopoietic_neoplasm_--_acute_myeloid_leukaemia	haematopoietic_and_lymphoid_tissue	COSM3766193	NA	NA	NA	0.087	0.15	0.52717	1
+chr17	43071077	T	C	BRCA1	4	92.03	60	NA	NA	NA	NA	haematopoietic_neoplasm_--_acute_myeloid_leukaemia	haematopoietic_and_lymphoid_tissue	COSM3755560	NA	NA	NA	0.0372	0.1	0.3446	1
+chr17	43091983	T	C	BRCA1	4	84.03	60	NA	NA	NA	NA	haemangioblastoma_--_NS	soft_tissue	COSM3755561	NA	NA	NA	0.0002	0.05	0.64447	1
+chr17	43092919	G	A	BRCA1	2	33.74	60	NA	NA	NA	NA	carcinoma_--_NS	prostate	COSM3755564	NA	NA	NA	0.0004	0.05	0.33539	1
+chr17	43093454	C	T	BRCA1	11	425.77	60	NA	NA	NA	NA	rhabdomyosarcoma_--_embryonal	soft_tissue	COSM4989394	NA	NA	NA	0.0014	0.05	0.51068	1
+chr19	39177761	G	C	PAK4	106	1134.77	60	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	0.0004	0.05	0.01093	0.15
+chr19	47271515	T	C	CCDC9	12	336.77	60	NA	NA	NA	NA	haematopoietic_neoplasm_--_acute_myeloid_leukaemia	haematopoietic_and_lymphoid_tissue	COSM3721172	NA	NA	NA	0.093	0.15	0.97622	1
+chr20	46687147	C	T	TP53RK	26	423.77	60	NA	NA	NA	NA	carcinoma_--_ductal_carcinoma	pancreas	COSM3758608	NA	NA	NA	0.0834	0.15	0.88584	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/varcalling.outdir/cancer.vcf	Tue Oct 30 12:28:26 2018 -0400
@@ -0,0 +1,87 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##FILTER=<ID=FS,Description="FS > 30.0">
+##FILTER=<ID=LowQual,Description="Low quality">
+##FILTER=<ID=QD,Description="QD < 2.0">
+##FILTER=<ID=SnpCluster,Description="SNPs found in clusters">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##GATKCommandLine=<ID=HaplotypeCaller,CommandLine="HaplotypeCaller  --recover-dangling-heads true --dont-use-soft-clipped-bases true --standard-min-confidence-threshold-for-calling 20.0 --output /broad/hptmp/bankapur/full_mut/varcalling.outdir/variants.vcf --input /broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/recalibrated.bam --reference /seq/regev_genome_portal/RESOURCES/CTAT_GENOME_LIB/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir/ref_genome.fa  --emit-ref-confidence NONE --gvcf-gq-bands 1 --gvcf-gq-bands 2 --gvcf-gq-bands 3 --gvcf-gq-bands 4 --gvcf-gq-bands 5 --gvcf-gq-bands 6 --gvcf-gq-bands 7 --gvcf-gq-bands 8 --gvcf-gq-bands 9 --gvcf-gq-bands 10 --gvcf-gq-bands 11 --gvcf-gq-bands 12 --gvcf-gq-bands 13 --gvcf-gq-bands 14 --gvcf-gq-bands 15 --gvcf-gq-bands 16 --gvcf-gq-bands 17 --gvcf-gq-bands 18 --gvcf-gq-bands 19 --gvcf-gq-bands 20 --gvcf-gq-bands 21 --gvcf-gq-bands 22 --gvcf-gq-bands 23 --gvcf-gq-bands 24 --gvcf-gq-bands 25 --gvcf-gq-bands 26 --gvcf-gq-bands 27 --gvcf-gq-bands 28 --gvcf-gq-bands 29 --gvcf-gq-bands 30 --gvcf-gq-bands 31 --gvcf-gq-bands 32 --gvcf-gq-bands 33 --gvcf-gq-bands 34 --gvcf-gq-bands 35 --gvcf-gq-bands 36 --gvcf-gq-bands 37 --gvcf-gq-bands 38 --gvcf-gq-bands 39 --gvcf-gq-bands 40 --gvcf-gq-bands 41 --gvcf-gq-bands 42 --gvcf-gq-bands 43 --gvcf-gq-bands 44 --gvcf-gq-bands 45 --gvcf-gq-bands 46 --gvcf-gq-bands 47 --gvcf-gq-bands 48 --gvcf-gq-bands 49 --gvcf-gq-bands 50 --gvcf-gq-bands 51 --gvcf-gq-bands 52 --gvcf-gq-bands 53 --gvcf-gq-bands 54 --gvcf-gq-bands 55 --gvcf-gq-bands 56 --gvcf-gq-bands 57 --gvcf-gq-bands 58 --gvcf-gq-bands 59 --gvcf-gq-bands 60 --gvcf-gq-bands 70 --gvcf-gq-bands 80 --gvcf-gq-bands 90 --gvcf-gq-bands 99 --indel-size-to-eliminate-in-ref-model 10 --use-alleles-trigger false --disable-optimizations false --just-determine-active-regions false --dont-genotype false --max-mnp-distance 0 --dont-trim-active-regions false --max-disc-ar-extension 25 --max-gga-ar-extension 300 --padding-around-indels 150 --padding-around-snps 20 --kmer-size 10 --kmer-size 25 --dont-increase-kmer-sizes-for-cycles false --allow-non-unique-kmers-in-ref false --num-pruning-samples 1 --do-not-recover-dangling-branches false --min-dangling-branch-length 4 --consensus false --max-num-haplotypes-in-population 128 --error-correct-kmers false --min-pruning 2 --debug-graph-transformations false --kmer-length-for-read-error-correction 25 --min-observations-for-kmer-to-be-solid 20 --likelihood-calculation-engine PairHMM --base-quality-score-threshold 18 --pair-hmm-gap-continuation-penalty 10 --pair-hmm-implementation FASTEST_AVAILABLE --pcr-indel-model CONSERVATIVE --phred-scaled-global-read-mismapping-rate 45 --native-pair-hmm-threads 4 --native-pair-hmm-use-double-precision false --debug false --use-filtered-reads-for-annotations false --bam-writer-type CALLED_HAPLOTYPES --capture-assembly-failure-bam false --error-correct-reads false --do-not-run-physical-phasing false --min-base-quality-score 10 --smith-waterman JAVA --use-new-qual-calculator false --annotate-with-num-discovered-alleles false --heterozygosity 0.001 --indel-heterozygosity 1.25E-4 --heterozygosity-stdev 0.01 --max-alternate-alleles 6 --max-genotype-count 1024 --sample-ploidy 2 --num-reference-samples-if-no-call 0 --genotyping-mode DISCOVERY --genotype-filtered-alleles false --contamination-fraction-to-filter 0.0 --output-mode EMIT_VARIANTS_ONLY --all-site-pls false --min-assembly-region-size 50 --max-assembly-region-size 300 --assembly-region-padding 100 --max-reads-per-alignment-start 50 --active-probability-threshold 0.002 --max-prob-propagation-distance 50 --interval-set-rule UNION --interval-padding 0 --interval-exclusion-padding 0 --interval-merging-rule ALL --read-validation-stringency SILENT --seconds-between-progress-updates 10.0 --disable-sequence-dictionary-validation false --create-output-bam-index true --create-output-bam-md5 false --create-output-variant-index true --create-output-variant-md5 false --lenient false --add-output-sam-program-record true --add-output-vcf-command-line true --cloud-prefetch-buffer 40 --cloud-index-prefetch-buffer -1 --disable-bam-index-caching false --sites-only-vcf-output false --help false --version false --showHidden false --verbosity INFO --QUIET false --use-jdk-deflater false --use-jdk-inflater false --gcs-max-retries 20 --gcs-project-for-requester-pays  --disable-tool-default-read-filters false --minimum-mapping-quality 20 --disable-tool-default-annotations false --enable-all-annotations false",Version=4.0.10.0,Date="October 29, 2018 1:03:15 PM EDT">
+##GATKCommandLine=<ID=VariantFiltration,CommandLine="VariantFiltration  --output /broad/hptmp/bankapur/full_mut/varcalling.outdir/variants_initial_filtering.vcf --filter-expression FS > 30.0 --filter-expression QD < 2.0 --filter-name FS --filter-name QD --cluster-size 3 --cluster-window-size 35 --variant /broad/hptmp/bankapur/full_mut/varcalling.outdir/variants.vcf --reference /seq/regev_genome_portal/RESOURCES/CTAT_GENOME_LIB/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir/ref_genome.fa  --mask-extension 0 --mask-name Mask --filter-not-in-mask false --missing-values-evaluate-as-failing false --invalidate-previous-filters false --invert-filter-expression false --invert-genotype-filter-expression false --set-filtered-genotype-to-no-call false --interval-set-rule UNION --interval-padding 0 --interval-exclusion-padding 0 --interval-merging-rule ALL --read-validation-stringency SILENT --seconds-between-progress-updates 10.0 --disable-sequence-dictionary-validation false --create-output-bam-index true --create-output-bam-md5 false --create-output-variant-index true --create-output-variant-md5 false --lenient false --add-output-sam-program-record true --add-output-vcf-command-line true --cloud-prefetch-buffer 40 --cloud-index-prefetch-buffer -1 --disable-bam-index-caching false --sites-only-vcf-output false --help false --version false --showHidden false --verbosity INFO --QUIET false --use-jdk-deflater false --use-jdk-inflater false --gcs-max-retries 20 --gcs-project-for-requester-pays  --disable-tool-default-read-filters false",Version=4.0.10.0,Date="October 29, 2018 1:15:13 PM EDT">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes  for each ALT allele  in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency  for each ALT allele  in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC)  for each ALT allele  in the same order as listed">
+##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF)  for each ALT allele  in the same order as listed">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias">
+##contig=<ID=chr1,length=248956422>
+##contig=<ID=chr2,length=242193529>
+##contig=<ID=chr3,length=198295559>
+##contig=<ID=chr4,length=190214555>
+##contig=<ID=chr5,length=181538259>
+##contig=<ID=chr6,length=170805979>
+##contig=<ID=chr7,length=159345973>
+##contig=<ID=chr8,length=145138636>
+##contig=<ID=chr9,length=138394717>
+##contig=<ID=chr10,length=133797422>
+##contig=<ID=chr11,length=135086622>
+##contig=<ID=chr12,length=133275309>
+##contig=<ID=chr13,length=114364328>
+##contig=<ID=chr14,length=107043718>
+##contig=<ID=chr15,length=101991189>
+##contig=<ID=chr16,length=90338345>
+##contig=<ID=chr17,length=83257441>
+##contig=<ID=chr18,length=80373285>
+##contig=<ID=chr19,length=58617616>
+##contig=<ID=chr20,length=64444167>
+##contig=<ID=chr21,length=46709983>
+##contig=<ID=chr22,length=50818468>
+##contig=<ID=chrX,length=156040895>
+##contig=<ID=chrY,length=57227415>
+##contig=<ID=chrM,length=16569>
+##source=HaplotypeCaller
+##source=VariantFiltration
+##SnpEffVersion="4.1k (build 2015-09-07), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  -nostats -noLof -no-downstream -no-upstream hg38 "
+##INFO=<ID=ANN,Number=.,Type=String,Description="Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'">
+##INFO=<ID=GENE,Number=.,Type=String,Description="The name of the gene/s in the genomic region of the SNP as annotated by SNPeff">
+##INFO=<ID=COSMIC_ID,Number=.,Type=String,Description="COSMIC mutation id (unique).">
+##INFO=<ID=TISSUE,Number=.,Type=String,Description="The primary tissue/cancer and subtype from which the sample originated.">
+##INFO=<ID=TUMOR,Number=.,Type=String,Description="The histological classification of the sample.">
+##INFO=<ID=FATHMM,Number=.,Type=String,Description="FATHMM (Functional Analysis through Hidden Markov Models). 'Pathogenic':Cancer or damaging  'Neutral':Passanger or Tolerated.">
+##INFO=<ID=SOMATIC,Number=.,Type=String,Description="Information on whether the sample was reported to be Confirmed Somatic. 'Confirmed somatic':if the mutation has been confimed to be somatic in the experiment by sequencing both the tumour and a matched normal from the same patient  'Previously Observed':when the mutation has been reported as somatic previously but not in current paper  'variant of unknown origin':when the mutation is known to be somatic but the tumour was sequenced without a matched normal">
+##bcftools_annotateVersion=1.9+htslib-1.9
+##bcftools_annotateCommand=annotate --output-type z --annotations /seq/regev_genome_portal/RESOURCES/CTAT_GENOME_LIB/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir/ctat_mutation_lib/cosmic.vcf.gz --columns INFO/COSMIC_ID,INFO/TISSUE,INFO/TUMOR,INFO/FATHMM,INFO/SOMATIC --output /broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/variants_annotated.vcf.gz /broad/hptmp/bankapur/full_mut/varcalling.outdir/variants_initial_filtering_clean_snp_RNAedit.vcf_snpeff_updated.vcf.gz; Date=Mon Oct 29 13:22:17 2018
+##INFO=<ID=CHASM_PVALUE,Number=.,Type=Float,Description="CHASM cancer driver p-value (missense) generated by CRAVAT">
+##INFO=<ID=CHASM_FDR,Number=.,Type=Float,Description="CHASM cancer driver FDR (missense)">
+##INFO=<ID=VEST_PVALUE,Number=.,Type=Float,Description="VEST pathogenicity p-value (non-silent)">
+##INFO=<ID=VEST_FDR,Number=.,Type=Float,Description="VEST pathogenicity FDR (non-silent)">
+##bcftools_annotateCommand=annotate --annotations /broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/Variant_result_updated.tsv.gz -h /seq/regev_genome_portal/RESOURCES/CTAT_GENOME_LIB/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir/ctat_mutation_lib/header/cravat_annotation.txt --columns CHROM,POS,CHASM_PVALUE,CHASM_FDR,VEST_PVALUE,VEST_FDR --output-type z --output /broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/variants_initial_filtering_clean_snp_RNAedit_cosmic_filtered_cravat_annotated_coding.vcf.gz /broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/variants_initial_filtering_clean_snp_RNAedit_cosmic_filtered.vcf; Date=Mon Oct 29 13:23:53 2018
+##bcftools_annotateCommand=annotate --annotations /broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/Variant_non_coding_result_updated.tsv.gz -h /seq/regev_genome_portal/RESOURCES/CTAT_GENOME_LIB/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir/ctat_mutation_lib/header/cravat_annotation.txt --columns CHROM,POS,CHASM_PVALUE,CHASM_FDR,VEST_PVALUE,VEST_FDR --output-type z --output /broad/hptmp/bankapur/full_mut/varcalling.outdir/annotated_min_filtered.vcf.gz /broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/variants_initial_filtering_clean_snp_RNAedit_cosmic_filtered_cravat_annotated_coding.vcf.gz; Date=Mon Oct 29 13:23:53 2018
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	reads_1_fastq
+chr5	474989	.	A	G	96.03	PASS	GENE=LOC100288152,SLC9A3;AC=2;AF=1;AN=2;DP=4;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=60;QD=24.01;SOR=1.609;ANN=G|missense_variant|MODERATE|SLC9A3|SLC9A3|transcript|NM_004174.2|Coding|16/17|c.2395T>C|p.Cys799Arg|2507/2777|2395/2505|799/834||,G|missense_variant|MODERATE|SLC9A3|SLC9A3|transcript|NM_001284351.1|Coding|16/17|c.2368T>C|p.Cys790Arg|2480/2750|2368/2478|790/825||,G|intron_variant|MODIFIER|LOC100288152|LOC100288152|transcript|NR_125375.1|Noncoding|1/6|n.165-148A>G||||||;COSMIC_ID=COSM4006021;TISSUE=urinary_tract;TUMOR=carcinoma_--_NS;FATHMM=NEUTRAL;SOMATIC=Confirmed_somatic_variant;CHASM_PVALUE=0.1114;CHASM_FDR=0.2;VEST_PVALUE=0.96802;VEST_FDR=1	GT:AD:DP:GQ:PL	1/1:0,4:4:12:124,12,0
+chr5	181224474	.	G	A	349.77	PASS	GENE=TRIM41;AC=1;AF=0.5;AN=2;BaseQRankSum=0.32;DP=45;ExcessHet=3.0103;FS=1.237;MLEAC=1;MLEAF=0.5;MQ=60;MQRankSum=0;QD=7.77;ReadPosRankSum=0.504;SOR=0.945;ANN=A|missense_variant|MODERATE|TRIM41|TRIM41|transcript|NM_033549.4|Coding|1/6|c.475G>A|p.Glu159Lys|1212/3663|475/1893|159/630||,A|missense_variant|MODERATE|TRIM41|TRIM41|transcript|NM_201627.2|Coding|1/8|c.475G>A|p.Glu159Lys|1212/2723|475/1557|159/518||,A|non_coding_exon_variant|MODIFIER|TRIM41|TRIM41|transcript|NR_045218.1|Noncoding|1/7|n.1212G>A||||||;CHASM_PVALUE=0.0694;CHASM_FDR=0.15;VEST_PVALUE=0.48052;VEST_FDR=1	GT:AD:DP:GQ:PL	0/1:28,17:45:99:378,0,717
+chr8	143923759	.	G	A	838.77	PASS	GENE=PLEC;AC=1;AF=0.5;AN=2;BaseQRankSum=-0.63;DP=66;ExcessHet=3.0103;FS=0.94;MLEAC=1;MLEAF=0.5;MQ=60;MQRankSum=0;QD=12.71;ReadPosRankSum=0.567;SOR=0.82;ANN=A|missense_variant|MODERATE|PLEC|PLEC|transcript|NM_201380.3|Coding|31/32|c.6581C>T|p.Ala2194Val|6751/15255|6581/14055|2194/4684||,A|missense_variant|MODERATE|PLEC|PLEC|transcript|NM_201384.2|Coding|31/32|c.6170C>T|p.Ala2057Val|6299/14803|6170/13644|2057/4547||,A|missense_variant|MODERATE|PLEC|PLEC|transcript|NM_201383.2|Coding|31/32|c.6182C>T|p.Ala2061Val|6191/14695|6182/13656|2061/4551||,A|missense_variant|MODERATE|PLEC|PLEC|transcript|NM_201382.3|Coding|31/32|c.6170C>T|p.Ala2057Val|6238/14742|6170/13644|2057/4547||,A|missense_variant|MODERATE|PLEC|PLEC|transcript|NM_201381.2|Coding|31/32|c.6074C>T|p.Ala2025Val|6148/14652|6074/13548|2025/4515||,A|missense_variant|MODERATE|PLEC|PLEC|transcript|NM_201379.2|Coding|31/32|c.6104C>T|p.Ala2035Val|6253/14757|6104/13578|2035/4525||,A|missense_variant|MODERATE|PLEC|PLEC|transcript|NM_201378.3|Coding|31/32|c.6128C>T|p.Ala2043Val|6185/14689|6128/13602|2043/4533||,A|missense_variant|MODERATE|PLEC|PLEC|transcript|NM_000445.4|Coding|32/33|c.6251C>T|p.Ala2084Val|6300/14804|6251/13725|2084/4574||;COSMIC_ID=COSM3750086;TISSUE=large_intestine;TUMOR=carcinoma_--_adenocarcinoma;FATHMM=NEUTRAL;SOMATIC=Confirmed_somatic_variant;CHASM_PVALUE=0.0344;CHASM_FDR=0.1;VEST_PVALUE=0.84202;VEST_FDR=1	GT:AD:DP:GQ:PL	0/1:31,35:66:99:867,0,736
+chr12	56420869	.	G	A	384.77	PASS	GENE=TIMELESS;AC=1;AF=0.5;AN=2;BaseQRankSum=2.348;DP=48;ExcessHet=3.0103;FS=4.262;MLEAC=1;MLEAF=0.5;MQ=60;MQRankSum=0;QD=8.02;ReadPosRankSum=0.096;SOR=0.674;ANN=A|missense_variant|MODERATE|TIMELESS|TIMELESS|transcript|NM_003920.3|Coding|25/29|c.3053C>T|p.Pro1018Leu|3221/5138|3053/3627|1018/1208||;COSMIC_ID=COSM3753397;TISSUE=large_intestine;TUMOR=carcinoma_--_adenocarcinoma;FATHMM=PATHOGENIC;SOMATIC=Confirmed_somatic_variant;CHASM_PVALUE=0.0744;CHASM_FDR=0.15;VEST_PVALUE=0.18439;VEST_FDR=0.95	GT:AD:DP:GQ:PL	0/1:30,18:48:99:413,0,759
+chr17	7673767	.	C	T	1848.77	PASS	GENE=TP53;AC=2;AF=1;AN=2;DP=61;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=60;QD=30.31;SOR=1.609;ANN=T|missense_variant|MODERATE|TP53|TP53|transcript|NM_000546.5|Coding|8/11|c.853G>A|p.Glu285Lys|1055/2591|853/1182|285/393||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001126115.1|Coding|4/7|c.457G>A|p.Glu153Lys|735/2271|457/786|153/261||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001126116.1|Coding|4/8|c.457G>A|p.Glu153Lys|735/2404|457/630|153/209||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001126117.1|Coding|4/8|c.457G>A|p.Glu153Lys|735/2331|457/645|153/214||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001276697.1|Coding|4/7|c.376G>A|p.Glu126Lys|735/2271|376/705|126/234||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001276698.1|Coding|4/8|c.376G>A|p.Glu126Lys|735/2404|376/549|126/182||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001276699.1|Coding|4/8|c.376G>A|p.Glu126Lys|735/2331|376/564|126/187||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001126112.2|Coding|8/11|c.853G>A|p.Glu285Lys|1052/2588|853/1182|285/393||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001126113.2|Coding|8/12|c.853G>A|p.Glu285Lys|1055/2651|853/1041|285/346||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001126114.2|Coding|8/12|c.853G>A|p.Glu285Lys|1055/2724|853/1026|285/341||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001126118.1|Coding|7/10|c.736G>A|p.Glu246Lys|1172/2708|736/1065|246/354||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001276695.1|Coding|8/12|c.736G>A|p.Glu246Lys|1055/2651|736/924|246/307||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001276696.1|Coding|8/12|c.736G>A|p.Glu246Lys|1055/2724|736/909|246/302||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001276760.1|Coding|8/11|c.736G>A|p.Glu246Lys|1055/2591|736/1065|246/354||,T|missense_variant|MODERATE|TP53|TP53|transcript|NM_001276761.1|Coding|8/11|c.736G>A|p.Glu246Lys|1052/2588|736/1065|246/354||;COSMIC_ID=COSM3717625;TISSUE=bone;TUMOR=Ewings_sarcoma-peripheral_primitive_neuroectodermal_tumour_--_NS;FATHMM=PATHOGENIC;SOMATIC=Confirmed_somatic_variant;CHASM_PVALUE=0;CHASM_FDR=0.05;VEST_PVALUE=0.01447;VEST_FDR=0.15	GT:AD:DP:GQ:PL	1/1:0,61:61:99:1877,183,0
+chr17	7676154	.	G	C	2161.77	PASS	GENE=TP53;AC=2;AF=1;AN=2;BaseQRankSum=3.225;DP=80;ExcessHet=3.0103;FS=14.289;MLEAC=2;MLEAF=1;MQ=60;MQRankSum=0;QD=27.02;ReadPosRankSum=-2.023;SOR=0.642;ANN=C|missense_variant|MODERATE|TP53|TP53|transcript|NM_000546.5|Coding|4/11|c.215C>G|p.Pro72Arg|417/2591|215/1182|72/393||,C|missense_variant|MODERATE|TP53|TP53|transcript|NM_001126112.2|Coding|4/11|c.215C>G|p.Pro72Arg|414/2588|215/1182|72/393||,C|missense_variant|MODERATE|TP53|TP53|transcript|NM_001126113.2|Coding|4/12|c.215C>G|p.Pro72Arg|417/2651|215/1041|72/346||,C|missense_variant|MODERATE|TP53|TP53|transcript|NM_001126114.2|Coding|4/12|c.215C>G|p.Pro72Arg|417/2724|215/1026|72/341||,C|missense_variant|MODERATE|TP53|TP53|transcript|NM_001126118.1|Coding|3/10|c.98C>G|p.Pro33Arg|534/2708|98/1065|33/354||,C|missense_variant|MODERATE|TP53|TP53|transcript|NM_001276695.1|Coding|4/12|c.98C>G|p.Pro33Arg|417/2651|98/924|33/307||,C|missense_variant|MODERATE|TP53|TP53|transcript|NM_001276696.1|Coding|4/12|c.98C>G|p.Pro33Arg|417/2724|98/909|33/302||,C|missense_variant|MODERATE|TP53|TP53|transcript|NM_001276760.1|Coding|4/11|c.98C>G|p.Pro33Arg|417/2591|98/1065|33/354||,C|missense_variant|MODERATE|TP53|TP53|transcript|NM_001276761.1|Coding|4/11|c.98C>G|p.Pro33Arg|414/2588|98/1065|33/354||;COSMIC_ID=COSM3766193;TISSUE=haematopoietic_and_lymphoid_tissue;TUMOR=haematopoietic_neoplasm_--_acute_myeloid_leukaemia;FATHMM=NEUTRAL;SOMATIC=Reported_in_another_cancer_sample_as_somatic;CHASM_PVALUE=0.087;CHASM_FDR=0.15;VEST_PVALUE=0.52717;VEST_FDR=1	GT:AD:DP:GQ:PL	1/1:4,76:80:99:2190,183,0
+chr17	43071077	.	T	C	92.03	PASS	GENE=BRCA1;AC=2;AF=1;AN=2;DP=4;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=60;QD=23.01;SOR=3.258;ANN=C|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007300.3|Coding|16/24|c.4900A>G|p.Ser1634Gly|5132/7270|4900/5655|1634/1884||,C|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007298.3|Coding|14/22|c.1525A>G|p.Ser509Gly|1544/3682|1525/2280|509/759||,C|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007297.3|Coding|14/22|c.4696A>G|p.Ser1566Gly|4977/7115|4696/5451|1566/1816||,C|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007299.3|Coding|15/22|c.1525A>G|p.Ser509Gly|1719/3783|1525/2100|509/699||,C|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007294.3|Coding|15/23|c.4837A>G|p.Ser1613Gly|5069/7207|4837/5592|1613/1863||,C|non_coding_exon_variant|MODIFIER|BRCA1|BRCA1|transcript|NR_027676.1|Noncoding|15/23|n.4973A>G||||||;COSMIC_ID=COSM3755560;TISSUE=haematopoietic_and_lymphoid_tissue;TUMOR=haematopoietic_neoplasm_--_acute_myeloid_leukaemia;FATHMM=NEUTRAL;SOMATIC=Confirmed_somatic_variant;CHASM_PVALUE=0.0372;CHASM_FDR=0.1;VEST_PVALUE=0.3446;VEST_FDR=1	GT:AD:DP:GQ:PL	1/1:0,4:4:12:120,12,0
+chr17	43091983	.	T	C	84.03	PASS	GENE=BRCA1;AC=2;AF=1;AN=2;DP=4;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=60;QD=21.01;SOR=0.693;ANN=C|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007300.3|Coding|10/24|c.3548A>G|p.Lys1183Arg|3780/7270|3548/5655|1183/1884||,C|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007297.3|Coding|9/22|c.3407A>G|p.Lys1136Arg|3688/7115|3407/5451|1136/1816||,C|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007294.3|Coding|10/23|c.3548A>G|p.Lys1183Arg|3780/7207|3548/5592|1183/1863||,C|intron_variant|MODIFIER|BRCA1|BRCA1|transcript|NM_007298.3|Coding|9/21|c.788-951A>G||||||,C|intron_variant|MODIFIER|BRCA1|BRCA1|transcript|NM_007299.3|Coding|10/21|c.788-951A>G||||||,C|non_coding_exon_variant|MODIFIER|BRCA1|BRCA1|transcript|NR_027676.1|Noncoding|10/23|n.3684A>G||||||;COSMIC_ID=COSM3755561;TISSUE=soft_tissue;TUMOR=haemangioblastoma_--_NS;FATHMM=NEUTRAL;SOMATIC=Confirmed_somatic_variant;CHASM_PVALUE=0.0002;CHASM_FDR=0.05;VEST_PVALUE=0.64447;VEST_FDR=1	GT:AD:DP:GQ:PL	1/1:0,4:4:12:112,12,0
+chr17	43092919	.	G	A	33.74	PASS	GENE=BRCA1;AC=2;AF=1;AN=2;DP=2;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=60;QD=16.87;SOR=2.303;ANN=A|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007300.3|Coding|10/24|c.2612C>T|p.Pro871Leu|2844/7270|2612/5655|871/1884||,A|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007297.3|Coding|9/22|c.2471C>T|p.Pro824Leu|2752/7115|2471/5451|824/1816||,A|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007294.3|Coding|10/23|c.2612C>T|p.Pro871Leu|2844/7207|2612/5592|871/1863||,A|intron_variant|MODIFIER|BRCA1|BRCA1|transcript|NM_007298.3|Coding|9/21|c.787+1825C>T||||||,A|intron_variant|MODIFIER|BRCA1|BRCA1|transcript|NM_007299.3|Coding|10/21|c.787+1825C>T||||||,A|non_coding_exon_variant|MODIFIER|BRCA1|BRCA1|transcript|NR_027676.1|Noncoding|10/23|n.2748C>T||||||;COSMIC_ID=COSM3755564;TISSUE=prostate;TUMOR=carcinoma_--_NS;FATHMM=NEUTRAL;SOMATIC=Confirmed_somatic_variant;CHASM_PVALUE=0.0004;CHASM_FDR=0.05;VEST_PVALUE=0.33539;VEST_FDR=1	GT:AD:DP:GQ:PL	1/1:0,2:2:6:61,6,0
+chr17	43093454	.	C	T	425.77	PASS	GENE=BRCA1;AC=2;AF=1;AN=2;DP=11;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=60;QD=31.78;SOR=1.27;ANN=T|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007300.3|Coding|10/24|c.2077G>A|p.Asp693Asn|2309/7270|2077/5655|693/1884||,T|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007297.3|Coding|9/22|c.1936G>A|p.Asp646Asn|2217/7115|1936/5451|646/1816||,T|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007294.3|Coding|10/23|c.2077G>A|p.Asp693Asn|2309/7207|2077/5592|693/1863||,T|intron_variant|MODIFIER|BRCA1|BRCA1|transcript|NM_007298.3|Coding|9/21|c.787+1290G>A||||||,T|intron_variant|MODIFIER|BRCA1|BRCA1|transcript|NM_007299.3|Coding|10/21|c.787+1290G>A||||||,T|non_coding_exon_variant|MODIFIER|BRCA1|BRCA1|transcript|NR_027676.1|Noncoding|10/23|n.2213G>A||||||;COSMIC_ID=COSM4989394;TISSUE=soft_tissue;TUMOR=rhabdomyosarcoma_--_embryonal;FATHMM=NEUTRAL;SOMATIC=Reported_in_another_cancer_sample_as_somatic;CHASM_PVALUE=0.0014;CHASM_FDR=0.05;VEST_PVALUE=0.51068;VEST_FDR=1	GT:AD:DP:GQ:PL	1/1:0,11:11:33:454,33,0
+chr19	39177761	.	G	C	1134.77	PASS	GENE=PAK4;AC=1;AF=0.5;AN=2;BaseQRankSum=2.258;DP=106;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=60;MQRankSum=0;QD=10.71;ReadPosRankSum=1.397;SOR=0.654;ANN=C|missense_variant|MODERATE|PAK4|PAK4|transcript|NM_001014831.2|Coding|10/11|c.1572G>C|p.Met524Ile|2033/3064|1572/1776|524/591||,C|missense_variant|MODERATE|PAK4|PAK4|transcript|NM_001014832.1|Coding|8/9|c.1572G>C|p.Met524Ile|1734/2765|1572/1776|524/591||,C|missense_variant|MODERATE|PAK4|PAK4|transcript|NM_001014834.2|Coding|7/8|c.1113G>C|p.Met371Ile|1275/2306|1113/1317|371/438||,C|missense_variant|MODERATE|PAK4|PAK4|transcript|NM_001014835.1|Coding|8/9|c.1113G>C|p.Met371Ile|1348/2379|1113/1317|371/438||,C|missense_variant|MODERATE|PAK4|PAK4|transcript|NM_005884.3|Coding|9/10|c.1572G>C|p.Met524Ile|1807/2838|1572/1776|524/591||;CHASM_PVALUE=0.0004;CHASM_FDR=0.05;VEST_PVALUE=0.01093;VEST_FDR=0.15	GT:AD:DP:GQ:PL	0/1:58,48:106:99:1163,0,1435
+chr19	47271515	.	T	C	336.77	PASS	GENE=CCDC9;AC=2;AF=1;AN=2;DP=12;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=60;QD=28.06;SOR=2.67;ANN=C|missense_variant|MODERATE|CCDC9|CCDC9|transcript|NM_015603.2|Coding|12/12|c.1433T>C|p.Leu478Pro|1640/2078|1433/1596|478/531||;COSMIC_ID=COSM3721172;TISSUE=haematopoietic_and_lymphoid_tissue;TUMOR=haematopoietic_neoplasm_--_acute_myeloid_leukaemia;FATHMM=NEUTRAL;SOMATIC=Confirmed_somatic_variant;CHASM_PVALUE=0.093;CHASM_FDR=0.15;VEST_PVALUE=0.97622;VEST_FDR=1	GT:AD:DP:GQ:PL	1/1:0,12:12:36:365,36,0
+chr20	46687147	.	C	T	423.77	PASS	GENE=TP53RK;AC=1;AF=0.5;AN=2;BaseQRankSum=-0.26;DP=26;ExcessHet=3.0103;FS=1.657;MLEAC=1;MLEAF=0.5;MQ=60;MQRankSum=0;QD=16.3;ReadPosRankSum=1.395;SOR=1.179;ANN=T|missense_variant|MODERATE|TP53RK|TP53RK|transcript|NM_033550.3|Coding|2/2|c.368G>A|p.Arg123Gln|591/3373|368/762|123/253||;COSMIC_ID=COSM3758608;TISSUE=pancreas;TUMOR=carcinoma_--_ductal_carcinoma;FATHMM=PATHOGENIC;SOMATIC=Confirmed_somatic_variant;CHASM_PVALUE=0.0834;CHASM_FDR=0.15;VEST_PVALUE=0.88584;VEST_FDR=1	GT:AD:DP:GQ:PL	0/1:8,18:26:99:452,0,165
Binary file test-data/varcalling.outdir/misc/recalibrated.bai has changed
Binary file test-data/varcalling.outdir/misc/recalibrated.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/varcalling.outdir/mutation_inspector.json	Tue Oct 30 12:28:26 2018 -0400
@@ -0,0 +1,321 @@
+{
+  "BAM": "/broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/recalibrated.bam", 
+  "BAM_INDEX": "/broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/recalibrated.bai", 
+  "BED": "/seq/regev_genome_portal/RESOURCES/CTAT_GENOME_LIB/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir/ctat_mutation_lib/refGene.sort.bed", 
+  "BED_INDEX": "/seq/regev_genome_portal/RESOURCES/CTAT_GENOME_LIB/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir/ctat_mutation_lib/refGene.sort.bed.idx", 
+  "SAMPLE": "varcalling.outdir", 
+  "SNV": [
+    {
+      "ALT": "G", 
+      "CHASM_FDR": "0.2", 
+      "CHASM_PVALUE": "0.1114", 
+      "CHROM": "chr5", 
+      "COSMIC_ID": "COSM4006021", 
+      "DP": "4", 
+      "GENE": "LOC100288152,SLC9A3", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "474989", 
+      "QUAL": "96.03", 
+      "REF": "A", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "urinary_tract", 
+      "TUMOR": "carcinoma_--_NS", 
+      "VEST_FDR": "1", 
+      "VEST_PVALUE": "0.96802"
+    }, 
+    {
+      "ALT": "A", 
+      "CHASM_FDR": "0.15", 
+      "CHASM_PVALUE": "0.0694", 
+      "CHROM": "chr5", 
+      "COSMIC_ID": "NA", 
+      "DP": "45", 
+      "GENE": "TRIM41", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "181224474", 
+      "QUAL": "349.77", 
+      "REF": "G", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "NA", 
+      "TUMOR": "NA", 
+      "VEST_FDR": "1", 
+      "VEST_PVALUE": "0.48052"
+    }, 
+    {
+      "ALT": "A", 
+      "CHASM_FDR": "0.1", 
+      "CHASM_PVALUE": "0.0344", 
+      "CHROM": "chr8", 
+      "COSMIC_ID": "COSM3750086", 
+      "DP": "66", 
+      "GENE": "PLEC", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "143923759", 
+      "QUAL": "838.77", 
+      "REF": "G", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "large_intestine", 
+      "TUMOR": "carcinoma_--_adenocarcinoma", 
+      "VEST_FDR": "1", 
+      "VEST_PVALUE": "0.84202"
+    }, 
+    {
+      "ALT": "A", 
+      "CHASM_FDR": "0.15", 
+      "CHASM_PVALUE": "0.0744", 
+      "CHROM": "chr12", 
+      "COSMIC_ID": "COSM3753397", 
+      "DP": "48", 
+      "GENE": "TIMELESS", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "56420869", 
+      "QUAL": "384.77", 
+      "REF": "G", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "large_intestine", 
+      "TUMOR": "carcinoma_--_adenocarcinoma", 
+      "VEST_FDR": "0.95", 
+      "VEST_PVALUE": "0.18439"
+    }, 
+    {
+      "ALT": "T", 
+      "CHASM_FDR": "0.05", 
+      "CHASM_PVALUE": "0", 
+      "CHROM": "chr17", 
+      "COSMIC_ID": "COSM3717625", 
+      "DP": "61", 
+      "GENE": "TP53", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "7673767", 
+      "QUAL": "1848.77", 
+      "REF": "C", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "bone", 
+      "TUMOR": "Ewings_sarcoma-peripheral_primitive_neuroectodermal_tumour_--_NS", 
+      "VEST_FDR": "0.15", 
+      "VEST_PVALUE": "0.01447"
+    }, 
+    {
+      "ALT": "C", 
+      "CHASM_FDR": "0.15", 
+      "CHASM_PVALUE": "0.087", 
+      "CHROM": "chr17", 
+      "COSMIC_ID": "COSM3766193", 
+      "DP": "80", 
+      "GENE": "TP53", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "7676154", 
+      "QUAL": "2161.77", 
+      "REF": "G", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "haematopoietic_and_lymphoid_tissue", 
+      "TUMOR": "haematopoietic_neoplasm_--_acute_myeloid_leukaemia", 
+      "VEST_FDR": "1", 
+      "VEST_PVALUE": "0.52717"
+    }, 
+    {
+      "ALT": "C", 
+      "CHASM_FDR": "0.1", 
+      "CHASM_PVALUE": "0.0372", 
+      "CHROM": "chr17", 
+      "COSMIC_ID": "COSM3755560", 
+      "DP": "4", 
+      "GENE": "BRCA1", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "43071077", 
+      "QUAL": "92.03", 
+      "REF": "T", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "haematopoietic_and_lymphoid_tissue", 
+      "TUMOR": "haematopoietic_neoplasm_--_acute_myeloid_leukaemia", 
+      "VEST_FDR": "1", 
+      "VEST_PVALUE": "0.3446"
+    }, 
+    {
+      "ALT": "C", 
+      "CHASM_FDR": "0.05", 
+      "CHASM_PVALUE": "0.0002", 
+      "CHROM": "chr17", 
+      "COSMIC_ID": "COSM3755561", 
+      "DP": "4", 
+      "GENE": "BRCA1", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "43091983", 
+      "QUAL": "84.03", 
+      "REF": "T", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "soft_tissue", 
+      "TUMOR": "haemangioblastoma_--_NS", 
+      "VEST_FDR": "1", 
+      "VEST_PVALUE": "0.64447"
+    }, 
+    {
+      "ALT": "A", 
+      "CHASM_FDR": "0.05", 
+      "CHASM_PVALUE": "0.0004", 
+      "CHROM": "chr17", 
+      "COSMIC_ID": "COSM3755564", 
+      "DP": "2", 
+      "GENE": "BRCA1", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "43092919", 
+      "QUAL": "33.74", 
+      "REF": "G", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "prostate", 
+      "TUMOR": "carcinoma_--_NS", 
+      "VEST_FDR": "1", 
+      "VEST_PVALUE": "0.33539"
+    }, 
+    {
+      "ALT": "T", 
+      "CHASM_FDR": "0.05", 
+      "CHASM_PVALUE": "0.0014", 
+      "CHROM": "chr17", 
+      "COSMIC_ID": "COSM4989394", 
+      "DP": "11", 
+      "GENE": "BRCA1", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "43093454", 
+      "QUAL": "425.77", 
+      "REF": "C", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "soft_tissue", 
+      "TUMOR": "rhabdomyosarcoma_--_embryonal", 
+      "VEST_FDR": "1", 
+      "VEST_PVALUE": "0.51068"
+    }, 
+    {
+      "ALT": "C", 
+      "CHASM_FDR": "0.05", 
+      "CHASM_PVALUE": "0.0004", 
+      "CHROM": "chr19", 
+      "COSMIC_ID": "NA", 
+      "DP": "106", 
+      "GENE": "PAK4", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "39177761", 
+      "QUAL": "1134.77", 
+      "REF": "G", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "NA", 
+      "TUMOR": "NA", 
+      "VEST_FDR": "0.15", 
+      "VEST_PVALUE": "0.01093"
+    }, 
+    {
+      "ALT": "C", 
+      "CHASM_FDR": "0.15", 
+      "CHASM_PVALUE": "0.093", 
+      "CHROM": "chr19", 
+      "COSMIC_ID": "COSM3721172", 
+      "DP": "12", 
+      "GENE": "CCDC9", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "47271515", 
+      "QUAL": "336.77", 
+      "REF": "T", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "haematopoietic_and_lymphoid_tissue", 
+      "TUMOR": "haematopoietic_neoplasm_--_acute_myeloid_leukaemia", 
+      "VEST_FDR": "1", 
+      "VEST_PVALUE": "0.97622"
+    }, 
+    {
+      "ALT": "T", 
+      "CHASM_FDR": "0.15", 
+      "CHASM_PVALUE": "0.0834", 
+      "CHROM": "chr20", 
+      "COSMIC_ID": "COSM3758608", 
+      "DP": "26", 
+      "GENE": "TP53RK", 
+      "KGPROD": "NA", 
+      "MQ": "60", 
+      "NSF": "NA", 
+      "NSM": "NA", 
+      "NSN": "NA", 
+      "PMC": "NA", 
+      "POS": "46687147", 
+      "QUAL": "423.77", 
+      "REF": "C", 
+      "RS": "NA", 
+      "SAO": "NA", 
+      "TISSUE": "pancreas", 
+      "TUMOR": "carcinoma_--_ductal_carcinoma", 
+      "VEST_FDR": "1", 
+      "VEST_PVALUE": "0.88584"
+    }
+  ]
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/varcalling.outdir/variants.vcf	Tue Oct 30 12:28:26 2018 -0400
@@ -0,0 +1,79 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=LowQual,Description="Low quality">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##GATKCommandLine=<ID=HaplotypeCaller,CommandLine="HaplotypeCaller  --recover-dangling-heads true --dont-use-soft-clipped-bases true --standard-min-confidence-threshold-for-calling 20.0 --output /broad/hptmp/bankapur/full_mut/varcalling.outdir/variants.vcf --input /broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/recalibrated.bam --reference /seq/regev_genome_portal/RESOURCES/CTAT_GENOME_LIB/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir/ref_genome.fa  --emit-ref-confidence NONE --gvcf-gq-bands 1 --gvcf-gq-bands 2 --gvcf-gq-bands 3 --gvcf-gq-bands 4 --gvcf-gq-bands 5 --gvcf-gq-bands 6 --gvcf-gq-bands 7 --gvcf-gq-bands 8 --gvcf-gq-bands 9 --gvcf-gq-bands 10 --gvcf-gq-bands 11 --gvcf-gq-bands 12 --gvcf-gq-bands 13 --gvcf-gq-bands 14 --gvcf-gq-bands 15 --gvcf-gq-bands 16 --gvcf-gq-bands 17 --gvcf-gq-bands 18 --gvcf-gq-bands 19 --gvcf-gq-bands 20 --gvcf-gq-bands 21 --gvcf-gq-bands 22 --gvcf-gq-bands 23 --gvcf-gq-bands 24 --gvcf-gq-bands 25 --gvcf-gq-bands 26 --gvcf-gq-bands 27 --gvcf-gq-bands 28 --gvcf-gq-bands 29 --gvcf-gq-bands 30 --gvcf-gq-bands 31 --gvcf-gq-bands 32 --gvcf-gq-bands 33 --gvcf-gq-bands 34 --gvcf-gq-bands 35 --gvcf-gq-bands 36 --gvcf-gq-bands 37 --gvcf-gq-bands 38 --gvcf-gq-bands 39 --gvcf-gq-bands 40 --gvcf-gq-bands 41 --gvcf-gq-bands 42 --gvcf-gq-bands 43 --gvcf-gq-bands 44 --gvcf-gq-bands 45 --gvcf-gq-bands 46 --gvcf-gq-bands 47 --gvcf-gq-bands 48 --gvcf-gq-bands 49 --gvcf-gq-bands 50 --gvcf-gq-bands 51 --gvcf-gq-bands 52 --gvcf-gq-bands 53 --gvcf-gq-bands 54 --gvcf-gq-bands 55 --gvcf-gq-bands 56 --gvcf-gq-bands 57 --gvcf-gq-bands 58 --gvcf-gq-bands 59 --gvcf-gq-bands 60 --gvcf-gq-bands 70 --gvcf-gq-bands 80 --gvcf-gq-bands 90 --gvcf-gq-bands 99 --indel-size-to-eliminate-in-ref-model 10 --use-alleles-trigger false --disable-optimizations false --just-determine-active-regions false --dont-genotype false --max-mnp-distance 0 --dont-trim-active-regions false --max-disc-ar-extension 25 --max-gga-ar-extension 300 --padding-around-indels 150 --padding-around-snps 20 --kmer-size 10 --kmer-size 25 --dont-increase-kmer-sizes-for-cycles false --allow-non-unique-kmers-in-ref false --num-pruning-samples 1 --do-not-recover-dangling-branches false --min-dangling-branch-length 4 --consensus false --max-num-haplotypes-in-population 128 --error-correct-kmers false --min-pruning 2 --debug-graph-transformations false --kmer-length-for-read-error-correction 25 --min-observations-for-kmer-to-be-solid 20 --likelihood-calculation-engine PairHMM --base-quality-score-threshold 18 --pair-hmm-gap-continuation-penalty 10 --pair-hmm-implementation FASTEST_AVAILABLE --pcr-indel-model CONSERVATIVE --phred-scaled-global-read-mismapping-rate 45 --native-pair-hmm-threads 4 --native-pair-hmm-use-double-precision false --debug false --use-filtered-reads-for-annotations false --bam-writer-type CALLED_HAPLOTYPES --capture-assembly-failure-bam false --error-correct-reads false --do-not-run-physical-phasing false --min-base-quality-score 10 --smith-waterman JAVA --use-new-qual-calculator false --annotate-with-num-discovered-alleles false --heterozygosity 0.001 --indel-heterozygosity 1.25E-4 --heterozygosity-stdev 0.01 --max-alternate-alleles 6 --max-genotype-count 1024 --sample-ploidy 2 --num-reference-samples-if-no-call 0 --genotyping-mode DISCOVERY --genotype-filtered-alleles false --contamination-fraction-to-filter 0.0 --output-mode EMIT_VARIANTS_ONLY --all-site-pls false --min-assembly-region-size 50 --max-assembly-region-size 300 --assembly-region-padding 100 --max-reads-per-alignment-start 50 --active-probability-threshold 0.002 --max-prob-propagation-distance 50 --interval-set-rule UNION --interval-padding 0 --interval-exclusion-padding 0 --interval-merging-rule ALL --read-validation-stringency SILENT --seconds-between-progress-updates 10.0 --disable-sequence-dictionary-validation false --create-output-bam-index true --create-output-bam-md5 false --create-output-variant-index true --create-output-variant-md5 false --lenient false --add-output-sam-program-record true --add-output-vcf-command-line true --cloud-prefetch-buffer 40 --cloud-index-prefetch-buffer -1 --disable-bam-index-caching false --sites-only-vcf-output false --help false --version false --showHidden false --verbosity INFO --QUIET false --use-jdk-deflater false --use-jdk-inflater false --gcs-max-retries 20 --gcs-project-for-requester-pays  --disable-tool-default-read-filters false --minimum-mapping-quality 20 --disable-tool-default-annotations false --enable-all-annotations false",Version=4.0.10.0,Date="October 29, 2018 1:03:15 PM EDT">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
+##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias">
+##contig=<ID=chr1,length=248956422>
+##contig=<ID=chr2,length=242193529>
+##contig=<ID=chr3,length=198295559>
+##contig=<ID=chr4,length=190214555>
+##contig=<ID=chr5,length=181538259>
+##contig=<ID=chr6,length=170805979>
+##contig=<ID=chr7,length=159345973>
+##contig=<ID=chr8,length=145138636>
+##contig=<ID=chr9,length=138394717>
+##contig=<ID=chr10,length=133797422>
+##contig=<ID=chr11,length=135086622>
+##contig=<ID=chr12,length=133275309>
+##contig=<ID=chr13,length=114364328>
+##contig=<ID=chr14,length=107043718>
+##contig=<ID=chr15,length=101991189>
+##contig=<ID=chr16,length=90338345>
+##contig=<ID=chr17,length=83257441>
+##contig=<ID=chr18,length=80373285>
+##contig=<ID=chr19,length=58617616>
+##contig=<ID=chr20,length=64444167>
+##contig=<ID=chr21,length=46709983>
+##contig=<ID=chr22,length=50818468>
+##contig=<ID=chrX,length=156040895>
+##contig=<ID=chrY,length=57227415>
+##contig=<ID=chrM,length=16569>
+##source=HaplotypeCaller
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	reads_1_fastq
+chr5	474989	.	A	G	96.03	.	AC=2;AF=1.00;AN=2;DP=4;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=24.01;SOR=1.609	GT:AD:DP:GQ:PL	1/1:0,4:4:12:124,12,0
+chr5	475231	.	A	T	66.28	.	AC=2;AF=1.00;AN=2;DP=3;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=22.09;SOR=1.179	GT:AD:DP:GQ:PL	1/1:0,3:3:9:94,9,0
+chr5	181223744	.	T	C	173.77	.	AC=1;AF=0.500;AN=2;BaseQRankSum=1.544;DP=14;ExcessHet=3.0103;FS=2.363;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=12.41;ReadPosRankSum=-0.756;SOR=1.863	GT:AD:DP:GQ:PL	0/1:6,8:14:99:202,0,133
+chr5	181224474	.	G	A	349.77	.	AC=1;AF=0.500;AN=2;BaseQRankSum=0.320;DP=45;ExcessHet=3.0103;FS=1.237;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=7.77;ReadPosRankSum=0.504;SOR=0.945	GT:AD:DP:GQ:PL	0/1:28,17:45:99:378,0,717
+chr8	143923488	.	C	T	706.77	.	AC=1;AF=0.500;AN=2;BaseQRankSum=-2.260;DP=78;ExcessHet=3.0103;FS=4.540;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=9.06;ReadPosRankSum=0.137;SOR=0.307	GT:AD:DP:GQ:PL	0/1:45,33:78:99:735,0,1132
+chr8	143923759	.	G	A	838.77	.	AC=1;AF=0.500;AN=2;BaseQRankSum=-0.630;DP=66;ExcessHet=3.0103;FS=0.940;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=12.71;ReadPosRankSum=0.567;SOR=0.820	GT:AD:DP:GQ:PL	0/1:31,35:66:99:867,0,736
+chr8	143924001	.	C	T	729.77	.	AC=1;AF=0.500;AN=2;BaseQRankSum=0.212;DP=60;ExcessHet=3.0103;FS=4.714;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=12.16;ReadPosRankSum=0.323;SOR=1.525	GT:AD:DP:GQ:PL	0/1:38,22:60:99:758,0,1811
+chr8	143924022	.	A	G	587.77	.	AC=1;AF=0.500;AN=2;BaseQRankSum=-2.271;DP=58;ExcessHet=3.0103;FS=14.916;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=10.13;ReadPosRankSum=0.674;SOR=2.948	GT:AD:DP:GQ:PL	0/1:40,18:58:99:616,0,1892
+chr12	56420869	.	G	A	384.77	.	AC=1;AF=0.500;AN=2;BaseQRankSum=2.348;DP=48;ExcessHet=3.0103;FS=4.262;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=8.02;ReadPosRankSum=0.096;SOR=0.674	GT:AD:DP:GQ:PL	0/1:30,18:48:99:413,0,759
+chr12	56420872	.	A	G	695.77	.	AC=1;AF=0.500;AN=2;BaseQRankSum=-0.308;DP=46;ExcessHet=3.0103;FS=8.803;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=15.13;ReadPosRankSum=0.656;SOR=1.232	GT:AD:DP:GQ:PL	0/1:18,28:46:99:724,0,415
+chr12	56422138	.	C	T	72.77	.	AC=1;AF=0.500;AN=2;BaseQRankSum=1.718;DP=8;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=9.10;ReadPosRankSum=-0.816;SOR=0.693	GT:AD:DP:GQ:PL	0/1:4,4:8:96:101,0,96
+chr17	7673767	.	C	T	1848.77	.	AC=2;AF=1.00;AN=2;DP=61;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=30.31;SOR=1.609	GT:AD:DP:GQ:PL	1/1:0,61:61:99:1877,183,0
+chr17	7675327	.	C	T	32.74	.	AC=2;AF=1.00;AN=2;DP=2;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=16.37;SOR=0.693	GT:AD:DP:GQ:PL	1/1:0,2:2:6:60,6,0
+chr17	7676154	.	G	C	2161.77	.	AC=2;AF=1.00;AN=2;BaseQRankSum=3.225;DP=80;ExcessHet=3.0103;FS=14.289;MLEAC=2;MLEAF=1.00;MQ=60.00;MQRankSum=0.000;QD=27.02;ReadPosRankSum=-2.023;SOR=0.642	GT:AD:DP:GQ:PL	1/1:4,76:80:99:2190,183,0
+chr17	43071077	.	T	C	92.03	.	AC=2;AF=1.00;AN=2;DP=4;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=23.01;SOR=3.258	GT:AD:DP:GQ:PL	1/1:0,4:4:12:120,12,0
+chr17	43082453	.	A	G	37.74	.	AC=2;AF=1.00;AN=2;DP=2;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=18.87;SOR=2.303	GT:AD:DP:GQ:PL	1/1:0,2:2:6:65,6,0
+chr17	43091983	.	T	C	84.03	.	AC=2;AF=1.00;AN=2;DP=4;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=21.01;SOR=0.693	GT:AD:DP:GQ:PL	1/1:0,4:4:12:112,12,0
+chr17	43092919	.	G	A	33.74	.	AC=2;AF=1.00;AN=2;DP=2;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=16.87;SOR=2.303	GT:AD:DP:GQ:PL	1/1:0,2:2:6:61,6,0
+chr17	43093220	.	A	G	278.77	.	AC=2;AF=1.00;AN=2;DP=10;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=27.88;SOR=4.804	GT:AD:DP:GQ:PL	1/1:0,10:10:30:307,30,0
+chr17	43093449	.	G	A	425.77	.	AC=2;AF=1.00;AN=2;DP=9;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=32.87;SOR=1.402	GT:AD:DP:GQ:PL	1/1:0,9:9:33:454,33,0
+chr17	43093454	.	C	T	425.77	.	AC=2;AF=1.00;AN=2;DP=11;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=31.78;SOR=1.270	GT:AD:DP:GQ:PL	1/1:0,11:11:33:454,33,0
+chr19	39177761	.	G	C	1134.77	.	AC=1;AF=0.500;AN=2;BaseQRankSum=2.258;DP=106;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=10.71;ReadPosRankSum=1.397;SOR=0.654	GT:AD:DP:GQ:PL	0/1:58,48:106:99:1163,0,1435
+chr19	39178960	.	A	ATG	226.77	.	AC=2;AF=1.00;AN=2;DP=12;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=32.40;SOR=4.174	GT:AD:DP:GQ:PL	1/1:0,7:7:21:264,21,0
+chr19	39179002	.	T	C	31.74	.	AC=2;AF=1.00;AN=2;DP=2;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=15.87;SOR=2.303	GT:AD:DP:GQ:PL	1/1:0,2:2:6:59,6,0
+chr19	47271315	.	C	T	513.77	.	AC=2;AF=1.00;AN=2;BaseQRankSum=0.932;DP=21;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQRankSum=0.000;QD=25.69;ReadPosRankSum=-1.045;SOR=0.264	GT:AD:DP:GQ:PL	1/1:1,19:20:30:542,30,0
+chr19	47271515	.	T	C	336.77	.	AC=2;AF=1.00;AN=2;DP=12;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=28.06;SOR=2.670	GT:AD:DP:GQ:PL	1/1:0,12:12:36:365,36,0
+chr19	47272198	.	G	T	21.77	.	AC=2;AF=1.00;AN=2;DP=2;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=10.88;SOR=0.693	GT:AD:DP:GQ:PL	1/1:0,2:2:6:49,6,0
+chr20	46687147	.	C	T	423.77	.	AC=1;AF=0.500;AN=2;BaseQRankSum=-0.260;DP=26;ExcessHet=3.0103;FS=1.657;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=16.30;ReadPosRankSum=1.395;SOR=1.179	GT:AD:DP:GQ:PL	0/1:8,18:26:99:452,0,165
Binary file test-data/varcalling.outdir/variants.vcf.idx has changed
Binary file test-data/varcalling.outdir/variants_initial_filtering_clean_snp_RNAedit.vcf_snpeff_updated.vcf.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ctat_genome_resource_libs.loc.sample	Tue Oct 30 12:28:26 2018 -0400
@@ -0,0 +1,15 @@
+# This file lists the locations of CTAT Genome Resource Libraries
+# Usually there will only be one library, but it is concievable 
+# that there could be multiple libraries.
+# This file format is as follows
+# (white space characters are TAB characters):
+#
+#<value>    <name>  <path>
+# value is a unique id
+# name is the display name
+# path is the directory where the genome resource lib files are stored
+#
+#ctat_genome_resource_libs.loc could look like:
+#
+#GRCh38_v27_CTAT_lib_Feb092018	CTAT_GenomeResourceLib_GRCh38_v27_CTAT_lib_Feb092018	/path/to/ctat/genome/resource/lib/directory
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Oct 30 12:28:26 2018 -0400
@@ -0,0 +1,14 @@
+<tables>
+    <table name="ctat_genome_resource_libs" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_genome_resource_libs.loc" />
+    </table>
+    <table name="ctat_centrifuge_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_centrifuge_indexes.loc" />
+    </table>
+    <table name="ctat_lncrna_annotations" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_lncrna_annotations.loc" />
+    </table>
+</tables>