Mercurial > repos > jjohnson > defuse
diff defuse_results_to_vcf.py @ 27:d57fcac025e2
Add more info fields to defuse_results_to_vcf.py
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Wed, 14 Aug 2013 16:44:18 -0500 |
parents | 2ecf82136986 |
children | f51a95bdc38e |
line wrap: on
line diff
--- a/defuse_results_to_vcf.py Fri Aug 09 11:36:24 2013 -0500 +++ b/defuse_results_to_vcf.py Wed Aug 14 16:44:18 2013 -0500 @@ -104,8 +104,17 @@ ##INFO=<ID=SPLICESCORE,Number=1,Type=Integer,Description="number of nucleotides similar to GTAG at fusion splice"> ##INFO=<ID=GENE,Number=2,Type=String,Description="Gene Names at each breakend"> ##INFO=<ID=GENEID,Number=2,Type=String,Description="Gene IDs at each breakend"> +##INFO=<ID=GENELOC,Number=2,Type=String,Description="location of breakpoint releative to genes"> +##INFO=<ID=EXPR,Number=2,Type=Integer,Description="expression of genes as number of concordant pairs aligned to exons"> ##INFO=<ID=ORF,Number=0,Type=Flag,Description="fusion combines genes in a way that preserves a reading frame"> ##INFO=<ID=EXONBND,Number=0,Type=Flag,Description="fusion splice at exon boundaries"> +##INFO=<ID=INTERCHROM,Number=0,Type=Flag,Description="fusion produced by an interchromosomal translocation"> +##INFO=<ID=READTHROUGH,Number=0,Type=Flag,Description="fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement"> +##INFO=<ID=ADJACENT,Number=0,Type=Flag,Description="fusion between adjacent genes"> +##INFO=<ID=ALTSPLICE,Number=0,Type=Flag,Description="fusion likely the product of alternative splicing between adjacent genes"> +##INFO=<ID=DELETION,Number=0,Type=Flag,Description="fusion produced by a genomic deletion"> +##INFO=<ID=EVERSION,Number=0,Type=Flag,Description="fusion produced by a genomic eversion"> +##INFO=<ID=INVERSION,Number=0,Type=Flag,Description="fusion produced by a genomic inversion"> #CHROM POS ID REF ALT QUAL FILTER INFO\ """ @@ -193,6 +202,12 @@ gene_name1 = fields[columns.index('gene_name1')] gene_name2 = fields[columns.index('gene_name2')] gene_name_info = 'GENE=%s,%s' % (gene_name1,gene_name2) + gene_location1 = fields[columns.index('gene_location1')] + gene_location2 = fields[columns.index('gene_location2')] + gene_loc = 'GENELOC=%s,%s' % (gene_location1,gene_location2) + expression1 = int(fields[columns.index('expression1')]) + expression2 = int(fields[columns.index('expression2')]) + expr = 'EXPR=%d,%d' % (expression1,expression2) genomic_break_pos1 = int(fields[columns.index('genomic_break_pos1')]) genomic_break_pos2 = int(fields[columns.index('genomic_break_pos2')]) breakpoint_homology = int(fields[columns.index('breakpoint_homology')]) @@ -200,6 +215,12 @@ orf = fields[columns.index('orf')] == 'Y' exonboundaries = fields[columns.index('exonboundaries')] == 'Y' read_through = fields[columns.index('read_through')] == 'Y' + interchromosomal = fields[columns.index('interchromosomal')] == 'Y' + adjacent = fields[columns.index('adjacent')] == 'Y' + altsplice = fields[columns.index('altsplice')] == 'Y' + deletion = fields[columns.index('deletion')] == 'Y' + eversion = fields[columns.index('eversion')] == 'Y' + inversion = fields[columns.index('inversion')] == 'Y' span_count = int(fields[columns.index('span_count')]) splitr_count = int(fields[columns.index('splitr_count')]) splice_score = int(fields[columns.index('splice_score')]) @@ -215,11 +236,25 @@ alt1 = "%s%s%s:%d%s" % (ref1,b2,gene_chromosome2,genomic_break_pos2,b2) alt2 = "%s%s:%d%s%s" % (b1,gene_chromosome1,genomic_break_pos1,b1,ref2) #TODO evaluate what should be included in the INFO field - info = ['DP=%d' % (span_count + splitr_count),'SPLITCNT=%d' % splitr_count,'SPANCNT=%d' % span_count,gene_name_info,gene_info,homlen,'SPLICESCORE=%d' % splice_score] + info = ['DP=%d' % (span_count + splitr_count),'SPLITCNT=%d' % splitr_count,'SPANCNT=%d' % span_count,gene_name_info,gene_info,gene_loc,expr,homlen,'SPLICESCORE=%d' % splice_score] if orf: info.append('ORF') if exonboundaries: info.append('EXONBND') + if interchromosomal: + info.append('INTERCHROM') + if read_through: + info.append('READTHROUGH') + if adjacent: + info.append('ADJACENT') + if altsplice: + info.append('ALTSPLICE') + if deletion: + info.append('DELETION') + if eversion: + info.append('EVERSION') + if inversion: + info.append('INVERSION') info1 = [svtype,'MATEID=%s' % mate_id2] + info info2 = [svtype,'MATEID=%s' % mate_id1] + info qual = int(float(fields[columns.index('probability')]) * 255) if columns.index('probability') else '.'