diff defuse_results_to_vcf.py @ 27:d57fcac025e2

Add more info fields to defuse_results_to_vcf.py
author Jim Johnson <jj@umn.edu>
date Wed, 14 Aug 2013 16:44:18 -0500
parents 2ecf82136986
children f51a95bdc38e
line wrap: on
line diff
--- a/defuse_results_to_vcf.py	Fri Aug 09 11:36:24 2013 -0500
+++ b/defuse_results_to_vcf.py	Wed Aug 14 16:44:18 2013 -0500
@@ -104,8 +104,17 @@
 ##INFO=<ID=SPLICESCORE,Number=1,Type=Integer,Description="number of nucleotides similar to GTAG at fusion splice">
 ##INFO=<ID=GENE,Number=2,Type=String,Description="Gene Names at each breakend">
 ##INFO=<ID=GENEID,Number=2,Type=String,Description="Gene IDs at each breakend">
+##INFO=<ID=GENELOC,Number=2,Type=String,Description="location of breakpoint releative to genes">
+##INFO=<ID=EXPR,Number=2,Type=Integer,Description="expression of genes as number of concordant pairs aligned to exons">
 ##INFO=<ID=ORF,Number=0,Type=Flag,Description="fusion combines genes in a way that preserves a reading frame">
 ##INFO=<ID=EXONBND,Number=0,Type=Flag,Description="fusion splice at exon boundaries">
+##INFO=<ID=INTERCHROM,Number=0,Type=Flag,Description="fusion produced by an interchromosomal translocation">
+##INFO=<ID=READTHROUGH,Number=0,Type=Flag,Description="fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement">
+##INFO=<ID=ADJACENT,Number=0,Type=Flag,Description="fusion between adjacent genes">
+##INFO=<ID=ALTSPLICE,Number=0,Type=Flag,Description="fusion likely the product of alternative splicing between adjacent genes">
+##INFO=<ID=DELETION,Number=0,Type=Flag,Description="fusion produced by a genomic deletion">
+##INFO=<ID=EVERSION,Number=0,Type=Flag,Description="fusion produced by a genomic eversion">
+##INFO=<ID=INVERSION,Number=0,Type=Flag,Description="fusion produced by a genomic inversion">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO\
 """
 
@@ -193,6 +202,12 @@
       gene_name1 = fields[columns.index('gene_name1')]
       gene_name2 = fields[columns.index('gene_name2')]
       gene_name_info = 'GENE=%s,%s' % (gene_name1,gene_name2)
+      gene_location1 = fields[columns.index('gene_location1')]
+      gene_location2 = fields[columns.index('gene_location2')]
+      gene_loc = 'GENELOC=%s,%s' % (gene_location1,gene_location2)
+      expression1 = int(fields[columns.index('expression1')])
+      expression2 = int(fields[columns.index('expression2')])
+      expr = 'EXPR=%d,%d' % (expression1,expression2)
       genomic_break_pos1 = int(fields[columns.index('genomic_break_pos1')])
       genomic_break_pos2 = int(fields[columns.index('genomic_break_pos2')])
       breakpoint_homology = int(fields[columns.index('breakpoint_homology')])
@@ -200,6 +215,12 @@
       orf = fields[columns.index('orf')] == 'Y'
       exonboundaries = fields[columns.index('exonboundaries')] == 'Y'
       read_through = fields[columns.index('read_through')] == 'Y'
+      interchromosomal = fields[columns.index('interchromosomal')] == 'Y'
+      adjacent = fields[columns.index('adjacent')] == 'Y'
+      altsplice = fields[columns.index('altsplice')] == 'Y'
+      deletion = fields[columns.index('deletion')] == 'Y'
+      eversion = fields[columns.index('eversion')] == 'Y'
+      inversion = fields[columns.index('inversion')] == 'Y'
       span_count = int(fields[columns.index('span_count')])
       splitr_count = int(fields[columns.index('splitr_count')])
       splice_score = int(fields[columns.index('splice_score')])
@@ -215,11 +236,25 @@
       alt1 = "%s%s%s:%d%s" %  (ref1,b2,gene_chromosome2,genomic_break_pos2,b2) 
       alt2 = "%s%s:%d%s%s" %  (b1,gene_chromosome1,genomic_break_pos1,b1,ref2) 
       #TODO evaluate what should be included in the INFO field
-      info = ['DP=%d' % (span_count + splitr_count),'SPLITCNT=%d' % splitr_count,'SPANCNT=%d' % span_count,gene_name_info,gene_info,homlen,'SPLICESCORE=%d' % splice_score]
+      info = ['DP=%d' % (span_count + splitr_count),'SPLITCNT=%d' % splitr_count,'SPANCNT=%d' % span_count,gene_name_info,gene_info,gene_loc,expr,homlen,'SPLICESCORE=%d' % splice_score]
       if orf:
         info.append('ORF')
       if exonboundaries:
         info.append('EXONBND')
+      if interchromosomal:
+        info.append('INTERCHROM')
+      if read_through:
+        info.append('READTHROUGH')
+      if adjacent:
+        info.append('ADJACENT')
+      if altsplice:
+        info.append('ALTSPLICE')
+      if deletion:
+        info.append('DELETION')
+      if eversion:
+        info.append('EVERSION')
+      if inversion:
+        info.append('INVERSION')
       info1 = [svtype,'MATEID=%s' % mate_id2] + info
       info2 = [svtype,'MATEID=%s' % mate_id1] + info
       qual = int(float(fields[columns.index('probability')]) * 255) if columns.index('probability') else '.'