defuse: defuse_results_to_vcf.py comparison

comparison defuse_results_to_vcf.py @ 27:d57fcac025e2

Add more info fields to defuse_results_to_vcf.py

author	Jim Johnson <jj@umn.edu>
date	Wed, 14 Aug 2013 16:44:18 -0500
parents	2ecf82136986
children	f51a95bdc38e

comparison

equal deleted inserted replaced

-:8f0775c43739
+:d57fcac025e2
 ##INFO=<ID=SPANCNT,Number=1,Type=Integer,Description="number of spanning reads supporting the fusion">
 ##INFO=<ID=HOMLEN,Number=1,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
 ##INFO=<ID=SPLICESCORE,Number=1,Type=Integer,Description="number of nucleotides similar to GTAG at fusion splice">
 ##INFO=<ID=GENE,Number=2,Type=String,Description="Gene Names at each breakend">
 ##INFO=<ID=GENEID,Number=2,Type=String,Description="Gene IDs at each breakend">
+##INFO=<ID=GENELOC,Number=2,Type=String,Description="location of breakpoint releative to genes">
+##INFO=<ID=EXPR,Number=2,Type=Integer,Description="expression of genes as number of concordant pairs aligned to exons">
 ##INFO=<ID=ORF,Number=0,Type=Flag,Description="fusion combines genes in a way that preserves a reading frame">
 ##INFO=<ID=EXONBND,Number=0,Type=Flag,Description="fusion splice at exon boundaries">
+##INFO=<ID=INTERCHROM,Number=0,Type=Flag,Description="fusion produced by an interchromosomal translocation">
+##INFO=<ID=READTHROUGH,Number=0,Type=Flag,Description="fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement">
+##INFO=<ID=ADJACENT,Number=0,Type=Flag,Description="fusion between adjacent genes">
+##INFO=<ID=ALTSPLICE,Number=0,Type=Flag,Description="fusion likely the product of alternative splicing between adjacent genes">
+##INFO=<ID=DELETION,Number=0,Type=Flag,Description="fusion produced by a genomic deletion">
+##INFO=<ID=EVERSION,Number=0,Type=Flag,Description="fusion produced by a genomic eversion">
+##INFO=<ID=INVERSION,Number=0,Type=Flag,Description="fusion produced by a genomic inversion">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO\
 """
 def cmp_alphanumeric(s1,s2):
 if s1 == s2:
 gene2 = fields[columns.index('gene2')]
 gene_info = 'GENEID=%s,%s' % (gene1,gene2)
 gene_name1 = fields[columns.index('gene_name1')]
 gene_name2 = fields[columns.index('gene_name2')]
 gene_name_info = 'GENE=%s,%s' % (gene_name1,gene_name2)
+gene_location1 = fields[columns.index('gene_location1')]
+gene_location2 = fields[columns.index('gene_location2')]
+gene_loc = 'GENELOC=%s,%s' % (gene_location1,gene_location2)
+expression1 = int(fields[columns.index('expression1')])
+expression2 = int(fields[columns.index('expression2')])
+expr = 'EXPR=%d,%d' % (expression1,expression2)
 genomic_break_pos1 = int(fields[columns.index('genomic_break_pos1')])
 genomic_break_pos2 = int(fields[columns.index('genomic_break_pos2')])
 breakpoint_homology = int(fields[columns.index('breakpoint_homology')])
 homlen = 'HOMLEN=%s' % breakpoint_homology
 orf = fields[columns.index('orf')] == 'Y'
 exonboundaries = fields[columns.index('exonboundaries')] == 'Y'
 read_through = fields[columns.index('read_through')] == 'Y'
+interchromosomal = fields[columns.index('interchromosomal')] == 'Y'
+adjacent = fields[columns.index('adjacent')] == 'Y'
+altsplice = fields[columns.index('altsplice')] == 'Y'
+deletion = fields[columns.index('deletion')] == 'Y'
+eversion = fields[columns.index('eversion')] == 'Y'
+inversion = fields[columns.index('inversion')] == 'Y'
 span_count = int(fields[columns.index('span_count')])
 splitr_count = int(fields[columns.index('splitr_count')])
 splice_score = int(fields[columns.index('splice_score')])
 probability = fields[columns.index('probability')] if columns.index('probability') else '.'
 splitr_sequence = fields[columns.index('splitr_sequence')]
 b1 = '[' if genomic_strand1 == '+' else ']'
 b2 = '[' if genomic_strand2 == '+' else ']'
 alt1 = "%s%s%s:%d%s" %  (ref1,b2,gene_chromosome2,genomic_break_pos2,b2)
 alt2 = "%s%s:%d%s%s" %  (b1,gene_chromosome1,genomic_break_pos1,b1,ref2)
 #TODO evaluate what should be included in the INFO field
-info = ['DP=%d' % (span_count + splitr_count),'SPLITCNT=%d' % splitr_count,'SPANCNT=%d' % span_count,gene_name_info,gene_info,homlen,'SPLICESCORE=%d' % splice_score]
+info = ['DP=%d' % (span_count + splitr_count),'SPLITCNT=%d' % splitr_count,'SPANCNT=%d' % span_count,gene_name_info,gene_info,gene_loc,expr,homlen,'SPLICESCORE=%d' % splice_score]
 if orf:
 info.append('ORF')
 if exonboundaries:
 info.append('EXONBND')
+if interchromosomal:
+info.append('INTERCHROM')
+if read_through:
+info.append('READTHROUGH')
+if adjacent:
+info.append('ADJACENT')
+if altsplice:
+info.append('ALTSPLICE')
+if deletion:
+info.append('DELETION')
+if eversion:
+info.append('EVERSION')
+if inversion:
+info.append('INVERSION')
 info1 = [svtype,'MATEID=%s' % mate_id2] + info
 info2 = [svtype,'MATEID=%s' % mate_id1] + info
 qual = int(float(fields[columns.index('probability')]) * 255) if columns.index('probability') else '.'
 vcf1 = '%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s'% (gene_chromosome1,genomic_break_pos1, mate_id1, ref1, alt1, qual, filt, ';'.join(info1) )
 vcf2 = '%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s'% (gene_chromosome2,genomic_break_pos2, mate_id2, ref2, alt2, qual, filt, ';'.join(info2) )

Mercurial > repos > jjohnson > defuse

comparison defuse_results_to_vcf.py @ 27:d57fcac025e2