comparison ensembl_variant_report.py @ 8:fd612f8119a2 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/ensembl_variant_report commit f003aece24587f68b883449f278f1e601110e2ec-dirty
author jjohnson
date Tue, 17 Jul 2018 12:21:36 -0400
parents d5cb252c68da
children 0ef485da6ba6
comparison
equal deleted inserted replaced
7:d5cb252c68da 8:fd612f8119a2
94 ref = fields[ri] 94 ref = fields[ri]
95 alts = fields[ai] 95 alts = fields[ai]
96 dp = int(fields[di]) 96 dp = int(fields[di])
97 dpr = [int(x) for x in fields[fi].split(',')] 97 dpr = [int(x) for x in fields[fi].split(',')]
98 for i,alt in enumerate(alts.split(',')): 98 for i,alt in enumerate(alts.split(',')):
99 freq = float(dpr[i+1])/float(sum(dpr)) if dpr else None 99 freq = float(dpr[i+1])/float(dp) if dp and dpr else \
100 float(dpr[i+1])/float(sum(dpr)) if dpr else None
100 yield (transcript,pos,ref,alt,dp,freq) 101 yield (transcript,pos,ref,alt,dp,freq)
101 102
102 def parse_snpeff_vcf(): 103 def parse_snpeff_vcf():
103 for linenum,line in enumerate(inputFile): 104 for linenum,line in enumerate(inputFile):
104 if line.startswith('##'): 105 if line.startswith('##'):
129 (alt,eff,impact,gene_name,gene_id,feature_type,transcript,biotype,exon,c_hgvs,p_hgvs,cdna,cds,aa,distance,info) = effect.split('|') 130 (alt,eff,impact,gene_name,gene_id,feature_type,transcript,biotype,exon,c_hgvs,p_hgvs,cdna,cds,aa,distance,info) = effect.split('|')
130 elif key == 'EFF': 131 elif key == 'EFF':
131 (eff, effs) = effect.rstrip(')').split('(') 132 (eff, effs) = effect.rstrip(')').split('(')
132 (impact, functional_class, codon_change, aa_change, aa_len, gene_name, biotype, coding, transcript, exon, alt) = effs.split('|')[0:11] 133 (impact, functional_class, codon_change, aa_change, aa_len, gene_name, biotype, coding, transcript, exon, alt) = effs.split('|')[0:11]
133 i = alt_list.index(alt) if alt in alt_list else 0 134 i = alt_list.index(alt) if alt in alt_list else 0
134 freq = float(dpr[i+1])/float(sum(dpr)) if dpr else None 135 freq = float(dpr[i+1])/float(dp) if dp and dpr else \
136 float(dpr[i+1])/float(sum(dpr)) if dpr else None
135 yield (transcript,pos,ref,alt,dp,freq) 137 yield (transcript,pos,ref,alt,dp,freq)
136 138
137 139
138 #Process gene model 140 #Process gene model
139 ens_ref = None 141 ens_ref = None