comparison ensembl_variant_report.py @ 9:0ef485da6ba6 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/ensembl_variant_report commit 6b6e5c13531bf909c4c70b7f8f9e28b4206d9068-dirty
author jjohnson
date Mon, 18 Mar 2019 21:44:39 -0400
parents fd612f8119a2
children d367a472e8a1
comparison
equal deleted inserted replaced
8:fd612f8119a2 9:0ef485da6ba6
114 alt_list = alts.split(',') 114 alt_list = alts.split(',')
115 pos = int(pos) 115 pos = int(pos)
116 qual = float(qual) 116 qual = float(qual)
117 dp = None 117 dp = None
118 dpr = None 118 dpr = None
119 af = None
119 for info_item in info.split(';'): 120 for info_item in info.split(';'):
120 if info_item.find('=') < 0: continue 121 if info_item.find('=') < 0: continue
121 (key, val) = info_item.split('=', 1) 122 (key, val) = info_item.split('=', 1)
122 if key == 'DP': 123 if key == 'DP':
123 dp = int(val) 124 dp = int(val)
124 if key == 'DPR': 125 if key == 'DPR' or key == 'AD':
125 dpr = [int(x) for x in val.split(',')] 126 dpr = [int(x) for x in val.split(',')]
127 if key == 'AF':
128 af = [float(x) for x in val.split(',')]
126 if key in ['EFF','ANN']: 129 if key in ['EFF','ANN']:
127 for effect in val.split(','): 130 for effect in val.split(','):
128 if options.debug: print >> sys.stderr, "\n%s" % (effect.split('|')) 131 if options.debug: print >> sys.stderr, "\n%s" % (effect.split('|'))
129 if key == 'ANN': 132 if key == 'ANN':
130 (alt,eff,impact,gene_name,gene_id,feature_type,transcript,biotype,exon,c_hgvs,p_hgvs,cdna,cds,aa,distance,info) = effect.split('|') 133 (alt,eff,impact,gene_name,gene_id,feature_type,transcript,biotype,exon,c_hgvs,p_hgvs,cdna,cds,aa,distance,info) = effect.split('|')
131 elif key == 'EFF': 134 elif key == 'EFF':
132 (eff, effs) = effect.rstrip(')').split('(') 135 (eff, effs) = effect.rstrip(')').split('(')
133 (impact, functional_class, codon_change, aa_change, aa_len, gene_name, biotype, coding, transcript, exon, alt) = effs.split('|')[0:11] 136 (impact, functional_class, codon_change, aa_change, aa_len, gene_name, biotype, coding, transcript, exon, alt) = effs.split('|')[0:11]
134 i = alt_list.index(alt) if alt in alt_list else 0 137 i = alt_list.index(alt) if alt in alt_list else 0
135 freq = float(dpr[i+1])/float(dp) if dp and dpr else \ 138 if af:
136 float(dpr[i+1])/float(sum(dpr)) if dpr else None 139 freq = af[i]
137 yield (transcript,pos,ref,alt,dp,freq) 140 elif dpr:
141 freq = float(dpr[i+1])/float(dp) if dp else \
142 float(dpr[i+1])/float(sum(dpr))
143 else:
144 freq = None
145 if freq:
146 yield (transcript,pos,ref,alt,dp,freq)
138 147
139 148
140 #Process gene model 149 #Process gene model
141 ens_ref = None 150 ens_ref = None
142 if options.gene_model != None: 151 if options.gene_model != None:
179 pos0 = pos1 - 1 # zero based position 188 pos0 = pos1 - 1 # zero based position
180 spos = pos0 if tx.gene.strand else pos0 + len(ref) - 1 189 spos = pos0 if tx.gene.strand else pos0 + len(ref) - 1
181 alt_seq = alt if tx.gene.strand else reverse_complement(alt) 190 alt_seq = alt if tx.gene.strand else reverse_complement(alt)
182 ref_seq = ref if tx.gene.strand else reverse_complement(ref) 191 ref_seq = ref if tx.gene.strand else reverse_complement(ref)
183 cds_pos = ens_ref.genome_to_cds_pos(tid, spos) 192 cds_pos = ens_ref.genome_to_cds_pos(tid, spos)
193 if cds_pos is None:
194 continue
184 if options.debug: print >> sys.stderr, "cds_pos: %s" % (str(cds_pos)) 195 if options.debug: print >> sys.stderr, "cds_pos: %s" % (str(cds_pos))
185 alt_cds = cds[:cds_pos] + alt_seq + cds[cds_pos+len(ref):] if cds_pos+len(ref) < len(cds) else '' 196 alt_cds = cds[:cds_pos] + alt_seq + cds[cds_pos+len(ref):] if cds_pos+len(ref) < len(cds) else ''
186 offset = 0 197 offset = 0
187 if tx.gene.strand: 198 if tx.gene.strand:
188 for i in range(min(len(ref),len(alt))): 199 for i in range(min(len(ref),len(alt))):