comparison defuse_trinity_analysis.py @ 39:90127ee1eae5

Fix defuse_trinity_analysis.py
author Jim Johnson <jj@umn.edu>
date Thu, 12 Feb 2015 06:54:38 -0600
parents 4353f776dfa3
children ed07bcc39f6e
comparison
equal deleted inserted replaced
38:9c815a3721b3 39:90127ee1eae5
177 fusion_type = 'inter' if fusion['gene_chromosome1'] != fusion['gene_chromosome2'] else 'intra' if abs(fusion['genomic_break_pos1'] - fusion['genomic_break_pos2']) > options.ticdist else 'TIC' 177 fusion_type = 'inter' if fusion['gene_chromosome1'] != fusion['gene_chromosome2'] else 'intra' if abs(fusion['genomic_break_pos1'] - fusion['genomic_break_pos2']) > options.ticdist else 'TIC'
178 fusion['fusion_type'] = fusion_type 178 fusion['fusion_type'] = fusion_type
179 fusion['transcripts'] = [] 179 fusion['transcripts'] = []
180 fusion['Transcript'] = 'No' 180 fusion['Transcript'] = 'No'
181 fusion['Protein'] = 'No' 181 fusion['Protein'] = 'No'
182 print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fwd_seq,rev_seq,fusion_type,fusion['gene_name1'],fusion['gene_name2']) 182 #print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fwd_seq,rev_seq,fusion_type,fusion['gene_name1'],fusion['gene_name2'])
183 inputFile.close() 183 inputFile.close()
184 184
185 ## Process Trinity data and compare to deFuse 185 ## Process Trinity data and compare to deFuse
186 matched_transcripts = dict() 186 matched_transcripts = dict()
187 matched_orfs = dict() 187 matched_orfs = dict()
196 if fusion['fwd_seq'] in seq or fusion['rev_seq'] in seq: 196 if fusion['fwd_seq'] in seq or fusion['rev_seq'] in seq:
197 fusions_with_transcripts.add(i) 197 fusions_with_transcripts.add(i)
198 matched_transcripts[name] = seq 198 matched_transcripts[name] = seq
199 fusion['transcripts'].append(name) 199 fusion['transcripts'].append(name)
200 fusion['Transcript'] = 'Yes' 200 fusion['Transcript'] = 'Yes'
201 print >> sys.stdout, "fusions_with_transcripts: %d %s\n matched_transcripts: %d" % (len(fusions_with_transcripts),fusions_with_transcripts,len(matched_transcripts)) 201 #print >> sys.stdout, "fusions_with_transcripts: %d %s\n matched_transcripts: %d" % (len(fusions_with_transcripts),fusions_with_transcripts,len(matched_transcripts))
202 for i,fusion in enumerate(fusions): 202 print >> sys.stdout, "fusions_with_transcripts: %d unique_transcripts: %d" % (len(fusions_with_transcripts),len(matched_transcripts))
203 print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fusion['fwd_seq'],fusion['rev_seq'],fusion['fusion_type'],fusion['gene_name1'],fusion['gene_name2'], fusion['transcripts']) 203 #for i,fusion in enumerate(fusions):
204 # print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fusion['fwd_seq'],fusion['rev_seq'],fusion['fusion_type'],fusion['gene_name1'],fusion['gene_name2'], fusion['transcripts'])
204 ## Process ORFs and compare to matched deFuse and Trinity data. 205 ## Process ORFs and compare to matched deFuse and Trinity data.
205 ## Proteins must be at least 100 aa long, starting at the first "M" and must end with an "*". 206 ## Proteins must be at least 100 aa long, starting at the first "M" and must end with an "*".
206 if options.peptides: 207 if options.peptides:
207 with open(options.peptides) as fp: 208 with open(options.peptides) as fp:
208 for name, seq in read_fasta(fp): 209 for name, seq in read_fasta(fp):
219 if pep_len - start < options.min_pep_len: 220 if pep_len - start < options.min_pep_len:
220 continue 221 continue
221 fusions_with_orfs.add(i) 222 fusions_with_orfs.add(i)
222 matched_orfs[name] = seq 223 matched_orfs[name] = seq
223 fusion['Protein'] = 'Yes' 224 fusion['Protein'] = 'Yes'
225 """
224 # fwd or reverse 226 # fwd or reverse
225 tx_seq = matched_transcripts(tx_id) 227 tx_seq = matched_transcripts(tx_id)
226 pos = tx_seq.find(fusion['fwd_seq']) 228 pos = tx_seq.find(fusion['fwd_seq'])
227 if pos < 0: 229 if pos < 0:
228 pos = tx_seq.find(fusion['rev_seq']) 230 pos = tx_seq.find(fusion['rev_seq'])
229 # locate fusion in transcript 231 # locate fusion in transcript
230 # locate fusion in ORF 232 # locate fusion in ORF
231 fusion['prior_pep_seq'] = '' 233 fusion['prior_pep_seq'] = ''
232 fusion['novel_pep_seq'] = '' 234 fusion['novel_pep_seq'] = ''
233 print >> sys.stdout, "fusions_with_orfs: %d %s\n matched_orfs: %d" % (len(fusions_with_orfs),fusions_with_orfs,len(matched_orfs)) 235 """
236 #print >> sys.stdout, "fusions_with_orfs: %d %s\n matched_orfs: %d" % (len(fusions_with_orfs),fusions_with_orfs,len(matched_orfs))
237 print >> sys.stdout, "fusions_with_orfs: %d unique_orfs: %d" % (len(fusions_with_orfs),len(matched_orfs))
234 ## Write reports 238 ## Write reports
235 report_fields = ['gene_name1','gene_name2','span_count','probability','gene_chromosome1','gene_location1','gene_chromosome2','gene_location2','fusion_type','Transcript','Protein'] 239 report_fields = ['gene_name1','gene_name2','span_count','probability','gene_chromosome1','gene_location1','gene_chromosome2','gene_location2','fusion_type','Transcript','Protein']
236 report_colnames = {'gene_name1':'Gene 1','gene_name2':'Gene 2','span_count':'Span cnt','probability':'Probability','gene_chromosome1':'From Chr','gene_location1':'Fusion point','gene_chromosome2':'To Chr','gene_location2':'Fusion point','fusion_type':'Type','Transcript':'Transcript?','Protein':'Protein?' } 240 report_colnames = {'gene_name1':'Gene 1','gene_name2':'Gene 2','span_count':'Span cnt','probability':'Probability','gene_chromosome1':'From Chr','gene_location1':'Fusion point','gene_chromosome2':'To Chr','gene_location2':'Fusion point','fusion_type':'Type','Transcript':'Transcript?','Protein':'Protein?' }
237 print >> outputFile,"%s\t%s" % ('#','\t'.join([report_colnames[x] for x in report_fields])) 241 print >> outputFile,"%s\t%s" % ('#','\t'.join([report_colnames[x] for x in report_fields]))
238 for i,fusion in enumerate(fusions): 242 for i,fusion in enumerate(fusions):