Mercurial > repos > jjohnson > defuse
comparison defuse_trinity_analysis.py @ 39:90127ee1eae5
Fix defuse_trinity_analysis.py
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 12 Feb 2015 06:54:38 -0600 |
parents | 4353f776dfa3 |
children | ed07bcc39f6e |
comparison
equal
deleted
inserted
replaced
38:9c815a3721b3 | 39:90127ee1eae5 |
---|---|
177 fusion_type = 'inter' if fusion['gene_chromosome1'] != fusion['gene_chromosome2'] else 'intra' if abs(fusion['genomic_break_pos1'] - fusion['genomic_break_pos2']) > options.ticdist else 'TIC' | 177 fusion_type = 'inter' if fusion['gene_chromosome1'] != fusion['gene_chromosome2'] else 'intra' if abs(fusion['genomic_break_pos1'] - fusion['genomic_break_pos2']) > options.ticdist else 'TIC' |
178 fusion['fusion_type'] = fusion_type | 178 fusion['fusion_type'] = fusion_type |
179 fusion['transcripts'] = [] | 179 fusion['transcripts'] = [] |
180 fusion['Transcript'] = 'No' | 180 fusion['Transcript'] = 'No' |
181 fusion['Protein'] = 'No' | 181 fusion['Protein'] = 'No' |
182 print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fwd_seq,rev_seq,fusion_type,fusion['gene_name1'],fusion['gene_name2']) | 182 #print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fwd_seq,rev_seq,fusion_type,fusion['gene_name1'],fusion['gene_name2']) |
183 inputFile.close() | 183 inputFile.close() |
184 | 184 |
185 ## Process Trinity data and compare to deFuse | 185 ## Process Trinity data and compare to deFuse |
186 matched_transcripts = dict() | 186 matched_transcripts = dict() |
187 matched_orfs = dict() | 187 matched_orfs = dict() |
196 if fusion['fwd_seq'] in seq or fusion['rev_seq'] in seq: | 196 if fusion['fwd_seq'] in seq or fusion['rev_seq'] in seq: |
197 fusions_with_transcripts.add(i) | 197 fusions_with_transcripts.add(i) |
198 matched_transcripts[name] = seq | 198 matched_transcripts[name] = seq |
199 fusion['transcripts'].append(name) | 199 fusion['transcripts'].append(name) |
200 fusion['Transcript'] = 'Yes' | 200 fusion['Transcript'] = 'Yes' |
201 print >> sys.stdout, "fusions_with_transcripts: %d %s\n matched_transcripts: %d" % (len(fusions_with_transcripts),fusions_with_transcripts,len(matched_transcripts)) | 201 #print >> sys.stdout, "fusions_with_transcripts: %d %s\n matched_transcripts: %d" % (len(fusions_with_transcripts),fusions_with_transcripts,len(matched_transcripts)) |
202 for i,fusion in enumerate(fusions): | 202 print >> sys.stdout, "fusions_with_transcripts: %d unique_transcripts: %d" % (len(fusions_with_transcripts),len(matched_transcripts)) |
203 print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fusion['fwd_seq'],fusion['rev_seq'],fusion['fusion_type'],fusion['gene_name1'],fusion['gene_name2'], fusion['transcripts']) | 203 #for i,fusion in enumerate(fusions): |
204 # print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fusion['fwd_seq'],fusion['rev_seq'],fusion['fusion_type'],fusion['gene_name1'],fusion['gene_name2'], fusion['transcripts']) | |
204 ## Process ORFs and compare to matched deFuse and Trinity data. | 205 ## Process ORFs and compare to matched deFuse and Trinity data. |
205 ## Proteins must be at least 100 aa long, starting at the first "M" and must end with an "*". | 206 ## Proteins must be at least 100 aa long, starting at the first "M" and must end with an "*". |
206 if options.peptides: | 207 if options.peptides: |
207 with open(options.peptides) as fp: | 208 with open(options.peptides) as fp: |
208 for name, seq in read_fasta(fp): | 209 for name, seq in read_fasta(fp): |
219 if pep_len - start < options.min_pep_len: | 220 if pep_len - start < options.min_pep_len: |
220 continue | 221 continue |
221 fusions_with_orfs.add(i) | 222 fusions_with_orfs.add(i) |
222 matched_orfs[name] = seq | 223 matched_orfs[name] = seq |
223 fusion['Protein'] = 'Yes' | 224 fusion['Protein'] = 'Yes' |
225 """ | |
224 # fwd or reverse | 226 # fwd or reverse |
225 tx_seq = matched_transcripts(tx_id) | 227 tx_seq = matched_transcripts(tx_id) |
226 pos = tx_seq.find(fusion['fwd_seq']) | 228 pos = tx_seq.find(fusion['fwd_seq']) |
227 if pos < 0: | 229 if pos < 0: |
228 pos = tx_seq.find(fusion['rev_seq']) | 230 pos = tx_seq.find(fusion['rev_seq']) |
229 # locate fusion in transcript | 231 # locate fusion in transcript |
230 # locate fusion in ORF | 232 # locate fusion in ORF |
231 fusion['prior_pep_seq'] = '' | 233 fusion['prior_pep_seq'] = '' |
232 fusion['novel_pep_seq'] = '' | 234 fusion['novel_pep_seq'] = '' |
233 print >> sys.stdout, "fusions_with_orfs: %d %s\n matched_orfs: %d" % (len(fusions_with_orfs),fusions_with_orfs,len(matched_orfs)) | 235 """ |
236 #print >> sys.stdout, "fusions_with_orfs: %d %s\n matched_orfs: %d" % (len(fusions_with_orfs),fusions_with_orfs,len(matched_orfs)) | |
237 print >> sys.stdout, "fusions_with_orfs: %d unique_orfs: %d" % (len(fusions_with_orfs),len(matched_orfs)) | |
234 ## Write reports | 238 ## Write reports |
235 report_fields = ['gene_name1','gene_name2','span_count','probability','gene_chromosome1','gene_location1','gene_chromosome2','gene_location2','fusion_type','Transcript','Protein'] | 239 report_fields = ['gene_name1','gene_name2','span_count','probability','gene_chromosome1','gene_location1','gene_chromosome2','gene_location2','fusion_type','Transcript','Protein'] |
236 report_colnames = {'gene_name1':'Gene 1','gene_name2':'Gene 2','span_count':'Span cnt','probability':'Probability','gene_chromosome1':'From Chr','gene_location1':'Fusion point','gene_chromosome2':'To Chr','gene_location2':'Fusion point','fusion_type':'Type','Transcript':'Transcript?','Protein':'Protein?' } | 240 report_colnames = {'gene_name1':'Gene 1','gene_name2':'Gene 2','span_count':'Span cnt','probability':'Probability','gene_chromosome1':'From Chr','gene_location1':'Fusion point','gene_chromosome2':'To Chr','gene_location2':'Fusion point','fusion_type':'Type','Transcript':'Transcript?','Protein':'Protein?' } |
237 print >> outputFile,"%s\t%s" % ('#','\t'.join([report_colnames[x] for x in report_fields])) | 241 print >> outputFile,"%s\t%s" % ('#','\t'.join([report_colnames[x] for x in report_fields])) |
238 for i,fusion in enumerate(fusions): | 242 for i,fusion in enumerate(fusions): |