# HG changeset patch # User Jim Johnson # Date 1423745678 21600 # Node ID 90127ee1eae5bb18c9c1212709f01cef1ae236ce # Parent 9c815a3721b3c84b1c6c2c8141a7bba423f8ed31 Fix defuse_trinity_analysis.py diff -r 9c815a3721b3 -r 90127ee1eae5 defuse_trinity_analysis.py --- a/defuse_trinity_analysis.py Wed Feb 11 08:24:09 2015 -0600 +++ b/defuse_trinity_analysis.py Thu Feb 12 06:54:38 2015 -0600 @@ -179,7 +179,7 @@ fusion['transcripts'] = [] fusion['Transcript'] = 'No' fusion['Protein'] = 'No' - print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fwd_seq,rev_seq,fusion_type,fusion['gene_name1'],fusion['gene_name2']) + #print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fwd_seq,rev_seq,fusion_type,fusion['gene_name1'],fusion['gene_name2']) inputFile.close() ## Process Trinity data and compare to deFuse @@ -198,9 +198,10 @@ matched_transcripts[name] = seq fusion['transcripts'].append(name) fusion['Transcript'] = 'Yes' - print >> sys.stdout, "fusions_with_transcripts: %d %s\n matched_transcripts: %d" % (len(fusions_with_transcripts),fusions_with_transcripts,len(matched_transcripts)) - for i,fusion in enumerate(fusions): - print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fusion['fwd_seq'],fusion['rev_seq'],fusion['fusion_type'],fusion['gene_name1'],fusion['gene_name2'], fusion['transcripts']) + #print >> sys.stdout, "fusions_with_transcripts: %d %s\n matched_transcripts: %d" % (len(fusions_with_transcripts),fusions_with_transcripts,len(matched_transcripts)) + print >> sys.stdout, "fusions_with_transcripts: %d unique_transcripts: %d" % (len(fusions_with_transcripts),len(matched_transcripts)) + #for i,fusion in enumerate(fusions): + # print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fusion['fwd_seq'],fusion['rev_seq'],fusion['fusion_type'],fusion['gene_name1'],fusion['gene_name2'], fusion['transcripts']) ## Process ORFs and compare to matched deFuse and Trinity data. ## Proteins must be at least 100 aa long, starting at the first "M" and must end with an "*". if options.peptides: @@ -221,6 +222,7 @@ fusions_with_orfs.add(i) matched_orfs[name] = seq fusion['Protein'] = 'Yes' + """ # fwd or reverse tx_seq = matched_transcripts(tx_id) pos = tx_seq.find(fusion['fwd_seq']) @@ -230,7 +232,9 @@ # locate fusion in ORF fusion['prior_pep_seq'] = '' fusion['novel_pep_seq'] = '' - print >> sys.stdout, "fusions_with_orfs: %d %s\n matched_orfs: %d" % (len(fusions_with_orfs),fusions_with_orfs,len(matched_orfs)) + """ + #print >> sys.stdout, "fusions_with_orfs: %d %s\n matched_orfs: %d" % (len(fusions_with_orfs),fusions_with_orfs,len(matched_orfs)) + print >> sys.stdout, "fusions_with_orfs: %d unique_orfs: %d" % (len(fusions_with_orfs),len(matched_orfs)) ## Write reports report_fields = ['gene_name1','gene_name2','span_count','probability','gene_chromosome1','gene_location1','gene_chromosome2','gene_location2','fusion_type','Transcript','Protein'] report_colnames = {'gene_name1':'Gene 1','gene_name2':'Gene 2','span_count':'Span cnt','probability':'Probability','gene_chromosome1':'From Chr','gene_location1':'Fusion point','gene_chromosome2':'To Chr','gene_location2':'Fusion point','fusion_type':'Type','Transcript':'Transcript?','Protein':'Protein?' }