# HG changeset patch
# User Jim Johnson <jj@umn.edu>
# Date 1423745678 21600
# Node ID 90127ee1eae5bb18c9c1212709f01cef1ae236ce
# Parent  9c815a3721b3c84b1c6c2c8141a7bba423f8ed31
Fix defuse_trinity_analysis.py

diff -r 9c815a3721b3 -r 90127ee1eae5 defuse_trinity_analysis.py
--- a/defuse_trinity_analysis.py	Wed Feb 11 08:24:09 2015 -0600
+++ b/defuse_trinity_analysis.py	Thu Feb 12 06:54:38 2015 -0600
@@ -179,7 +179,7 @@
       fusion['transcripts'] = []
       fusion['Transcript'] = 'No'
       fusion['Protein'] = 'No'
-      print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fwd_seq,rev_seq,fusion_type,fusion['gene_name1'],fusion['gene_name2'])
+      #print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fwd_seq,rev_seq,fusion_type,fusion['gene_name1'],fusion['gene_name2'])
   inputFile.close()
 
   ## Process Trinity data and compare to deFuse
@@ -198,9 +198,10 @@
             matched_transcripts[name] = seq
             fusion['transcripts'].append(name)
             fusion['Transcript'] = 'Yes'
-    print >> sys.stdout, "fusions_with_transcripts: %d  %s\n matched_transcripts: %d" % (len(fusions_with_transcripts),fusions_with_transcripts,len(matched_transcripts))
-    for i,fusion in enumerate(fusions):
-      print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fusion['fwd_seq'],fusion['rev_seq'],fusion['fusion_type'],fusion['gene_name1'],fusion['gene_name2'], fusion['transcripts'])
+    #print >> sys.stdout, "fusions_with_transcripts: %d  %s\n matched_transcripts: %d" % (len(fusions_with_transcripts),fusions_with_transcripts,len(matched_transcripts))
+    print >> sys.stdout, "fusions_with_transcripts: %d unique_transcripts: %d" % (len(fusions_with_transcripts),len(matched_transcripts))
+    #for i,fusion in enumerate(fusions):
+    #  print >> sys.stdout, "%4d\t%6s\t%s\t%s\t%s\t%s\t%s\t%s" % (i,fusion['cluster_id'],fusion['fwd_seq'],fusion['rev_seq'],fusion['fusion_type'],fusion['gene_name1'],fusion['gene_name2'], fusion['transcripts'])
     ## Process ORFs and compare to matched deFuse and Trinity data.
     ## Proteins must be at least 100 aa long, starting at the first "M" and must end with an "*".
     if options.peptides: 
@@ -221,6 +222,7 @@
                   fusions_with_orfs.add(i)
                   matched_orfs[name] = seq
                   fusion['Protein'] = 'Yes'
+                  """
                   # fwd or reverse
                   tx_seq = matched_transcripts(tx_id)
                   pos = tx_seq.find(fusion['fwd_seq'])
@@ -230,7 +232,9 @@
                   # locate fusion in ORF
                   fusion['prior_pep_seq'] = ''
                   fusion['novel_pep_seq'] = ''
-      print >> sys.stdout, "fusions_with_orfs: %d  %s\n matched_orfs: %d" % (len(fusions_with_orfs),fusions_with_orfs,len(matched_orfs))
+                  """
+      #print >> sys.stdout, "fusions_with_orfs: %d  %s\n matched_orfs: %d" % (len(fusions_with_orfs),fusions_with_orfs,len(matched_orfs))
+      print >> sys.stdout, "fusions_with_orfs: %d  unique_orfs: %d" % (len(fusions_with_orfs),len(matched_orfs))
   ## Write reports
   report_fields = ['gene_name1','gene_name2','span_count','probability','gene_chromosome1','gene_location1','gene_chromosome2','gene_location2','fusion_type','Transcript','Protein']
   report_colnames = {'gene_name1':'Gene 1','gene_name2':'Gene 2','span_count':'Span cnt','probability':'Probability','gene_chromosome1':'From Chr','gene_location1':'Fusion point','gene_chromosome2':'To Chr','gene_location2':'Fusion point','fusion_type':'Type','Transcript':'Transcript?','Protein':'Protein?' }