diff kmer_analysis.py @ 19:385fc64fa988 draft default tip

Uploaded
author rlegendre
date Fri, 12 Jun 2015 11:32:59 -0400
parents a121cce43f90
children
line wrap: on
line diff
--- a/kmer_analysis.py	Tue Jun 09 09:06:17 2015 -0400
+++ b/kmer_analysis.py	Fri Jun 12 11:32:59 2015 -0400
@@ -210,34 +210,37 @@
             except IOError :
                 print tmpdir+"/assoCov_"+chrom+".txt doesn't exist"     
 
-            
-            ## if a gene without intron :
-            if GFF[gene]['exon_number'] == 1:
-                    
-                ## get coverage for each gene   
-                if GFF[gene]['strand'] == "+":
-                    for i in range(GFF[gene]['exon'][1]['start'],GFF[gene]['exon'][1]['stop']+1):
-                        cov.append(int((data[i].rstrip()).split("\t")[0]))
+            try:
+                ## if a gene without intron :
+                if GFF[gene]['exon_number'] == 1:
+                        
+                    ## get coverage for each gene   
+                    if GFF[gene]['strand'] == "+":
+                        for i in range(GFF[gene]['exon'][1]['start'],GFF[gene]['exon'][1]['stop']+1):
+                            cov.append(int((data[i].rstrip()).split("\t")[0]))
+                    else :
+                        for i in range(GFF[gene]['exon'][1]['start'],GFF[gene]['exon'][1]['stop']+1):
+                            cov.append(int(((data[i].rstrip()).split("\t")[1]).replace("-","")))
+                        cov.reverse()
                 else :
-                    for i in range(GFF[gene]['exon'][1]['start'],GFF[gene]['exon'][1]['stop']+1):
-                        cov.append(int(((data[i].rstrip()).split("\t")[1]).replace("-","")))
-                    cov.reverse()
-            else :
-                ## For each gene, get coverage and sum of exon size
-                if GFF[gene]['strand'] == "+":
-                        
-                    for exon in range(1,GFF[gene]['exon_number']+1) :
-                        for i in range(GFF[gene]['exon'][exon]['start'],GFF[gene]['exon'][exon]['stop']+1):
-                            #if i <= GFF[gene]['stop'] :
-                            cov.append(int((data[i].rstrip()).split("\t")[0]))  
-                else :
-                          
-                    for exon in range(1,GFF[gene]['exon_number']+1) :
-                        for i in range(GFF[gene]['exon'][exon]['start'],GFF[gene]['exon'][exon]['stop']+1):
-                            #if i <= GFF[gene]['start'] :
-                            cov.append(int(((data[i].rstrip()).split("\t")[1]).replace("-","")))
-                    cov.reverse()        
+                    ## For each gene, get coverage and sum of exon size
+                    if GFF[gene]['strand'] == "+":
                             
+                        for exon in range(1,GFF[gene]['exon_number']+1) :
+                            for i in range(GFF[gene]['exon'][exon]['start'],GFF[gene]['exon'][exon]['stop']+1):
+                                #if i <= GFF[gene]['stop'] :
+                                cov.append(int((data[i].rstrip()).split("\t")[0]))  
+                    else :
+                              
+                        for exon in range(1,GFF[gene]['exon_number']+1) :
+                            for i in range(GFF[gene]['exon'][exon]['start'],GFF[gene]['exon'][exon]['stop']+1):
+                                #if i <= GFF[gene]['start'] :
+                                cov.append(int(((data[i].rstrip()).split("\t")[1]).replace("-","")))
+                        cov.reverse()        
+            except :
+                #print gene+" could not be analysed."
+                #del GFF[gene]
+                continue          
             len_cov = len(cov)
             prop = [0,0,0]
             for nuc in range(0,len_cov-2,3) :