diff kmer_analysis.py @ 13:7c944fd9907e draft

release 2
author rlegendre
date Thu, 09 Apr 2015 11:35:48 -0400
parents 707807fee542
children a121cce43f90
line wrap: on
line diff
--- a/kmer_analysis.py	Fri Jan 23 03:31:37 2015 -0500
+++ b/kmer_analysis.py	Thu Apr 09 11:35:48 2015 -0400
@@ -80,6 +80,10 @@
     '''
     global total_mapped_read
     
+    ## tags by default
+    multi_tag = "XS:i:"
+    tag = "IH:i:1"
+    
     ## init kmer dict
     KMER = OrderedDict({})
     
@@ -103,10 +107,10 @@
                             multi_tag = "XS:i:"
                         elif 'bwa' in  line:
                             multi_tag = "XT:A:R"
-                        #elif 'TopHat' in  line:
-                        #    multi_tag = "NH:i:1"
+                        elif 'TopHat' in  line:
+                            tag = "NH:i:1"
                         else :
-                            stop_err("No PG tag find in "+samfile+". Please use bowtie or bwa for mapping")  
+                            stop_err("No PG tag find in "+samfile+". Please use bowtie, bwa or Tophat for mapping")  
                         
                     # get footprint
                     elif re.search('^[^@].+', line) :
@@ -121,7 +125,7 @@
                         read_pos = int(line.split('\t')[3])
                         read_sens = int(line.split('\t')[1])
                         #len_read = len(line.split('\t')[9])
-                        if len_read == kmer and multi_tag not in line:
+                        if len_read == kmer and (tag in line or multi_tag not in line):
                         ###if line.split('\t')[5] == '28M' :
                             total_mapped_read +=1
                             #if it's a forward read
@@ -399,7 +403,7 @@
         #GFF = ribo_functions.store_gtf(options.gff)
         ## check gff reading
         if not GFF['order'] :
-            stop_err( 'Incorrect GFF file' + str( e ) )
+            stop_err( 'Incorrect GFF file' )
         
         ## split bam
         split_bam(options.bamfile,tmpdir)
@@ -416,12 +420,12 @@
         ## compute analysis with other kmer
         for keys in kmer.iterkeys() :
             if keys != 28 :
-                ## remove all txt files in tmp directory
-                if os.system("rm "+tmpdir+"/*.txt") != 0 :
-                    stop_err( 'Error during tmp directory cleaning : ' + str( e ) )
-                    
+ 
                 ## If not enought reads in this kmer :
                 if kmer[keys] > 100 :
+                    ## remove all txt files in tmp directory
+                    if os.system("rm "+tmpdir+"/*.txt") != 0 :
+                        stop_err( 'Error during tmp directory cleaning')
                     ## compute coverage and distribution kmer
                     tmp = get_first_base(tmpdir, keys)
                     ## compute phasing