diff metagene_readthrough.py @ 13:7c944fd9907e draft

release 2
author rlegendre
date Thu, 09 Apr 2015 11:35:48 -0400
parents 707807fee542
children 344bacf6acb8
line wrap: on
line diff
--- a/metagene_readthrough.py	Fri Jan 23 03:31:37 2015 -0500
+++ b/metagene_readthrough.py	Thu Apr 09 11:35:48 2015 -0400
@@ -665,44 +665,6 @@
         stop_err( 'Error during computing analysis : ' + str( e ) ) 
 
 
-def cleaning_bam(bam):
-    '''
-        Remove reads unmapped, non uniquely mapped and reads with length lower than 25 and upper than 32, and mapping quality upper than 12
-    '''
-    try :
-        header = subprocess.check_output(['samtools', 'view', '-H', bam], stderr= subprocess.PIPE)
-        #header = results.split('\n')
-        
-        # check mapper for define multiple tag
-        if re.search('bowtie', header):
-            multi_tag = "XS:i:"
-        elif re.search('bwa', header):
-            multi_tag = "XT:A:M"
-        else :
-            stop_err("No PG tag find in"+bam+". Please use bowtie or bwa for mapping")
-        
-        tmp_sam = tempfile.mktemp()
-        cmd = "samtools view %s > %s" % (bam, tmp_sam) 
-        proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE)
-        returncode = proc.wait()
-        
-        
-        with open(tempfile.mktemp(),'w') as out :
-            out.write(header)
-            with open(tmp_sam,'r') as sam :
-                for line in sam :
-                    if multi_tag not in line and line.split('\t')[1] != '4' and int(line.split('\t')[4]) > 12 :
-                        if len(line.split('\t')[9]) < 32 and len(line.split('\t')[9]) > 25 :
-                            out.write(line)
-        bamfile = tempfile.mktemp()+'.bam'   
-        cmd = "samtools view -hSb %s > %s" % (out.name,bamfile)
-        proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE)
-        returncode = proc.wait()
-        
-        return bamfile
-    
-    except Exception,e:
-        stop_err( 'Error during cleaning bam : ' + str( e ) ) 
     
 def write_html_page(html,subfolder) :
         
@@ -1004,8 +966,8 @@
         #GFF = ribo_functions.store_gtf(options.gff)
         ## check gff reading
         if not GFF['order'] :
-            stop_err( 'Incorrect GFF file' + str( e ) )
-        clean_file = cleaning_bam(options.bamfile)
+            stop_err( 'Incorrect GFF file' )
+        clean_file = ribo_functions.cleaning_bam(options.bamfile)
         compute_analysis(clean_file, GFF, options.fasta, options.gff, subfolder, options.extend)
         if os.path.exists( clean_file ):
             os.remove( clean_file )