comparison metagene_readthrough.py @ 13:7c944fd9907e draft

release 2
author rlegendre
date Thu, 09 Apr 2015 11:35:48 -0400
parents 707807fee542
children 344bacf6acb8
comparison
equal deleted inserted replaced
12:ee3a3435ce43 13:7c944fd9907e
663 file_extension.close() 663 file_extension.close()
664 except Exception,e: 664 except Exception,e:
665 stop_err( 'Error during computing analysis : ' + str( e ) ) 665 stop_err( 'Error during computing analysis : ' + str( e ) )
666 666
667 667
668 def cleaning_bam(bam):
669 '''
670 Remove reads unmapped, non uniquely mapped and reads with length lower than 25 and upper than 32, and mapping quality upper than 12
671 '''
672 try :
673 header = subprocess.check_output(['samtools', 'view', '-H', bam], stderr= subprocess.PIPE)
674 #header = results.split('\n')
675
676 # check mapper for define multiple tag
677 if re.search('bowtie', header):
678 multi_tag = "XS:i:"
679 elif re.search('bwa', header):
680 multi_tag = "XT:A:M"
681 else :
682 stop_err("No PG tag find in"+bam+". Please use bowtie or bwa for mapping")
683
684 tmp_sam = tempfile.mktemp()
685 cmd = "samtools view %s > %s" % (bam, tmp_sam)
686 proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE)
687 returncode = proc.wait()
688
689
690 with open(tempfile.mktemp(),'w') as out :
691 out.write(header)
692 with open(tmp_sam,'r') as sam :
693 for line in sam :
694 if multi_tag not in line and line.split('\t')[1] != '4' and int(line.split('\t')[4]) > 12 :
695 if len(line.split('\t')[9]) < 32 and len(line.split('\t')[9]) > 25 :
696 out.write(line)
697 bamfile = tempfile.mktemp()+'.bam'
698 cmd = "samtools view -hSb %s > %s" % (out.name,bamfile)
699 proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE)
700 returncode = proc.wait()
701
702 return bamfile
703
704 except Exception,e:
705 stop_err( 'Error during cleaning bam : ' + str( e ) )
706 668
707 def write_html_page(html,subfolder) : 669 def write_html_page(html,subfolder) :
708 670
709 671
710 try : 672 try :
1002 964
1003 #GFF = store_gff(options.gff) 965 #GFF = store_gff(options.gff)
1004 #GFF = ribo_functions.store_gtf(options.gff) 966 #GFF = ribo_functions.store_gtf(options.gff)
1005 ## check gff reading 967 ## check gff reading
1006 if not GFF['order'] : 968 if not GFF['order'] :
1007 stop_err( 'Incorrect GFF file' + str( e ) ) 969 stop_err( 'Incorrect GFF file' )
1008 clean_file = cleaning_bam(options.bamfile) 970 clean_file = ribo_functions.cleaning_bam(options.bamfile)
1009 compute_analysis(clean_file, GFF, options.fasta, options.gff, subfolder, options.extend) 971 compute_analysis(clean_file, GFF, options.fasta, options.gff, subfolder, options.extend)
1010 if os.path.exists( clean_file ): 972 if os.path.exists( clean_file ):
1011 os.remove( clean_file ) 973 os.remove( clean_file )
1012 974
1013 write_html_page(html_file,subfolder) 975 write_html_page(html_file,subfolder)