Mercurial > repos > rlegendre > ribo_tools
diff metagene_readthrough.py @ 13:7c944fd9907e draft
release 2
| author | rlegendre |
|---|---|
| date | Thu, 09 Apr 2015 11:35:48 -0400 |
| parents | 707807fee542 |
| children | 344bacf6acb8 |
line wrap: on
line diff
--- a/metagene_readthrough.py Fri Jan 23 03:31:37 2015 -0500 +++ b/metagene_readthrough.py Thu Apr 09 11:35:48 2015 -0400 @@ -665,44 +665,6 @@ stop_err( 'Error during computing analysis : ' + str( e ) ) -def cleaning_bam(bam): - ''' - Remove reads unmapped, non uniquely mapped and reads with length lower than 25 and upper than 32, and mapping quality upper than 12 - ''' - try : - header = subprocess.check_output(['samtools', 'view', '-H', bam], stderr= subprocess.PIPE) - #header = results.split('\n') - - # check mapper for define multiple tag - if re.search('bowtie', header): - multi_tag = "XS:i:" - elif re.search('bwa', header): - multi_tag = "XT:A:M" - else : - stop_err("No PG tag find in"+bam+". Please use bowtie or bwa for mapping") - - tmp_sam = tempfile.mktemp() - cmd = "samtools view %s > %s" % (bam, tmp_sam) - proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE) - returncode = proc.wait() - - - with open(tempfile.mktemp(),'w') as out : - out.write(header) - with open(tmp_sam,'r') as sam : - for line in sam : - if multi_tag not in line and line.split('\t')[1] != '4' and int(line.split('\t')[4]) > 12 : - if len(line.split('\t')[9]) < 32 and len(line.split('\t')[9]) > 25 : - out.write(line) - bamfile = tempfile.mktemp()+'.bam' - cmd = "samtools view -hSb %s > %s" % (out.name,bamfile) - proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE) - returncode = proc.wait() - - return bamfile - - except Exception,e: - stop_err( 'Error during cleaning bam : ' + str( e ) ) def write_html_page(html,subfolder) : @@ -1004,8 +966,8 @@ #GFF = ribo_functions.store_gtf(options.gff) ## check gff reading if not GFF['order'] : - stop_err( 'Incorrect GFF file' + str( e ) ) - clean_file = cleaning_bam(options.bamfile) + stop_err( 'Incorrect GFF file' ) + clean_file = ribo_functions.cleaning_bam(options.bamfile) compute_analysis(clean_file, GFF, options.fasta, options.gff, subfolder, options.extend) if os.path.exists( clean_file ): os.remove( clean_file )
