Mercurial > repos > rlegendre > ribo_tools
diff kmer_analysis.py @ 13:7c944fd9907e draft
release 2
author | rlegendre |
---|---|
date | Thu, 09 Apr 2015 11:35:48 -0400 |
parents | 707807fee542 |
children | a121cce43f90 |
line wrap: on
line diff
--- a/kmer_analysis.py Fri Jan 23 03:31:37 2015 -0500 +++ b/kmer_analysis.py Thu Apr 09 11:35:48 2015 -0400 @@ -80,6 +80,10 @@ ''' global total_mapped_read + ## tags by default + multi_tag = "XS:i:" + tag = "IH:i:1" + ## init kmer dict KMER = OrderedDict({}) @@ -103,10 +107,10 @@ multi_tag = "XS:i:" elif 'bwa' in line: multi_tag = "XT:A:R" - #elif 'TopHat' in line: - # multi_tag = "NH:i:1" + elif 'TopHat' in line: + tag = "NH:i:1" else : - stop_err("No PG tag find in "+samfile+". Please use bowtie or bwa for mapping") + stop_err("No PG tag find in "+samfile+". Please use bowtie, bwa or Tophat for mapping") # get footprint elif re.search('^[^@].+', line) : @@ -121,7 +125,7 @@ read_pos = int(line.split('\t')[3]) read_sens = int(line.split('\t')[1]) #len_read = len(line.split('\t')[9]) - if len_read == kmer and multi_tag not in line: + if len_read == kmer and (tag in line or multi_tag not in line): ###if line.split('\t')[5] == '28M' : total_mapped_read +=1 #if it's a forward read @@ -399,7 +403,7 @@ #GFF = ribo_functions.store_gtf(options.gff) ## check gff reading if not GFF['order'] : - stop_err( 'Incorrect GFF file' + str( e ) ) + stop_err( 'Incorrect GFF file' ) ## split bam split_bam(options.bamfile,tmpdir) @@ -416,12 +420,12 @@ ## compute analysis with other kmer for keys in kmer.iterkeys() : if keys != 28 : - ## remove all txt files in tmp directory - if os.system("rm "+tmpdir+"/*.txt") != 0 : - stop_err( 'Error during tmp directory cleaning : ' + str( e ) ) - + ## If not enought reads in this kmer : if kmer[keys] > 100 : + ## remove all txt files in tmp directory + if os.system("rm "+tmpdir+"/*.txt") != 0 : + stop_err( 'Error during tmp directory cleaning') ## compute coverage and distribution kmer tmp = get_first_base(tmpdir, keys) ## compute phasing