ribo_tools: kmer_analysis.py comparison

comparison kmer_analysis.py @ 13:7c944fd9907e draft

release 2

author	rlegendre
date	Thu, 09 Apr 2015 11:35:48 -0400
parents	707807fee542
children	a121cce43f90

comparison

equal deleted inserted replaced

-:ee3a3435ce43
+:7c944fd9907e
 '''
 write footprint coverage file for each sam file in tmpdir and get kmer distribution
 '''
 global total_mapped_read
+## tags by default
+multi_tag = "XS:i:"
+tag = "IH:i:1"
 ## init kmer dict
 KMER = OrderedDict({})
 try:
 file_array = (commands.getoutput('ls '+tmpdir)).split('\n')
 elif '@PG\tID' in line :
 if 'bowtie' in line:
 multi_tag = "XS:i:"
 elif 'bwa' in  line:
 multi_tag = "XT:A:R"
-#elif 'TopHat' in  line:
+elif 'TopHat' in  line:
-#    multi_tag = "NH:i:1"
+tag = "NH:i:1"
 else :
-stop_err("No PG tag find in "+samfile+". Please use bowtie or bwa for mapping")
+stop_err("No PG tag find in "+samfile+". Please use bowtie, bwa or Tophat for mapping")
 # get footprint
 elif re.search('^[^@].+', line) :
 len_read = len(line.split('\t')[9])
 ##full kmer dict
 #print line.rstrip()
 read_pos = int(line.split('\t')[3])
 read_sens = int(line.split('\t')[1])
 #len_read = len(line.split('\t')[9])
-if len_read == kmer and multi_tag not in line:
+if len_read == kmer and (tag in line or multi_tag not in line):
 ###if line.split('\t')[5] == '28M' :
 total_mapped_read +=1
 #if it's a forward read
 if read_sens == 0 :
 #get 5' base
 stop_err( 'Please check your annotation file is in correct format, GFF or GTF' )
 #GFF = store_gff(options.gff)
 #GFF = ribo_functions.store_gtf(options.gff)
 ## check gff reading
 if not GFF['order'] :
-stop_err( 'Incorrect GFF file' + str( e ) )
+stop_err( 'Incorrect GFF file' )
 ## split bam
 split_bam(options.bamfile,tmpdir)
 ###################################
 ## First analysis with 28mer :
 results = {}
 results[28] = whole_phasing
 ## compute analysis with other kmer
 for keys in kmer.iterkeys() :
 if keys != 28 :
-## remove all txt files in tmp directory
-if os.system("rm "+tmpdir+"/*.txt") != 0 :
-stop_err( 'Error during tmp directory cleaning : ' + str( e ) )
 ## If not enought reads in this kmer :
 if kmer[keys] > 100 :
+## remove all txt files in tmp directory
+if os.system("rm "+tmpdir+"/*.txt") != 0 :
+stop_err( 'Error during tmp directory cleaning')
 ## compute coverage and distribution kmer
 tmp = get_first_base(tmpdir, keys)
 ## compute phasing
 whole_phasing = frame_analysis(tmpdir,GFF)

Mercurial > repos > rlegendre > ribo_tools

comparison kmer_analysis.py @ 13:7c944fd9907e draft