ribo_tools: ribo_functions.py comparison

comparison ribo_functions.py @ 17:c87c40e642af draft

Uploaded

author	rlegendre
date	Fri, 29 May 2015 09:17:29 -0400
parents	7c944fd9907e
children	a121cce43f90

comparison

equal deleted inserted replaced

-:fcfdb2607cb8
+:c87c40e642af
 except Exception, e:
 stop_err( 'Error during bam file splitting : ' + str( e ) )
-def get_first_base(tmpdir,kmer):
+def get_first_base(tmpdir, kmer, frame):
 '''
 write footprint coverage file for each sam file in tmpdir
 '''
 global total_mapped_read
+## tags by default
+multi_tag = "XS:i:"
+tag = "IH:i:1"
 try:
+### compute position of P-site according to frame (P-site -> +15 starting from 5' end of footprint)
+p_site_pos = 16-frame
 file_array = (commands.getoutput('ls '+tmpdir)).split('\n')
 ##write coverage for each sam file in tmpdir
 for samfile in file_array:
 with open(tmpdir+'/'+samfile, 'r') as sam :
 ##get chromosome name
-chrom = samfile.split(".")[0]
+chrom = samfile.split(".sam")[0]
 for line in sam:
 #initialize dictionnary
-if re.search('@SQ', line) :
+if '@SQ' in line :
 size = int(line.split('LN:')[1])
 genomeF = [0]*size
 genomeR = [0]*size
 # define multiple reads keys from mapper
-elif re.search('@PG', line) :
+elif '@PG\tID' in line :
-if re.search('bowtie', line):
+if 'bowtie' in line:
 multi_tag = "XS:i:"
-elif re.search('bwa', line):
+elif 'bwa' in  line:
 multi_tag = "XT:A:R"
+elif 'TopHat' in  line:
+tag = "NH:i:1"
 else :
-stop_err("No PG tag find in"+samfile+". Please use bowtie or bwa for mapping")
+stop_err("No PG tag find in "+samfile+". Please use bowtie, bwa or Tophat for mapping")
 # get footprint
 elif re.search('^[^@].+', line) :
 #print line.rstrip()
 read_pos = int(line.split('\t')[3])
 read_sens = int(line.split('\t')[1])
-#len_read = len(line.split('\t')[9])
+len_read = len(line.split('\t')[9])
-if line.split('\t')[5] == kmer+'M' and multi_tag not in line:
+read_qual = int(line.split('\t')[4])
+if len_read == kmer and (tag in line or multi_tag not in line) and read_qual > 20 :
 ###if line.split('\t')[5] == '28M' :
 # print line.rstrip()
 total_mapped_read +=1
 #if it's a forward read
 if read_sens == 0 :
 #get P-site : start read + 12 nt
-read_pos += 12
+#read_pos += 15
+read_pos += p_site_pos
 genomeF[read_pos] += 1
 #if it's a reverse read
 elif read_sens == 16 :
 #get P-site
-read_pos += 15
+#read_pos += 12
+read_pos += (len_read-1) - p_site_pos
 genomeR[read_pos] += 1
 #try:
 #write coverage in files
 with open(tmpdir+'/assoCov_'+chrom+'.txt', 'w') as cov :
 for i in range(0,size):
 cov.write(str(genomeF[i])+'\t-'+str(genomeR[i])+'\n')
 #except Exception,e:
 #    stop_err( 'Error during coverage file writting : ' + str( e ) )
+del genomeF
+del genomeR
 sys.stdout.write("%d reads are used for frame analysis\n" % total_mapped_read)
 except Exception, e:
 stop_err( 'Error during footprinting : ' + str( e ) )
 def store_gff(gff):
 '''
 parse and store gff file in a dictionnary
 '''

Mercurial > repos > rlegendre > ribo_tools

comparison ribo_functions.py @ 17:c87c40e642af draft