comparison ribo_functions.py @ 17:c87c40e642af draft

Uploaded
author rlegendre
date Fri, 29 May 2015 09:17:29 -0400
parents 7c944fd9907e
children a121cce43f90
comparison
equal deleted inserted replaced
16:fcfdb2607cb8 17:c87c40e642af
57 except Exception, e: 57 except Exception, e:
58 stop_err( 'Error during bam file splitting : ' + str( e ) ) 58 stop_err( 'Error during bam file splitting : ' + str( e ) )
59 59
60 60
61 61
62 def get_first_base(tmpdir,kmer): 62 def get_first_base(tmpdir, kmer, frame):
63 ''' 63 '''
64 write footprint coverage file for each sam file in tmpdir 64 write footprint coverage file for each sam file in tmpdir
65 ''' 65 '''
66 global total_mapped_read 66 global total_mapped_read
67 ## tags by default
68 multi_tag = "XS:i:"
69 tag = "IH:i:1"
70
67 try: 71 try:
72 ### compute position of P-site according to frame (P-site -> +15 starting from 5' end of footprint)
73 p_site_pos = 16-frame
74
68 file_array = (commands.getoutput('ls '+tmpdir)).split('\n') 75 file_array = (commands.getoutput('ls '+tmpdir)).split('\n')
69 ##write coverage for each sam file in tmpdir 76 ##write coverage for each sam file in tmpdir
70 for samfile in file_array: 77 for samfile in file_array:
71 with open(tmpdir+'/'+samfile, 'r') as sam : 78 with open(tmpdir+'/'+samfile, 'r') as sam :
72 ##get chromosome name 79 ##get chromosome name
73 chrom = samfile.split(".")[0] 80 chrom = samfile.split(".sam")[0]
74 81
75 for line in sam: 82 for line in sam:
76 #initialize dictionnary 83 #initialize dictionnary
77 if re.search('@SQ', line) : 84 if '@SQ' in line :
78 size = int(line.split('LN:')[1]) 85 size = int(line.split('LN:')[1])
79 genomeF = [0]*size 86 genomeF = [0]*size
80 genomeR = [0]*size 87 genomeR = [0]*size
81 # define multiple reads keys from mapper 88 # define multiple reads keys from mapper
82 elif re.search('@PG', line) : 89 elif '@PG\tID' in line :
83 if re.search('bowtie', line): 90 if 'bowtie' in line:
84 multi_tag = "XS:i:" 91 multi_tag = "XS:i:"
85 elif re.search('bwa', line): 92 elif 'bwa' in line:
86 multi_tag = "XT:A:R" 93 multi_tag = "XT:A:R"
94 elif 'TopHat' in line:
95 tag = "NH:i:1"
87 else : 96 else :
88 stop_err("No PG tag find in"+samfile+". Please use bowtie or bwa for mapping") 97 stop_err("No PG tag find in "+samfile+". Please use bowtie, bwa or Tophat for mapping")
89 98
90 # get footprint 99 # get footprint
91 elif re.search('^[^@].+', line) : 100 elif re.search('^[^@].+', line) :
92 #print line.rstrip() 101 #print line.rstrip()
93 read_pos = int(line.split('\t')[3]) 102 read_pos = int(line.split('\t')[3])
94 read_sens = int(line.split('\t')[1]) 103 read_sens = int(line.split('\t')[1])
95 #len_read = len(line.split('\t')[9]) 104 len_read = len(line.split('\t')[9])
96 if line.split('\t')[5] == kmer+'M' and multi_tag not in line: 105 read_qual = int(line.split('\t')[4])
106 if len_read == kmer and (tag in line or multi_tag not in line) and read_qual > 20 :
97 ###if line.split('\t')[5] == '28M' : 107 ###if line.split('\t')[5] == '28M' :
98 # print line.rstrip() 108 # print line.rstrip()
99 total_mapped_read +=1 109 total_mapped_read +=1
100 #if it's a forward read 110 #if it's a forward read
101 if read_sens == 0 : 111 if read_sens == 0 :
102 #get P-site : start read + 12 nt 112 #get P-site : start read + 12 nt
103 read_pos += 12 113 #read_pos += 15
114 read_pos += p_site_pos
104 genomeF[read_pos] += 1 115 genomeF[read_pos] += 1
105 #if it's a reverse read 116 #if it's a reverse read
106 elif read_sens == 16 : 117 elif read_sens == 16 :
107 #get P-site 118 #get P-site
108 read_pos += 15 119 #read_pos += 12
120 read_pos += (len_read-1) - p_site_pos
109 genomeR[read_pos] += 1 121 genomeR[read_pos] += 1
110 122
111 #try: 123 #try:
112 #write coverage in files 124 #write coverage in files
113 with open(tmpdir+'/assoCov_'+chrom+'.txt', 'w') as cov : 125 with open(tmpdir+'/assoCov_'+chrom+'.txt', 'w') as cov :
114 for i in range(0,size): 126 for i in range(0,size):
115 cov.write(str(genomeF[i])+'\t-'+str(genomeR[i])+'\n') 127 cov.write(str(genomeF[i])+'\t-'+str(genomeR[i])+'\n')
116 #except Exception,e: 128 #except Exception,e:
117 # stop_err( 'Error during coverage file writting : ' + str( e ) ) 129 # stop_err( 'Error during coverage file writting : ' + str( e ) )
118 130 del genomeF
131 del genomeR
119 sys.stdout.write("%d reads are used for frame analysis\n" % total_mapped_read) 132 sys.stdout.write("%d reads are used for frame analysis\n" % total_mapped_read)
120 except Exception, e: 133 except Exception, e:
121 stop_err( 'Error during footprinting : ' + str( e ) ) 134 stop_err( 'Error during footprinting : ' + str( e ) )
135
136
122 137
123 def store_gff(gff): 138 def store_gff(gff):
124 ''' 139 '''
125 parse and store gff file in a dictionnary 140 parse and store gff file in a dictionnary
126 ''' 141 '''