Mercurial > repos > rlegendre > ribo_tools
comparison ribo_functions.py @ 17:c87c40e642af draft
Uploaded
author | rlegendre |
---|---|
date | Fri, 29 May 2015 09:17:29 -0400 |
parents | 7c944fd9907e |
children | a121cce43f90 |
comparison
equal
deleted
inserted
replaced
16:fcfdb2607cb8 | 17:c87c40e642af |
---|---|
57 except Exception, e: | 57 except Exception, e: |
58 stop_err( 'Error during bam file splitting : ' + str( e ) ) | 58 stop_err( 'Error during bam file splitting : ' + str( e ) ) |
59 | 59 |
60 | 60 |
61 | 61 |
62 def get_first_base(tmpdir,kmer): | 62 def get_first_base(tmpdir, kmer, frame): |
63 ''' | 63 ''' |
64 write footprint coverage file for each sam file in tmpdir | 64 write footprint coverage file for each sam file in tmpdir |
65 ''' | 65 ''' |
66 global total_mapped_read | 66 global total_mapped_read |
67 ## tags by default | |
68 multi_tag = "XS:i:" | |
69 tag = "IH:i:1" | |
70 | |
67 try: | 71 try: |
72 ### compute position of P-site according to frame (P-site -> +15 starting from 5' end of footprint) | |
73 p_site_pos = 16-frame | |
74 | |
68 file_array = (commands.getoutput('ls '+tmpdir)).split('\n') | 75 file_array = (commands.getoutput('ls '+tmpdir)).split('\n') |
69 ##write coverage for each sam file in tmpdir | 76 ##write coverage for each sam file in tmpdir |
70 for samfile in file_array: | 77 for samfile in file_array: |
71 with open(tmpdir+'/'+samfile, 'r') as sam : | 78 with open(tmpdir+'/'+samfile, 'r') as sam : |
72 ##get chromosome name | 79 ##get chromosome name |
73 chrom = samfile.split(".")[0] | 80 chrom = samfile.split(".sam")[0] |
74 | 81 |
75 for line in sam: | 82 for line in sam: |
76 #initialize dictionnary | 83 #initialize dictionnary |
77 if re.search('@SQ', line) : | 84 if '@SQ' in line : |
78 size = int(line.split('LN:')[1]) | 85 size = int(line.split('LN:')[1]) |
79 genomeF = [0]*size | 86 genomeF = [0]*size |
80 genomeR = [0]*size | 87 genomeR = [0]*size |
81 # define multiple reads keys from mapper | 88 # define multiple reads keys from mapper |
82 elif re.search('@PG', line) : | 89 elif '@PG\tID' in line : |
83 if re.search('bowtie', line): | 90 if 'bowtie' in line: |
84 multi_tag = "XS:i:" | 91 multi_tag = "XS:i:" |
85 elif re.search('bwa', line): | 92 elif 'bwa' in line: |
86 multi_tag = "XT:A:R" | 93 multi_tag = "XT:A:R" |
94 elif 'TopHat' in line: | |
95 tag = "NH:i:1" | |
87 else : | 96 else : |
88 stop_err("No PG tag find in"+samfile+". Please use bowtie or bwa for mapping") | 97 stop_err("No PG tag find in "+samfile+". Please use bowtie, bwa or Tophat for mapping") |
89 | 98 |
90 # get footprint | 99 # get footprint |
91 elif re.search('^[^@].+', line) : | 100 elif re.search('^[^@].+', line) : |
92 #print line.rstrip() | 101 #print line.rstrip() |
93 read_pos = int(line.split('\t')[3]) | 102 read_pos = int(line.split('\t')[3]) |
94 read_sens = int(line.split('\t')[1]) | 103 read_sens = int(line.split('\t')[1]) |
95 #len_read = len(line.split('\t')[9]) | 104 len_read = len(line.split('\t')[9]) |
96 if line.split('\t')[5] == kmer+'M' and multi_tag not in line: | 105 read_qual = int(line.split('\t')[4]) |
106 if len_read == kmer and (tag in line or multi_tag not in line) and read_qual > 20 : | |
97 ###if line.split('\t')[5] == '28M' : | 107 ###if line.split('\t')[5] == '28M' : |
98 # print line.rstrip() | 108 # print line.rstrip() |
99 total_mapped_read +=1 | 109 total_mapped_read +=1 |
100 #if it's a forward read | 110 #if it's a forward read |
101 if read_sens == 0 : | 111 if read_sens == 0 : |
102 #get P-site : start read + 12 nt | 112 #get P-site : start read + 12 nt |
103 read_pos += 12 | 113 #read_pos += 15 |
114 read_pos += p_site_pos | |
104 genomeF[read_pos] += 1 | 115 genomeF[read_pos] += 1 |
105 #if it's a reverse read | 116 #if it's a reverse read |
106 elif read_sens == 16 : | 117 elif read_sens == 16 : |
107 #get P-site | 118 #get P-site |
108 read_pos += 15 | 119 #read_pos += 12 |
120 read_pos += (len_read-1) - p_site_pos | |
109 genomeR[read_pos] += 1 | 121 genomeR[read_pos] += 1 |
110 | 122 |
111 #try: | 123 #try: |
112 #write coverage in files | 124 #write coverage in files |
113 with open(tmpdir+'/assoCov_'+chrom+'.txt', 'w') as cov : | 125 with open(tmpdir+'/assoCov_'+chrom+'.txt', 'w') as cov : |
114 for i in range(0,size): | 126 for i in range(0,size): |
115 cov.write(str(genomeF[i])+'\t-'+str(genomeR[i])+'\n') | 127 cov.write(str(genomeF[i])+'\t-'+str(genomeR[i])+'\n') |
116 #except Exception,e: | 128 #except Exception,e: |
117 # stop_err( 'Error during coverage file writting : ' + str( e ) ) | 129 # stop_err( 'Error during coverage file writting : ' + str( e ) ) |
118 | 130 del genomeF |
131 del genomeR | |
119 sys.stdout.write("%d reads are used for frame analysis\n" % total_mapped_read) | 132 sys.stdout.write("%d reads are used for frame analysis\n" % total_mapped_read) |
120 except Exception, e: | 133 except Exception, e: |
121 stop_err( 'Error during footprinting : ' + str( e ) ) | 134 stop_err( 'Error during footprinting : ' + str( e ) ) |
135 | |
136 | |
122 | 137 |
123 def store_gff(gff): | 138 def store_gff(gff): |
124 ''' | 139 ''' |
125 parse and store gff file in a dictionnary | 140 parse and store gff file in a dictionnary |
126 ''' | 141 ''' |