Mercurial > repos > charles_s_test > seqsero2
comparison libs/H_combination_output_analysis.py @ 0:6895de35a263 draft
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
author | charles_s_test |
---|---|
date | Thu, 19 Oct 2017 18:16:51 -0400 |
parents | |
children | 38ad1130d077 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6895de35a263 |
---|---|
1 #!/usr/bin/env python | |
2 # "H_combination_output_analysis.py target.fasta fliCdatabase.fasta fljBdatabase.fasta" | |
3 # must have ispcr and primers of fliC and fljB at the same directory | |
4 | |
5 | |
6 import os | |
7 from Bio import SeqIO | |
8 import sys | |
9 from Bio.Blast import NCBIXML | |
10 from Initial_Conditions import phase1 | |
11 from Initial_Conditions import phase2 | |
12 | |
13 Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb" | |
14 Blastxpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastx" | |
15 | |
16 target=sys.argv[1] | |
17 database_fliC=sys.argv[2] | |
18 database_fljB=sys.argv[3] | |
19 output=target.split('.')[0]+'_out.fasta' | |
20 | |
21 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015 | |
22 database_path="database"###01/27/2015,database_path=dirpath+"/database" | |
23 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fliC.txt '+target+'_fliC.fa') | |
24 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fljB.txt '+target+'_fljB.fa') | |
25 fliC=target+'_fliC.fa' | |
26 fljB=target+'_fljB.fa' | |
27 | |
28 if os.path.getsize(fliC)>10: | |
29 os.system(Makebltdb+' -in '+database_fliC+' -out '+database_fliC+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already | |
30 os.system(Blastxpth+' -seg=no -query '+fliC+' -db '+database_fliC+'_db '+'-out '+'FliC_Htype_'+target+'.xml '+'-outfmt 5') | |
31 print target | |
32 fliC_XML='FliC_Htype_'+target+'.xml' | |
33 fliC_handle=open(fliC_XML) | |
34 records=NCBIXML.parse(fliC_handle) | |
35 fliC_records=list(records) | |
36 E_thresh=1e-10 | |
37 hspbit=[] | |
38 alignmentlist=[] | |
39 for record in fliC_records: | |
40 for alignment in record.alignments: | |
41 hsp_bit_score=0 | |
42 startlist=[]#the percentage algorithm don't consider one situation, the new hsp cover old hsp | |
43 endlist=[]# | |
44 for hsp in alignment.hsps: | |
45 start=hsp.query_start# | |
46 end=hsp.query_end# | |
47 leng=abs(start-end)# | |
48 if hsp.expect<E_thresh:# | |
49 if start>end:# | |
50 temp=start# | |
51 start=end# | |
52 end=start# | |
53 if len(startlist)==0:# | |
54 hsp_bit_score=hsp_bit_score+hsp.bits# | |
55 startlist.append(start)# | |
56 endlist.append(end)# | |
57 else:# | |
58 for i in range(len(startlist)):# | |
59 if startlist[i]<start<endlist[i]:# | |
60 start=endlist[i]+1# | |
61 if startlist[i]<end<endlist[i]:#03112016 | |
62 end=startlist[i]-1# | |
63 if end<start:#the new hsp was included in old hsp# | |
64 percentage=0# | |
65 else: | |
66 percentage=float(end-start)/leng# | |
67 startlist.append(start)# | |
68 endlist.append(end)# | |
69 hsp_bit_score=hsp_bit_score+percentage*hsp.bits# | |
70 alignment=alignment.hit_def+':'+str(hsp_bit_score) | |
71 hspbit.append(hsp_bit_score) | |
72 alignmentlist.append(alignment) | |
73 scorelist=dict(zip(alignmentlist,hspbit)) | |
74 score=0 | |
75 serotype=[] | |
76 seroscore=[] | |
77 for Htype in scorelist: | |
78 if scorelist[Htype]>score: | |
79 First_Choice=Htype | |
80 score=scorelist[Htype] | |
81 if locals().has_key('First_Choice'): | |
82 serotype.append(First_Choice.split("__")[0]) | |
83 seroscore.append(score) | |
84 secscore=0 | |
85 for Htype in scorelist: | |
86 | |
87 if scorelist[Htype]>secscore and (Htype.split("__")[0] not in serotype): | |
88 Sec_Choice=Htype | |
89 secscore=scorelist[Htype] | |
90 if locals().has_key('Sec_Choice'): | |
91 serotype.append(Sec_Choice.split("__")[0]) | |
92 seroscore.append(secscore) | |
93 thirdscore=0 | |
94 for Htype in scorelist: | |
95 if scorelist[Htype]>thirdscore and (Htype.split("__")[0] not in serotype): | |
96 Third_Choice=Htype | |
97 thirdscore=scorelist[Htype] | |
98 if locals().has_key('Third_Choice'): | |
99 serotype.append(Third_Choice.split("__")[0]) | |
100 seroscore.append(thirdscore) | |
101 print serotype,seroscore | |
102 if score>100: | |
103 print '#',target,'$$$ Most possible H_fliC_type: ',First_Choice,'\n' | |
104 print '$$$ bit_score:',score,'\n' | |
105 if locals().has_key('secscore'): | |
106 if secscore>100: | |
107 print '#',target,'$$$ Second possible H_fliC_type: ',Sec_Choice,'\n' | |
108 print '$$$ Second bit_score:',secscore,'\n' | |
109 if locals().has_key('thirdscore'): | |
110 if thirdscore>100: | |
111 print '#',target,'$$$ Third possible H_fliC_type: ',Third_Choice,'\n' | |
112 print '$$$ Third bit_score:',thirdscore,'\n' | |
113 else: | |
114 print '$$$ No fliC in',target | |
115 else: | |
116 score=1 | |
117 print '$$$ No fliC (no file created) in',target | |
118 | |
119 | |
120 | |
121 if os.path.getsize(fljB)>10: | |
122 os.system(Makebltdb+' -in '+database_fljB+' -out '+database_fljB+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already | |
123 os.system(Blastxpth+' -query '+fljB+' -db '+database_fljB+'_db '+'-out '+'FljB_Htype_'+target+'.xml '+'-outfmt 5') | |
124 print target | |
125 fljB_XML='FljB_Htype_'+target+'.xml' | |
126 fljB_handle=open(fljB_XML) | |
127 records=NCBIXML.parse(fljB_handle) | |
128 fljB_records=list(records) | |
129 E_thresh=1e-10 | |
130 hspbit=[] | |
131 alignmentlist=[] | |
132 for record in fljB_records: | |
133 for alignment in record.alignments: | |
134 hsp_bit_score=0 | |
135 startlist=[]# | |
136 endlist=[]# | |
137 for hsp in alignment.hsps: | |
138 start=hsp.query_start# | |
139 end=hsp.query_end# | |
140 leng=abs(start-end)# | |
141 if hsp.expect<E_thresh:# | |
142 if start>end:# | |
143 temp=start# | |
144 start=end# | |
145 end=start# | |
146 if len(startlist)==0:# | |
147 hsp_bit_score=hsp_bit_score+hsp.bits# | |
148 startlist.append(start)# | |
149 endlist.append(end)# | |
150 else:# | |
151 for i in range(len(startlist)):# | |
152 if startlist[i]<start<endlist[i]:# | |
153 start=endlist[i]+1# | |
154 if startlist[i]<end<endlist[i]:#03112016 | |
155 end=startlist[i]-1# | |
156 if end<start:#the new hsp was included in old hsp# | |
157 percentage=0# | |
158 else: | |
159 percentage=float(end-start)/leng# | |
160 startlist.append(start)# | |
161 endlist.append(end)# | |
162 hsp_bit_score=hsp_bit_score+percentage*hsp.bits# | |
163 alignment=alignment.hit_def+':'+str(hsp_bit_score) | |
164 hspbit.append(hsp_bit_score) | |
165 alignmentlist.append(alignment) | |
166 fljB_scorelist=dict(zip(alignmentlist,hspbit)) | |
167 | |
168 fljB_score=0 | |
169 fljB_serotype=[] | |
170 fljB_seroscore=[] | |
171 for Htype in fljB_scorelist: | |
172 if fljB_scorelist[Htype]>fljB_score: | |
173 fljB_First_Choice=Htype | |
174 fljB_score=fljB_scorelist[Htype] | |
175 if locals().has_key('fljB_First_Choice'): | |
176 fljB_serotype.append(fljB_First_Choice.split("__")[0]) | |
177 fljB_seroscore.append(fljB_score) | |
178 fljB_secscore=0 | |
179 for Htype in fljB_scorelist: | |
180 if fljB_scorelist[Htype]>fljB_secscore and (Htype.split("__")[0] not in fljB_serotype): | |
181 fljB_Sec_Choice=Htype | |
182 fljB_secscore=fljB_scorelist[Htype] | |
183 if locals().has_key('fljB_Sec_Choice'): | |
184 fljB_serotype.append(fljB_Sec_Choice.split("__")[0]) | |
185 fljB_seroscore.append(fljB_secscore) | |
186 fljB_thirdscore=0 | |
187 for Htype in fljB_scorelist: | |
188 if fljB_scorelist[Htype]>fljB_thirdscore and (Htype.split("__")[0] not in fljB_serotype): | |
189 fljB_Third_Choice=Htype | |
190 fljB_thirdscore=fljB_scorelist[Htype] | |
191 if locals().has_key('fljB_Third_Choice'): | |
192 fljB_serotype.append(fljB_Third_Choice.split("__")[0]) | |
193 fljB_seroscore.append(fljB_thirdscore) | |
194 | |
195 if fljB_score>100: | |
196 print '#',target,'$$$ Most possible H_fljB_type: ',fljB_First_Choice,'\n' | |
197 print '$$$ Most bit_score:',fljB_score,'\n' | |
198 if locals().has_key('fljB_secscore'): | |
199 if fljB_secscore>100: | |
200 print '#',target,'$$$ Second possible H_fljB_type: ',fljB_Sec_Choice,'\n' | |
201 print '$$$ Second bit_score:',fljB_secscore,'\n' | |
202 if locals().has_key('fljB_thirdscore'): | |
203 if fljB_thirdscore>100: | |
204 print '#',target,'$$$ Third possible H_fljB_type: ',fljB_Third_Choice,'\n' | |
205 print '$$$ Third bit_score:',fljB_thirdscore,'\n' | |
206 else: | |
207 print '$$$ No fljB in',target | |
208 else: | |
209 fljB_score=1 | |
210 print '$$$ No fljB (no file created) in',target | |
211 | |
212 | |
213 if score>100 and fljB_score>100: | |
214 fliC_sero=dict(zip(serotype,seroscore)) | |
215 fljB_sero=dict(zip(fljB_serotype,fljB_seroscore)) | |
216 combination=[] | |
217 combination_score=[] | |
218 for seroname in fliC_sero: | |
219 for fljB_seroname in fljB_sero: | |
220 for i in range(len(phase1)): | |
221 if phase1[i]==seroname and phase2[i]==fljB_seroname: | |
222 name=seroname+"_"+fljB_seroname | |
223 score=fliC_sero[seroname]+fljB_sero[fljB_seroname] | |
224 combination.append(name) | |
225 combination_score.append(score) | |
226 combinationlist=dict(zip(combination,combination_score)) #we can do the filteration here | |
227 final_dict=sorted(combinationlist.iteritems(), key=lambda d:d[1], reverse = True) | |
228 print "$$_H:Order:",final_dict | |
229 elif score>100 and fljB_score<100: | |
230 print "$$_H:No fljB, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice | |
231 elif score<100 and fljB_score>100: | |
232 print "$$_H:No fliC, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice | |
233 elif score==1 and fljB_score>100: | |
234 print "$$_H:No fliC (file) existed, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice | |
235 elif score==1 and fljB_score<100: | |
236 print "$$_H:No fliC (file) existed, and no fljB" | |
237 elif score>100 and fljB_score==1: | |
238 print "$$_H:No fljB (file) existed, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice | |
239 elif score<100 and fljB_score==1: | |
240 print "$$_H:No fljB (file) existed, and no fliC" | |
241 else: | |
242 print "$$_H:No fliC and fljB" | |
243 | |
244 | |
245 ''' | |
246 E_thresh=1e-10 | |
247 hspbit=[] | |
248 alignmentlist=[] | |
249 for record in fliC_records: | |
250 for alignment in record.alignments: | |
251 hsp_bit_score=0 | |
252 for hsp in alignment.hsps: | |
253 if hsp.expect<E_thresh: | |
254 hsp_bit_score=hsp_bit_score+hsp.bits | |
255 alignment=alignment.hit_def+':'+str(hsp_bit_score) | |
256 hspbit.append(hsp_bit_score) | |
257 alignmentlist.append(alignment) | |
258 | |
259 scorelist=dict(zip(alignmentlist,hspbit)) | |
260 score=0 | |
261 for Htype in scorelist: | |
262 if scorelist[Htype]>score: | |
263 First_Choice=Htype | |
264 score=scorelist[Htype] | |
265 if score>100: | |
266 print '#',target,'Most possible H_fliC_type: ',First_Choice,'\n' | |
267 print '#bit_score:',score,'\n' | |
268 else: | |
269 print '#No fliC in',target | |
270 | |
271 | |
272 E_thresh=1e-10 | |
273 hspbit=[] | |
274 alignmentlist=[] | |
275 for record in fljB_records: | |
276 for alignment in record.alignments: | |
277 hsp_bit_score=0 | |
278 for hsp in alignment.hsps: | |
279 if hsp.expect<E_thresh: | |
280 hsp_bit_score=hsp_bit_score+hsp.bits | |
281 alignment=alignment.hit_def+':'+str(hsp_bit_score) | |
282 hspbit.append(hsp_bit_score) | |
283 alignmentlist.append(alignment) | |
284 | |
285 scorelist=dict(zip(alignmentlist,hspbit)) | |
286 fljB_score=0 | |
287 for Htype in scorelist: | |
288 if scorelist[Htype]>fljB_score: | |
289 First_Choice=Htype | |
290 fljB_score=scorelist[Htype] | |
291 if fljB_score>100: | |
292 print '#',target,'Most possible H_fljB_type: ',First_Choice,'\n' | |
293 print '#bit_score:',fljB_score,'\n' | |
294 else: | |
295 print '#No fljB in',target | |
296 ''' | |
297 |