comparison libs/H_combination_output_analysis.py @ 0:6895de35a263 draft

planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
author charles_s_test
date Thu, 19 Oct 2017 18:16:51 -0400
parents
children 38ad1130d077
comparison
equal deleted inserted replaced
-1:000000000000 0:6895de35a263
1 #!/usr/bin/env python
2 # "H_combination_output_analysis.py target.fasta fliCdatabase.fasta fljBdatabase.fasta"
3 # must have ispcr and primers of fliC and fljB at the same directory
4
5
6 import os
7 from Bio import SeqIO
8 import sys
9 from Bio.Blast import NCBIXML
10 from Initial_Conditions import phase1
11 from Initial_Conditions import phase2
12
13 Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb"
14 Blastxpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastx"
15
16 target=sys.argv[1]
17 database_fliC=sys.argv[2]
18 database_fljB=sys.argv[3]
19 output=target.split('.')[0]+'_out.fasta'
20
21 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015
22 database_path="database"###01/27/2015,database_path=dirpath+"/database"
23 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fliC.txt '+target+'_fliC.fa')
24 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fljB.txt '+target+'_fljB.fa')
25 fliC=target+'_fliC.fa'
26 fljB=target+'_fljB.fa'
27
28 if os.path.getsize(fliC)>10:
29 os.system(Makebltdb+' -in '+database_fliC+' -out '+database_fliC+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already
30 os.system(Blastxpth+' -seg=no -query '+fliC+' -db '+database_fliC+'_db '+'-out '+'FliC_Htype_'+target+'.xml '+'-outfmt 5')
31 print target
32 fliC_XML='FliC_Htype_'+target+'.xml'
33 fliC_handle=open(fliC_XML)
34 records=NCBIXML.parse(fliC_handle)
35 fliC_records=list(records)
36 E_thresh=1e-10
37 hspbit=[]
38 alignmentlist=[]
39 for record in fliC_records:
40 for alignment in record.alignments:
41 hsp_bit_score=0
42 startlist=[]#the percentage algorithm don't consider one situation, the new hsp cover old hsp
43 endlist=[]#
44 for hsp in alignment.hsps:
45 start=hsp.query_start#
46 end=hsp.query_end#
47 leng=abs(start-end)#
48 if hsp.expect<E_thresh:#
49 if start>end:#
50 temp=start#
51 start=end#
52 end=start#
53 if len(startlist)==0:#
54 hsp_bit_score=hsp_bit_score+hsp.bits#
55 startlist.append(start)#
56 endlist.append(end)#
57 else:#
58 for i in range(len(startlist)):#
59 if startlist[i]<start<endlist[i]:#
60 start=endlist[i]+1#
61 if startlist[i]<end<endlist[i]:#03112016
62 end=startlist[i]-1#
63 if end<start:#the new hsp was included in old hsp#
64 percentage=0#
65 else:
66 percentage=float(end-start)/leng#
67 startlist.append(start)#
68 endlist.append(end)#
69 hsp_bit_score=hsp_bit_score+percentage*hsp.bits#
70 alignment=alignment.hit_def+':'+str(hsp_bit_score)
71 hspbit.append(hsp_bit_score)
72 alignmentlist.append(alignment)
73 scorelist=dict(zip(alignmentlist,hspbit))
74 score=0
75 serotype=[]
76 seroscore=[]
77 for Htype in scorelist:
78 if scorelist[Htype]>score:
79 First_Choice=Htype
80 score=scorelist[Htype]
81 if locals().has_key('First_Choice'):
82 serotype.append(First_Choice.split("__")[0])
83 seroscore.append(score)
84 secscore=0
85 for Htype in scorelist:
86
87 if scorelist[Htype]>secscore and (Htype.split("__")[0] not in serotype):
88 Sec_Choice=Htype
89 secscore=scorelist[Htype]
90 if locals().has_key('Sec_Choice'):
91 serotype.append(Sec_Choice.split("__")[0])
92 seroscore.append(secscore)
93 thirdscore=0
94 for Htype in scorelist:
95 if scorelist[Htype]>thirdscore and (Htype.split("__")[0] not in serotype):
96 Third_Choice=Htype
97 thirdscore=scorelist[Htype]
98 if locals().has_key('Third_Choice'):
99 serotype.append(Third_Choice.split("__")[0])
100 seroscore.append(thirdscore)
101 print serotype,seroscore
102 if score>100:
103 print '#',target,'$$$ Most possible H_fliC_type: ',First_Choice,'\n'
104 print '$$$ bit_score:',score,'\n'
105 if locals().has_key('secscore'):
106 if secscore>100:
107 print '#',target,'$$$ Second possible H_fliC_type: ',Sec_Choice,'\n'
108 print '$$$ Second bit_score:',secscore,'\n'
109 if locals().has_key('thirdscore'):
110 if thirdscore>100:
111 print '#',target,'$$$ Third possible H_fliC_type: ',Third_Choice,'\n'
112 print '$$$ Third bit_score:',thirdscore,'\n'
113 else:
114 print '$$$ No fliC in',target
115 else:
116 score=1
117 print '$$$ No fliC (no file created) in',target
118
119
120
121 if os.path.getsize(fljB)>10:
122 os.system(Makebltdb+' -in '+database_fljB+' -out '+database_fljB+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already
123 os.system(Blastxpth+' -query '+fljB+' -db '+database_fljB+'_db '+'-out '+'FljB_Htype_'+target+'.xml '+'-outfmt 5')
124 print target
125 fljB_XML='FljB_Htype_'+target+'.xml'
126 fljB_handle=open(fljB_XML)
127 records=NCBIXML.parse(fljB_handle)
128 fljB_records=list(records)
129 E_thresh=1e-10
130 hspbit=[]
131 alignmentlist=[]
132 for record in fljB_records:
133 for alignment in record.alignments:
134 hsp_bit_score=0
135 startlist=[]#
136 endlist=[]#
137 for hsp in alignment.hsps:
138 start=hsp.query_start#
139 end=hsp.query_end#
140 leng=abs(start-end)#
141 if hsp.expect<E_thresh:#
142 if start>end:#
143 temp=start#
144 start=end#
145 end=start#
146 if len(startlist)==0:#
147 hsp_bit_score=hsp_bit_score+hsp.bits#
148 startlist.append(start)#
149 endlist.append(end)#
150 else:#
151 for i in range(len(startlist)):#
152 if startlist[i]<start<endlist[i]:#
153 start=endlist[i]+1#
154 if startlist[i]<end<endlist[i]:#03112016
155 end=startlist[i]-1#
156 if end<start:#the new hsp was included in old hsp#
157 percentage=0#
158 else:
159 percentage=float(end-start)/leng#
160 startlist.append(start)#
161 endlist.append(end)#
162 hsp_bit_score=hsp_bit_score+percentage*hsp.bits#
163 alignment=alignment.hit_def+':'+str(hsp_bit_score)
164 hspbit.append(hsp_bit_score)
165 alignmentlist.append(alignment)
166 fljB_scorelist=dict(zip(alignmentlist,hspbit))
167
168 fljB_score=0
169 fljB_serotype=[]
170 fljB_seroscore=[]
171 for Htype in fljB_scorelist:
172 if fljB_scorelist[Htype]>fljB_score:
173 fljB_First_Choice=Htype
174 fljB_score=fljB_scorelist[Htype]
175 if locals().has_key('fljB_First_Choice'):
176 fljB_serotype.append(fljB_First_Choice.split("__")[0])
177 fljB_seroscore.append(fljB_score)
178 fljB_secscore=0
179 for Htype in fljB_scorelist:
180 if fljB_scorelist[Htype]>fljB_secscore and (Htype.split("__")[0] not in fljB_serotype):
181 fljB_Sec_Choice=Htype
182 fljB_secscore=fljB_scorelist[Htype]
183 if locals().has_key('fljB_Sec_Choice'):
184 fljB_serotype.append(fljB_Sec_Choice.split("__")[0])
185 fljB_seroscore.append(fljB_secscore)
186 fljB_thirdscore=0
187 for Htype in fljB_scorelist:
188 if fljB_scorelist[Htype]>fljB_thirdscore and (Htype.split("__")[0] not in fljB_serotype):
189 fljB_Third_Choice=Htype
190 fljB_thirdscore=fljB_scorelist[Htype]
191 if locals().has_key('fljB_Third_Choice'):
192 fljB_serotype.append(fljB_Third_Choice.split("__")[0])
193 fljB_seroscore.append(fljB_thirdscore)
194
195 if fljB_score>100:
196 print '#',target,'$$$ Most possible H_fljB_type: ',fljB_First_Choice,'\n'
197 print '$$$ Most bit_score:',fljB_score,'\n'
198 if locals().has_key('fljB_secscore'):
199 if fljB_secscore>100:
200 print '#',target,'$$$ Second possible H_fljB_type: ',fljB_Sec_Choice,'\n'
201 print '$$$ Second bit_score:',fljB_secscore,'\n'
202 if locals().has_key('fljB_thirdscore'):
203 if fljB_thirdscore>100:
204 print '#',target,'$$$ Third possible H_fljB_type: ',fljB_Third_Choice,'\n'
205 print '$$$ Third bit_score:',fljB_thirdscore,'\n'
206 else:
207 print '$$$ No fljB in',target
208 else:
209 fljB_score=1
210 print '$$$ No fljB (no file created) in',target
211
212
213 if score>100 and fljB_score>100:
214 fliC_sero=dict(zip(serotype,seroscore))
215 fljB_sero=dict(zip(fljB_serotype,fljB_seroscore))
216 combination=[]
217 combination_score=[]
218 for seroname in fliC_sero:
219 for fljB_seroname in fljB_sero:
220 for i in range(len(phase1)):
221 if phase1[i]==seroname and phase2[i]==fljB_seroname:
222 name=seroname+"_"+fljB_seroname
223 score=fliC_sero[seroname]+fljB_sero[fljB_seroname]
224 combination.append(name)
225 combination_score.append(score)
226 combinationlist=dict(zip(combination,combination_score)) #we can do the filteration here
227 final_dict=sorted(combinationlist.iteritems(), key=lambda d:d[1], reverse = True)
228 print "$$_H:Order:",final_dict
229 elif score>100 and fljB_score<100:
230 print "$$_H:No fljB, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice
231 elif score<100 and fljB_score>100:
232 print "$$_H:No fliC, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice
233 elif score==1 and fljB_score>100:
234 print "$$_H:No fliC (file) existed, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice
235 elif score==1 and fljB_score<100:
236 print "$$_H:No fliC (file) existed, and no fljB"
237 elif score>100 and fljB_score==1:
238 print "$$_H:No fljB (file) existed, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice
239 elif score<100 and fljB_score==1:
240 print "$$_H:No fljB (file) existed, and no fliC"
241 else:
242 print "$$_H:No fliC and fljB"
243
244
245 '''
246 E_thresh=1e-10
247 hspbit=[]
248 alignmentlist=[]
249 for record in fliC_records:
250 for alignment in record.alignments:
251 hsp_bit_score=0
252 for hsp in alignment.hsps:
253 if hsp.expect<E_thresh:
254 hsp_bit_score=hsp_bit_score+hsp.bits
255 alignment=alignment.hit_def+':'+str(hsp_bit_score)
256 hspbit.append(hsp_bit_score)
257 alignmentlist.append(alignment)
258
259 scorelist=dict(zip(alignmentlist,hspbit))
260 score=0
261 for Htype in scorelist:
262 if scorelist[Htype]>score:
263 First_Choice=Htype
264 score=scorelist[Htype]
265 if score>100:
266 print '#',target,'Most possible H_fliC_type: ',First_Choice,'\n'
267 print '#bit_score:',score,'\n'
268 else:
269 print '#No fliC in',target
270
271
272 E_thresh=1e-10
273 hspbit=[]
274 alignmentlist=[]
275 for record in fljB_records:
276 for alignment in record.alignments:
277 hsp_bit_score=0
278 for hsp in alignment.hsps:
279 if hsp.expect<E_thresh:
280 hsp_bit_score=hsp_bit_score+hsp.bits
281 alignment=alignment.hit_def+':'+str(hsp_bit_score)
282 hspbit.append(hsp_bit_score)
283 alignmentlist.append(alignment)
284
285 scorelist=dict(zip(alignmentlist,hspbit))
286 fljB_score=0
287 for Htype in scorelist:
288 if scorelist[Htype]>fljB_score:
289 First_Choice=Htype
290 fljB_score=scorelist[Htype]
291 if fljB_score>100:
292 print '#',target,'Most possible H_fljB_type: ',First_Choice,'\n'
293 print '#bit_score:',fljB_score,'\n'
294 else:
295 print '#No fljB in',target
296 '''
297