annotate SeqSero/libs/run_auto_All_for_assemblies.py @ 0:b0e627ec0dda draft

Uploaded
author estrain
date Wed, 06 Dec 2017 15:17:29 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
1 #!/usr/bin/env python
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
2
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
3
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
4
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
5 import os
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
6 from Bio import SeqIO
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
7 import sys
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
8 import itertools
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
9 from Initial_Conditions import phase1
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
10 from Initial_Conditions import phase2
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
11 from Initial_Conditions import phaseO
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
12 from Initial_Conditions import sero
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
13 import time
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
14 import multiprocessing
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
15 import string
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
16
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
17 #m=string.atoi(sys.argv[1])
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
18 m=1 #temperorily, m can be set as one, because we just need one core to deal with it
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
19 file_name=sys.argv[1]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
20
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
21 def Combine(b,c):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
22 fliC_combinations=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
23 fliC_combinations.append(",".join(c))
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
24 temp_combinations=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
25 for i in range(len(b)):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
26 for x in itertools.combinations(b,i+1):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
27 temp_combinations.append(",".join(x))
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
28 for x in temp_combinations:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
29 temp=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
30 for y in c:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
31 temp.append(y)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
32 temp.append(x)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
33 temp=",".join(temp)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
34 temp=temp.split(",")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
35 temp.sort()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
36 temp=",".join(temp)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
37 fliC_combinations.append(temp)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
38 return fliC_combinations
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
39
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
40
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
41 def Test(file1,z,q):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
42 fliC="?"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
43 fljB="?"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
44 Otype="?"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
45 oafA=""#$$$$
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
46 O3_10=""
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
47 O1_3_19=""
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
48 file2=file1.replace(' ','_').replace(":","__").replace("[","").replace("]","")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
49 try:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
50 os.rename(file1, file2)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
51 real_file=file2
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
52 except:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
53 real_file=file1
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
54 #print "###The genome name:",file1
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
55 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
56 os.system('touch result.txt')
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
57 database_path="database"###01/27/2015
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
58 os.system('python '+dirpath+'/Otype_determine_analysis.py '+database_path+'/Typhimurium_LT2_gnd_galF.fasta '+real_file+' '+database_path+'/new_Oserotype.fasta >temp_result_'+str(q)+'O.txt')
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
59 os.system('cat temp_result_'+str(q)+'O.txt>>data_log.txt')
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
60 handle=open('temp_result_'+str(q)+'O.txt',"r")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
61 handle=handle.readlines()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
62 for line in handle:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
63 if "$$$ Most" in line and "O_type" in line:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
64 Otype=line.split("O-")[1].split("_")[0].split(" ")[0]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
65 Otype=Otype.replace("\n","").strip()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
66 #print line,
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
67 elif "$$$ No" in line:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
68 Otype="-"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
69 if "O-9" in line:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
70 Otype="9"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
71 #print line,
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
72 elif "$$$O5-" in line:#$$$
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
73 oafA="-"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
74 elif "$$$O3,10 more possible" in line:#$$$
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
75 O3_10="+"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
76 elif "$$$O1,3,19 more possible" in line:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
77 O1_3_19="+"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
78 if Otype=="1,3,19" or Otype=="3,10":#$$$judge O3,10 before formula forms
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
79 if O3_10=="+":
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
80 Otype="3,10"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
81 elif O1_3_19=="+":
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
82 Otype="1,3,19"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
83 else:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
84 print "No_O3,10_O1,3,19_spe_sequences"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
85 os.system('python '+dirpath+'/H_combination_output_analysis.py '+real_file+' '+database_path+'/H_new_fliC_protein_database.fasta '+database_path+'/H_new_fljB_protein_database.fasta >temp_result_'+str(q)+'H.txt')
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
86 os.system('cat temp_result_'+str(q)+'H.txt>>data_log.txt')
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
87 handle2=open('temp_result_'+str(q)+'H.txt',"r")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
88 handle2=handle2.readlines()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
89 suspect="no" #for the first choice doesn't hit core sequence
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
90 for line in handle2:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
91 if "$$$ Most" in line and "fliC" in line:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
92 #print line,
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
93 fliC=line.split("fliC_type: ")[1].split("_")[0].strip()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
94 if fliC=="g,m,p,s":
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
95 fliC="g,m,s"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
96 elif "$$$ No" in line and "fliC" in line:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
97 fliC="-"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
98 #print line,
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
99 elif "$$$ Most" in line and "fljB" in line:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
100 #print line,
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
101 fljB=line.split("fljB_type: ")[1].split("_")[0].strip()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
102 elif "$$$ No" in line and "fljB" in line:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
103 fljB="-"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
104 #print line,
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
105 if Otype=="9" and fliC=="g,m" and fljB=="-":
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
106 os.system('python '+dirpath+'/special_gene_test_assemblies.py '+database_path+'/specific_genes.fasta '+real_file+' sdf >temp_result_'+str(q)+'sdf.txt')
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
107 os.system('cat temp_result_'+str(q)+'sdf.txt>>data_log.txt')
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
108 handle3=open('temp_result_'+str(q)+'sdf.txt',"r")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
109 sdf=""
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
110 for line in handle3:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
111 if "$$$" in line and "got a hit" in line:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
112 #print line,
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
113 sdf="+"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
114 if sdf!="+":
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
115 sdf="-"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
116
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
117 seronames=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
118 for i in range(len(phase1)):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
119 fliC_combine=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
120 fljB_combine=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
121 if phaseO[i]==Otype:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
122 if phase1[i].count("[")==0:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
123 fliC_combine.append(phase1[i])
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
124 elif phase1[i].count("[")>=1:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
125 c=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
126 b=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
127 if phase1[i][0]=="[" and phase1[i][-1]=="]" and phase1[i].count("[")==1:#for specific situations like [1,5]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
128 content=phase1[i].replace("[","").replace("]","")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
129 fliC_combine.append(content)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
130 fliC_combine.append("-")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
131 else:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
132 for x in phase1[i].split(","):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
133 if "[" in x:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
134 b.append(x.replace("[","").replace("]",""))
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
135 else:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
136 c.append(x)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
137 fliC_combine=Combine(b,c) #Combine will offer every possible combinations of the formula, like f,[g],t: f,t f,g,t
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
138 if phase2[i].count("[")==0:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
139 fljB_combine.append(phase2[i])
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
140 elif phase2[i].count("[")>=1:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
141 d=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
142 e=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
143 if phase2[i][0]=="[" and phase2[i][-1]=="]" and phase2[i].count("[")==1:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
144 content=phase2[i].replace("[","").replace("]","")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
145 fljB_combine.append(content)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
146 fljB_combine.append("-")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
147 else:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
148 for x in phase2[i].split(","):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
149 if "[" in x:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
150 d.append(x.replace("[","").replace("]",""))
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
151 else:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
152 e.append(x)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
153 fljB_combine=Combine(d,e)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
154 new_fliC=fliC.split(",") #because some antigen like r,[i] not follow alphabetical order, so use this one to judge and can avoid missings
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
155 new_fliC.sort()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
156 new_fliC=",".join(new_fliC)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
157 new_fljB=fljB.split(",")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
158 new_fljB.sort()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
159 new_fljB=",".join(new_fljB)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
160 if (new_fliC in fliC_combine or fliC in fliC_combine) and (new_fljB in fljB_combine or fljB in fljB_combine):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
161 seronames.append(sero[i])
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
162 if len(seronames)==0:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
163 seronames=["N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)"]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
164 star=""
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
165 star_line=""
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
166 if len(seronames)>1:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
167 star="*"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
168 star_line="The predicted serotypes share the same general formula:\t"+Otype+":"+fliC+":"+fljB+"\n"##
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
169 #print "$$$The most possible formula is: (by the order O:H1:H2) ",Otype,":",fliC,":",fljB
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
170 #print "$$$The possible serotyes are:",seronames
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
171 m=0
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
172 for y in seronames:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
173 if y in file1:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
174 #print "$$$ Is the judgement true? Answer:Yes!" #here we use file1, because we want ":", while file2 turned it to "__"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
175 answer="Yes"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
176 m=1
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
177 if m==0:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
178 #print "$$$ Is the judgement true? Answer: Need to check the records and file names"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
179 answer="Not sure"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
180 print "\n","\n"
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
181 predict_form=Otype+":"+fliC+":"+fljB
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
182 predict_sero=(" or ").join(seronames)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
183 if predict_form=="9:g,m:-":#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
184 predict_form=predict_form+"\nSdf prediction:"+sdf #
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
185 if sdf=="-":#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
186 star="*"#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
187 star_line="Additional characterization is necessary to assign a serotype to this strain. Commonly circulating strains of serotype Enteritidis are sdf+, although sdf- strains of serotype Enteritidis are known to exist. Serotype Gallinarum is typically sdf- but should be quite rare. Sdf- strains of serotype Enteritidis and serotype Gallinarum can be differentiated by phenotypic profile or genetic criteria.\n"##
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
188 predict_sero="See comments below"#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
189 elif predict_form=="4:i:-":#03252016#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
190 predict_sero="potential monophasic variant of Typhimurium"#03252016#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
191 elif predict_form=="4:r:-":#03252016#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
192 predict_sero="potential monophasic variant of Heidelberg"#03252016#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
193 elif predict_form=="4:b:-":#03252016#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
194 predict_sero="potential monophasic variant of Paratyphi B"#03252016#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
195 elif predict_form=="8:e,h:1,2":#03282016#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
196 predict_sero="Newport"#03282016#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
197 star="*"##03282016#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
198 star_line="Serotype Bardo shares the same antigenic profile with Newport, but Bardo is exceedingly rare."#03282016#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
199 claim="The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes."##
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
200 if "N/A" in predict_sero:###added after standalone version, 2015/2/3
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
201 claim=""###added after standalone version, 2015/2/3
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
202 '''
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
203 new_file=open(file2+".txt","w")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
204 new_file.write(file2+"\t"+"O-"+Otype+"\t"+fliC+"\t"+fljB+"\t"+Otype+":"+fliC+":"+fljB+"\t"+(" or ").join(seronames)+"\t"+answer+"\t"+suspect+"\n")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
205 new_file.close()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
206 '''
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
207 if "Typhimurium" in predict_sero and oafA=="-":#$$$$#03252016#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
208 predict_sero=predict_sero.strip()+"(O5-)"#03252016#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
209 star="*"#
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
210 star_line="Detected the deletion of O5-."
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
211 new_file=open("Seqsero_result.txt","w")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
212 new_file.write("Input files:\t"+file2+"\n"+"O antigen prediction:\t"+"O-"+Otype+"\n"+"H1 antigen prediction(fliC):\t"+fliC+"\n"+"H2 antigen prediction(fljB):\t"+fljB+"\n"+"Predicted antigenic profile:\t"+predict_form+"\n"+"Predicted serotype(s):\t"+predict_sero+star+"\n"+star+star_line+claim+"\n")##
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
213 new_file.close()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
214 os.system("rm temp_result_"+str(q)+"*.txt")###01/28/2015
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
215 os.system("rm result.txt")###01/28/2015
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
216 #os.system("rm -rf database")###01/28/2015
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
217 os.system("rm *.fasta *.xml *.fa")###01/28/2015
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
218
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
219
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
220 def main():
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
221 files1=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
222 files1.append(file_name)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
223 file_names=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
224 fastq_names=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
225 for file1 in files1:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
226 if file1[-6:]=='.fasta' or file1[-4:]=='.fna' or file1[-3:]=='.fa' or file1[-4:]=='.fsa':
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
227 file_names.append(file1)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
228 if file1[-9:]==".fastq.gz" or file1[-6:]==".fastq":
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
229 core_name=file1[:8]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
230 fastq_names.append(core_name)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
231 fastq_names=list(set(fastq_names))
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
232 file_names=file_names+fastq_names
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
233 for i in range(0,len(file_names),m):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
234 jobs=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
235 txt_names=[]
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
236 if len(file_names)>=i+m:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
237 for j in range(m):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
238 p = multiprocessing.Process(target=Test,args=(file_names[j+i],i+j+1,i+j,))
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
239 jobs.append(p)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
240 p.start()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
241 else:
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
242 t=m+i-len(file_names)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
243 for j in range(m-t):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
244 p = multiprocessing.Process(target=Test,args=(file_names[j+i],i+j+1,i+j,))
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
245 jobs.append(p)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
246 p.start()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
247 '''
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
248 for j in xrange(len(jobs)):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
249 jobs[j].join()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
250 txt_names.append(file_names[j+i].replace(' ','_').replace(":","__").replace("[","").replace("]","")+".txt")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
251 print txt_names
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
252 for j in xrange(len(txt_names)):
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
253 print i,"and",j
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
254 print i+j+1
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
255 file=open(txt_names[j],"r")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
256 handle=list(file)
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
257 b=handle[0].split("\t")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
258 print b
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
259 sheet.write(i+j+1,0,b[0])
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
260 sheet.write(i+j+1,1,b[1])
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
261 sheet.write(i+j+1,2,b[2])
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
262 sheet.write(i+j+1,3,b[3])
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
263 sheet.write(i+j+1,4,b[4])
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
264 sheet.write(i+j+1,5,b[5])
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
265 sheet.write(i+j+1,6,b[6])
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
266 sheet.write(i+j+1,7,b[7])
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
267
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
268 print "End time,",time.time()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
269 file3.save("Seqsero_result2.xls")
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
270 '''
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
271
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
272
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
273 if __name__ == '__main__':
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
274 main()
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
275
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
276
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
277
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
278
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
279
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
280
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
281
b0e627ec0dda Uploaded
estrain
parents:
diff changeset
282