comparison libs/run_auto_All_for_assemblies.py @ 10:53efef402c51 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Wed, 29 Nov 2017 08:34:19 -0500
parents 3d6680af0bec
children
comparison
equal deleted inserted replaced
9:acc5cd06a36a 10:53efef402c51
1 #!/usr/bin/env python 1 #!/usr/bin/env python2.7
2 2
3 3
4 4
5 import os 5 import os
6 from Bio import SeqIO 6 from Bio import SeqIO
49 try: 49 try:
50 os.rename(file1, file2) 50 os.rename(file1, file2)
51 real_file=file2 51 real_file=file2
52 except: 52 except:
53 real_file=file1 53 real_file=file1
54 #print("###The genome name:",file1 54 #print "###The genome name:",file1
55 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015 55 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015
56 os.system('touch result.txt') 56 os.system('touch result.txt')
57 database_path="database"###01/27/2015 57 database_path="database"###01/27/2015
58 os.system('python '+dirpath+'/Otype_determine_analysis.py '+database_path+'/Typhimurium_LT2_gnd_galF.fasta '+real_file+' '+database_path+'/new_Oserotype.fasta >temp_result_'+str(q)+'O.txt') 58 os.system('python2.7 '+dirpath+'/Otype_determine_analysis.py '+database_path+'/Typhimurium_LT2_gnd_galF.fasta '+real_file+' '+database_path+'/new_Oserotype.fasta >temp_result_'+str(q)+'O.txt')
59 os.system('cat temp_result_'+str(q)+'O.txt>>data_log.txt') 59 os.system('cat temp_result_'+str(q)+'O.txt>>data_log.txt')
60 handle=open('temp_result_'+str(q)+'O.txt',"r") 60 handle=open('temp_result_'+str(q)+'O.txt',"r")
61 handle=handle.readlines() 61 handle=handle.readlines()
62 for line in handle: 62 for line in handle:
63 if "$$$ Most" in line and "O_type" in line: 63 if "$$$ Most" in line and "O_type" in line:
79 if O3_10=="+": 79 if O3_10=="+":
80 Otype="3,10" 80 Otype="3,10"
81 elif O1_3_19=="+": 81 elif O1_3_19=="+":
82 Otype="1,3,19" 82 Otype="1,3,19"
83 else: 83 else:
84 print("No_O3,10_O1,3,19_spe_sequences") 84 print "No_O3,10_O1,3,19_spe_sequences"
85 os.system('python '+dirpath+'/H_combination_output_analysis.py '+real_file+' '+database_path+'/H_new_fliC_protein_database.fasta '+database_path+'/H_new_fljB_protein_database.fasta >temp_result_'+str(q)+'H.txt') 85 os.system('python2.7 '+dirpath+'/H_combination_output_analysis.py '+real_file+' '+database_path+'/H_new_fliC_protein_database.fasta '+database_path+'/H_new_fljB_protein_database.fasta >temp_result_'+str(q)+'H.txt')
86 os.system('cat temp_result_'+str(q)+'H.txt>>data_log.txt') 86 os.system('cat temp_result_'+str(q)+'H.txt>>data_log.txt')
87 handle2=open('temp_result_'+str(q)+'H.txt',"r") 87 handle2=open('temp_result_'+str(q)+'H.txt',"r")
88 handle2=handle2.readlines() 88 handle2=handle2.readlines()
89 suspect="no" #for the first choice doesn't hit core sequence 89 suspect="no" #for the first choice doesn't hit core sequence
90 for line in handle2: 90 for line in handle2:
101 fljB=line.split("fljB_type: ")[1].split("_")[0].strip() 101 fljB=line.split("fljB_type: ")[1].split("_")[0].strip()
102 elif "$$$ No" in line and "fljB" in line: 102 elif "$$$ No" in line and "fljB" in line:
103 fljB="-" 103 fljB="-"
104 #print line, 104 #print line,
105 if Otype=="9" and fliC=="g,m" and fljB=="-": 105 if Otype=="9" and fliC=="g,m" and fljB=="-":
106 os.system('python '+dirpath+'/special_gene_test_assemblies.py '+database_path+'/specific_genes.fasta '+real_file+' sdf >temp_result_'+str(q)+'sdf.txt') 106 os.system('python2.7 '+dirpath+'/special_gene_test_assemblies.py '+database_path+'/specific_genes.fasta '+real_file+' sdf >temp_result_'+str(q)+'sdf.txt')
107 os.system('cat temp_result_'+str(q)+'sdf.txt>>data_log.txt') 107 os.system('cat temp_result_'+str(q)+'sdf.txt>>data_log.txt')
108 handle3=open('temp_result_'+str(q)+'sdf.txt',"r") 108 handle3=open('temp_result_'+str(q)+'sdf.txt',"r")
109 sdf="" 109 sdf=""
110 for line in handle3: 110 for line in handle3:
111 if "$$$" in line and "got a hit" in line: 111 if "$$$" in line and "got a hit" in line:
164 star="" 164 star=""
165 star_line="" 165 star_line=""
166 if len(seronames)>1: 166 if len(seronames)>1:
167 star="*" 167 star="*"
168 star_line="The predicted serotypes share the same general formula:\t"+Otype+":"+fliC+":"+fljB+"\n"## 168 star_line="The predicted serotypes share the same general formula:\t"+Otype+":"+fliC+":"+fljB+"\n"##
169 #print("$$$The most possible formula is: (by the order O:H1:H2) ",Otype,":",fliC,":",fljB 169 #print "$$$The most possible formula is: (by the order O:H1:H2) ",Otype,":",fliC,":",fljB
170 #print("$$$The possible serotyes are:",seronames 170 #print "$$$The possible serotyes are:",seronames
171 m=0 171 m=0
172 for y in seronames: 172 for y in seronames:
173 if y in file1: 173 if y in file1:
174 #print("$$$ Is the judgement true? Answer:Yes!" #here we use file1, because we want ":", while file2 turned it to "__" 174 #print "$$$ Is the judgement true? Answer:Yes!" #here we use file1, because we want ":", while file2 turned it to "__"
175 answer="Yes" 175 answer="Yes"
176 m=1 176 m=1
177 if m==0: 177 if m==0:
178 #print("$$$ Is the judgement true? Answer: Need to check the records and file names" 178 #print "$$$ Is the judgement true? Answer: Need to check the records and file names"
179 answer="Not sure" 179 answer="Not sure"
180 print("\n","\n") 180 print "\n","\n"
181 predict_form=Otype+":"+fliC+":"+fljB 181 predict_form=Otype+":"+fliC+":"+fljB
182 predict_sero=(" or ").join(seronames) 182 predict_sero=(" or ").join(seronames)
183 if predict_form=="9:g,m:-":# 183 if predict_form=="9:g,m:-":#
184 predict_form=predict_form+"\nSdf prediction:"+sdf # 184 predict_form=predict_form+"\nSdf prediction:"+sdf #
185 if sdf=="-":# 185 if sdf=="-":#
248 for j in xrange(len(jobs)): 248 for j in xrange(len(jobs)):
249 jobs[j].join() 249 jobs[j].join()
250 txt_names.append(file_names[j+i].replace(' ','_').replace(":","__").replace("[","").replace("]","")+".txt") 250 txt_names.append(file_names[j+i].replace(' ','_').replace(":","__").replace("[","").replace("]","")+".txt")
251 print txt_names 251 print txt_names
252 for j in xrange(len(txt_names)): 252 for j in xrange(len(txt_names)):
253 print(i,"and",j) 253 print i,"and",j
254 print(i+j+1) 254 print i+j+1
255 file=open(txt_names[j],"r") 255 file=open(txt_names[j],"r")
256 handle=list(file) 256 handle=list(file)
257 b=handle[0].split("\t") 257 b=handle[0].split("\t")
258 print(b) 258 print b
259 sheet.write(i+j+1,0,b[0]) 259 sheet.write(i+j+1,0,b[0])
260 sheet.write(i+j+1,1,b[1]) 260 sheet.write(i+j+1,1,b[1])
261 sheet.write(i+j+1,2,b[2]) 261 sheet.write(i+j+1,2,b[2])
262 sheet.write(i+j+1,3,b[3]) 262 sheet.write(i+j+1,3,b[3])
263 sheet.write(i+j+1,4,b[4]) 263 sheet.write(i+j+1,4,b[4])
264 sheet.write(i+j+1,5,b[5]) 264 sheet.write(i+j+1,5,b[5])
265 sheet.write(i+j+1,6,b[6]) 265 sheet.write(i+j+1,6,b[6])
266 sheet.write(i+j+1,7,b[7]) 266 sheet.write(i+j+1,7,b[7])
267 267
268 print("End time,",time.time()) 268 print "End time,",time.time()
269 file3.save("Seqsero_result2.xls") 269 file3.save("Seqsero_result2.xls")
270 ''' 270 '''
271 271
272 272
273 if __name__ == '__main__': 273 if __name__ == '__main__':