comparison libs/BWA_analysis_O_new_dependent.py @ 7:3d6680af0bec draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 16:30:27 -0500
parents 38ad1130d077
children 53efef402c51
comparison
equal deleted inserted replaced
6:b6281a377a18 7:3d6680af0bec
62 for_core_id=forword_seq.split(".fastq")[0] 62 for_core_id=forword_seq.split(".fastq")[0]
63 re_core_id=reverse_seq.split(".fastq")[0] 63 re_core_id=reverse_seq.split(".fastq")[0]
64 for_fq=for_core_id+".fastq" 64 for_fq=for_core_id+".fastq"
65 rev_fq=re_core_id+".fastq" 65 rev_fq=re_core_id+".fastq"
66 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))#######03152016 66 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))#######03152016
67 print "check fastq id and make them in accordance with each other...please wait..." 67 print("check fastq id and make them in accordance with each other...please wait...")
68 os.system("python "+dirpath+"/compare_and_change_two_fastq_id.py "+for_fq+" "+rev_fq)#######03152016 68 os.system("python "+dirpath+"/compare_and_change_two_fastq_id.py "+for_fq+" "+rev_fq)#######03152016
69 for_sai=for_core_id+".sai" 69 for_sai=for_core_id+".sai"
70 rev_sai=re_core_id+".sai" 70 rev_sai=re_core_id+".sai"
71 sam=for_core_id+".sam" 71 sam=for_core_id+".sam"
72 bam=sam.replace(".sam",".bam") 72 bam=sam.replace(".sam",".bam")
115 name_list.append(line.split("\t")[2]) 115 name_list.append(line.split("\t")[2])
116 a,b=Uniq(name_list) 116 a,b=Uniq(name_list)
117 c=dict(zip(a,b)) 117 c=dict(zip(a,b))
118 final_O=sorted(c.iteritems(), key=lambda d:d[1], reverse = True) #order from frequency high to low, but tuple while not list 118 final_O=sorted(c.iteritems(), key=lambda d:d[1], reverse = True) #order from frequency high to low, but tuple while not list
119 Sero_list_O=[] 119 Sero_list_O=[]
120 print "Final_Otype_list:" 120 print("Final_Otype_list:")
121 print final_O 121 print(final_O)
122 num_1=0#new inserted 122 num_1=0#new inserted
123 O9_wbav=0 123 O9_wbav=0
124 O310_wzx=0 124 O310_wzx=0
125 O946_wzy=0 125 O946_wzy=0
126 if len(final_O)>0: #new inserted 126 if len(final_O)>0: #new inserted
138 O94627=x[1] 138 O94627=x[1]
139 O_list=[] 139 O_list=[]
140 O_choice="" 140 O_choice=""
141 141
142 142
143 print "$$$Genome:",sra_name 143 print("$$$Genome:",sra_name)
144 if len(final_O)==0: 144 if len(final_O)==0:
145 print "$$$No Otype, due to no hit" 145 print("$$$No Otype, due to no hit")
146 else: 146 else:
147 if final_O[0][1]<8: 147 if final_O[0][1]<8:
148 print "$$$No Otype, due to the hit reads number is small." 148 print("$$$No Otype, due to the hit reads number is small.")
149 else: 149 else:
150 for x in final_O: 150 for x in final_O:
151 if x[1]>5: 151 if x[1]>5:
152 O_list.append(x[0]) 152 O_list.append(x[0])
153 qq=1# 153 qq=1#
154 for x in final_O:# 154 for x in final_O:#
155 if "sdf" in x[0] and x[1]>3:# 155 if "sdf" in x[0] and x[1]>3:#
156 qq=0# 156 qq=0#
157 print "$$$",x[0],"got a hit, reads:",x[1]# 157 print("$$$",x[0],"got a hit, reads:",x[1])#
158 if qq!=0:# 158 if qq!=0:#
159 print "$$$No sdf exists"# 159 print("$$$No sdf exists")#
160 160
161 if "O-9,46_wbaV" in O_list and float(O9_wbaV)/float(num_1) > 0.1: 161 if "O-9,46_wbaV" in O_list and float(O9_wbaV)/float(num_1) > 0.1:
162 if "O-9,46_wzy" in O_list and float(O946_wzy)/float(num_1) > 0.1: 162 if "O-9,46_wzy" in O_list and float(O946_wzy)/float(num_1) > 0.1:
163 O_choice="O-9,46" 163 O_choice="O-9,46"
164 print "$$$Most possilble Otype: O-9,46" 164 print("$$$Most possilble Otype: O-9,46")
165 elif "O-9,46,27_partial_wzy" in O_list and float(O94627)/float(num_1) > 0.1: 165 elif "O-9,46,27_partial_wzy" in O_list and float(O94627)/float(num_1) > 0.1:
166 O_choice="O-9,46,27" 166 O_choice="O-9,46,27"
167 print "$$$Most possilble Otype: O-9,46,27" 167 print("$$$Most possilble Otype: O-9,46,27")
168 else: 168 else:
169 O_choice="O-9" 169 O_choice="O-9"
170 if file_mode=="3": 170 if file_mode=="3":
171 rev_fq="" 171 rev_fq=""
172 rev_sai="" 172 rev_sai=""
174 else: 174 else:
175 assembly(sra_name,O_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode) 175 assembly(sra_name,O_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode)
176 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1: 176 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1:
177 if "O-3,10_not_in_1,3,19" in O_list and float(O310_no_1319)/float(num_1) > 0.1: 177 if "O-3,10_not_in_1,3,19" in O_list and float(O310_no_1319)/float(num_1) > 0.1:
178 O_choice="O-3,10" 178 O_choice="O-3,10"
179 print "$$$Most possilble Otype: O-3,10" 179 print("$$$Most possilble Otype: O-3,10")
180 else: 180 else:
181 O_choice="O-1,3,19" 181 O_choice="O-1,3,19"
182 print "$$$Most possilble Otype: O-1,3,19" 182 print("$$$Most possilble Otype: O-1,3,19")
183 else: 183 else:
184 try: 184 try:
185 O_choice=final_O[0][0].split("_")[0] 185 O_choice=final_O[0][0].split("_")[0]
186 if O_choice=="O-1,3,19": 186 if O_choice=="O-1,3,19":
187 O_choice=final_O[1][0].split("_")[0] 187 O_choice=final_O[1][0].split("_")[0]
188 print "$$$Most possilble Otype: ",O_choice 188 print("$$$Most possilble Otype: ",O_choice)
189 except: 189 except:
190 print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)" 190 print("$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)")
191 191
192 192
193 def assembly(sra_name,potential_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode): 193 def assembly(sra_name,potential_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode):
194 database="ParaA_rfb.fasta" 194 database="ParaA_rfb.fasta"
195 os.system(BwaPath+" index database/"+database)###01/28/2015 195 os.system(BwaPath+" index database/"+database)###01/28/2015
244 O9_bigger+=1 244 O9_bigger+=1
245 if O9_score<O2_score: 245 if O9_score<O2_score:
246 O2_bigger+=1 246 O2_bigger+=1
247 except: 247 except:
248 continue 248 continue
249 print "$$$Genome:",sra_name 249 print("$$$Genome:",sra_name)
250 if O9_bigger>O2_bigger: 250 if O9_bigger>O2_bigger:
251 print "$$$Most possible Otype is O-9" 251 print("$$$Most possible Otype is O-9")
252 elif O9_bigger<O2_bigger: 252 elif O9_bigger<O2_bigger:
253 print "$$$Most possible Otype is O-2" 253 print("$$$Most possible Otype is O-2")
254 else: 254 else:
255 print "$$$No suitable one, because can't distinct it's O-9 or O-2, but ",potential_choice," has a more possibility." 255 print("$$$No suitable one, because can't distinct it's O-9 or O-2, but ",potential_choice," has a more possibility.")
256 print "O-9 number is:",O9_bigger 256 print("O-9 number is:",O9_bigger)
257 print "O-2 number is:",O2_bigger 257 print("O-2 number is:",O2_bigger)
258 258
259 os.system("rm "+sam+"_title.txt")###01/28/2015 259 os.system("rm "+sam+"_title.txt")###01/28/2015
260 os.system("rm "+sam+"_seq.txt")###01/28/2015 260 os.system("rm "+sam+"_seq.txt")###01/28/2015
261 os.system("rm "+sam+".fasta")###01/28/2015 261 os.system("rm "+sam+".fasta")###01/28/2015
262 os.system("rm "+database2+"_db.*")###01/28/2015 262 os.system("rm "+database2+"_db.*")###01/28/2015