Mercurial > repos > charles_s_test > seqsero2
comparison libs/BWA_analysis_O_new_dependent.py @ 0:6895de35a263 draft
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
author | charles_s_test |
---|---|
date | Thu, 19 Oct 2017 18:16:51 -0400 |
parents | |
children | 0d65b71ff8df |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6895de35a263 |
---|---|
1 #!/usr/bin/env python | |
2 #tyr_of_O2_O9.fasta should be in the same directory, in it, O9 should be first then O2 | |
3 | |
4 import os | |
5 from Bio import SeqIO | |
6 import sys | |
7 from Initial_functions import Uniq | |
8 from Bio.Blast import NCBIXML | |
9 | |
10 BwaPath="/nfs/sw/apps/bwa/bwa-0.7.15/bwa" | |
11 SamTlsPth="/nfs/sw/apps/samtools/samtools-1.3.1/bin/samtools" | |
12 Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb" | |
13 Blastnpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastn" | |
14 | |
15 def BWA_O_analysis(sra_name,additional_file,database,mapping_mode,file_mode): | |
16 if file_mode=="1":#interleaved | |
17 if sra_name[-3:]=="sra": | |
18 os.system("fastq-dump --split-files "+sra_name) | |
19 del_fastq=1 | |
20 for_fq=sra_name.replace(".sra","_1.fastq") | |
21 rev_fq=sra_name.replace(".sra","_2.fastq") | |
22 for_sai=sra_name.replace(".sra","_1.sai") | |
23 rev_sai=sra_name.replace(".sra","_2.sai") | |
24 sam=sra_name.replace(".sra",".sam") | |
25 bam=sra_name.replace(".sra",".bam") | |
26 else: | |
27 del_fastq=0 | |
28 core_id=sra_name.split(".fastq")[0] | |
29 try: | |
30 os.system("gunzip "+sra_name) | |
31 except: | |
32 pass | |
33 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) | |
34 os.system("perl "+dirpath+"/split_interleaved_fastq.pl --input "+core_id+".fastq --output "+core_id.replace(".","_")+".fastq")#######03152016 | |
35 ori_size=os.path.getsize(core_id+".fastq")#######03152016 | |
36 os.system("mv "+core_id.replace(".","_")+"-read1.fastq"+" "+core_id+"-read1.fastq")#######03152016 | |
37 os.system("mv "+core_id.replace(".","_")+"-read2.fastq"+" "+core_id+"-read2.fastq")#######03152016 | |
38 for_fq=core_id+"-read1.fastq"#######03152016 | |
39 rev_fq=core_id+"-read2.fastq"#######03152016 | |
40 if float(os.path.getsize(for_fq))/ori_size<=0.1 or float(os.path.getsize(rev_fq))/ori_size<=0.1:#09092015#12292015#######03152016 | |
41 os.system("echo haha")#09092015 | |
42 os.system("perl "+dirpath+"/splitPairedEndReads.pl "+core_id+".fastq")#09092015 | |
43 os.system("mv "+core_id+".fastq_1 "+for_fq)##09092015 | |
44 os.system("mv "+core_id+".fastq_2 "+rev_fq)##09092015 | |
45 else:#09092015 | |
46 os.system("echo hehe")#09092015 | |
47 for_sai=core_id+"_1.sai" | |
48 rev_sai=core_id+"_2.sai" | |
49 sam=core_id+".sam" | |
50 bam=core_id+".bam" | |
51 elif file_mode=="2":#seperated | |
52 forword_seq=sra_name | |
53 reverse_seq=additional_file | |
54 try: | |
55 os.system("gunzip "+forword_seq) | |
56 except: | |
57 pass | |
58 try: | |
59 os.system("gunzip "+reverse_seq) | |
60 except: | |
61 pass | |
62 for_core_id=forword_seq.split(".fastq")[0] | |
63 re_core_id=reverse_seq.split(".fastq")[0] | |
64 for_fq=for_core_id+".fastq" | |
65 rev_fq=re_core_id+".fastq" | |
66 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))#######03152016 | |
67 print "check fastq id and make them in accordance with each other...please wait..." | |
68 os.system("python "+dirpath+"/compare_and_change_two_fastq_id.py "+for_fq+" "+rev_fq)#######03152016 | |
69 for_sai=for_core_id+".sai" | |
70 rev_sai=re_core_id+".sai" | |
71 sam=for_core_id+".sam" | |
72 bam=sam.replace(".sam",".bam") | |
73 elif file_mode=="3":#single-end | |
74 if sra_name[-3:]=="sra": | |
75 os.system("fastq-dump --split-files "+sra_name)###01/28/2015 | |
76 del_fastq=1 | |
77 for_fq=sra_name.replace(".sra","_1.fastq") | |
78 for_sai=sra_name.replace(".sra","_1.sai") | |
79 sam=sra_name.replace(".sra",".sam") | |
80 bam=sra_name.replace(".sra",".bam") | |
81 else: | |
82 del_fastq=0 | |
83 core_id=sra_name.split(".fastq")[0] | |
84 try: | |
85 os.system("gunzip "+sra_name) | |
86 except: | |
87 pass | |
88 for_fq=core_id+".fastq" | |
89 for_sai=core_id+"_1.sai" | |
90 sam=core_id+".sam" | |
91 bam=core_id+".bam" | |
92 | |
93 os.system(BwaPath+" index "+database) | |
94 if file_mode!="3": | |
95 if mapping_mode=="sam": | |
96 os.system(BwaPath+" aln "+database+" "+for_fq+" > "+for_sai) | |
97 os.system(BwaPath+" aln "+database+" "+rev_fq+" > "+rev_sai) | |
98 os.system(BwaPath+" sampe "+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam) | |
99 elif mapping_mode=="mem": | |
100 os.system(BwaPath+" mem "+database+" "+for_fq+" "+rev_fq+" > "+sam) #2014/12/23 | |
101 else: | |
102 if mapping_mode=="mem": | |
103 os.system(BwaPath+" mem "+database+" "+for_fq+" > "+sam) #2014/12/23 | |
104 elif mapping_mode=="sam": | |
105 os.system(BwaPath+" aln "+database+" "+for_fq+" > "+for_sai) | |
106 os.system(BwaPath+" samse "+database+" "+for_sai+" "+for_fq+" > "+sam) | |
107 os.system(SamTlsPth+" view -F 4 -Sbh "+sam+" > "+bam) | |
108 os.system(SamTlsPth+" view -h -o "+sam+" "+bam) | |
109 | |
110 file=open(sam,"r") | |
111 handle=file.readlines() | |
112 name_list=[] | |
113 for line in handle: | |
114 if len(line)>300: | |
115 name_list.append(line.split("\t")[2]) | |
116 a,b=Uniq(name_list) | |
117 c=dict(zip(a,b)) | |
118 final_O=sorted(c.iteritems(), key=lambda d:d[1], reverse = True) #order from frequency high to low, but tuple while not list | |
119 Sero_list_O=[] | |
120 print "Final_Otype_list:" | |
121 print final_O | |
122 num_1=0#new inserted | |
123 O9_wbav=0 | |
124 O310_wzx=0 | |
125 O946_wzy=0 | |
126 if len(final_O)>0: #new inserted | |
127 for x in final_O:#new inserted | |
128 num_1=num_1+x[1]#new inserted | |
129 if "O-9,46_wbaV" in x[0]: | |
130 O9_wbaV=x[1] | |
131 if "O-3,10_wzx" in x[0]: | |
132 O310_wzx=x[1] | |
133 if "O-9,46_wzy" in x[0]: | |
134 O946_wzy=x[1] | |
135 if "O-3,10_not_in_1,3,19" in x[0]: | |
136 O310_no_1319=x[1] | |
137 if "O-9,46,27_partial_wzy" in x[0]: | |
138 O94627=x[1] | |
139 O_list=[] | |
140 O_choice="" | |
141 | |
142 | |
143 print "$$$Genome:",sra_name | |
144 if len(final_O)==0: | |
145 print "$$$No Otype, due to no hit" | |
146 else: | |
147 if final_O[0][1]<8: | |
148 print "$$$No Otype, due to the hit reads number is small." | |
149 else: | |
150 for x in final_O: | |
151 if x[1]>5: | |
152 O_list.append(x[0]) | |
153 qq=1# | |
154 for x in final_O:# | |
155 if "sdf" in x[0] and x[1]>3:# | |
156 qq=0# | |
157 print "$$$",x[0],"got a hit, reads:",x[1]# | |
158 if qq!=0:# | |
159 print "$$$No sdf exists"# | |
160 | |
161 if "O-9,46_wbaV" in O_list and float(O9_wbaV)/float(num_1) > 0.1: | |
162 if "O-9,46_wzy" in O_list and float(O946_wzy)/float(num_1) > 0.1: | |
163 O_choice="O-9,46" | |
164 print "$$$Most possilble Otype: O-9,46" | |
165 elif "O-9,46,27_partial_wzy" in O_list and float(O94627)/float(num_1) > 0.1: | |
166 O_choice="O-9,46,27" | |
167 print "$$$Most possilble Otype: O-9,46,27" | |
168 else: | |
169 O_choice="O-9" | |
170 if file_mode=="3": | |
171 rev_fq="" | |
172 rev_sai="" | |
173 assembly(sra_name,O_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode) | |
174 else: | |
175 assembly(sra_name,O_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode) | |
176 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1: | |
177 if "O-3,10_not_in_1,3,19" in O_list and float(O310_no_1319)/float(num_1) > 0.1: | |
178 O_choice="O-3,10" | |
179 print "$$$Most possilble Otype: O-3,10" | |
180 else: | |
181 O_choice="O-1,3,19" | |
182 print "$$$Most possilble Otype: O-1,3,19" | |
183 else: | |
184 try: | |
185 O_choice=final_O[0][0].split("_")[0] | |
186 if O_choice=="O-1,3,19": | |
187 O_choice=final_O[1][0].split("_")[0] | |
188 print "$$$Most possilble Otype: ",O_choice | |
189 except: | |
190 print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)" | |
191 | |
192 | |
193 def assembly(sra_name,potential_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode): | |
194 database="ParaA_rfb.fasta" | |
195 os.system(BwaPath+" index database/"+database)###01/28/2015 | |
196 if rev_fq=="":#2015/09/09 | |
197 if mapping_mode=="mem": | |
198 os.system(BwaPath+" mem database/"+database+" "+for_fq+" > "+sam) #2014/12/23 | |
199 elif mapping_mode=="sam": | |
200 os.system(BwaPath+" aln database/"+database+" "+for_fq+" > "+for_sai) | |
201 os.system(BwaPath+" samse database/"+database+" "+for_sai+" "+for_fq+" > "+sam) | |
202 else: | |
203 if mapping_mode=="mem": | |
204 os.system(BwaPath+" mem database/"+database+" "+for_fq+" "+rev_fq+" > "+sam) #2014/12/23 | |
205 elif mapping_mode=="sam": | |
206 os.system(BwaPath+" aln database/"+database+" "+for_fq+" > "+for_sai) | |
207 os.system(BwaPath+" aln database/"+database+" "+rev_fq+" > "+rev_sai) | |
208 os.system(BwaPath+" sampe database/"+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam) | |
209 os.system(SamTlsPth+" view -F 4 -Sbh "+sam+" > "+bam) | |
210 os.system(SamTlsPth+" view -h -o "+sam+" "+bam) | |
211 os.system("cat "+sam+"|awk '{if ($5>0) {print $10}}'>"+sam+"_seq.txt") | |
212 os.system("cat "+sam+"|awk '{if ($5>0) {print $1}}'>"+sam+"_title.txt") | |
213 file1=open(sam+"_title.txt","r") | |
214 file2=open(sam+"_seq.txt","r") | |
215 file1=file1.readlines() | |
216 file2=file2.readlines() | |
217 file=open(sam+".fasta","w") | |
218 for i in range(len(file1)): | |
219 title=">"+file1[i] | |
220 seq=file2[i] | |
221 if len(seq)>=50 and len(title)>6:#generally,can be adjusted | |
222 file.write(title) | |
223 file.write(seq) | |
224 file.close() | |
225 database2="tyr_of_O2_O9.fasta" | |
226 os.system(Makebltdb+' -in database/'+database2+' -out '+database2+'_db '+'-dbtype nucl') | |
227 os.system(Blastnpth+" -query "+sam+".fasta"+" -db "+database2+"_db -out "+sam+"_vs_O29.xml -outfmt 5") | |
228 handle=open(sam+"_vs_O29.xml") | |
229 handle=NCBIXML.parse(handle) | |
230 handle=list(handle) | |
231 O9_bigger=0 | |
232 O2_bigger=0 | |
233 for x in handle: | |
234 O9_score=0 | |
235 O2_score=0 | |
236 try: | |
237 if 'O-9' in x.alignments[0].hit_def: | |
238 O9_score=x.alignments[0].hsps[0].bits | |
239 O2_score=x.alignments[1].hsps[0].bits | |
240 elif 'O-2' in x.alignments[0].hit_def: | |
241 O9_score=x.alignments[1].hsps[0].bits | |
242 O2_score=x.alignments[0].hsps[0].bits | |
243 if O9_score>O2_score: | |
244 O9_bigger+=1 | |
245 if O9_score<O2_score: | |
246 O2_bigger+=1 | |
247 except: | |
248 continue | |
249 print "$$$Genome:",sra_name | |
250 if O9_bigger>O2_bigger: | |
251 print "$$$Most possible Otype is O-9" | |
252 elif O9_bigger<O2_bigger: | |
253 print "$$$Most possible Otype is O-2" | |
254 else: | |
255 print "$$$No suitable one, because can't distinct it's O-9 or O-2, but ",potential_choice," has a more possibility." | |
256 print "O-9 number is:",O9_bigger | |
257 print "O-2 number is:",O2_bigger | |
258 | |
259 os.system("rm "+sam+"_title.txt")###01/28/2015 | |
260 os.system("rm "+sam+"_seq.txt")###01/28/2015 | |
261 os.system("rm "+sam+".fasta")###01/28/2015 | |
262 os.system("rm "+database2+"_db.*")###01/28/2015 | |
263 os.system("rm "+sam+"_vs_O29.xml")###01/28/2015 | |
264 | |
265 | |
266 target=sys.argv[1] #should be sra format | |
267 data_base=sys.argv[2] | |
268 mapping_mode=sys.argv[3] | |
269 if sys.argv[4] not in ("1","2","3"): | |
270 additional_file=sys.argv[4] | |
271 file_mode=sys.argv[5] | |
272 else: | |
273 additional_file="" | |
274 file_mode=sys.argv[4] | |
275 | |
276 BWA_O_analysis(target,additional_file,data_base,mapping_mode,file_mode) |