annotate libs/mapping_and_assembly_hybrid.py @ 3:62f8b9e226c2 draft default tip

planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
author charles_s_test
date Thu, 21 Dec 2017 11:06:32 -0500
parents 343e38c6798f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
1 import os,sys,glob,time,itertools,subprocess
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
2 from Initial_Conditions import phase1
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
3 from Initial_Conditions import phase2
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
4 from Initial_Conditions import phaseO
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
5 from Initial_Conditions import sero
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
6 from distutils.version import LooseVersion
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
7
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
8
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
9
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
10
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
11 def xml_parse_score_comparision_seqsero(xmlfile):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
12 #used to do seqsero xml analysis
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
13 from Bio.Blast import NCBIXML
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
14 handle=open(xmlfile)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
15 handle=NCBIXML.parse(handle)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
16 handle=list(handle)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
17 List=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
18 List_score=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
19 List_ids=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
20 for i in range(len(handle)):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
21 if len(handle[i].alignments)>0:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
22 for j in range(len(handle[i].alignments)):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
23 score=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
24 ids=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
25 List.append(handle[i].query.strip()+"___"+handle[i].alignments[j].hit_def)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
26 for z in range(len(handle[i].alignments[j].hsps)):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
27 if "last" in handle[i].query or "first" in handle[i].query:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
28 score+=handle[i].alignments[j].hsps[z].bits
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
29 ids+=float(handle[i].alignments[j].hsps[z].identities)/handle[i].query_length
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
30 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
31 if handle[i].alignments[j].hsps[z].align_length>=30:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
32 #for the long alleles, filter noise parts
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
33 score+=handle[i].alignments[j].hsps[z].bits
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
34 ids+=float(handle[i].alignments[j].hsps[z].identities)/handle[i].query_length
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
35 List_score.append(score)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
36 List_ids.append(ids)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
37 temp=zip(List,List_score,List_ids)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
38 Final_list=sorted(temp, key=lambda d:d[1], reverse = True)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
39 return Final_list
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
40
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
41
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
42 def Uniq(L,sort_on_fre="none"): #return the uniq list and the count number
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
43 Old=L
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
44 L.sort()
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
45 L = [L[i] for i in range(len(L)) if L[i] not in L[:i]]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
46 count=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
47 for j in range(len(L)):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
48 y=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
49 for x in Old:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
50 if L[j]==x:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
51 y+=1
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
52 count.append(y)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
53 if sort_on_fre!="none":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
54 d=zip(*sorted(zip(count, L)))
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
55 L=d[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
56 count=d[0]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
57 return (L,count)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
58
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
59
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
60 def judge_fliC_or_fljB_from_head_tail_for_one_contig(nodes_vs_score_list):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
61 #used to predict it's fliC or fljB for one contig, based on tail and head score, but output the score difference,if it is very small, then not reliable, use blast score for whole contig to test
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
62 #this is mainly used for
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
63 a=nodes_vs_score_list
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
64 fliC_score=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
65 fljB_score=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
66 for z in a:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
67 if "fliC" in z[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
68 fliC_score+=z[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
69 elif "fljB" in z[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
70 fljB_score+=z[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
71 if fliC_score>=fljB_score:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
72 role="fliC"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
73 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
74 role="fljB"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
75 return (role,abs(fliC_score-fljB_score))
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
76
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
77 def judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(node_name,Final_list_passed):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
78 #used to predict contig is fliC or fljB, if the differnce score value on above head_and_tail is less than 10 (quite small)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
79 #also used when no head or tail got blasted score for the contig
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
80 role=""
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
81 for z in Final_list_passed:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
82 if node_name in z[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
83 role=z[0].split("_")[0]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
84 break
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
85 return role
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
86
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
87
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
88 def fliC_or_fljB_judge_from_head_tail_sequence(nodes_list,tail_head_list,Final_list_passed):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
89 #nodes_list is the c created by c,d=Uniq(nodes) in below function
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
90 first_target=""
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
91 role_list=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
92 for x in nodes_list:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
93 a=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
94 role=""
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
95 for y in tail_head_list:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
96 if x in y[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
97 a.append(y)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
98 if len(a)==4:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
99 #compare two heads (37 > 30)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
100 #four contigs, most perfect assembly, high quality
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
101 """
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
102 for z in a:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
103 if "fliC_first_37" in z[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
104 t1=z[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
105 elif "fljB_first_37" in z[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
106 t2=z[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
107 if t1>=t2:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
108 role="fliC"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
109 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
110 role="fljB"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
111 """
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
112 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
113 if diff<20:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
114 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
115 elif len(a)==3:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
116 """
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
117 #compare the number, because hybrid problem
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
118 temp=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
119 for z in a:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
120 temp.append(z[0].split("_")[0])
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
121 m,n=Uniq(temp)#only two choices in m or n
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
122 if n[0]>n[1]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
123 role=m[0]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
124 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
125 role=m[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
126 """
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
127 ###however, if the one with highest score is the fewer one, compare their accumulation score
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
128 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
129 if diff<20:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
130 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
131 ###end of above score comparison
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
132 elif len(a)==2:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
133 #must on same node, if not, then decide with unit blast score, blast-score/length_of_special_sequence(30 or 37)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
134 temp=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
135 for z in a:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
136 temp.append(z[0].split("_")[0])
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
137 m,n=Uniq(temp)#should only have one choice, but weird situation might occur too
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
138 if len(m)==1:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
139 pass
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
140 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
141 pass
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
142 #print "head and tail not belong to same role, now let's guess based on maximum likelihood"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
143 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
144 if diff<20:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
145 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
146 """
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
147 max_unit_score=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
148 for z in a:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
149 unit_score=z[-1]/int(z[0].split("__")[1])
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
150 if unit_score>=max_unit_score:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
151 role=z[0].split("_")[0]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
152 max_unit_score=unit_score
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
153 """
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
154 ###need to desgin a algorithm to guess most possible situation for nodes_list, See the situations of test evaluation
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
155 elif len(a)==1:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
156 #that one
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
157 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
158 if diff<20:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
159 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
160 #role=a[0][0].split("_")[0]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
161 #need to evaluate, in future, may set up a cut-off, if not met, then just find Final_list_passed best match,like when "a==0"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
162 else:#a==0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
163 #use Final_list_passed best match
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
164 for z in Final_list_passed:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
165 if x in z[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
166 role=z[0].split("_")[0]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
167 break
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
168 #print x,role,len(a)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
169 role_list.append((role,x))
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
170 if len(role_list)==2:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
171 if role_list[0][0]==role_list[1][0]:#this is the most cocmmon error, two antigen were assigned to same phase
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
172 #just use score to do a final test
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
173 role_list=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
174 for x in nodes_list:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
175 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
176 role_list.append((role,x))
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
177 return role_list
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
178
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
179 def decide_contig_roles_for_H_antigen(Final_list):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
180 #used to decide which contig is FliC and which one is fljB
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
181 contigs=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
182 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1])>=3.5 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]))]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
183 nodes=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
184 for x in Final_list_passed:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
185 if x[0].startswith("fl") and "last" not in x[0] and "first" not in x[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
186 nodes.append(x[0].split("___")[1].strip())
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
187 c,d=Uniq(nodes)#c is node_list
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
188 #print c
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
189 tail_head_list=[x for x in Final_list if ("last" in x[0] or "first" in x[0])]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
190 roles=fliC_or_fljB_judge_from_head_tail_sequence(c,tail_head_list,Final_list_passed)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
191 return roles
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
192
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
193 def Combine(b,c):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
194 fliC_combinations=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
195 fliC_combinations.append(",".join(c))
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
196 temp_combinations=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
197 for i in range(len(b)):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
198 for x in itertools.combinations(b,i+1):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
199 temp_combinations.append(",".join(x))
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
200 for x in temp_combinations:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
201 temp=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
202 for y in c:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
203 temp.append(y)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
204 temp.append(x)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
205 temp=",".join(temp)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
206 temp=temp.split(",")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
207 temp.sort()
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
208 temp=",".join(temp)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
209 fliC_combinations.append(temp)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
210 return fliC_combinations
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
211
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
212 def decide_O_type_and_get_special_genes(Final_list):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
213 #decide O based on Final_list
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
214 O_choice="?"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
215 O_list=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
216 special_genes=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
217 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1])>=3.5 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]))]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
218 nodes=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
219 for x in Final_list_passed:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
220 if x[0].startswith("O-"):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
221 nodes.append(x[0].split("___")[1].strip())
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
222 elif not x[0].startswith("fl"):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
223 special_genes.append(x)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
224 #print "special_genes:",special_genes
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
225 c,d=Uniq(nodes)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
226 #print "potential O antigen contig",c
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
227 final_O=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
228 O_nodes_list=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
229 for x in c:#c is the list for contigs
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
230 temp=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
231 for y in Final_list_passed:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
232 if x in y[0] and y[0].startswith("O-"):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
233 final_O.append(y)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
234 break
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
235 ### O contig has the problem of two genes on same contig, so do additional test
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
236 potenial_new_gene=""
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
237 for x in final_O:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
238 pointer=0 #for genes merged or not
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
239 #not consider O-1,3,19_not_in_3,10, too short compared with others
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
240 if "O-1,3,19_not_in_3,10" not in x[0] and int(x[0].split("__")[1].split("___")[0])+800 <= int(x[0].split("length_")[1].split("_")[0]):#gene length << contig length; for now give 300*2 (for secureity can use 400*2) as flank region
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
241 pointer=x[0].split("___")[1].strip()#store the contig name
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
242 print pointer
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
243 if pointer!=0:#it has potential merge event
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
244 for y in Final_list:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
245 if pointer in y[0] and y not in final_O and (y[1]>=int(y[0].split("__")[1].split("___")[0])*1.5 or (y[1]>=int(y[0].split("__")[1].split("___")[0])*y[2] and y[1]>=400)):#that's a realtively strict filter now; if passed, it has merge event and add one more to final_O
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
246 potenial_new_gene=y
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
247 print potenial_new_gene
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
248 break
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
249 if potenial_new_gene!="":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
250 print "two differnt genes in same contig, fix it for O antigen"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
251 final_O.append(potenial_new_gene)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
252 ### end of the two genes on same contig test
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
253 if len(final_O)==0:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
254 #print "$$$No Otype, due to no hit"#may need to be changed
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
255 O_choice="-"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
256 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
257 O_list=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
258 for x in final_O:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
259 O_list.append(x[0].split("__")[0])
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
260 if not "O-1,3,19_not_in_3,10__130" in x[0]:#O-1,3,19_not_in_3,10 is too small, which may affect further analysis
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
261 O_nodes_list.append(x[0].split("___")[1])
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
262 ### special test for O9,46 and O3,10 family
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
263 if "O-9,46_wbaV" in O_list:#not sure should use and float(O9_wbaV)/float(num_1) > 0.1
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
264 if "O-9,46_wzy" in O_list:#and float(O946_wzy)/float(num_1) > 0.1
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
265 O_choice="O-9,46"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
266 #print "$$$Most possilble Otype: O-9,46"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
267 elif "O-9,46,27_partial_wzy" in O_list:#and float(O94627)/float(num_1) > 0.1
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
268 O_choice="O-9,46,27"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
269 #print "$$$Most possilble Otype: O-9,46,27"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
270 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
271 O_choice="O-9"#next, detect O9 vs O2?
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
272 O2=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
273 O9=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
274 for z in special_genes:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
275 if "tyr-O-9" in z[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
276 O9=z[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
277 elif "tyr-O-2" in z[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
278 O2=z[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
279 if O2>O9:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
280 O_choice="O-2"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
281 elif O2<O9:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
282 pass
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
283 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
284 pass
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
285 #print "$$$No suitable one, because can't distinct it's O-9 or O-2, but O-9 has a more possibility."
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
286 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list):#and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
287 if "O-3,10_not_in_1,3,19" in O_list:#and float(O310_no_1319)/float(num_1) > 0.1
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
288 O_choice="O-3,10"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
289 #print "$$$Most possilble Otype: O-3,10 (contain O-3,10_not_in_1,3,19)"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
290 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
291 O_choice="O-1,3,19"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
292 #print "$$$Most possilble Otype: O-1,3,19 (not contain O-3,10_not_in_1,3,19)"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
293 ### end of special test for O9,46 and O3,10 family
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
294 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
295 try:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
296 max_score=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
297 for x in final_O:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
298 if x[1]>=max_score:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
299 max_score=x[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
300 O_choice=x[0].split("_")[0]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
301 if O_choice=="O-1,3,19":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
302 O_choice=final_O[1][0].split("_")[0]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
303 #print "$$$Most possilble Otype: ",O_choice
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
304 except:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
305 pass
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
306 #print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
307 #print "O:",O_choice,O_nodes_list
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
308 return O_choice,O_nodes_list,special_genes,final_O
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
309
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
310 def seqsero_from_formula_to_serotypes(Otype,fliC,fljB,special_gene_list):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
311 #like test_output_06012017.txt
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
312 #can add more varialbles like sdf-type, sub-species-type in future (we can conclude it into a special-gene-list)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
313 from Initial_Conditions import phase1
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
314 from Initial_Conditions import phase2
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
315 from Initial_Conditions import phaseO
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
316 from Initial_Conditions import sero
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
317 seronames=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
318 for i in range(len(phase1)):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
319 fliC_combine=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
320 fljB_combine=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
321 if phaseO[i]==Otype:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
322 ### for fliC, detect every possible combinations to avoid the effect of "["
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
323 if phase1[i].count("[")==0:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
324 fliC_combine.append(phase1[i])
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
325 elif phase1[i].count("[")>=1:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
326 c=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
327 b=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
328 if phase1[i][0]=="[" and phase1[i][-1]=="]" and phase1[i].count("[")==1:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
329 content=phase1[i].replace("[","").replace("]","")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
330 fliC_combine.append(content)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
331 fliC_combine.append("-")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
332 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
333 for x in phase1[i].split(","):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
334 if "[" in x:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
335 b.append(x.replace("[","").replace("]",""))
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
336 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
337 c.append(x)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
338 fliC_combine=Combine(b,c) #Combine will offer every possible combinations of the formula, like f,[g],t: f,t f,g,t
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
339 ### end of fliC "[" detect
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
340 ### for fljB, detect every possible combinations to avoid the effect of "["
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
341 if phase2[i].count("[")==0:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
342 fljB_combine.append(phase2[i])
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
343 elif phase2[i].count("[")>=1:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
344 d=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
345 e=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
346 if phase2[i][0]=="[" and phase2[i][-1]=="]" and phase2[i].count("[")==1:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
347 content=phase2[i].replace("[","").replace("]","")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
348 fljB_combine.append(content)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
349 fljB_combine.append("-")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
350 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
351 for x in phase2[i].split(","):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
352 if "[" in x:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
353 d.append(x.replace("[","").replace("]",""))
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
354 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
355 e.append(x)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
356 fljB_combine=Combine(d,e)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
357 ### end of fljB "[" detect
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
358 new_fliC=fliC.split(",") #because some antigen like r,[i] not follow alphabetical order, so use this one to judge and can avoid missings
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
359 new_fliC.sort()
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
360 new_fliC=",".join(new_fliC)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
361 new_fljB=fljB.split(",")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
362 new_fljB.sort()
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
363 new_fljB=",".join(new_fljB)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
364 if (new_fliC in fliC_combine or fliC in fliC_combine) and (new_fljB in fljB_combine or fljB in fljB_combine):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
365 seronames.append(sero[i])
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
366 #analyze seronames
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
367 if len(seronames)==0:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
368 seronames=["N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)"]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
369 star=""
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
370 star_line=""
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
371 if len(seronames)>1:#there are two possible predictions for serotypes
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
372 star="*"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
373 star_line="The predicted serotypes share the same general formula:\t"+Otype+":"+fliC+":"+fljB+"\n"##
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
374 print "\n"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
375 predict_form=Otype+":"+fliC+":"+fljB#
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
376 predict_sero=(" or ").join(seronames)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
377 ###special test for Enteritidis
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
378 if predict_form=="9:g,m:-":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
379 sdf="-"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
380 for x in special_gene_list:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
381 if x[0].startswith("sdf"):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
382 sdf="+"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
383 predict_form=predict_form+"\nSdf prediction:"+sdf
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
384 if sdf=="-":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
385 star="*"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
386 star_line="Additional characterization is necessary to assign a serotype to this strain. Commonly circulating strains of serotype Enteritidis are sdf+, although sdf- strains of serotype Enteritidis are known to exist. Serotype Gallinarum is typically sdf- but should be quite rare. Sdf- strains of serotype Enteritidis and serotype Gallinarum can be differentiated by phenotypic profile or genetic criteria.\n"#+##
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
387 predict_sero="See comments below"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
388 ###end of special test for Enteritidis
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
389 elif predict_form=="4:i:-":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
390 predict_sero="potential monophasic variant of Typhimurium"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
391 elif predict_form=="4:r:-":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
392 predict_sero="potential monophasic variant of Heidelberg"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
393 elif predict_form=="4:b:-":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
394 predict_sero="potential monophasic variant of Paratyphi B"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
395 elif predict_form=="8:e,h:1,2":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
396 predict_sero="Newport"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
397 star="*"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
398 star_line="Serotype Bardo shares the same antigenic profile with Newport, but Bardo is exceedingly rare."
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
399 claim="The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes."##
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
400 if "N/A" in predict_sero:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
401 claim=""
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
402 if "Typhimurium" in predict_sero or predict_form=="4:i:-":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
403 normal=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
404 mutation=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
405 for x in special_gene_list:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
406 if "oafA-O-4_full" in x[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
407 normal=x[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
408 elif "oafA-O-4_5-" in x[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
409 mutation=x[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
410 if normal>mutation:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
411 #print "$$$Typhimurium"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
412 pass
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
413 elif normal<mutation:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
414 predict_sero=predict_sero.strip()+"(O5-)"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
415 star="*"#
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
416 star_line="Detected the deletion of O5-."
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
417 #print "$$$Typhimurium_O5-"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
418 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
419 #print "$$$Typhimurium, even no 7 bases difference"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
420 pass
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
421 return predict_form,predict_sero,star,star_line,claim
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
422
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
423 def main():
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
424 database=sys.argv[1]#used to extract reads
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
425 mapping_mode=sys.argv[2]#mem or sampe
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
426 threads=sys.argv[3]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
427 for_fq=sys.argv[4]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
428 rev_fq=sys.argv[5]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
429 current_time=time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
430 sam=for_fq+".sam"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
431 bam=for_fq+".bam"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
432 sorted_bam=for_fq+"_sorted.bam"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
433 mapped_fq1=for_fq+"_mapped.fq"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
434 mapped_fq2=rev_fq+"_mapped.fq"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
435 combined_fq=for_fq+"_combined.fq"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
436 for_sai=for_fq+".sai"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
437 rev_sai=rev_fq+".sai"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
438 print "building database..."
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
439 #os.system("bwa index "+database+ " 2> /dev/null")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
440 os.system("bwa index "+database+ " 2>> data_log.txt ")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
441 print "mapping..."
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
442 if mapping_mode=="mem":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
443 os.system("bwa mem -t "+threads+" "+database+" "+for_fq+" "+rev_fq+" > "+sam+ " 2>> data_log.txt")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
444 elif mapping_mode=="sam":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
445 os.system("bwa aln -t "+threads+" "+database+" "+for_fq+" > "+for_sai+ " 2>> data_log.txt")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
446 os.system("bwa aln -t "+threads+" "+database+" "+rev_fq+" > "+rev_sai+ " 2>> data_log.txt")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
447 os.system("bwa sampe "+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam+ " 2>> data_log.txt")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
448 os.system("samtools view -@ "+threads+" -F 4 -Sbh "+sam+" > "+bam)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
449 os.system("samtools view -@ "+threads+" -h -o "+sam+" "+bam)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
450 ### check the version of samtools then use differnt commands
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
451 samtools_version=subprocess.Popen(["samtools"],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
452 out, err = samtools_version.communicate()
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
453 version = err.split("ersion:")[1].strip().split(" ")[0].strip()
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
454 print "check samtools version:",version
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
455 if LooseVersion(version)<=LooseVersion("1.2"):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
456 os.system("samtools sort -@ "+threads+" -n "+bam+" "+for_fq+"_sorted")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
457 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
458 os.system("samtools sort -@ "+threads+" -n "+bam+" >"+sorted_bam)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
459 ### end of samtools version check and its analysis
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
460 os.system("bamToFastq -i "+sorted_bam+" -fq "+combined_fq)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
461 os.system("bamToFastq -i "+sorted_bam+" -fq "+mapped_fq1+" -fq2 "+mapped_fq2 + " 2>> data_log.txt")#2> /dev/null if want no output
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
462 outdir=current_time+"_temp"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
463 print "assembling..."
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
464 if int(threads)>4:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
465 t="4"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
466 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
467 t=threads
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
468 os.system("spades.py --careful --pe1-s "+combined_fq+" --pe1-1 "+mapped_fq1+" --pe1-2 "+mapped_fq2+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
469 new_fasta=for_fq+"_"+database+"_"+mapping_mode+".fasta"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
470 os.system("mv "+outdir+"/contigs.fasta "+new_fasta+ " 2> /dev/null")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
471 #os.system("mv "+outdir+"/scaffolds.fasta "+new_fasta+ " 2> /dev/null") contigs.fasta
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
472 os.system("rm -rf "+outdir+ " 2> /dev/null")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
473 ### begin blast
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
474 print "blasting..."
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
475 print "\n"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
476 xmlfile=for_fq+"-extracted_vs_"+database+"_"+mapping_mode+".xml"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
477 os.system('makeblastdb -in '+new_fasta+' -out '+new_fasta+'_db '+'-dbtype nucl >> data_log.txt 2>&1') #temp.txt is to forbid the blast result interrupt the output of our program###1/27/2015
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
478 os.system("blastn -word_size 10 -query "+database+" -db "+new_fasta+"_db -out "+xmlfile+" -outfmt 5 >> data_log.txt 2>&1")###1/27/2015
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
479 Final_list=xml_parse_score_comparision_seqsero(xmlfile)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
480 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1])>=3.5 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]))]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
481 fliC_choice="-"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
482 fljB_choice="-"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
483 fliC_contig="NA"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
484 fljB_contig="NA"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
485 fliC_length=0 #can be changed to coverage in future
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
486 fljB_length=0 #can be changed to coverage in future
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
487 O_choice=""#no need to decide O contig for now, should be only one
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
488 O_choice,O_nodes,special_gene_list,O_nodes_roles=decide_O_type_and_get_special_genes(Final_list)#decide the O antigen type and also return special-gene-list for further identification
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
489 O_choice=O_choice.split("-")[-1].strip()
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
490 H_contig_roles=decide_contig_roles_for_H_antigen(Final_list)#decide the H antigen contig is fliC or fljB
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
491 log_file=open("SeqSero_hybrid_assembly_log.txt","a")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
492 print "O_contigs:"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
493 log_file.write("O_contigs:\n")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
494 for x in O_nodes_roles:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
495 if "O-1,3,19_not_in_3,10" not in x[0]:#O-1,3,19_not_in_3,10 is just a small size marker
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
496 print x[0].split("___")[-1],x[0].split("__")[0],"blast score:",x[1],"identity%:",str(round(x[2]*100,2))+"%"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
497 log_file.write(x[0].split("___")[-1]+" "+x[0].split("__")[0]+" "+"blast score: "+str(x[1])+"identity%:"+str(round(x[2]*100,2))+"%"+"\n")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
498 print "H_contigs:"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
499 log_file.write("H_contigs:\n")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
500 H_contig_stat=[]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
501 for i in range(len(H_contig_roles)):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
502 x=H_contig_roles[i]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
503 a=0
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
504 for y in Final_list_passed:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
505 if x[1] in y[0] and y[0].startswith(x[0]):
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
506 if "first" in y[0] or "last" in y[0]: #this is the final filter to decide it's fliC or fljB, if can't pass, then can't decide
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
507 for y in Final_list_passed: #it's impossible to has the "first" and "last" allele as prediction, so re-do it
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
508 if x[1] in y[0]:#it's very possible to be third phase allele, so no need to make it must be fliC or fljB
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
509 print x[1],"can't_decide_fliC_or_fljB",y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
510 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+" "+"blast_score: "+str(y[1])+" identity%:"+str(round(y[2]*100,2))+"%"+"\n")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
511 H_contig_roles[i]="can't decide fliC or fljB, may be third phase"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
512 break
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
513 else:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
514 print x[1],x[0],y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
515 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+" "+"blast_score: "+str(y[1])+" identity%:"+str(round(y[2]*100,2))+"%"+"\n")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
516 break
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
517 for x in H_contig_roles:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
518 #if multiple choices, temporately select the one with longest length for now, will revise in further change
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
519 if "fliC" == x[0] and int(x[1].split("_")[3])>=fliC_length and x[1] not in O_nodes:#remember to avoid the effect of O-type contig, so should not in O_node list
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
520 fliC_contig=x[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
521 fliC_length=int(x[1].split("_")[3])
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
522 elif "fljB" == x[0] and int(x[1].split("_")[3])>=fljB_length and x[1] not in O_nodes:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
523 fljB_contig=x[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
524 fljB_length=int(x[1].split("_")[3])
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
525 for x in Final_list_passed:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
526 if fliC_choice=="-" and "fliC_" in x[0] and fliC_contig in x[0] :
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
527 fliC_choice=x[0].split("_")[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
528 elif fljB_choice=="-" and "fljB_" in x[0] and fljB_contig in x[0]:
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
529 fljB_choice=x[0].split("_")[1]
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
530 elif fliC_choice!="-" and fljB_choice!="-":
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
531 break
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
532 print "\n"
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
533 print "SeqSero Input files:",for_fq,rev_fq
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
534 print "Most possible O antigen:",O_choice
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
535 print "Most possible H1 antigen:",fliC_choice
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
536 print "Most possible H2 antigen:",fljB_choice
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
537 #print Final_list
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
538 ###output
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
539 predict_form,predict_sero,star,star_line,claim=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list)
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
540 new_file=open("Seqsero_result.txt","w")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
541 new_file.write("Input files:\t"+for_fq+" "+rev_fq+"\n"+"O antigen prediction:\t"+"O-"+O_choice+"\n"+"H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+"H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+"Predicted antigenic profile:\t"+predict_form+"\n"+"Predicted serotype(s):\t"+predict_sero+star+"\n"+star+star_line+claim+"\n")#+##
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
542 new_file.close()
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
543 os.system("cat Seqsero_result.txt")
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
544
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
545 if __name__ == '__main__':
343e38c6798f planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff changeset
546 main()