Mercurial > repos > charles_s_test > seqsero_v2
annotate libs/mapping_and_assembly_hybrid.py @ 3:62f8b9e226c2 draft default tip
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
author | charles_s_test |
---|---|
date | Thu, 21 Dec 2017 11:06:32 -0500 |
parents | 343e38c6798f |
children |
rev | line source |
---|---|
0
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
1 import os,sys,glob,time,itertools,subprocess |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
2 from Initial_Conditions import phase1 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
3 from Initial_Conditions import phase2 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
4 from Initial_Conditions import phaseO |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
5 from Initial_Conditions import sero |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
6 from distutils.version import LooseVersion |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
7 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
8 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
9 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
10 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
11 def xml_parse_score_comparision_seqsero(xmlfile): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
12 #used to do seqsero xml analysis |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
13 from Bio.Blast import NCBIXML |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
14 handle=open(xmlfile) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
15 handle=NCBIXML.parse(handle) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
16 handle=list(handle) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
17 List=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
18 List_score=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
19 List_ids=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
20 for i in range(len(handle)): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
21 if len(handle[i].alignments)>0: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
22 for j in range(len(handle[i].alignments)): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
23 score=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
24 ids=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
25 List.append(handle[i].query.strip()+"___"+handle[i].alignments[j].hit_def) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
26 for z in range(len(handle[i].alignments[j].hsps)): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
27 if "last" in handle[i].query or "first" in handle[i].query: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
28 score+=handle[i].alignments[j].hsps[z].bits |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
29 ids+=float(handle[i].alignments[j].hsps[z].identities)/handle[i].query_length |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
30 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
31 if handle[i].alignments[j].hsps[z].align_length>=30: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
32 #for the long alleles, filter noise parts |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
33 score+=handle[i].alignments[j].hsps[z].bits |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
34 ids+=float(handle[i].alignments[j].hsps[z].identities)/handle[i].query_length |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
35 List_score.append(score) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
36 List_ids.append(ids) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
37 temp=zip(List,List_score,List_ids) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
38 Final_list=sorted(temp, key=lambda d:d[1], reverse = True) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
39 return Final_list |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
40 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
41 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
42 def Uniq(L,sort_on_fre="none"): #return the uniq list and the count number |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
43 Old=L |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
44 L.sort() |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
45 L = [L[i] for i in range(len(L)) if L[i] not in L[:i]] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
46 count=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
47 for j in range(len(L)): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
48 y=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
49 for x in Old: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
50 if L[j]==x: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
51 y+=1 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
52 count.append(y) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
53 if sort_on_fre!="none": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
54 d=zip(*sorted(zip(count, L))) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
55 L=d[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
56 count=d[0] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
57 return (L,count) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
58 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
59 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
60 def judge_fliC_or_fljB_from_head_tail_for_one_contig(nodes_vs_score_list): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
61 #used to predict it's fliC or fljB for one contig, based on tail and head score, but output the score difference,if it is very small, then not reliable, use blast score for whole contig to test |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
62 #this is mainly used for |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
63 a=nodes_vs_score_list |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
64 fliC_score=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
65 fljB_score=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
66 for z in a: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
67 if "fliC" in z[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
68 fliC_score+=z[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
69 elif "fljB" in z[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
70 fljB_score+=z[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
71 if fliC_score>=fljB_score: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
72 role="fliC" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
73 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
74 role="fljB" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
75 return (role,abs(fliC_score-fljB_score)) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
76 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
77 def judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(node_name,Final_list_passed): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
78 #used to predict contig is fliC or fljB, if the differnce score value on above head_and_tail is less than 10 (quite small) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
79 #also used when no head or tail got blasted score for the contig |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
80 role="" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
81 for z in Final_list_passed: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
82 if node_name in z[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
83 role=z[0].split("_")[0] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
84 break |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
85 return role |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
86 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
87 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
88 def fliC_or_fljB_judge_from_head_tail_sequence(nodes_list,tail_head_list,Final_list_passed): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
89 #nodes_list is the c created by c,d=Uniq(nodes) in below function |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
90 first_target="" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
91 role_list=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
92 for x in nodes_list: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
93 a=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
94 role="" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
95 for y in tail_head_list: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
96 if x in y[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
97 a.append(y) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
98 if len(a)==4: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
99 #compare two heads (37 > 30) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
100 #four contigs, most perfect assembly, high quality |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
101 """ |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
102 for z in a: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
103 if "fliC_first_37" in z[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
104 t1=z[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
105 elif "fljB_first_37" in z[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
106 t2=z[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
107 if t1>=t2: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
108 role="fliC" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
109 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
110 role="fljB" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
111 """ |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
112 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
113 if diff<20: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
114 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
115 elif len(a)==3: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
116 """ |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
117 #compare the number, because hybrid problem |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
118 temp=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
119 for z in a: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
120 temp.append(z[0].split("_")[0]) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
121 m,n=Uniq(temp)#only two choices in m or n |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
122 if n[0]>n[1]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
123 role=m[0] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
124 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
125 role=m[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
126 """ |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
127 ###however, if the one with highest score is the fewer one, compare their accumulation score |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
128 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
129 if diff<20: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
130 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
131 ###end of above score comparison |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
132 elif len(a)==2: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
133 #must on same node, if not, then decide with unit blast score, blast-score/length_of_special_sequence(30 or 37) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
134 temp=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
135 for z in a: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
136 temp.append(z[0].split("_")[0]) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
137 m,n=Uniq(temp)#should only have one choice, but weird situation might occur too |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
138 if len(m)==1: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
139 pass |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
140 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
141 pass |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
142 #print "head and tail not belong to same role, now let's guess based on maximum likelihood" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
143 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
144 if diff<20: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
145 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
146 """ |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
147 max_unit_score=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
148 for z in a: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
149 unit_score=z[-1]/int(z[0].split("__")[1]) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
150 if unit_score>=max_unit_score: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
151 role=z[0].split("_")[0] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
152 max_unit_score=unit_score |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
153 """ |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
154 ###need to desgin a algorithm to guess most possible situation for nodes_list, See the situations of test evaluation |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
155 elif len(a)==1: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
156 #that one |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
157 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
158 if diff<20: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
159 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
160 #role=a[0][0].split("_")[0] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
161 #need to evaluate, in future, may set up a cut-off, if not met, then just find Final_list_passed best match,like when "a==0" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
162 else:#a==0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
163 #use Final_list_passed best match |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
164 for z in Final_list_passed: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
165 if x in z[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
166 role=z[0].split("_")[0] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
167 break |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
168 #print x,role,len(a) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
169 role_list.append((role,x)) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
170 if len(role_list)==2: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
171 if role_list[0][0]==role_list[1][0]:#this is the most cocmmon error, two antigen were assigned to same phase |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
172 #just use score to do a final test |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
173 role_list=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
174 for x in nodes_list: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
175 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
176 role_list.append((role,x)) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
177 return role_list |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
178 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
179 def decide_contig_roles_for_H_antigen(Final_list): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
180 #used to decide which contig is FliC and which one is fljB |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
181 contigs=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
182 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1])>=3.5 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]))] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
183 nodes=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
184 for x in Final_list_passed: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
185 if x[0].startswith("fl") and "last" not in x[0] and "first" not in x[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
186 nodes.append(x[0].split("___")[1].strip()) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
187 c,d=Uniq(nodes)#c is node_list |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
188 #print c |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
189 tail_head_list=[x for x in Final_list if ("last" in x[0] or "first" in x[0])] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
190 roles=fliC_or_fljB_judge_from_head_tail_sequence(c,tail_head_list,Final_list_passed) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
191 return roles |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
192 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
193 def Combine(b,c): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
194 fliC_combinations=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
195 fliC_combinations.append(",".join(c)) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
196 temp_combinations=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
197 for i in range(len(b)): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
198 for x in itertools.combinations(b,i+1): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
199 temp_combinations.append(",".join(x)) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
200 for x in temp_combinations: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
201 temp=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
202 for y in c: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
203 temp.append(y) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
204 temp.append(x) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
205 temp=",".join(temp) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
206 temp=temp.split(",") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
207 temp.sort() |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
208 temp=",".join(temp) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
209 fliC_combinations.append(temp) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
210 return fliC_combinations |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
211 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
212 def decide_O_type_and_get_special_genes(Final_list): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
213 #decide O based on Final_list |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
214 O_choice="?" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
215 O_list=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
216 special_genes=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
217 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1])>=3.5 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]))] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
218 nodes=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
219 for x in Final_list_passed: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
220 if x[0].startswith("O-"): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
221 nodes.append(x[0].split("___")[1].strip()) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
222 elif not x[0].startswith("fl"): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
223 special_genes.append(x) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
224 #print "special_genes:",special_genes |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
225 c,d=Uniq(nodes) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
226 #print "potential O antigen contig",c |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
227 final_O=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
228 O_nodes_list=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
229 for x in c:#c is the list for contigs |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
230 temp=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
231 for y in Final_list_passed: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
232 if x in y[0] and y[0].startswith("O-"): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
233 final_O.append(y) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
234 break |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
235 ### O contig has the problem of two genes on same contig, so do additional test |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
236 potenial_new_gene="" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
237 for x in final_O: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
238 pointer=0 #for genes merged or not |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
239 #not consider O-1,3,19_not_in_3,10, too short compared with others |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
240 if "O-1,3,19_not_in_3,10" not in x[0] and int(x[0].split("__")[1].split("___")[0])+800 <= int(x[0].split("length_")[1].split("_")[0]):#gene length << contig length; for now give 300*2 (for secureity can use 400*2) as flank region |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
241 pointer=x[0].split("___")[1].strip()#store the contig name |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
242 print pointer |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
243 if pointer!=0:#it has potential merge event |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
244 for y in Final_list: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
245 if pointer in y[0] and y not in final_O and (y[1]>=int(y[0].split("__")[1].split("___")[0])*1.5 or (y[1]>=int(y[0].split("__")[1].split("___")[0])*y[2] and y[1]>=400)):#that's a realtively strict filter now; if passed, it has merge event and add one more to final_O |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
246 potenial_new_gene=y |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
247 print potenial_new_gene |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
248 break |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
249 if potenial_new_gene!="": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
250 print "two differnt genes in same contig, fix it for O antigen" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
251 final_O.append(potenial_new_gene) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
252 ### end of the two genes on same contig test |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
253 if len(final_O)==0: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
254 #print "$$$No Otype, due to no hit"#may need to be changed |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
255 O_choice="-" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
256 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
257 O_list=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
258 for x in final_O: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
259 O_list.append(x[0].split("__")[0]) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
260 if not "O-1,3,19_not_in_3,10__130" in x[0]:#O-1,3,19_not_in_3,10 is too small, which may affect further analysis |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
261 O_nodes_list.append(x[0].split("___")[1]) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
262 ### special test for O9,46 and O3,10 family |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
263 if "O-9,46_wbaV" in O_list:#not sure should use and float(O9_wbaV)/float(num_1) > 0.1 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
264 if "O-9,46_wzy" in O_list:#and float(O946_wzy)/float(num_1) > 0.1 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
265 O_choice="O-9,46" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
266 #print "$$$Most possilble Otype: O-9,46" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
267 elif "O-9,46,27_partial_wzy" in O_list:#and float(O94627)/float(num_1) > 0.1 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
268 O_choice="O-9,46,27" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
269 #print "$$$Most possilble Otype: O-9,46,27" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
270 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
271 O_choice="O-9"#next, detect O9 vs O2? |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
272 O2=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
273 O9=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
274 for z in special_genes: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
275 if "tyr-O-9" in z[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
276 O9=z[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
277 elif "tyr-O-2" in z[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
278 O2=z[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
279 if O2>O9: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
280 O_choice="O-2" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
281 elif O2<O9: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
282 pass |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
283 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
284 pass |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
285 #print "$$$No suitable one, because can't distinct it's O-9 or O-2, but O-9 has a more possibility." |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
286 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list):#and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
287 if "O-3,10_not_in_1,3,19" in O_list:#and float(O310_no_1319)/float(num_1) > 0.1 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
288 O_choice="O-3,10" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
289 #print "$$$Most possilble Otype: O-3,10 (contain O-3,10_not_in_1,3,19)" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
290 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
291 O_choice="O-1,3,19" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
292 #print "$$$Most possilble Otype: O-1,3,19 (not contain O-3,10_not_in_1,3,19)" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
293 ### end of special test for O9,46 and O3,10 family |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
294 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
295 try: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
296 max_score=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
297 for x in final_O: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
298 if x[1]>=max_score: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
299 max_score=x[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
300 O_choice=x[0].split("_")[0] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
301 if O_choice=="O-1,3,19": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
302 O_choice=final_O[1][0].split("_")[0] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
303 #print "$$$Most possilble Otype: ",O_choice |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
304 except: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
305 pass |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
306 #print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
307 #print "O:",O_choice,O_nodes_list |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
308 return O_choice,O_nodes_list,special_genes,final_O |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
309 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
310 def seqsero_from_formula_to_serotypes(Otype,fliC,fljB,special_gene_list): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
311 #like test_output_06012017.txt |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
312 #can add more varialbles like sdf-type, sub-species-type in future (we can conclude it into a special-gene-list) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
313 from Initial_Conditions import phase1 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
314 from Initial_Conditions import phase2 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
315 from Initial_Conditions import phaseO |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
316 from Initial_Conditions import sero |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
317 seronames=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
318 for i in range(len(phase1)): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
319 fliC_combine=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
320 fljB_combine=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
321 if phaseO[i]==Otype: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
322 ### for fliC, detect every possible combinations to avoid the effect of "[" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
323 if phase1[i].count("[")==0: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
324 fliC_combine.append(phase1[i]) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
325 elif phase1[i].count("[")>=1: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
326 c=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
327 b=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
328 if phase1[i][0]=="[" and phase1[i][-1]=="]" and phase1[i].count("[")==1: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
329 content=phase1[i].replace("[","").replace("]","") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
330 fliC_combine.append(content) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
331 fliC_combine.append("-") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
332 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
333 for x in phase1[i].split(","): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
334 if "[" in x: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
335 b.append(x.replace("[","").replace("]","")) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
336 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
337 c.append(x) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
338 fliC_combine=Combine(b,c) #Combine will offer every possible combinations of the formula, like f,[g],t: f,t f,g,t |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
339 ### end of fliC "[" detect |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
340 ### for fljB, detect every possible combinations to avoid the effect of "[" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
341 if phase2[i].count("[")==0: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
342 fljB_combine.append(phase2[i]) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
343 elif phase2[i].count("[")>=1: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
344 d=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
345 e=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
346 if phase2[i][0]=="[" and phase2[i][-1]=="]" and phase2[i].count("[")==1: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
347 content=phase2[i].replace("[","").replace("]","") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
348 fljB_combine.append(content) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
349 fljB_combine.append("-") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
350 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
351 for x in phase2[i].split(","): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
352 if "[" in x: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
353 d.append(x.replace("[","").replace("]","")) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
354 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
355 e.append(x) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
356 fljB_combine=Combine(d,e) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
357 ### end of fljB "[" detect |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
358 new_fliC=fliC.split(",") #because some antigen like r,[i] not follow alphabetical order, so use this one to judge and can avoid missings |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
359 new_fliC.sort() |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
360 new_fliC=",".join(new_fliC) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
361 new_fljB=fljB.split(",") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
362 new_fljB.sort() |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
363 new_fljB=",".join(new_fljB) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
364 if (new_fliC in fliC_combine or fliC in fliC_combine) and (new_fljB in fljB_combine or fljB in fljB_combine): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
365 seronames.append(sero[i]) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
366 #analyze seronames |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
367 if len(seronames)==0: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
368 seronames=["N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)"] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
369 star="" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
370 star_line="" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
371 if len(seronames)>1:#there are two possible predictions for serotypes |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
372 star="*" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
373 star_line="The predicted serotypes share the same general formula:\t"+Otype+":"+fliC+":"+fljB+"\n"## |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
374 print "\n" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
375 predict_form=Otype+":"+fliC+":"+fljB# |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
376 predict_sero=(" or ").join(seronames) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
377 ###special test for Enteritidis |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
378 if predict_form=="9:g,m:-": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
379 sdf="-" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
380 for x in special_gene_list: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
381 if x[0].startswith("sdf"): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
382 sdf="+" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
383 predict_form=predict_form+"\nSdf prediction:"+sdf |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
384 if sdf=="-": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
385 star="*" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
386 star_line="Additional characterization is necessary to assign a serotype to this strain. Commonly circulating strains of serotype Enteritidis are sdf+, although sdf- strains of serotype Enteritidis are known to exist. Serotype Gallinarum is typically sdf- but should be quite rare. Sdf- strains of serotype Enteritidis and serotype Gallinarum can be differentiated by phenotypic profile or genetic criteria.\n"#+## |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
387 predict_sero="See comments below" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
388 ###end of special test for Enteritidis |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
389 elif predict_form=="4:i:-": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
390 predict_sero="potential monophasic variant of Typhimurium" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
391 elif predict_form=="4:r:-": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
392 predict_sero="potential monophasic variant of Heidelberg" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
393 elif predict_form=="4:b:-": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
394 predict_sero="potential monophasic variant of Paratyphi B" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
395 elif predict_form=="8:e,h:1,2": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
396 predict_sero="Newport" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
397 star="*" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
398 star_line="Serotype Bardo shares the same antigenic profile with Newport, but Bardo is exceedingly rare." |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
399 claim="The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes."## |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
400 if "N/A" in predict_sero: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
401 claim="" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
402 if "Typhimurium" in predict_sero or predict_form=="4:i:-": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
403 normal=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
404 mutation=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
405 for x in special_gene_list: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
406 if "oafA-O-4_full" in x[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
407 normal=x[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
408 elif "oafA-O-4_5-" in x[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
409 mutation=x[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
410 if normal>mutation: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
411 #print "$$$Typhimurium" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
412 pass |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
413 elif normal<mutation: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
414 predict_sero=predict_sero.strip()+"(O5-)" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
415 star="*"# |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
416 star_line="Detected the deletion of O5-." |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
417 #print "$$$Typhimurium_O5-" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
418 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
419 #print "$$$Typhimurium, even no 7 bases difference" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
420 pass |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
421 return predict_form,predict_sero,star,star_line,claim |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
422 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
423 def main(): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
424 database=sys.argv[1]#used to extract reads |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
425 mapping_mode=sys.argv[2]#mem or sampe |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
426 threads=sys.argv[3] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
427 for_fq=sys.argv[4] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
428 rev_fq=sys.argv[5] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
429 current_time=time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime()) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
430 sam=for_fq+".sam" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
431 bam=for_fq+".bam" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
432 sorted_bam=for_fq+"_sorted.bam" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
433 mapped_fq1=for_fq+"_mapped.fq" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
434 mapped_fq2=rev_fq+"_mapped.fq" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
435 combined_fq=for_fq+"_combined.fq" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
436 for_sai=for_fq+".sai" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
437 rev_sai=rev_fq+".sai" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
438 print "building database..." |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
439 #os.system("bwa index "+database+ " 2> /dev/null") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
440 os.system("bwa index "+database+ " 2>> data_log.txt ") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
441 print "mapping..." |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
442 if mapping_mode=="mem": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
443 os.system("bwa mem -t "+threads+" "+database+" "+for_fq+" "+rev_fq+" > "+sam+ " 2>> data_log.txt") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
444 elif mapping_mode=="sam": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
445 os.system("bwa aln -t "+threads+" "+database+" "+for_fq+" > "+for_sai+ " 2>> data_log.txt") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
446 os.system("bwa aln -t "+threads+" "+database+" "+rev_fq+" > "+rev_sai+ " 2>> data_log.txt") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
447 os.system("bwa sampe "+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam+ " 2>> data_log.txt") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
448 os.system("samtools view -@ "+threads+" -F 4 -Sbh "+sam+" > "+bam) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
449 os.system("samtools view -@ "+threads+" -h -o "+sam+" "+bam) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
450 ### check the version of samtools then use differnt commands |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
451 samtools_version=subprocess.Popen(["samtools"],stdout=subprocess.PIPE,stderr=subprocess.PIPE) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
452 out, err = samtools_version.communicate() |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
453 version = err.split("ersion:")[1].strip().split(" ")[0].strip() |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
454 print "check samtools version:",version |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
455 if LooseVersion(version)<=LooseVersion("1.2"): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
456 os.system("samtools sort -@ "+threads+" -n "+bam+" "+for_fq+"_sorted") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
457 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
458 os.system("samtools sort -@ "+threads+" -n "+bam+" >"+sorted_bam) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
459 ### end of samtools version check and its analysis |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
460 os.system("bamToFastq -i "+sorted_bam+" -fq "+combined_fq) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
461 os.system("bamToFastq -i "+sorted_bam+" -fq "+mapped_fq1+" -fq2 "+mapped_fq2 + " 2>> data_log.txt")#2> /dev/null if want no output |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
462 outdir=current_time+"_temp" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
463 print "assembling..." |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
464 if int(threads)>4: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
465 t="4" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
466 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
467 t=threads |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
468 os.system("spades.py --careful --pe1-s "+combined_fq+" --pe1-1 "+mapped_fq1+" --pe1-2 "+mapped_fq2+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
469 new_fasta=for_fq+"_"+database+"_"+mapping_mode+".fasta" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
470 os.system("mv "+outdir+"/contigs.fasta "+new_fasta+ " 2> /dev/null") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
471 #os.system("mv "+outdir+"/scaffolds.fasta "+new_fasta+ " 2> /dev/null") contigs.fasta |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
472 os.system("rm -rf "+outdir+ " 2> /dev/null") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
473 ### begin blast |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
474 print "blasting..." |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
475 print "\n" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
476 xmlfile=for_fq+"-extracted_vs_"+database+"_"+mapping_mode+".xml" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
477 os.system('makeblastdb -in '+new_fasta+' -out '+new_fasta+'_db '+'-dbtype nucl >> data_log.txt 2>&1') #temp.txt is to forbid the blast result interrupt the output of our program###1/27/2015 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
478 os.system("blastn -word_size 10 -query "+database+" -db "+new_fasta+"_db -out "+xmlfile+" -outfmt 5 >> data_log.txt 2>&1")###1/27/2015 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
479 Final_list=xml_parse_score_comparision_seqsero(xmlfile) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
480 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1])>=3.5 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]))] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
481 fliC_choice="-" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
482 fljB_choice="-" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
483 fliC_contig="NA" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
484 fljB_contig="NA" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
485 fliC_length=0 #can be changed to coverage in future |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
486 fljB_length=0 #can be changed to coverage in future |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
487 O_choice=""#no need to decide O contig for now, should be only one |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
488 O_choice,O_nodes,special_gene_list,O_nodes_roles=decide_O_type_and_get_special_genes(Final_list)#decide the O antigen type and also return special-gene-list for further identification |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
489 O_choice=O_choice.split("-")[-1].strip() |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
490 H_contig_roles=decide_contig_roles_for_H_antigen(Final_list)#decide the H antigen contig is fliC or fljB |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
491 log_file=open("SeqSero_hybrid_assembly_log.txt","a") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
492 print "O_contigs:" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
493 log_file.write("O_contigs:\n") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
494 for x in O_nodes_roles: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
495 if "O-1,3,19_not_in_3,10" not in x[0]:#O-1,3,19_not_in_3,10 is just a small size marker |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
496 print x[0].split("___")[-1],x[0].split("__")[0],"blast score:",x[1],"identity%:",str(round(x[2]*100,2))+"%" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
497 log_file.write(x[0].split("___")[-1]+" "+x[0].split("__")[0]+" "+"blast score: "+str(x[1])+"identity%:"+str(round(x[2]*100,2))+"%"+"\n") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
498 print "H_contigs:" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
499 log_file.write("H_contigs:\n") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
500 H_contig_stat=[] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
501 for i in range(len(H_contig_roles)): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
502 x=H_contig_roles[i] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
503 a=0 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
504 for y in Final_list_passed: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
505 if x[1] in y[0] and y[0].startswith(x[0]): |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
506 if "first" in y[0] or "last" in y[0]: #this is the final filter to decide it's fliC or fljB, if can't pass, then can't decide |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
507 for y in Final_list_passed: #it's impossible to has the "first" and "last" allele as prediction, so re-do it |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
508 if x[1] in y[0]:#it's very possible to be third phase allele, so no need to make it must be fliC or fljB |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
509 print x[1],"can't_decide_fliC_or_fljB",y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
510 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+" "+"blast_score: "+str(y[1])+" identity%:"+str(round(y[2]*100,2))+"%"+"\n") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
511 H_contig_roles[i]="can't decide fliC or fljB, may be third phase" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
512 break |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
513 else: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
514 print x[1],x[0],y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
515 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+" "+"blast_score: "+str(y[1])+" identity%:"+str(round(y[2]*100,2))+"%"+"\n") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
516 break |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
517 for x in H_contig_roles: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
518 #if multiple choices, temporately select the one with longest length for now, will revise in further change |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
519 if "fliC" == x[0] and int(x[1].split("_")[3])>=fliC_length and x[1] not in O_nodes:#remember to avoid the effect of O-type contig, so should not in O_node list |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
520 fliC_contig=x[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
521 fliC_length=int(x[1].split("_")[3]) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
522 elif "fljB" == x[0] and int(x[1].split("_")[3])>=fljB_length and x[1] not in O_nodes: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
523 fljB_contig=x[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
524 fljB_length=int(x[1].split("_")[3]) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
525 for x in Final_list_passed: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
526 if fliC_choice=="-" and "fliC_" in x[0] and fliC_contig in x[0] : |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
527 fliC_choice=x[0].split("_")[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
528 elif fljB_choice=="-" and "fljB_" in x[0] and fljB_contig in x[0]: |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
529 fljB_choice=x[0].split("_")[1] |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
530 elif fliC_choice!="-" and fljB_choice!="-": |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
531 break |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
532 print "\n" |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
533 print "SeqSero Input files:",for_fq,rev_fq |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
534 print "Most possible O antigen:",O_choice |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
535 print "Most possible H1 antigen:",fliC_choice |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
536 print "Most possible H2 antigen:",fljB_choice |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
537 #print Final_list |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
538 ###output |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
539 predict_form,predict_sero,star,star_line,claim=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list) |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
540 new_file=open("Seqsero_result.txt","w") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
541 new_file.write("Input files:\t"+for_fq+" "+rev_fq+"\n"+"O antigen prediction:\t"+"O-"+O_choice+"\n"+"H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+"H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+"Predicted antigenic profile:\t"+predict_form+"\n"+"Predicted serotype(s):\t"+predict_sero+star+"\n"+star+star_line+claim+"\n")#+## |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
542 new_file.close() |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
543 os.system("cat Seqsero_result.txt") |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
544 |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
545 if __name__ == '__main__': |
343e38c6798f
planemo upload commit 3f94d8852b51fc041ae5ad5a1929cdce302145a0-dirty
charles_s_test
parents:
diff
changeset
|
546 main() |