Mercurial > repos > charles_s_test > seqsero2
comparison run_seqsero2.py @ 9:acc5cd06a36a draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Tue, 28 Nov 2017 21:16:36 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
8:b04931a9fe71 | 9:acc5cd06a36a |
---|---|
1 #!/usr/bin/python | |
2 | |
3 import os, re, sys, time, datetime | |
4 import subprocess | |
5 from subprocess import Popen, PIPE | |
6 | |
7 | |
8 # Declare Variables | |
9 # paths to files and scripts | |
10 | |
11 fileDir = os.path.dirname(os.path.realpath('__file__')) | |
12 print(fileDir) | |
13 | |
14 fq_list1 = [] | |
15 Tool_directory = sys.argv[2] | |
16 database = Tool_directory + '/universe.sqlite' | |
17 seqsero = Tool_directory + 'SeqSero.py' | |
18 test_out = open(Tool_directory + "/test.txt", 'w') | |
19 out_path = Tool_directory + '/files/000' | |
20 test_out2 = open(Tool_directory + "/test2.txt", 'w') | |
21 | |
22 path2sample = Tool_directory + 'fastq_tmp' | |
23 | |
24 | |
25 #test_out2.write(" Test test test"); | |
26 | |
27 test_out.write("monkey "); | |
28 #test_out.write(""); | |
29 test_out.write("\t".join(sys.argv)+'\n') | |
30 | |
31 | |
32 | |
33 | |
34 print(Tool_directory + ' path printed ') | |
35 | |
36 test_out.write(str(len(sys.argv))+"\n") | |
37 if len(sys.argv) >= 2: | |
38 test_out.write("\t".join(sys.argv)+'\n') | |
39 fq_list1 = sys.argv[1] | |
40 | |
41 test_out.write(str(len(sys.argv))+"\n") | |
42 | |
43 fastq_files = re.split(",", fq_list1) | |
44 | |
45 def print_time(): | |
46 test_out.write(time.asctime( time.localtime(time.time()))) | |
47 | |
48 tmp_path = "fastq_tmp" | |
49 if not os.path.exists(tmp_path): | |
50 os.system('mkdir '+tmp_path) | |
51 | |
52 test_out.write(str(len(fastq_files))+"\n") | |
53 | |
54 def list_runs(fastq_files): | |
55 ''' | |
56 Creates dict with runs as keys and list with filenames as values. | |
57 ''' | |
58 # print("monkey") | |
59 run2fastqs = {} | |
60 for file in fastq_files: | |
61 run = '' | |
62 try: | |
63 fastq = open(file, 'r') | |
64 i = 0 | |
65 for line in fastq: | |
66 line = line.rstrip("\n") | |
67 if i == 0: | |
68 run = re.split("\s", line)[0] | |
69 run = re.sub('@', '', run) | |
70 run = re.sub("\.1", '', run) | |
71 else: | |
72 break | |
73 i += 1 | |
74 file1 = re.split('/', file)[-1] | |
75 file2 = re.sub('.dat$', '_'+run+'.fastq', file1) | |
76 new_path_file = tmp_path+'/'+file2 | |
77 # print run, new_path_file | |
78 if file2 not in os.listdir(tmp_path): | |
79 os.system('cp '+file+' '+new_path_file) | |
80 if run in run2fastqs.keys(): | |
81 if file not in run2fastqs[run]: | |
82 run2fastqs[run].append(new_path_file) | |
83 else: | |
84 run2fastqs[run] = [new_path_file] | |
85 except IOError: | |
86 print("Data not found. It is possible for a deleted file to still be listed "\ | |
87 "in a Galaxy library. Please confirm that the data still exists on this "\ | |
88 "server. You may need to upload it again.") | |
89 return run2fastqs | |
90 | |
91 def run_seqsero(run2fastqs): | |
92 ''' | |
93 Takes files from run2fastqs and runs SeqSero. | |
94 ''' | |
95 outputs = [] | |
96 for run in run2fastqs: | |
97 seqsero_cmd = [] | |
98 if len(run2fastqs[run]) == 2: | |
99 seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]] | |
100 elif len(run2fastqs[run]) == 1: | |
101 seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]] | |
102 p = Popen(seqsero_cmd, stdout=PIPE) | |
103 output = p.communicate() | |
104 outputs.append(output) | |
105 return outputs | |
106 | |
107 def get_serotypes(outputs): | |
108 ''' | |
109 ''' | |
110 fastq2comment = {} | |
111 fastq2serotype = {} | |
112 for sample in outputs: | |
113 fastqs = '' | |
114 lines_used = [] | |
115 for line in sample: # line is actually the entire seqsero output. | |
116 line = str(line) | |
117 linel = re.split("\n", line) | |
118 # print(linel) | |
119 #lines_used = [] | |
120 for element in linel: # element is a line of seqsero output. | |
121 element = element.rstrip("\n") | |
122 test_out.write(element+"\n") | |
123 elementl = re.split("\t", element) | |
124 if elementl[0] == 'Input files:': | |
125 fastqs = elementl[1] | |
126 lines_used.append(element) | |
127 if elementl[1] not in fastq2serotype.keys(): | |
128 fastq2serotype[fastqs] = ['']*5 | |
129 fastq2comment[fastqs] = [] | |
130 elif elementl[0] == 'O antigen prediction:': | |
131 lines_used.append(element) | |
132 fastq2serotype[fastqs][0] = elementl[1] # add predicted profile | |
133 elif elementl[0] == 'H1 antigen prediction(fliC):': | |
134 lines_used.append(element) | |
135 fastq2serotype[fastqs][1] = elementl[1] # add predicted profile | |
136 elif elementl[0] == 'H2 antigen prediction(fljB):': | |
137 lines_used.append(element) | |
138 fastq2serotype[fastqs][2] = elementl[1] # add predicted profile | |
139 elif elementl[0] == 'Predicted antigenic profile:': | |
140 lines_used.append(element) | |
141 fastq2serotype[fastqs][3] = elementl[1] # add predicted profile | |
142 elif elementl[0] == 'Predicted serotype(s):': | |
143 lines_used.append(element) | |
144 fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype | |
145 if element not in lines_used and re.search("\w", fastqs) and len(element) > 7: | |
146 fastq2comment[fastqs].append(element) | |
147 #print("\n" | |
148 return fastq2serotype, fastq2comment | |
149 | |
150 def print_html(fastq2serotype, fastq2comment): | |
151 ''' | |
152 Takes dict and prints to html file. | |
153 ''' | |
154 tab_out = open('Seqsero_result.txt', 'w'); | |
155 html_out = open('Seqsero_result.html', 'w') | |
156 html_out.write('<!DOCTYPE html>\n') | |
157 html_out.write('<html>\n') | |
158 html_out.write('<head>\n') | |
159 html_out.write('<title>SeqSero Results</title>\n') | |
160 html_out.write('</head>\n') | |
161 html_out.write('<body>\n') | |
162 html_out.write('<body style="font-family:Helvetica;">\n') | |
163 html_out.write('<p style="font-size:10px">\n') | |
164 html_out.write('<table border=1>\n') | |
165 header = 'Input Files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted antigenic profile\tPredicted serotype(s)' | |
166 header = re.sub(' ', '_', header) | |
167 header_l = ['Input Files', 'O antigen prediction', 'H1 antigen prediction(fliC)', 'H2 antigen prediction(fljB)', 'Predicted antigenic profile', 'Predicted serotype(s)'] | |
168 html_out.write('<tr>\n') | |
169 for element in header_l: | |
170 html_out.write('<td>'+element+'</td>\n') | |
171 html_out.write('</tr>\n') | |
172 tab_out.write(header+"\n") | |
173 print("\n\n", header) | |
174 for fastq in fastq2serotype: | |
175 # print(fastq, fastq2serotype[fastq]) | |
176 line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq]) | |
177 tab_out.write(line_to_print+"\n") | |
178 html_out.write('<tr>\n') | |
179 html_out.write('<td>'+fastq+'</td>\n') | |
180 for antigen in fastq2serotype[fastq]: | |
181 html_out.write('<td>'+antigen+'</td>\n') | |
182 html_out.write('</tr>\n') | |
183 print(line_to_print) | |
184 html_out.write('</table>\n') | |
185 print("\n") | |
186 for fastq in fastq2comment: | |
187 tab_out.write("\n"+fastq+"\n") | |
188 html_out.write('<tr>\n') | |
189 html_out.write('<p>\n') | |
190 html_out.write('<td>'+fastq+"<br></td>\n") | |
191 for line in fastq2comment[fastq]: | |
192 #if len(line) > 7: | |
193 html_out.write('<td>'+line+'</td>\n') | |
194 tab_out.write(line+"\n") | |
195 print(line) | |
196 print("\n") | |
197 html_out.write('</p>\n') | |
198 html_out.write('</tr>\n') | |
199 html_out.write('</body>\n') | |
200 html_out.write('</html>\n') | |
201 html_out.close() | |
202 | |
203 | |
204 def readFile(filename): | |
205 filehandle = open(filename) | |
206 print(filehandle.read()) | |
207 filehandle.close() | |
208 | |
209 | |
210 run2fastqs = list_runs(fastq_files) | |
211 outputs = run_seqsero(run2fastqs) | |
212 fastq2serotype, fastq2comment = get_serotypes(outputs) | |
213 print_html(fastq2serotype, fastq2comment) | |
214 | |
215 print_time() | |
216 | |
217 |