Mercurial > repos > charles_s_test > seqsero2
comparison run_seqsero_batch_galaxy_no_comment.py @ 0:6895de35a263 draft
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
author | charles_s_test |
---|---|
date | Thu, 19 Oct 2017 18:16:51 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6895de35a263 |
---|---|
1 #!/usr/bin/python | |
2 | |
3 import os, re, sys, time, sqlite3, datetime | |
4 import subprocess | |
5 from subprocess import Popen, PIPE | |
6 | |
7 database = '/nfs/sw/apps/galaxy/database/universe.sqlite' | |
8 seqsero = '/nfs/sw/apps/galaxy/tools/seqsero/SeqSero.py' | |
9 test_out = open("/nfs/sw/apps/galaxy/tools/seqsero/test.txt", 'w') | |
10 out_path = '/nfs/sw/apps/galaxy/database/files/000' | |
11 test_out2 = open("/nfs/sw/apps/galaxy/tools/seqsero/test2.txt", 'w') | |
12 | |
13 #tab_out = open('/nfs/sw/apps/galaxy/tools/seqsero/Seqsero_result.txt', 'w') | |
14 #tab_out.write('super monkey') | |
15 | |
16 tab_out = open('Seqsero_result.txt', 'w'); | |
17 #tab_out.write('super monkey') | |
18 #tab_out.close() | |
19 | |
20 #print 'super monkey' | |
21 path2sample = '/nfs/sw/apps/galaxy/tools/seqsero/fastq_tmp' | |
22 | |
23 test_out2.write(" Test test test"); | |
24 | |
25 test_out.write(" test test "); | |
26 test_out.write(""); | |
27 test_out.write("\t".join(sys.argv)+'\n') | |
28 | |
29 fq_list1 = [] | |
30 fq_list2 = [] | |
31 | |
32 test_out.write(str(len(sys.argv))+"\n") | |
33 if len(sys.argv) >= 2: | |
34 test_out.write("\t".join(sys.argv)+'\n') | |
35 fq_list1 = sys.argv[1] | |
36 # fq_list2 = sys.argv[2] | |
37 | |
38 test_out.write(str(len(sys.argv))+"\n") | |
39 | |
40 fastq_files = re.split(",", fq_list1)#+re.split(",", fq_list2) | |
41 | |
42 def print_time(): | |
43 test_out.write(time.asctime( time.localtime(time.time()))) | |
44 | |
45 tmp_path = "/nfs/sw/apps/galaxy/tools/seqsero/fastq_tmp" | |
46 if not os.path.exists(tmp_path): | |
47 os.system('mkdir '+tmp_path) | |
48 | |
49 test_out.write(str(len(fastq_files))+"\n") | |
50 | |
51 def list_runs(fastq_files): | |
52 ''' | |
53 Creates dict with runs as keys and list with filenames as values. | |
54 ''' | |
55 # print "monkey" | |
56 run2fastqs = {} | |
57 for file in fastq_files: | |
58 run = '' | |
59 try: | |
60 fastq = open(file, 'r') | |
61 i = 0 | |
62 for line in fastq: | |
63 line = line.rstrip("\n") | |
64 if i == 0: | |
65 run = re.split("\s", line)[0] | |
66 run = re.sub('@', '', run) | |
67 run = re.sub("\.1", '', run) | |
68 else: | |
69 break | |
70 i += 1 | |
71 file1 = re.split('/', file)[-1] | |
72 file2 = re.sub('.dat$', '_'+run+'.fastq', file1) | |
73 new_path_file = tmp_path+'/'+file2 | |
74 # print run, new_path_file | |
75 if file2 not in os.listdir(tmp_path): | |
76 os.system('cp '+file+' '+new_path_file) | |
77 if run in run2fastqs.keys(): | |
78 if file not in run2fastqs[run]: | |
79 run2fastqs[run].append(new_path_file) | |
80 else: | |
81 run2fastqs[run] = [new_path_file] | |
82 except IOError: | |
83 print "Data not found. It is possible for a deleted file to still be listed "\ | |
84 "in a Galaxy library. Please confirm that the data still exists on this "\ | |
85 "server. You may need to upload it again." | |
86 return run2fastqs | |
87 | |
88 def run_seqsero(run2fastqs): | |
89 ''' | |
90 Takes files from run2fastqs and runs SeqSero. | |
91 ''' | |
92 outputs = [] | |
93 for run in run2fastqs: | |
94 seqsero_cmd = [] | |
95 if len(run2fastqs[run]) == 2: | |
96 seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]] | |
97 elif len(run2fastqs[run]) == 1: | |
98 seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]] | |
99 p = Popen(seqsero_cmd, stdout=PIPE) | |
100 output = p.communicate() | |
101 outputs.append(output) | |
102 return outputs | |
103 | |
104 | |
105 def parse_output(runs_info, out_path): | |
106 ''' | |
107 retrieve seqsero output into a list of lists. | |
108 ''' | |
109 for run in runs_info: | |
110 running_path = out_path+'/'+run | |
111 # print 'monkey go running ', running_path | |
112 running_info = open(running_path, 'r') | |
113 for line in running_info: | |
114 test_out.write(line, "\n") | |
115 | |
116 def get_serotypes(outputs): | |
117 ''' | |
118 ''' | |
119 fastq2serotype = {} | |
120 for sample in outputs: | |
121 for line in sample: | |
122 line = str(line) | |
123 linel = re.split("\n", line) | |
124 # print linel | |
125 for element in linel: | |
126 elementl = re.split("\t", element) | |
127 if elementl[0] == 'Input files:': | |
128 fastqs = elementl[1] | |
129 if elementl[1] not in fastq2serotype.keys(): | |
130 fastq2serotype[fastqs] = ['']*5 | |
131 elif elementl[0] == 'O antigen prediction:': | |
132 predicted_profile = elementl[1] | |
133 fastq2serotype[fastqs][0] = elementl[1] # add predicted profile | |
134 # print elementl[1] | |
135 elif elementl[0] == 'H1 antigen prediction(fliC):': | |
136 predicted_profile = elementl[1] | |
137 fastq2serotype[fastqs][1] = elementl[1] # add predicted profile | |
138 # print elementl[1] | |
139 elif elementl[0] == 'H2 antigen prediction(fljB):': | |
140 predicted_profile = elementl[1] | |
141 fastq2serotype[fastqs][2] = elementl[1] # add predicted profile | |
142 # print elementl[1] | |
143 elif elementl[0] == 'Predicted antigenic profile:': | |
144 predicted_profile = elementl[1] | |
145 fastq2serotype[fastqs][3] = elementl[1] # add predicted profile | |
146 # print elementl[1] | |
147 elif elementl[0] == 'Predicted serotype(s):': | |
148 fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype | |
149 # print elementl[1] | |
150 print "\n" | |
151 return fastq2serotype | |
152 | |
153 def print_html(fastq2serotype): | |
154 ''' | |
155 Takes dict and prints to html file. | |
156 ''' | |
157 html_out = open('Seqsero_result.html', 'w') | |
158 html_out.write('<!DOCTYPE html>\n') | |
159 html_out.write('<html>\n') | |
160 html_out.write('<head>\n') | |
161 html_out.write('<title>SeqSero Results</title>\n') | |
162 html_out.write('</head>\n') | |
163 html_out.write('<body>\n') | |
164 html_out.write('<body style="font-family:Helvetica;">\n') | |
165 html_out.write('<p style="font-size:10px">\n') | |
166 html_out.write('<table border=1>\n') | |
167 header = 'Input Files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted antigenic profile\tPredicted serotype(s)' | |
168 header = re.sub(' ', '_', header) | |
169 header_l = ['Input Files', 'O antigen prediction', 'H1 antigen prediction(fliC)', 'H2 antigen prediction(fljB)', 'Predicted antigenic profile', 'Predicted serotype(s)'] | |
170 html_out.write('<tr>\n') | |
171 for element in header_l: | |
172 html_out.write('<td>'+element+'</td>\n') | |
173 html_out.write('</tr>\n') | |
174 tab_out.write(header+"\n") | |
175 print header | |
176 for fastq in fastq2serotype: | |
177 # print fastq, fastq2serotype[fastq] | |
178 line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq]) | |
179 tab_out.write(line_to_print+"\n") | |
180 html_out.write('<tr>\n') | |
181 html_out.write('<td>'+fastq+'</td>\n') | |
182 for antigen in fastq2serotype[fastq]: | |
183 html_out.write('<td>'+antigen+'</td>\n') | |
184 html_out.write('</tr>\n') | |
185 print line_to_print | |
186 html_out.write('</table>\n') | |
187 html_out.write('</body>\n') | |
188 html_out.write('</html>\n') | |
189 html_out.close() | |
190 | |
191 run2fastqs = list_runs(fastq_files) | |
192 outputs = run_seqsero(run2fastqs) | |
193 fastq2serotype = get_serotypes(outputs) | |
194 print_html(fastq2serotype) | |
195 | |
196 print_time() | |
197 | |
198 |