comparison run_seqsero_batch_galaxy_no_comment.py @ 0:6895de35a263 draft

planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
author charles_s_test
date Thu, 19 Oct 2017 18:16:51 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6895de35a263
1 #!/usr/bin/python
2
3 import os, re, sys, time, sqlite3, datetime
4 import subprocess
5 from subprocess import Popen, PIPE
6
7 database = '/nfs/sw/apps/galaxy/database/universe.sqlite'
8 seqsero = '/nfs/sw/apps/galaxy/tools/seqsero/SeqSero.py'
9 test_out = open("/nfs/sw/apps/galaxy/tools/seqsero/test.txt", 'w')
10 out_path = '/nfs/sw/apps/galaxy/database/files/000'
11 test_out2 = open("/nfs/sw/apps/galaxy/tools/seqsero/test2.txt", 'w')
12
13 #tab_out = open('/nfs/sw/apps/galaxy/tools/seqsero/Seqsero_result.txt', 'w')
14 #tab_out.write('super monkey')
15
16 tab_out = open('Seqsero_result.txt', 'w');
17 #tab_out.write('super monkey')
18 #tab_out.close()
19
20 #print 'super monkey'
21 path2sample = '/nfs/sw/apps/galaxy/tools/seqsero/fastq_tmp'
22
23 test_out2.write(" Test test test");
24
25 test_out.write(" test test ");
26 test_out.write("");
27 test_out.write("\t".join(sys.argv)+'\n')
28
29 fq_list1 = []
30 fq_list2 = []
31
32 test_out.write(str(len(sys.argv))+"\n")
33 if len(sys.argv) >= 2:
34 test_out.write("\t".join(sys.argv)+'\n')
35 fq_list1 = sys.argv[1]
36 # fq_list2 = sys.argv[2]
37
38 test_out.write(str(len(sys.argv))+"\n")
39
40 fastq_files = re.split(",", fq_list1)#+re.split(",", fq_list2)
41
42 def print_time():
43 test_out.write(time.asctime( time.localtime(time.time())))
44
45 tmp_path = "/nfs/sw/apps/galaxy/tools/seqsero/fastq_tmp"
46 if not os.path.exists(tmp_path):
47 os.system('mkdir '+tmp_path)
48
49 test_out.write(str(len(fastq_files))+"\n")
50
51 def list_runs(fastq_files):
52 '''
53 Creates dict with runs as keys and list with filenames as values.
54 '''
55 # print "monkey"
56 run2fastqs = {}
57 for file in fastq_files:
58 run = ''
59 try:
60 fastq = open(file, 'r')
61 i = 0
62 for line in fastq:
63 line = line.rstrip("\n")
64 if i == 0:
65 run = re.split("\s", line)[0]
66 run = re.sub('@', '', run)
67 run = re.sub("\.1", '', run)
68 else:
69 break
70 i += 1
71 file1 = re.split('/', file)[-1]
72 file2 = re.sub('.dat$', '_'+run+'.fastq', file1)
73 new_path_file = tmp_path+'/'+file2
74 # print run, new_path_file
75 if file2 not in os.listdir(tmp_path):
76 os.system('cp '+file+' '+new_path_file)
77 if run in run2fastqs.keys():
78 if file not in run2fastqs[run]:
79 run2fastqs[run].append(new_path_file)
80 else:
81 run2fastqs[run] = [new_path_file]
82 except IOError:
83 print "Data not found. It is possible for a deleted file to still be listed "\
84 "in a Galaxy library. Please confirm that the data still exists on this "\
85 "server. You may need to upload it again."
86 return run2fastqs
87
88 def run_seqsero(run2fastqs):
89 '''
90 Takes files from run2fastqs and runs SeqSero.
91 '''
92 outputs = []
93 for run in run2fastqs:
94 seqsero_cmd = []
95 if len(run2fastqs[run]) == 2:
96 seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]]
97 elif len(run2fastqs[run]) == 1:
98 seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]]
99 p = Popen(seqsero_cmd, stdout=PIPE)
100 output = p.communicate()
101 outputs.append(output)
102 return outputs
103
104
105 def parse_output(runs_info, out_path):
106 '''
107 retrieve seqsero output into a list of lists.
108 '''
109 for run in runs_info:
110 running_path = out_path+'/'+run
111 # print 'monkey go running ', running_path
112 running_info = open(running_path, 'r')
113 for line in running_info:
114 test_out.write(line, "\n")
115
116 def get_serotypes(outputs):
117 '''
118 '''
119 fastq2serotype = {}
120 for sample in outputs:
121 for line in sample:
122 line = str(line)
123 linel = re.split("\n", line)
124 # print linel
125 for element in linel:
126 elementl = re.split("\t", element)
127 if elementl[0] == 'Input files:':
128 fastqs = elementl[1]
129 if elementl[1] not in fastq2serotype.keys():
130 fastq2serotype[fastqs] = ['']*5
131 elif elementl[0] == 'O antigen prediction:':
132 predicted_profile = elementl[1]
133 fastq2serotype[fastqs][0] = elementl[1] # add predicted profile
134 # print elementl[1]
135 elif elementl[0] == 'H1 antigen prediction(fliC):':
136 predicted_profile = elementl[1]
137 fastq2serotype[fastqs][1] = elementl[1] # add predicted profile
138 # print elementl[1]
139 elif elementl[0] == 'H2 antigen prediction(fljB):':
140 predicted_profile = elementl[1]
141 fastq2serotype[fastqs][2] = elementl[1] # add predicted profile
142 # print elementl[1]
143 elif elementl[0] == 'Predicted antigenic profile:':
144 predicted_profile = elementl[1]
145 fastq2serotype[fastqs][3] = elementl[1] # add predicted profile
146 # print elementl[1]
147 elif elementl[0] == 'Predicted serotype(s):':
148 fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype
149 # print elementl[1]
150 print "\n"
151 return fastq2serotype
152
153 def print_html(fastq2serotype):
154 '''
155 Takes dict and prints to html file.
156 '''
157 html_out = open('Seqsero_result.html', 'w')
158 html_out.write('<!DOCTYPE html>\n')
159 html_out.write('<html>\n')
160 html_out.write('<head>\n')
161 html_out.write('<title>SeqSero Results</title>\n')
162 html_out.write('</head>\n')
163 html_out.write('<body>\n')
164 html_out.write('<body style="font-family:Helvetica;">\n')
165 html_out.write('<p style="font-size:10px">\n')
166 html_out.write('<table border=1>\n')
167 header = 'Input Files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted antigenic profile\tPredicted serotype(s)'
168 header = re.sub(' ', '_', header)
169 header_l = ['Input Files', 'O antigen prediction', 'H1 antigen prediction(fliC)', 'H2 antigen prediction(fljB)', 'Predicted antigenic profile', 'Predicted serotype(s)']
170 html_out.write('<tr>\n')
171 for element in header_l:
172 html_out.write('<td>'+element+'</td>\n')
173 html_out.write('</tr>\n')
174 tab_out.write(header+"\n")
175 print header
176 for fastq in fastq2serotype:
177 # print fastq, fastq2serotype[fastq]
178 line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq])
179 tab_out.write(line_to_print+"\n")
180 html_out.write('<tr>\n')
181 html_out.write('<td>'+fastq+'</td>\n')
182 for antigen in fastq2serotype[fastq]:
183 html_out.write('<td>'+antigen+'</td>\n')
184 html_out.write('</tr>\n')
185 print line_to_print
186 html_out.write('</table>\n')
187 html_out.write('</body>\n')
188 html_out.write('</html>\n')
189 html_out.close()
190
191 run2fastqs = list_runs(fastq_files)
192 outputs = run_seqsero(run2fastqs)
193 fastq2serotype = get_serotypes(outputs)
194 print_html(fastq2serotype)
195
196 print_time()
197
198