comparison run_seqsero.py @ 0:6895de35a263 draft

planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
author charles_s_test
date Thu, 19 Oct 2017 18:16:51 -0400
parents
children 38ad1130d077
comparison
equal deleted inserted replaced
-1:000000000000 0:6895de35a263
1 #!/usr/bin/python
2
3 import os, re, sys, time, datetime
4 import subprocess
5 from subprocess import Popen, PIPE
6
7 database = '/nfs/sw/apps/galaxy/database/universe.sqlite'
8 seqsero = '/nfs/sw/apps/galaxy/tools/seqsero/SeqSero.py'
9 test_out = open("/nfs/sw/apps/galaxy/tools/seqsero/test.txt", 'w')
10 out_path = '/nfs/sw/apps/galaxy/database/files/000'
11 test_out2 = open("/nfs/sw/apps/galaxy/tools/seqsero/test2.txt", 'w')
12
13 path2sample = '/nfs/sw/apps/galaxy/tools/seqsero/fastq_tmp'
14
15 #test_out2.write(" Test test test");
16
17 test_out.write("monkey ");
18 #test_out.write("");
19 test_out.write("\t".join(sys.argv)+'\n')
20
21 fq_list1 = []
22 fq_list2 = []
23
24 test_out.write(str(len(sys.argv))+"\n")
25 if len(sys.argv) >= 2:
26 test_out.write("\t".join(sys.argv)+'\n')
27 fq_list1 = sys.argv[1]
28
29 test_out.write(str(len(sys.argv))+"\n")
30
31 fastq_files = re.split(",", fq_list1)
32
33 def print_time():
34 test_out.write(time.asctime( time.localtime(time.time())))
35
36 tmp_path = "/nfs/sw/apps/galaxy/tools/seqsero/fastq_tmp"
37 if not os.path.exists(tmp_path):
38 os.system('mkdir '+tmp_path)
39
40 test_out.write(str(len(fastq_files))+"\n")
41
42 def list_runs(fastq_files):
43 '''
44 Creates dict with runs as keys and list with filenames as values.
45 '''
46 # print "monkey"
47 run2fastqs = {}
48 for file in fastq_files:
49 run = ''
50 try:
51 fastq = open(file, 'r')
52 i = 0
53 for line in fastq:
54 line = line.rstrip("\n")
55 if i == 0:
56 run = re.split("\s", line)[0]
57 run = re.sub('@', '', run)
58 run = re.sub("\.1", '', run)
59 else:
60 break
61 i += 1
62 file1 = re.split('/', file)[-1]
63 file2 = re.sub('.dat$', '_'+run+'.fastq', file1)
64 new_path_file = tmp_path+'/'+file2
65 # print run, new_path_file
66 if file2 not in os.listdir(tmp_path):
67 os.system('cp '+file+' '+new_path_file)
68 if run in run2fastqs.keys():
69 if file not in run2fastqs[run]:
70 run2fastqs[run].append(new_path_file)
71 else:
72 run2fastqs[run] = [new_path_file]
73 except IOError:
74 print "Data not found. It is possible for a deleted file to still be listed "\
75 "in a Galaxy library. Please confirm that the data still exists on this "\
76 "server. You may need to upload it again."
77 return run2fastqs
78
79 def run_seqsero(run2fastqs):
80 '''
81 Takes files from run2fastqs and runs SeqSero.
82 '''
83 outputs = []
84 for run in run2fastqs:
85 seqsero_cmd = []
86 if len(run2fastqs[run]) == 2:
87 seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]]
88 elif len(run2fastqs[run]) == 1:
89 seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]]
90 p = Popen(seqsero_cmd, stdout=PIPE)
91 output = p.communicate()
92 outputs.append(output)
93 return outputs
94
95 def get_serotypes(outputs):
96 '''
97 '''
98 fastq2comment = {}
99 fastq2serotype = {}
100 for sample in outputs:
101 fastqs = ''
102 lines_used = []
103 for line in sample: # line is actually the entire seqsero output.
104 line = str(line)
105 linel = re.split("\n", line)
106 # print linel
107 #lines_used = []
108 for element in linel: # element is a line of seqsero output.
109 element = element.rstrip("\n")
110 test_out.write(element+"\n")
111 elementl = re.split("\t", element)
112 if elementl[0] == 'Input files:':
113 fastqs = elementl[1]
114 lines_used.append(element)
115 if elementl[1] not in fastq2serotype.keys():
116 fastq2serotype[fastqs] = ['']*5
117 fastq2comment[fastqs] = []
118 elif elementl[0] == 'O antigen prediction:':
119 lines_used.append(element)
120 fastq2serotype[fastqs][0] = elementl[1] # add predicted profile
121 elif elementl[0] == 'H1 antigen prediction(fliC):':
122 lines_used.append(element)
123 fastq2serotype[fastqs][1] = elementl[1] # add predicted profile
124 elif elementl[0] == 'H2 antigen prediction(fljB):':
125 lines_used.append(element)
126 fastq2serotype[fastqs][2] = elementl[1] # add predicted profile
127 elif elementl[0] == 'Predicted antigenic profile:':
128 lines_used.append(element)
129 fastq2serotype[fastqs][3] = elementl[1] # add predicted profile
130 elif elementl[0] == 'Predicted serotype(s):':
131 lines_used.append(element)
132 fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype
133 if element not in lines_used and re.search("\w", fastqs) and len(element) > 7:
134 fastq2comment[fastqs].append(element)
135 #print "\n"
136 return fastq2serotype, fastq2comment
137
138 def print_html(fastq2serotype, fastq2comment):
139 '''
140 Takes dict and prints to html file.
141 '''
142 tab_out = open('Seqsero_result.txt', 'w');
143 html_out = open('Seqsero_result.html', 'w')
144 html_out.write('<!DOCTYPE html>\n')
145 html_out.write('<html>\n')
146 html_out.write('<head>\n')
147 html_out.write('<title>SeqSero Results</title>\n')
148 html_out.write('</head>\n')
149 html_out.write('<body>\n')
150 html_out.write('<body style="font-family:Helvetica;">\n')
151 html_out.write('<p style="font-size:10px">\n')
152 html_out.write('<table border=1>\n')
153 header = 'Input Files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted antigenic profile\tPredicted serotype(s)'
154 header = re.sub(' ', '_', header)
155 header_l = ['Input Files', 'O antigen prediction', 'H1 antigen prediction(fliC)', 'H2 antigen prediction(fljB)', 'Predicted antigenic profile', 'Predicted serotype(s)']
156 html_out.write('<tr>\n')
157 for element in header_l:
158 html_out.write('<td>'+element+'</td>\n')
159 html_out.write('</tr>\n')
160 tab_out.write(header+"\n")
161 print "\n\n", header
162 for fastq in fastq2serotype:
163 # print fastq, fastq2serotype[fastq]
164 line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq])
165 tab_out.write(line_to_print+"\n")
166 html_out.write('<tr>\n')
167 html_out.write('<td>'+fastq+'</td>\n')
168 for antigen in fastq2serotype[fastq]:
169 html_out.write('<td>'+antigen+'</td>\n')
170 html_out.write('</tr>\n')
171 print line_to_print
172 html_out.write('</table>\n')
173 print "\n"
174 for fastq in fastq2comment:
175 tab_out.write("\n"+fastq+"\n")
176 html_out.write('<tr>\n')
177 html_out.write('<p>\n')
178 html_out.write('<td>'+fastq+"<br></td>\n")
179 for line in fastq2comment[fastq]:
180 #if len(line) > 7:
181 html_out.write('<td>'+line+'</td>\n')
182 tab_out.write(line+"\n")
183 print line
184 print "\n"
185 html_out.write('</p>\n')
186 html_out.write('</tr>\n')
187 html_out.write('</body>\n')
188 html_out.write('</html>\n')
189 html_out.close()
190
191 run2fastqs = list_runs(fastq_files)
192 outputs = run_seqsero(run2fastqs)
193 fastq2serotype, fastq2comment = get_serotypes(outputs)
194 print_html(fastq2serotype, fastq2comment)
195
196 print_time()
197
198