comparison run_seqsero2.py @ 9:acc5cd06a36a draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Tue, 28 Nov 2017 21:16:36 -0500
parents
children
comparison
equal deleted inserted replaced
8:b04931a9fe71 9:acc5cd06a36a
1 #!/usr/bin/python
2
3 import os, re, sys, time, datetime
4 import subprocess
5 from subprocess import Popen, PIPE
6
7
8 # Declare Variables
9 # paths to files and scripts
10
11 fileDir = os.path.dirname(os.path.realpath('__file__'))
12 print(fileDir)
13
14 fq_list1 = []
15 Tool_directory = sys.argv[2]
16 database = Tool_directory + '/universe.sqlite'
17 seqsero = Tool_directory + 'SeqSero.py'
18 test_out = open(Tool_directory + "/test.txt", 'w')
19 out_path = Tool_directory + '/files/000'
20 test_out2 = open(Tool_directory + "/test2.txt", 'w')
21
22 path2sample = Tool_directory + 'fastq_tmp'
23
24
25 #test_out2.write(" Test test test");
26
27 test_out.write("monkey ");
28 #test_out.write("");
29 test_out.write("\t".join(sys.argv)+'\n')
30
31
32
33
34 print(Tool_directory + ' path printed ')
35
36 test_out.write(str(len(sys.argv))+"\n")
37 if len(sys.argv) >= 2:
38 test_out.write("\t".join(sys.argv)+'\n')
39 fq_list1 = sys.argv[1]
40
41 test_out.write(str(len(sys.argv))+"\n")
42
43 fastq_files = re.split(",", fq_list1)
44
45 def print_time():
46 test_out.write(time.asctime( time.localtime(time.time())))
47
48 tmp_path = "fastq_tmp"
49 if not os.path.exists(tmp_path):
50 os.system('mkdir '+tmp_path)
51
52 test_out.write(str(len(fastq_files))+"\n")
53
54 def list_runs(fastq_files):
55 '''
56 Creates dict with runs as keys and list with filenames as values.
57 '''
58 # print("monkey")
59 run2fastqs = {}
60 for file in fastq_files:
61 run = ''
62 try:
63 fastq = open(file, 'r')
64 i = 0
65 for line in fastq:
66 line = line.rstrip("\n")
67 if i == 0:
68 run = re.split("\s", line)[0]
69 run = re.sub('@', '', run)
70 run = re.sub("\.1", '', run)
71 else:
72 break
73 i += 1
74 file1 = re.split('/', file)[-1]
75 file2 = re.sub('.dat$', '_'+run+'.fastq', file1)
76 new_path_file = tmp_path+'/'+file2
77 # print run, new_path_file
78 if file2 not in os.listdir(tmp_path):
79 os.system('cp '+file+' '+new_path_file)
80 if run in run2fastqs.keys():
81 if file not in run2fastqs[run]:
82 run2fastqs[run].append(new_path_file)
83 else:
84 run2fastqs[run] = [new_path_file]
85 except IOError:
86 print("Data not found. It is possible for a deleted file to still be listed "\
87 "in a Galaxy library. Please confirm that the data still exists on this "\
88 "server. You may need to upload it again.")
89 return run2fastqs
90
91 def run_seqsero(run2fastqs):
92 '''
93 Takes files from run2fastqs and runs SeqSero.
94 '''
95 outputs = []
96 for run in run2fastqs:
97 seqsero_cmd = []
98 if len(run2fastqs[run]) == 2:
99 seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]]
100 elif len(run2fastqs[run]) == 1:
101 seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]]
102 p = Popen(seqsero_cmd, stdout=PIPE)
103 output = p.communicate()
104 outputs.append(output)
105 return outputs
106
107 def get_serotypes(outputs):
108 '''
109 '''
110 fastq2comment = {}
111 fastq2serotype = {}
112 for sample in outputs:
113 fastqs = ''
114 lines_used = []
115 for line in sample: # line is actually the entire seqsero output.
116 line = str(line)
117 linel = re.split("\n", line)
118 # print(linel)
119 #lines_used = []
120 for element in linel: # element is a line of seqsero output.
121 element = element.rstrip("\n")
122 test_out.write(element+"\n")
123 elementl = re.split("\t", element)
124 if elementl[0] == 'Input files:':
125 fastqs = elementl[1]
126 lines_used.append(element)
127 if elementl[1] not in fastq2serotype.keys():
128 fastq2serotype[fastqs] = ['']*5
129 fastq2comment[fastqs] = []
130 elif elementl[0] == 'O antigen prediction:':
131 lines_used.append(element)
132 fastq2serotype[fastqs][0] = elementl[1] # add predicted profile
133 elif elementl[0] == 'H1 antigen prediction(fliC):':
134 lines_used.append(element)
135 fastq2serotype[fastqs][1] = elementl[1] # add predicted profile
136 elif elementl[0] == 'H2 antigen prediction(fljB):':
137 lines_used.append(element)
138 fastq2serotype[fastqs][2] = elementl[1] # add predicted profile
139 elif elementl[0] == 'Predicted antigenic profile:':
140 lines_used.append(element)
141 fastq2serotype[fastqs][3] = elementl[1] # add predicted profile
142 elif elementl[0] == 'Predicted serotype(s):':
143 lines_used.append(element)
144 fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype
145 if element not in lines_used and re.search("\w", fastqs) and len(element) > 7:
146 fastq2comment[fastqs].append(element)
147 #print("\n"
148 return fastq2serotype, fastq2comment
149
150 def print_html(fastq2serotype, fastq2comment):
151 '''
152 Takes dict and prints to html file.
153 '''
154 tab_out = open('Seqsero_result.txt', 'w');
155 html_out = open('Seqsero_result.html', 'w')
156 html_out.write('<!DOCTYPE html>\n')
157 html_out.write('<html>\n')
158 html_out.write('<head>\n')
159 html_out.write('<title>SeqSero Results</title>\n')
160 html_out.write('</head>\n')
161 html_out.write('<body>\n')
162 html_out.write('<body style="font-family:Helvetica;">\n')
163 html_out.write('<p style="font-size:10px">\n')
164 html_out.write('<table border=1>\n')
165 header = 'Input Files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted antigenic profile\tPredicted serotype(s)'
166 header = re.sub(' ', '_', header)
167 header_l = ['Input Files', 'O antigen prediction', 'H1 antigen prediction(fliC)', 'H2 antigen prediction(fljB)', 'Predicted antigenic profile', 'Predicted serotype(s)']
168 html_out.write('<tr>\n')
169 for element in header_l:
170 html_out.write('<td>'+element+'</td>\n')
171 html_out.write('</tr>\n')
172 tab_out.write(header+"\n")
173 print("\n\n", header)
174 for fastq in fastq2serotype:
175 # print(fastq, fastq2serotype[fastq])
176 line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq])
177 tab_out.write(line_to_print+"\n")
178 html_out.write('<tr>\n')
179 html_out.write('<td>'+fastq+'</td>\n')
180 for antigen in fastq2serotype[fastq]:
181 html_out.write('<td>'+antigen+'</td>\n')
182 html_out.write('</tr>\n')
183 print(line_to_print)
184 html_out.write('</table>\n')
185 print("\n")
186 for fastq in fastq2comment:
187 tab_out.write("\n"+fastq+"\n")
188 html_out.write('<tr>\n')
189 html_out.write('<p>\n')
190 html_out.write('<td>'+fastq+"<br></td>\n")
191 for line in fastq2comment[fastq]:
192 #if len(line) > 7:
193 html_out.write('<td>'+line+'</td>\n')
194 tab_out.write(line+"\n")
195 print(line)
196 print("\n")
197 html_out.write('</p>\n')
198 html_out.write('</tr>\n')
199 html_out.write('</body>\n')
200 html_out.write('</html>\n')
201 html_out.close()
202
203
204 def readFile(filename):
205 filehandle = open(filename)
206 print(filehandle.read())
207 filehandle.close()
208
209
210 run2fastqs = list_runs(fastq_files)
211 outputs = run_seqsero(run2fastqs)
212 fastq2serotype, fastq2comment = get_serotypes(outputs)
213 print_html(fastq2serotype, fastq2comment)
214
215 print_time()
216
217