seqsero2: run_seqsero2.py comparison

comparison run_seqsero2.py @ 9:acc5cd06a36a draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty

author	charles_s_test
date	Tue, 28 Nov 2017 21:16:36 -0500
parents
children

comparison

equal deleted inserted replaced

-:b04931a9fe71
+:acc5cd06a36a
+#!/usr/bin/python
+import os, re, sys, time, datetime
+import subprocess
+from subprocess import Popen, PIPE
+# Declare Variables
+# paths to files and scripts
+fileDir = os.path.dirname(os.path.realpath('__file__'))
+print(fileDir)
+fq_list1 = []
+Tool_directory  = sys.argv[2]
+database = Tool_directory + '/universe.sqlite'
+seqsero = Tool_directory + 'SeqSero.py'
+test_out = open(Tool_directory + "/test.txt", 'w')
+out_path = Tool_directory + '/files/000'
+test_out2 = open(Tool_directory + "/test2.txt", 'w')
+path2sample = Tool_directory + 'fastq_tmp'
+#test_out2.write(" Test test test");
+test_out.write("monkey ");
+#test_out.write("");
+test_out.write("\t".join(sys.argv)+'\n')
+print(Tool_directory + '  path printed ')
+test_out.write(str(len(sys.argv))+"\n")
+if len(sys.argv) >= 2:
+test_out.write("\t".join(sys.argv)+'\n')
+fq_list1 = sys.argv[1]
+test_out.write(str(len(sys.argv))+"\n")
+fastq_files = re.split(",", fq_list1)
+def print_time():
+test_out.write(time.asctime( time.localtime(time.time())))
+tmp_path = "fastq_tmp"
+if not os.path.exists(tmp_path):
+os.system('mkdir '+tmp_path)
+test_out.write(str(len(fastq_files))+"\n")
+def list_runs(fastq_files):
+'''
+Creates dict with runs as keys and list with filenames as values.
+'''
+#       print("monkey")
+run2fastqs = {}
+for file in fastq_files:
+run = ''
+try:
+fastq = open(file, 'r')
+i = 0
+for line in fastq:
+line = line.rstrip("\n")
+if i == 0:
+run = re.split("\s", line)[0]
+run = re.sub('@', '', run)
+run = re.sub("\.1", '', run)
+else:
+break
+i += 1
+file1 = re.split('/', file)[-1]
+file2 = re.sub('.dat$', '_'+run+'.fastq', file1)
+new_path_file = tmp_path+'/'+file2
+#                       print run, new_path_file
+if file2 not in os.listdir(tmp_path):
+os.system('cp '+file+' '+new_path_file)
+if run in run2fastqs.keys():
+if file not in run2fastqs[run]:
+run2fastqs[run].append(new_path_file)
+else:
+run2fastqs[run] = [new_path_file]
+except IOError:
+print("Data not found. It is possible for a deleted file to still be listed "\
+"in a Galaxy library. Please confirm that the data still exists on this "\
+"server. You may need to upload it again.")
+return run2fastqs
+def run_seqsero(run2fastqs):
+'''
+Takes files from run2fastqs and runs SeqSero.
+'''
+outputs = []
+for run in run2fastqs:
+seqsero_cmd = []
+if len(run2fastqs[run]) == 2:
+seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]]
+elif len(run2fastqs[run]) == 1:
+seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]]
+p = Popen(seqsero_cmd, stdout=PIPE)
+output = p.communicate()
+outputs.append(output)
+return outputs
+def get_serotypes(outputs):
+'''
+'''
+	fastq2comment = {}
+fastq2serotype = {}
+for sample in outputs:
+		fastqs = ''
+		lines_used = []
+for line in sample:	# line is actually the entire seqsero output.
+line = str(line)
+linel = re.split("\n", line)
+#                       print(linel)
+			#lines_used = []
+for element in linel:	# element is a line of seqsero output.
+				element = element.rstrip("\n")
+				test_out.write(element+"\n")
+elementl = re.split("\t", element)
+if elementl[0] == 'Input files:':
+fastqs = elementl[1]
+					lines_used.append(element)
+if elementl[1] not in fastq2serotype.keys():
+fastq2serotype[fastqs] = ['']*5
+						fastq2comment[fastqs] = []
+elif elementl[0] == 'O antigen prediction:':
+					lines_used.append(element)
+fastq2serotype[fastqs][0] = elementl[1] # add predicted profile
+elif elementl[0] == 'H1 antigen prediction(fliC):':
+					lines_used.append(element)
+fastq2serotype[fastqs][1] = elementl[1] # add predicted profile
+elif elementl[0] == 'H2 antigen prediction(fljB):':
+					lines_used.append(element)
+fastq2serotype[fastqs][2] = elementl[1] # add predicted profile
+elif elementl[0] == 'Predicted antigenic profile:':
+					lines_used.append(element)
+fastq2serotype[fastqs][3] = elementl[1] # add predicted profile
+elif elementl[0] == 'Predicted serotype(s):':
+					lines_used.append(element)
+fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype
+				if element not in lines_used and re.search("\w", fastqs) and len(element) > 7:
+					fastq2comment[fastqs].append(element)
+#print("\n"
+return fastq2serotype, fastq2comment
+def print_html(fastq2serotype, fastq2comment):
+'''
+Takes dict and prints to html file.
+'''
+	tab_out = open('Seqsero_result.txt', 'w');
+html_out = open('Seqsero_result.html', 'w')
+html_out.write('<!DOCTYPE html>\n')
+html_out.write('<html>\n')
+html_out.write('<head>\n')
+html_out.write('<title>SeqSero Results</title>\n')
+html_out.write('</head>\n')
+html_out.write('<body>\n')
+html_out.write('<body style="font-family:Helvetica;">\n')
+html_out.write('<p style="font-size:10px">\n')
+html_out.write('<table border=1>\n')
+header = 'Input Files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted antigenic profile\tPredicted serotype(s)'
+header = re.sub(' ', '_', header)
+header_l = ['Input Files', 'O antigen prediction', 'H1 antigen prediction(fliC)', 'H2 antigen prediction(fljB)', 'Predicted antigenic profile', 'Predicted serotype(s)']
+html_out.write('<tr>\n')
+for element in header_l:
+html_out.write('<td>'+element+'</td>\n')
+html_out.write('</tr>\n')
+tab_out.write(header+"\n")
+print("\n\n", header)
+for fastq in fastq2serotype:
+#               print(fastq, fastq2serotype[fastq])
+line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq])
+tab_out.write(line_to_print+"\n")
+html_out.write('<tr>\n')
+html_out.write('<td>'+fastq+'</td>\n')
+for antigen in fastq2serotype[fastq]:
+html_out.write('<td>'+antigen+'</td>\n')
+html_out.write('</tr>\n')
+print(line_to_print)
+html_out.write('</table>\n')
+	print("\n")
+for fastq in fastq2comment:
+tab_out.write("\n"+fastq+"\n")
+html_out.write('<tr>\n')
+		html_out.write('<p>\n')
+html_out.write('<td>'+fastq+"<br></td>\n")
+for line in fastq2comment[fastq]:
+			#if len(line) > 7:
+html_out.write('<td>'+line+'</td>\n')
+tab_out.write(line+"\n")
+			print(line)
+		print("\n")
+		html_out.write('</p>\n')
+html_out.write('</tr>\n')
+html_out.write('</body>\n')
+html_out.write('</html>\n')
+html_out.close()
+def readFile(filename):
+filehandle = open(filename)
+print(filehandle.read())
+filehandle.close()
+run2fastqs = list_runs(fastq_files)
+outputs = run_seqsero(run2fastqs)
+fastq2serotype, fastq2comment = get_serotypes(outputs)
+print_html(fastq2serotype, fastq2comment)
+print_time()

Mercurial > repos > charles_s_test > seqsero2

comparison run_seqsero2.py @ 9:acc5cd06a36a draft