Mercurial > repos > charles_s_test > seqsero2
view run_seqsero.py @ 7:3d6680af0bec draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 16:30:27 -0500 |
parents | 38ad1130d077 |
children | acc5cd06a36a |
line wrap: on
line source
#!/usr/bin/python import os, re, sys, time, datetime import subprocess from subprocess import Popen, PIPE # Declare Variables # paths to files and scripts fileDir = os.path.dirname(os.path.realpath('__file__')) print(fileDir) fq_list1 = [] Tool_directory = sys.argv[2] database = Tool_directory + '/universe.sqlite' seqsero = Tool_directory + 'SeqSero.py' test_out = open(Tool_directory + "/test.txt", 'w') out_path = Tool_directory + '/files/000' test_out2 = open(Tool_directory + "/test2.txt", 'w') path2sample = Tool_directory + 'fastq_tmp' #test_out2.write(" Test test test"); test_out.write("monkey "); #test_out.write(""); test_out.write("\t".join(sys.argv)+'\n') print(Tool_directory + ' path printed ') test_out.write(str(len(sys.argv))+"\n") if len(sys.argv) >= 2: test_out.write("\t".join(sys.argv)+'\n') fq_list1 = sys.argv[1] test_out.write(str(len(sys.argv))+"\n") fastq_files = re.split(",", fq_list1) def print_time(): test_out.write(time.asctime( time.localtime(time.time()))) tmp_path = "fastq_tmp" if not os.path.exists(tmp_path): os.system('mkdir '+tmp_path) test_out.write(str(len(fastq_files))+"\n") def list_runs(fastq_files): ''' Creates dict with runs as keys and list with filenames as values. ''' # print("monkey") run2fastqs = {} for file in fastq_files: run = '' try: fastq = open(file, 'r') i = 0 for line in fastq: line = line.rstrip("\n") if i == 0: run = re.split("\s", line)[0] run = re.sub('@', '', run) run = re.sub("\.1", '', run) else: break i += 1 file1 = re.split('/', file)[-1] file2 = re.sub('.dat$', '_'+run+'.fastq', file1) new_path_file = tmp_path+'/'+file2 # print run, new_path_file if file2 not in os.listdir(tmp_path): os.system('cp '+file+' '+new_path_file) if run in run2fastqs.keys(): if file not in run2fastqs[run]: run2fastqs[run].append(new_path_file) else: run2fastqs[run] = [new_path_file] except IOError: print("Data not found. It is possible for a deleted file to still be listed "\ "in a Galaxy library. Please confirm that the data still exists on this "\ "server. You may need to upload it again.") return run2fastqs def run_seqsero(run2fastqs): ''' Takes files from run2fastqs and runs SeqSero. ''' outputs = [] for run in run2fastqs: seqsero_cmd = [] if len(run2fastqs[run]) == 2: seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]] elif len(run2fastqs[run]) == 1: seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]] p = Popen(seqsero_cmd, stdout=PIPE) output = p.communicate() outputs.append(output) return outputs def get_serotypes(outputs): ''' ''' fastq2comment = {} fastq2serotype = {} for sample in outputs: fastqs = '' lines_used = [] for line in sample: # line is actually the entire seqsero output. line = str(line) linel = re.split("\n", line) # print(linel) #lines_used = [] for element in linel: # element is a line of seqsero output. element = element.rstrip("\n") test_out.write(element+"\n") elementl = re.split("\t", element) if elementl[0] == 'Input files:': fastqs = elementl[1] lines_used.append(element) if elementl[1] not in fastq2serotype.keys(): fastq2serotype[fastqs] = ['']*5 fastq2comment[fastqs] = [] elif elementl[0] == 'O antigen prediction:': lines_used.append(element) fastq2serotype[fastqs][0] = elementl[1] # add predicted profile elif elementl[0] == 'H1 antigen prediction(fliC):': lines_used.append(element) fastq2serotype[fastqs][1] = elementl[1] # add predicted profile elif elementl[0] == 'H2 antigen prediction(fljB):': lines_used.append(element) fastq2serotype[fastqs][2] = elementl[1] # add predicted profile elif elementl[0] == 'Predicted antigenic profile:': lines_used.append(element) fastq2serotype[fastqs][3] = elementl[1] # add predicted profile elif elementl[0] == 'Predicted serotype(s):': lines_used.append(element) fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype if element not in lines_used and re.search("\w", fastqs) and len(element) > 7: fastq2comment[fastqs].append(element) #print("\n" return fastq2serotype, fastq2comment def print_html(fastq2serotype, fastq2comment): ''' Takes dict and prints to html file. ''' tab_out = open('Seqsero_result.txt', 'w'); html_out = open('Seqsero_result.html', 'w') html_out.write('<!DOCTYPE html>\n') html_out.write('<html>\n') html_out.write('<head>\n') html_out.write('<title>SeqSero Results</title>\n') html_out.write('</head>\n') html_out.write('<body>\n') html_out.write('<body style="font-family:Helvetica;">\n') html_out.write('<p style="font-size:10px">\n') html_out.write('<table border=1>\n') header = 'Input Files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted antigenic profile\tPredicted serotype(s)' header = re.sub(' ', '_', header) header_l = ['Input Files', 'O antigen prediction', 'H1 antigen prediction(fliC)', 'H2 antigen prediction(fljB)', 'Predicted antigenic profile', 'Predicted serotype(s)'] html_out.write('<tr>\n') for element in header_l: html_out.write('<td>'+element+'</td>\n') html_out.write('</tr>\n') tab_out.write(header+"\n") print("\n\n", header) for fastq in fastq2serotype: # print(fastq, fastq2serotype[fastq]) line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq]) tab_out.write(line_to_print+"\n") html_out.write('<tr>\n') html_out.write('<td>'+fastq+'</td>\n') for antigen in fastq2serotype[fastq]: html_out.write('<td>'+antigen+'</td>\n') html_out.write('</tr>\n') print(line_to_print) html_out.write('</table>\n') print("\n") for fastq in fastq2comment: tab_out.write("\n"+fastq+"\n") html_out.write('<tr>\n') html_out.write('<p>\n') html_out.write('<td>'+fastq+"<br></td>\n") for line in fastq2comment[fastq]: #if len(line) > 7: html_out.write('<td>'+line+'</td>\n') tab_out.write(line+"\n") print(line) print("\n") html_out.write('</p>\n') html_out.write('</tr>\n') html_out.write('</body>\n') html_out.write('</html>\n') html_out.close() def readFile(filename): filehandle = open(filename) print(filehandle.read()) filehandle.close() run2fastqs = list_runs(fastq_files) outputs = run_seqsero(run2fastqs) fastq2serotype, fastq2comment = get_serotypes(outputs) print_html(fastq2serotype, fastq2comment) print_time()