Mercurial > repos > charles_s_test > seqsero2
changeset 9:acc5cd06a36a draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Tue, 28 Nov 2017 21:16:36 -0500 |
parents | b04931a9fe71 |
children | 53efef402c51 |
files | Seqsero_result.html Seqsero_result.txt run_seqsero.py run_seqsero2.py seqsero.xml test.txt |
diffstat | 6 files changed, 243 insertions(+), 62 deletions(-) [+] |
line wrap: on
line diff
--- a/Seqsero_result.html Tue Nov 28 18:55:00 2017 -0500 +++ b/Seqsero_result.html Tue Nov 28 21:16:36 2017 -0500 @@ -15,20 +15,6 @@ <td>Predicted antigenic profile</td> <td>Predicted serotype(s)</td> </tr> -<tr> -<td>dataset_1_SRR1198853.fastq dataset_2_SRR1198853.fastq</td> -<td>O--</td> -<td>-</td> -<td>-</td> -<td>-:-:-</td> -<td>N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)</td> -</tr> </table> -<tr> -<p> -<td>dataset_1_SRR1198853.fastq dataset_2_SRR1198853.fastq<br></td> -<td>check fastq id and make them in accordance with each other...please wait...</td> -</p> -</tr> </body> </html>
--- a/Seqsero_result.txt Tue Nov 28 18:55:00 2017 -0500 +++ b/Seqsero_result.txt Tue Nov 28 21:16:36 2017 -0500 @@ -1,5 +1,1 @@ Input_Files O_antigen_prediction H1_antigen_prediction(fliC) H2_antigen_prediction(fljB) Predicted_antigenic_profile Predicted_serotype(s) -dataset_1_SRR1198853.fastq dataset_2_SRR1198853.fastq O-- - - -:-:- N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme) - -dataset_1_SRR1198853.fastq dataset_2_SRR1198853.fastq -check fastq id and make them in accordance with each other...please wait...
--- a/run_seqsero.py Tue Nov 28 18:55:00 2017 -0500 +++ b/run_seqsero.py Tue Nov 28 21:16:36 2017 -0500 @@ -107,43 +107,44 @@ def get_serotypes(outputs): ''' ''' - fastq2comment = {} + fastq2comment = {} fastq2serotype = {} + for sample in outputs: - fastqs = '' - lines_used = [] + fastqs = '' + lines_used = [] for line in sample: # line is actually the entire seqsero output. line = str(line) linel = re.split("\n", line) # print(linel) #lines_used = [] for element in linel: # element is a line of seqsero output. - element = element.rstrip("\n") - test_out.write(element+"\n") + element = element.rstrip("\n") + test_out.write(element+"\n") elementl = re.split("\t", element) if elementl[0] == 'Input files:': fastqs = elementl[1] - lines_used.append(element) + lines_used.append(element) if elementl[1] not in fastq2serotype.keys(): fastq2serotype[fastqs] = ['']*5 - fastq2comment[fastqs] = [] + fastq2comment[fastqs] = [] elif elementl[0] == 'O antigen prediction:': - lines_used.append(element) + lines_used.append(element) fastq2serotype[fastqs][0] = elementl[1] # add predicted profile elif elementl[0] == 'H1 antigen prediction(fliC):': - lines_used.append(element) + lines_used.append(element) fastq2serotype[fastqs][1] = elementl[1] # add predicted profile elif elementl[0] == 'H2 antigen prediction(fljB):': - lines_used.append(element) + lines_used.append(element) fastq2serotype[fastqs][2] = elementl[1] # add predicted profile elif elementl[0] == 'Predicted antigenic profile:': - lines_used.append(element) + lines_used.append(element) fastq2serotype[fastqs][3] = elementl[1] # add predicted profile elif elementl[0] == 'Predicted serotype(s):': - lines_used.append(element) + lines_used.append(element) fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype - if element not in lines_used and re.search("\w", fastqs) and len(element) > 7: - fastq2comment[fastqs].append(element) + if element not in lines_used and re.search("\w", fastqs) and len(element) > 7: + fastq2comment[fastqs].append(element) #print("\n" return fastq2serotype, fastq2comment @@ -151,7 +152,7 @@ ''' Takes dict and prints to html file. ''' - tab_out = open('Seqsero_result.txt', 'w'); + tab_out = open('Seqsero_result.txt', 'w'); html_out = open('Seqsero_result.html', 'w') html_out.write('<!DOCTYPE html>\n') html_out.write('<html>\n') @@ -182,19 +183,19 @@ html_out.write('</tr>\n') print(line_to_print) html_out.write('</table>\n') - print("\n") + print("\n") for fastq in fastq2comment: tab_out.write("\n"+fastq+"\n") html_out.write('<tr>\n') - html_out.write('<p>\n') + html_out.write('<p>\n') html_out.write('<td>'+fastq+"<br></td>\n") for line in fastq2comment[fastq]: #if len(line) > 7: html_out.write('<td>'+line+'</td>\n') tab_out.write(line+"\n") - print(line) - print("\n") - html_out.write('</p>\n') + print(line) + print("\n") + html_out.write('</p>\n') html_out.write('</tr>\n') html_out.write('</body>\n') html_out.write('</html>\n')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/run_seqsero2.py Tue Nov 28 21:16:36 2017 -0500 @@ -0,0 +1,217 @@ +#!/usr/bin/python + +import os, re, sys, time, datetime +import subprocess +from subprocess import Popen, PIPE + + +# Declare Variables +# paths to files and scripts + +fileDir = os.path.dirname(os.path.realpath('__file__')) +print(fileDir) + +fq_list1 = [] +Tool_directory = sys.argv[2] +database = Tool_directory + '/universe.sqlite' +seqsero = Tool_directory + 'SeqSero.py' +test_out = open(Tool_directory + "/test.txt", 'w') +out_path = Tool_directory + '/files/000' +test_out2 = open(Tool_directory + "/test2.txt", 'w') + +path2sample = Tool_directory + 'fastq_tmp' + + +#test_out2.write(" Test test test"); + +test_out.write("monkey "); +#test_out.write(""); +test_out.write("\t".join(sys.argv)+'\n') + + + + +print(Tool_directory + ' path printed ') + +test_out.write(str(len(sys.argv))+"\n") +if len(sys.argv) >= 2: + test_out.write("\t".join(sys.argv)+'\n') + fq_list1 = sys.argv[1] + +test_out.write(str(len(sys.argv))+"\n") + +fastq_files = re.split(",", fq_list1) + +def print_time(): + test_out.write(time.asctime( time.localtime(time.time()))) + +tmp_path = "fastq_tmp" +if not os.path.exists(tmp_path): + os.system('mkdir '+tmp_path) + +test_out.write(str(len(fastq_files))+"\n") + +def list_runs(fastq_files): + ''' + Creates dict with runs as keys and list with filenames as values. + ''' +# print("monkey") + run2fastqs = {} + for file in fastq_files: + run = '' + try: + fastq = open(file, 'r') + i = 0 + for line in fastq: + line = line.rstrip("\n") + if i == 0: + run = re.split("\s", line)[0] + run = re.sub('@', '', run) + run = re.sub("\.1", '', run) + else: + break + i += 1 + file1 = re.split('/', file)[-1] + file2 = re.sub('.dat$', '_'+run+'.fastq', file1) + new_path_file = tmp_path+'/'+file2 +# print run, new_path_file + if file2 not in os.listdir(tmp_path): + os.system('cp '+file+' '+new_path_file) + if run in run2fastqs.keys(): + if file not in run2fastqs[run]: + run2fastqs[run].append(new_path_file) + else: + run2fastqs[run] = [new_path_file] + except IOError: + print("Data not found. It is possible for a deleted file to still be listed "\ + "in a Galaxy library. Please confirm that the data still exists on this "\ + "server. You may need to upload it again.") + return run2fastqs + +def run_seqsero(run2fastqs): + ''' + Takes files from run2fastqs and runs SeqSero. + ''' + outputs = [] + for run in run2fastqs: + seqsero_cmd = [] + if len(run2fastqs[run]) == 2: + seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]] + elif len(run2fastqs[run]) == 1: + seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]] + p = Popen(seqsero_cmd, stdout=PIPE) + output = p.communicate() + outputs.append(output) + return outputs + +def get_serotypes(outputs): + ''' + ''' + fastq2comment = {} + fastq2serotype = {} + for sample in outputs: + fastqs = '' + lines_used = [] + for line in sample: # line is actually the entire seqsero output. + line = str(line) + linel = re.split("\n", line) +# print(linel) + #lines_used = [] + for element in linel: # element is a line of seqsero output. + element = element.rstrip("\n") + test_out.write(element+"\n") + elementl = re.split("\t", element) + if elementl[0] == 'Input files:': + fastqs = elementl[1] + lines_used.append(element) + if elementl[1] not in fastq2serotype.keys(): + fastq2serotype[fastqs] = ['']*5 + fastq2comment[fastqs] = [] + elif elementl[0] == 'O antigen prediction:': + lines_used.append(element) + fastq2serotype[fastqs][0] = elementl[1] # add predicted profile + elif elementl[0] == 'H1 antigen prediction(fliC):': + lines_used.append(element) + fastq2serotype[fastqs][1] = elementl[1] # add predicted profile + elif elementl[0] == 'H2 antigen prediction(fljB):': + lines_used.append(element) + fastq2serotype[fastqs][2] = elementl[1] # add predicted profile + elif elementl[0] == 'Predicted antigenic profile:': + lines_used.append(element) + fastq2serotype[fastqs][3] = elementl[1] # add predicted profile + elif elementl[0] == 'Predicted serotype(s):': + lines_used.append(element) + fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype + if element not in lines_used and re.search("\w", fastqs) and len(element) > 7: + fastq2comment[fastqs].append(element) + #print("\n" + return fastq2serotype, fastq2comment + +def print_html(fastq2serotype, fastq2comment): + ''' + Takes dict and prints to html file. + ''' + tab_out = open('Seqsero_result.txt', 'w'); + html_out = open('Seqsero_result.html', 'w') + html_out.write('<!DOCTYPE html>\n') + html_out.write('<html>\n') + html_out.write('<head>\n') + html_out.write('<title>SeqSero Results</title>\n') + html_out.write('</head>\n') + html_out.write('<body>\n') + html_out.write('<body style="font-family:Helvetica;">\n') + html_out.write('<p style="font-size:10px">\n') + html_out.write('<table border=1>\n') + header = 'Input Files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted antigenic profile\tPredicted serotype(s)' + header = re.sub(' ', '_', header) + header_l = ['Input Files', 'O antigen prediction', 'H1 antigen prediction(fliC)', 'H2 antigen prediction(fljB)', 'Predicted antigenic profile', 'Predicted serotype(s)'] + html_out.write('<tr>\n') + for element in header_l: + html_out.write('<td>'+element+'</td>\n') + html_out.write('</tr>\n') + tab_out.write(header+"\n") + print("\n\n", header) + for fastq in fastq2serotype: +# print(fastq, fastq2serotype[fastq]) + line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq]) + tab_out.write(line_to_print+"\n") + html_out.write('<tr>\n') + html_out.write('<td>'+fastq+'</td>\n') + for antigen in fastq2serotype[fastq]: + html_out.write('<td>'+antigen+'</td>\n') + html_out.write('</tr>\n') + print(line_to_print) + html_out.write('</table>\n') + print("\n") + for fastq in fastq2comment: + tab_out.write("\n"+fastq+"\n") + html_out.write('<tr>\n') + html_out.write('<p>\n') + html_out.write('<td>'+fastq+"<br></td>\n") + for line in fastq2comment[fastq]: + #if len(line) > 7: + html_out.write('<td>'+line+'</td>\n') + tab_out.write(line+"\n") + print(line) + print("\n") + html_out.write('</p>\n') + html_out.write('</tr>\n') + html_out.write('</body>\n') + html_out.write('</html>\n') + html_out.close() + + +def readFile(filename): + filehandle = open(filename) + print(filehandle.read()) + filehandle.close() + + +run2fastqs = list_runs(fastq_files) +outputs = run_seqsero(run2fastqs) +fastq2serotype, fastq2comment = get_serotypes(outputs) +print_html(fastq2serotype, fastq2comment) + +print_time() + +
--- a/seqsero.xml Tue Nov 28 18:55:00 2017 -0500 +++ b/seqsero.xml Tue Nov 28 21:16:36 2017 -0500 @@ -12,12 +12,7 @@ <exit_code range="0:" level="warning"/> <regex match="Error:"/> </stdio> - <!-- <command interpreter="python2.7"> - run_seqsero.py $input1 - run_seqsero_batch_galaxy.py $input1 $input2--> - <!--test_bioblend.py $input1 $input2 - </command> --> - <command > + <command> python $__tool_directory__/run_seqsero.py $input1 $__tool_directory__/
--- a/test.txt Tue Nov 28 18:55:00 2017 -0500 +++ b/test.txt Tue Nov 28 21:16:36 2017 -0500 @@ -1,22 +1,8 @@ -monkey /galaxydir/galaxy/tools/GalaxySeqsero/run_seqsero.py /galaxydir/galaxy/database/files/000/dataset_1.dat,/galaxydir/galaxy/database/files/000/dataset_2.dat /galaxydir/galaxy/tools/GalaxySeqsero/ +monkey /galaxydir/galaxy/tools/GalaxySeqsero/run_seqsero.py /galaxydir/galaxy/database/files/000/dataset_170.dat,/galaxydir/galaxy/database/files/000/dataset_171.dat /galaxydir/galaxy/tools/GalaxySeqsero/ 3 -/galaxydir/galaxy/tools/GalaxySeqsero/run_seqsero.py /galaxydir/galaxy/database/files/000/dataset_1.dat,/galaxydir/galaxy/database/files/000/dataset_2.dat /galaxydir/galaxy/tools/GalaxySeqsero/ +/galaxydir/galaxy/tools/GalaxySeqsero/run_seqsero.py /galaxydir/galaxy/database/files/000/dataset_170.dat,/galaxydir/galaxy/database/files/000/dataset_171.dat /galaxydir/galaxy/tools/GalaxySeqsero/ 3 2 -('\n', '\n') -Input files: dataset_1_SRR1198853.fastq dataset_2_SRR1198853.fastq -O antigen prediction: O-? -H1 antigen prediction(fliC): k -H2 antigen prediction(fljB): 1,5 -Predicted antigenic profile: ?:k:1,5 -Predicted serotype(s): N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme) - -check fastq id and make them in accordance with each other...please wait... - - - -Result: - - +b"('\\n', '\\n')\nInput files:\tdataset_170_SRR6325381.fastq dataset_171_SRR6325381.fastq\nO antigen prediction:\tO-?\nH1 antigen prediction(fliC):\t-\nH2 antigen prediction(fljB):\t1,6\nPredicted antigenic profile:\t?:-:1,6\nPredicted serotype(s):\tN/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)\n\ncheck fastq id and make them in accordance with each other...please wait...\n\n\n\nResult:\n\n" None -Mon Nov 27 16:46:44 2017 \ No newline at end of file +Tue Nov 28 19:30:26 2017 \ No newline at end of file