SeqSero Results

# HG changeset patch # User charles_s_test # Date 1510471652 18000 # Node ID 0d65b71ff8df152640f67b48e4d929791e091002 # Parent 8cd7fc65c3a75f1baeb15d266167ef3e4ab656a5 planemo upload commit 464b391afaa5819bc681452e85bea9d882730eb6 diff -r 8cd7fc65c3a7 -r 0d65b71ff8df README.md --- a/README.md Fri Oct 27 17:47:00 2017 -0400 +++ b/README.md Sun Nov 12 02:27:32 2017 -0500 @@ -1,16 +1,5 @@ - -# GalaxySeqSero -#This Project has been modified by the CFSAN FDA office to work in Galaxy it also has a batch functionality as well add by the Galaxy rapper code. - -All below dependencies are the same for Seqsero. - Add this project to the galaxy tools directory and then added to tool_conf.xml - -Adding custom tools to Galaxy - -https://galaxyproject.org/admin/tools/add-tool-tutorial/ - - -# SeqSero 1.1 +<<<<<<< HEAD +# SeqSero 1.0 Salmonella serotyping from genome sequencing data diff -r 8cd7fc65c3a7 -r 0d65b71ff8df Seqsero_result.html --- a/Seqsero_result.html Fri Oct 27 17:47:00 2017 -0400 +++ b/Seqsero_result.html Sun Nov 12 02:27:32 2017 -0500 @@ -16,19 +16,19 @@ Predicted serotype(s) -dataset_15_sra_data9.fastq -O-16 -d -1,7 -16:d:1,7 -Gaminara +dataset_445_SRR6158764_1.fastq dataset_446_SRR6158764_2.fastq +O-7 +y +1,5 +7:y:1,5 +Bareilly

-dataset_15_sra_data9.fastq
+dataset_445_SRR6158764_1.fastq dataset_446_SRR6158764_2.fastq
The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes. -/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/fastq_tmp/dataset_15_sra_data9.fastq +check fastq id and make them in accordance with each other...please wait...

diff -r 8cd7fc65c3a7 -r 0d65b71ff8df Seqsero_result.txt --- a/Seqsero_result.txt Fri Oct 27 17:47:00 2017 -0400 +++ b/Seqsero_result.txt Sun Nov 12 02:27:32 2017 -0500 @@ -1,6 +1,6 @@ Input_Files O_antigen_prediction H1_antigen_prediction(fliC) H2_antigen_prediction(fljB) Predicted_antigenic_profile Predicted_serotype(s) -dataset_15_sra_data9.fastq O-16 d 1,7 16:d:1,7 Gaminara +dataset_445_SRR6158764_1.fastq dataset_446_SRR6158764_2.fastq O-7 y 1,5 7:y:1,5 Bareilly -dataset_15_sra_data9.fastq +dataset_445_SRR6158764_1.fastq dataset_446_SRR6158764_2.fastq The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes. -/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/fastq_tmp/dataset_15_sra_data9.fastq +check fastq id and make them in accordance with each other...please wait... diff -r 8cd7fc65c3a7 -r 0d65b71ff8df libs/BWA_analysis_H_update_new_family_dependent.py --- a/libs/BWA_analysis_H_update_new_family_dependent.py Fri Oct 27 17:47:00 2017 -0400 +++ b/libs/BWA_analysis_H_update_new_family_dependent.py Sun Nov 12 02:27:32 2017 -0500 @@ -6,8 +6,8 @@ from Initial_functions import Uniq from Bio.Blast import NCBIXML -BwaPath="/nfs/sw/apps/bwa/bwa-0.7.15/bwa" -SamTlsPth="/nfs/sw/apps/samtools/samtools-1.3.1/bin/samtools" +BwaPath="bwa" +SamTlsPth="samtools" Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb" Blastnpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastn" diff -r 8cd7fc65c3a7 -r 0d65b71ff8df libs/BWA_analysis_O_new_dependent.py --- a/libs/BWA_analysis_O_new_dependent.py Fri Oct 27 17:47:00 2017 -0400 +++ b/libs/BWA_analysis_O_new_dependent.py Sun Nov 12 02:27:32 2017 -0500 @@ -7,8 +7,8 @@ from Initial_functions import Uniq from Bio.Blast import NCBIXML -BwaPath="/nfs/sw/apps/bwa/bwa-0.7.15/bwa" -SamTlsPth="/nfs/sw/apps/samtools/samtools-1.3.1/bin/samtools" +BwaPath="bwa" +SamTlsPth="samtools" Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb" Blastnpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastn" diff -r 8cd7fc65c3a7 -r 0d65b71ff8df libs/Initial_Conditions.pyc Binary file libs/Initial_Conditions.pyc has changed diff -r 8cd7fc65c3a7 -r 0d65b71ff8df libs/Initial_functions.pyc Binary file libs/Initial_functions.pyc has changed diff -r 8cd7fc65c3a7 -r 0d65b71ff8df libs/deletion_compare.py --- a/libs/deletion_compare.py Fri Oct 27 17:47:00 2017 -0400 +++ b/libs/deletion_compare.py Sun Nov 12 02:27:32 2017 -0500 @@ -5,8 +5,8 @@ from Initial_functions import Uniq from Bio.Blast import NCBIXML -BwaPath="/nfs/sw/apps/bwa/bwa-0.7.15/bwa" -SamTlsPth="/nfs/sw/apps/samtools/samtools-1.3.1/bin/samtools" +BwaPath="bwa" +SamTlsPth="samtools" Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb" Blastnpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastn" diff -r 8cd7fc65c3a7 -r 0d65b71ff8df logupdate_tool.log diff -r 8cd7fc65c3a7 -r 0d65b71ff8df psql_test.py --- a/psql_test.py Fri Oct 27 17:47:00 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -import psycopg2, os - -os.system('source /nfs/sw/apps/galaxy-dev/galaxy/.venv/bin/activate') - -connection = psycopg2.connect(database="galaxy", user="galaxy", host="galaxydev.cvkyaz9id4ml.us-east-1.rds.amazonaws.com", password="cF$cl0udh9c", port="5432") -print "monkey made a connection!" - -worker = connection.cursor() -worker.execute('SELECT name FROM library_dataset_dataset_association WHERE dataset_id = 4;') -result = worker.fetchall() -print result, '4' - -worker = connection.cursor() -worker.execute('SELECT name FROM library_dataset_dataset_association WHERE dataset_id = 10;') -result = worker.fetchall() -print result, '10' - -worker = connection.cursor() -worker.execute('SELECT name FROM library_dataset_dataset_association WHERE dataset_id = 11;') -result = worker.fetchall() -print result, '11' - -worker = connection.cursor() -worker.execute('SELECT name FROM library_dataset_dataset_association WHERE dataset_id = 18;') -result = worker.fetchall() -print result, '18' diff -r 8cd7fc65c3a7 -r 0d65b71ff8df run_seqsero_batch_galaxy.py --- a/run_seqsero_batch_galaxy.py Fri Oct 27 17:47:00 2017 -0400 +++ b/run_seqsero_batch_galaxy.py Sun Nov 12 02:27:32 2017 -0500 @@ -13,7 +13,7 @@ from datetime import datetime print 'monkey found some files', sys.argv -engine = create_engine('postgresql+psycopg2://galaxy:cF$cl0udh9c@galaxyprod.cvkyaz9id4ml.us-east-1.rds.amazonaws.com:5432/galaxy') +engine = create_engine('postgresql+psycopg2://galaxy:cF$cl0udh9c@galaxydev.cvkyaz9id4ml.us-east-1.rds.amazonaws.com:5432/galaxy') print 'monkey says "vroom vroom"' connection = engine.connect() print "monkey made a connection!" diff -r 8cd7fc65c3a7 -r 0d65b71ff8df run_seqsero_batch_galaxy_09-15-17.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/run_seqsero_batch_galaxy_09-15-17.py Sun Nov 12 02:27:32 2017 -0500 @@ -0,0 +1,201 @@ +#!/usr/bin/python + +print "monkey has started" + +import os, re, sys, time, datetime +import subprocess +from subprocess import Popen, PIPE + +database = '/nfs/sw/apps/galaxy-prd/galaxy/database/universe.sqlite' +seqsero = '/nfs/sw/apps/galaxy-prd/galaxy/tools/seqsero/SeqSero.py' +test_out = open("/nfs/sw/apps/galaxy-prd/galaxy/tools/seqsero/test.txt", 'w') +out_path = '/nfs/sw/apps/galaxy-prd/galaxy/database/files/000' +test_out2 = open("/nfs/sw/apps/galaxy-prd/galaxy/tools/seqsero/test2.txt", 'w') + +path2sample = '/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/fastq_tmp' + +#test_out2.write(" Test test test"); + +test_out.write("monkey "); +#test_out.write(""); +test_out.write("\t".join(sys.argv)+'\n') + +fq_list1 = [] +fq_list2 = [] + +test_out.write(str(len(sys.argv))+"\n") +if len(sys.argv) >= 2: + test_out.write("\t".join(sys.argv)+'\n') + fq_list1 = sys.argv[1] + +test_out.write(str(len(sys.argv))+"\n") + +fastq_files = re.split(",", fq_list1) + +def print_time(): + test_out.write(time.asctime( time.localtime(time.time()))) + +tmp_path = "/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/fastq_tmp" +if not os.path.exists(tmp_path): + os.system('mkdir '+tmp_path) + +test_out.write(str(len(fastq_files))+"\n") + +def list_runs(fastq_files): + ''' + Creates dict with runs as keys and list with filenames as values. + ''' +# print "monkey" + run2fastqs = {} + for file in fastq_files: + run = '' + try: + fastq = open(file, 'r') + i = 0 + for line in fastq: + line = line.rstrip("\n") + if i == 0: + run = re.split("\s", line)[0] + run = re.sub('@', '', run) + run = re.split("\.", run)[0] + #run = re.sub("\.1", '', run)] + else: + break + i += 1 + file1 = re.split('/', file)[-1] + file2 = re.sub('.dat$', '_'+run+'.fastq', file1) + new_path_file = tmp_path+'/'+file2 +# print run, new_path_file + if file2 not in os.listdir(tmp_path): + os.system('cp '+file+' '+new_path_file) + if run in run2fastqs.keys(): + if file not in run2fastqs[run]: + run2fastqs[run].append(new_path_file) + else: + run2fastqs[run] = [new_path_file] + except IOError: + print "Data not found. It is possible for a deleted file to still be listed "\ + "in a Galaxy library. Please confirm that the data still exists on this "\ + "server. You may need to upload it again." + return run2fastqs + +def run_seqsero(run2fastqs): + ''' + Takes files from run2fastqs and runs SeqSero. + ''' + outputs = [] + for run in run2fastqs: + seqsero_cmd = [] + if len(run2fastqs[run]) == 2: + seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]] + elif len(run2fastqs[run]) == 1: + seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]] + p = Popen(seqsero_cmd, stdout=PIPE) + output = p.communicate() + outputs.append(output) + return outputs + +def get_serotypes(outputs): + ''' + ''' + fastq2comment = {} + fastq2serotype = {} + for sample in outputs: + fastqs = '' + lines_used = [] + for line in sample: # line is actually the entire seqsero output. + line = str(line) + linel = re.split("\n", line) +# print linel + #lines_used = [] + for element in linel: # element is a line of seqsero output. + element = element.rstrip("\n") + test_out.write(element+"\n") + elementl = re.split("\t", element) + if elementl[0] == 'Input files:': + fastqs = elementl[1] + lines_used.append(element) + if elementl[1] not in fastq2serotype.keys(): + fastq2serotype[fastqs] = ['']*5 + fastq2comment[fastqs] = [] + elif elementl[0] == 'O antigen prediction:': + lines_used.append(element) + fastq2serotype[fastqs][0] = elementl[1] # add predicted profile + elif elementl[0] == 'H1 antigen prediction(fliC):': + lines_used.append(element) + fastq2serotype[fastqs][1] = elementl[1] # add predicted profile + elif elementl[0] == 'H2 antigen prediction(fljB):': + lines_used.append(element) + fastq2serotype[fastqs][2] = elementl[1] # add predicted profile + elif elementl[0] == 'Predicted antigenic profile:': + lines_used.append(element) + fastq2serotype[fastqs][3] = elementl[1] # add predicted profile + elif elementl[0] == 'Predicted serotype(s):': + lines_used.append(element) + fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype + if element not in lines_used and re.search("\w", fastqs) and len(element) > 7: + fastq2comment[fastqs].append(element) + #print "\n" + return fastq2serotype, fastq2comment + +def print_html(fastq2serotype, fastq2comment): + ''' + Takes dict and prints to html file. + ''' + tab_out = open('Seqsero_result.txt', 'w'); + html_out = open('Seqsero_result.html', 'w') + html_out.write('\n') + html_out.write('\n') + html_out.write('\n') + html_out.write('SeqSero Results\n') + html_out.write('\n') + html_out.write('\n') + html_out.write('\n') + html_out.write('

\n') + html_out.write('\n') + header = 'Input Files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted antigenic profile\tPredicted serotype(s)' + header = re.sub(' ', '_', header) + header_l = ['Input Files', 'O antigen prediction', 'H1 antigen prediction(fliC)', 'H2 antigen prediction(fljB)', 'Predicted antigenic profile', 'Predicted serotype(s)'] + html_out.write('\n') + for element in header_l: + html_out.write('\n') + html_out.write('\n') + tab_out.write(header+"\n") + print "\n\n", header + for fastq in fastq2serotype: +# print fastq, fastq2serotype[fastq] + line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq]) + tab_out.write(line_to_print+"\n") + html_out.write('\n') + html_out.write('\n') + for antigen in fastq2serotype[fastq]: + html_out.write('\n') + html_out.write('\n') + print line_to_print + html_out.write('
'+element+'
'+fastq+' '+antigen+'
\n') + print "\n" + for fastq in fastq2comment: + tab_out.write("\n"+fastq+"\n") + html_out.write('\n') + html_out.write('

\n') + html_out.write(''+fastq+"
\n") + for line in fastq2comment[fastq]: + #if len(line) > 7: + html_out.write(''+line+'\n') + tab_out.write(line+"\n") + print line + print "\n" + html_out.write('

\n') + html_out.write('\n') + html_out.write('\n') + html_out.write('\n') + html_out.close() + +run2fastqs = list_runs(fastq_files) +outputs = run_seqsero(run2fastqs) +fastq2serotype, fastq2comment = get_serotypes(outputs) +print_html(fastq2serotype, fastq2comment) + +print_time() + + diff -r 8cd7fc65c3a7 -r 0d65b71ff8df seqsero.xml --- a/seqsero.xml Fri Oct 27 17:47:00 2017 -0400 +++ b/seqsero.xml Sun Nov 12 02:27:32 2017 -0500 @@ -1,18 +1,34 @@ - Salmonella Serotype Prediction from Paired-End Reads - + Salmonella Serotype Prediction from Paired-End Reads + + biopython + bwa + samtools + blast + sra_toolkit + + + + + + run_seqsero_batch_galaxy.py $input1 - + - + + + + + + This tool predicts Salmonella serotypes from shotgun WGS data. Below is the link to the seqsero help @@ -34,8 +50,4 @@ } - - - - diff -r 8cd7fc65c3a7 -r 0d65b71ff8df test.txt --- a/test.txt Fri Oct 27 17:47:00 2017 -0400 +++ b/test.txt Sun Nov 12 02:27:32 2017 -0500 @@ -1,19 +1,19 @@ -monkey /nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/run_seqsero_batch_galaxy.py /nfs/sw/apps/galaxy-dev/galaxy/database/files/000/dataset_304.dat +monkey /nfs/sw/apps/galaxy-prd/galaxy/tools/seqsero/run_seqsero_batch_galaxy.py /nfs/sw/apps/galaxy-prd/galaxy/database/files/001/dataset_1643.dat,/nfs/sw/apps/galaxy-prd/galaxy/database/files/001/dataset_1644.dat 2 -/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/run_seqsero_batch_galaxy.py /nfs/sw/apps/galaxy-dev/galaxy/database/files/000/dataset_304.dat +/nfs/sw/apps/galaxy-prd/galaxy/tools/seqsero/run_seqsero_batch_galaxy.py /nfs/sw/apps/galaxy-prd/galaxy/database/files/001/dataset_1643.dat,/nfs/sw/apps/galaxy-prd/galaxy/database/files/001/dataset_1644.dat 2 -1 +2 -Input files: dataset_304_F1605001-005-2C1.fsa -O antigen prediction: O-? +Input files: dataset_1643_SRR6232093_1.fastq dataset_1644_SRR6232093_2.fastq +O antigen prediction: O-- H1 antigen prediction(fliC): - H2 antigen prediction(fljB): - -Predicted antigenic profile: ?:-:- +Predicted antigenic profile: -:-:- Predicted serotype(s): N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme) -/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/fastq_tmp/dataset_304_F1605001-005-2C1.fsa +check fastq id and make them in accordance with each other...please wait... @@ -21,4 +21,4 @@ None -Fri Oct 27 15:35:33 2017 \ No newline at end of file +Wed Nov 8 06:26:06 2017 \ No newline at end of file diff -r 8cd7fc65c3a7 -r 0d65b71ff8df test_bioblend.py --- a/test_bioblend.py Fri Oct 27 17:47:00 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ -#/usr/bin/python - -import sqlite3, re, os -from bioblend import galaxy - -#out = open("/opt/galaxy/tools/seqserobatch/test.txt", 'w') - -def get_bioblend_id(): -# gi = galaxy.GalaxyInstance(url='http://galaxy.fda.gov:8080/', key='ebec9b999774b69d3f3880f0f664f56e') - gi = galaxy.GalaxyInstance(url='https://dev.galaxytrakr.org/') - job = galaxy.jobs.JobsClient(gi) - j = job.get_jobs() - while j[0]['exit_code'] == 'None': - print j[0]['id'], j[0]['exit_code'] - print j[0]['id'], j[0]['exit_code'] - -# dataset_id = galaxy.datasets.DatasetClient(gi) -# output = show_stdout(dataset_id) -# print output -# data = download_dataset(dataset_id, wait_for_completion=True) -# print dataset_id.show_stdout() - #print gi, job - -# print '"'+'tiger'+'"' - return j[0]['id'] - -def get_job_id(bioblend_id): -# print 'supermonkey', bioblend_id - database = '/opt/galaxy/database/universe.sqlite' - conn = sqlite3.connect(database) - select = 'SELECT job.id, job.stdout FROM job WHERE job.stdout LIKE '+"'"+"%"+bioblend_id+"%"+"'"+';' -# print select -# entries = conn.execute('SELECT job.id, job.stdout FROM job WHERE job.stdout LIKE '+"'"+"%"+bioblend_id+"%"+"'"+';') - entries = conn.execute('SELECT job.id, job.stdout FROM job WHERE job.stdout LIKE '+"'"+"%ecbc86ac41da8f7b%"+"'"+';') - print type(entries) - print entries - print select - for entry in entries: - print entry - for line in entry: - line = str(line) - print line -# print entries - -#out.write(j_join) - -bioblend_id = get_bioblend_id() -#print bioblend_id -#get_job_id(bioblend_id) diff -r 8cd7fc65c3a7 -r 0d65b71ff8df tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Sun Nov 12 02:27:32 2017 -0500 @@ -0,0 +1,9 @@ + + + + + + + + +