# HG changeset patch # User charles_s_test # Date 1510471652 18000 # Node ID 0d65b71ff8df152640f67b48e4d929791e091002 # Parent 8cd7fc65c3a75f1baeb15d266167ef3e4ab656a5 planemo upload commit 464b391afaa5819bc681452e85bea9d882730eb6 diff -r 8cd7fc65c3a7 -r 0d65b71ff8df README.md --- a/README.md Fri Oct 27 17:47:00 2017 -0400 +++ b/README.md Sun Nov 12 02:27:32 2017 -0500 @@ -1,16 +1,5 @@ - -# GalaxySeqSero -#This Project has been modified by the CFSAN FDA office to work in Galaxy it also has a batch functionality as well add by the Galaxy rapper code. - -All below dependencies are the same for Seqsero. - Add this project to the galaxy tools directory and then added to tool_conf.xml - -Adding custom tools to Galaxy - -https://galaxyproject.org/admin/tools/add-tool-tutorial/ - - -# SeqSero 1.1 +<<<<<<< HEAD +# SeqSero 1.0 Salmonella serotyping from genome sequencing data diff -r 8cd7fc65c3a7 -r 0d65b71ff8df Seqsero_result.html --- a/Seqsero_result.html Fri Oct 27 17:47:00 2017 -0400 +++ b/Seqsero_result.html Sun Nov 12 02:27:32 2017 -0500 @@ -16,19 +16,19 @@
-
\n') + html_out.write('
'+element+' | \n') + html_out.write('|
'+fastq+' | \n') + for antigen in fastq2serotype[fastq]: + html_out.write(''+antigen+' | \n') + html_out.write('
\n') + html_out.write('
diff -r 8cd7fc65c3a7 -r 0d65b71ff8df Seqsero_result.txt --- a/Seqsero_result.txt Fri Oct 27 17:47:00 2017 -0400 +++ b/Seqsero_result.txt Sun Nov 12 02:27:32 2017 -0500 @@ -1,6 +1,6 @@ Input_Files O_antigen_prediction H1_antigen_prediction(fliC) H2_antigen_prediction(fljB) Predicted_antigenic_profile Predicted_serotype(s) -dataset_15_sra_data9.fastq O-16 d 1,7 16:d:1,7 Gaminara +dataset_445_SRR6158764_1.fastq dataset_446_SRR6158764_2.fastq O-7 y 1,5 7:y:1,5 Bareilly -dataset_15_sra_data9.fastq +dataset_445_SRR6158764_1.fastq dataset_446_SRR6158764_2.fastq The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes. -/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/fastq_tmp/dataset_15_sra_data9.fastq +check fastq id and make them in accordance with each other...please wait... diff -r 8cd7fc65c3a7 -r 0d65b71ff8df libs/BWA_analysis_H_update_new_family_dependent.py --- a/libs/BWA_analysis_H_update_new_family_dependent.py Fri Oct 27 17:47:00 2017 -0400 +++ b/libs/BWA_analysis_H_update_new_family_dependent.py Sun Nov 12 02:27:32 2017 -0500 @@ -6,8 +6,8 @@ from Initial_functions import Uniq from Bio.Blast import NCBIXML -BwaPath="/nfs/sw/apps/bwa/bwa-0.7.15/bwa" -SamTlsPth="/nfs/sw/apps/samtools/samtools-1.3.1/bin/samtools" +BwaPath="bwa" +SamTlsPth="samtools" Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb" Blastnpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastn" diff -r 8cd7fc65c3a7 -r 0d65b71ff8df libs/BWA_analysis_O_new_dependent.py --- a/libs/BWA_analysis_O_new_dependent.py Fri Oct 27 17:47:00 2017 -0400 +++ b/libs/BWA_analysis_O_new_dependent.py Sun Nov 12 02:27:32 2017 -0500 @@ -7,8 +7,8 @@ from Initial_functions import Uniq from Bio.Blast import NCBIXML -BwaPath="/nfs/sw/apps/bwa/bwa-0.7.15/bwa" -SamTlsPth="/nfs/sw/apps/samtools/samtools-1.3.1/bin/samtools" +BwaPath="bwa" +SamTlsPth="samtools" Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb" Blastnpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastn" diff -r 8cd7fc65c3a7 -r 0d65b71ff8df libs/Initial_Conditions.pyc Binary file libs/Initial_Conditions.pyc has changed diff -r 8cd7fc65c3a7 -r 0d65b71ff8df libs/Initial_functions.pyc Binary file libs/Initial_functions.pyc has changed diff -r 8cd7fc65c3a7 -r 0d65b71ff8df libs/deletion_compare.py --- a/libs/deletion_compare.py Fri Oct 27 17:47:00 2017 -0400 +++ b/libs/deletion_compare.py Sun Nov 12 02:27:32 2017 -0500 @@ -5,8 +5,8 @@ from Initial_functions import Uniq from Bio.Blast import NCBIXML -BwaPath="/nfs/sw/apps/bwa/bwa-0.7.15/bwa" -SamTlsPth="/nfs/sw/apps/samtools/samtools-1.3.1/bin/samtools" +BwaPath="bwa" +SamTlsPth="samtools" Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb" Blastnpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastn" diff -r 8cd7fc65c3a7 -r 0d65b71ff8df logupdate_tool.log diff -r 8cd7fc65c3a7 -r 0d65b71ff8df psql_test.py --- a/psql_test.py Fri Oct 27 17:47:00 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -import psycopg2, os - -os.system('source /nfs/sw/apps/galaxy-dev/galaxy/.venv/bin/activate') - -connection = psycopg2.connect(database="galaxy", user="galaxy", host="galaxydev.cvkyaz9id4ml.us-east-1.rds.amazonaws.com", password="cF$cl0udh9c", port="5432") -print "monkey made a connection!" - -worker = connection.cursor() -worker.execute('SELECT name FROM library_dataset_dataset_association WHERE dataset_id = 4;') -result = worker.fetchall() -print result, '4' - -worker = connection.cursor() -worker.execute('SELECT name FROM library_dataset_dataset_association WHERE dataset_id = 10;') -result = worker.fetchall() -print result, '10' - -worker = connection.cursor() -worker.execute('SELECT name FROM library_dataset_dataset_association WHERE dataset_id = 11;') -result = worker.fetchall() -print result, '11' - -worker = connection.cursor() -worker.execute('SELECT name FROM library_dataset_dataset_association WHERE dataset_id = 18;') -result = worker.fetchall() -print result, '18' diff -r 8cd7fc65c3a7 -r 0d65b71ff8df run_seqsero_batch_galaxy.py --- a/run_seqsero_batch_galaxy.py Fri Oct 27 17:47:00 2017 -0400 +++ b/run_seqsero_batch_galaxy.py Sun Nov 12 02:27:32 2017 -0500 @@ -13,7 +13,7 @@ from datetime import datetime print 'monkey found some files', sys.argv -engine = create_engine('postgresql+psycopg2://galaxy:cF$cl0udh9c@galaxyprod.cvkyaz9id4ml.us-east-1.rds.amazonaws.com:5432/galaxy') +engine = create_engine('postgresql+psycopg2://galaxy:cF$cl0udh9c@galaxydev.cvkyaz9id4ml.us-east-1.rds.amazonaws.com:5432/galaxy') print 'monkey says "vroom vroom"' connection = engine.connect() print "monkey made a connection!" diff -r 8cd7fc65c3a7 -r 0d65b71ff8df run_seqsero_batch_galaxy_09-15-17.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/run_seqsero_batch_galaxy_09-15-17.py Sun Nov 12 02:27:32 2017 -0500 @@ -0,0 +1,201 @@ +#!/usr/bin/python + +print "monkey has started" + +import os, re, sys, time, datetime +import subprocess +from subprocess import Popen, PIPE + +database = '/nfs/sw/apps/galaxy-prd/galaxy/database/universe.sqlite' +seqsero = '/nfs/sw/apps/galaxy-prd/galaxy/tools/seqsero/SeqSero.py' +test_out = open("/nfs/sw/apps/galaxy-prd/galaxy/tools/seqsero/test.txt", 'w') +out_path = '/nfs/sw/apps/galaxy-prd/galaxy/database/files/000' +test_out2 = open("/nfs/sw/apps/galaxy-prd/galaxy/tools/seqsero/test2.txt", 'w') + +path2sample = '/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/fastq_tmp' + +#test_out2.write(" Test test test"); + +test_out.write("monkey "); +#test_out.write(""); +test_out.write("\t".join(sys.argv)+'\n') + +fq_list1 = [] +fq_list2 = [] + +test_out.write(str(len(sys.argv))+"\n") +if len(sys.argv) >= 2: + test_out.write("\t".join(sys.argv)+'\n') + fq_list1 = sys.argv[1] + +test_out.write(str(len(sys.argv))+"\n") + +fastq_files = re.split(",", fq_list1) + +def print_time(): + test_out.write(time.asctime( time.localtime(time.time()))) + +tmp_path = "/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/fastq_tmp" +if not os.path.exists(tmp_path): + os.system('mkdir '+tmp_path) + +test_out.write(str(len(fastq_files))+"\n") + +def list_runs(fastq_files): + ''' + Creates dict with runs as keys and list with filenames as values. + ''' +# print "monkey" + run2fastqs = {} + for file in fastq_files: + run = '' + try: + fastq = open(file, 'r') + i = 0 + for line in fastq: + line = line.rstrip("\n") + if i == 0: + run = re.split("\s", line)[0] + run = re.sub('@', '', run) + run = re.split("\.", run)[0] + #run = re.sub("\.1", '', run)] + else: + break + i += 1 + file1 = re.split('/', file)[-1] + file2 = re.sub('.dat$', '_'+run+'.fastq', file1) + new_path_file = tmp_path+'/'+file2 +# print run, new_path_file + if file2 not in os.listdir(tmp_path): + os.system('cp '+file+' '+new_path_file) + if run in run2fastqs.keys(): + if file not in run2fastqs[run]: + run2fastqs[run].append(new_path_file) + else: + run2fastqs[run] = [new_path_file] + except IOError: + print "Data not found. It is possible for a deleted file to still be listed "\ + "in a Galaxy library. Please confirm that the data still exists on this "\ + "server. You may need to upload it again." + return run2fastqs + +def run_seqsero(run2fastqs): + ''' + Takes files from run2fastqs and runs SeqSero. + ''' + outputs = [] + for run in run2fastqs: + seqsero_cmd = [] + if len(run2fastqs[run]) == 2: + seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]] + elif len(run2fastqs[run]) == 1: + seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]] + p = Popen(seqsero_cmd, stdout=PIPE) + output = p.communicate() + outputs.append(output) + return outputs + +def get_serotypes(outputs): + ''' + ''' + fastq2comment = {} + fastq2serotype = {} + for sample in outputs: + fastqs = '' + lines_used = [] + for line in sample: # line is actually the entire seqsero output. + line = str(line) + linel = re.split("\n", line) +# print linel + #lines_used = [] + for element in linel: # element is a line of seqsero output. + element = element.rstrip("\n") + test_out.write(element+"\n") + elementl = re.split("\t", element) + if elementl[0] == 'Input files:': + fastqs = elementl[1] + lines_used.append(element) + if elementl[1] not in fastq2serotype.keys(): + fastq2serotype[fastqs] = ['']*5 + fastq2comment[fastqs] = [] + elif elementl[0] == 'O antigen prediction:': + lines_used.append(element) + fastq2serotype[fastqs][0] = elementl[1] # add predicted profile + elif elementl[0] == 'H1 antigen prediction(fliC):': + lines_used.append(element) + fastq2serotype[fastqs][1] = elementl[1] # add predicted profile + elif elementl[0] == 'H2 antigen prediction(fljB):': + lines_used.append(element) + fastq2serotype[fastqs][2] = elementl[1] # add predicted profile + elif elementl[0] == 'Predicted antigenic profile:': + lines_used.append(element) + fastq2serotype[fastqs][3] = elementl[1] # add predicted profile + elif elementl[0] == 'Predicted serotype(s):': + lines_used.append(element) + fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype + if element not in lines_used and re.search("\w", fastqs) and len(element) > 7: + fastq2comment[fastqs].append(element) + #print "\n" + return fastq2serotype, fastq2comment + +def print_html(fastq2serotype, fastq2comment): + ''' + Takes dict and prints to html file. + ''' + tab_out = open('Seqsero_result.txt', 'w'); + html_out = open('Seqsero_result.html', 'w') + html_out.write('\n') + html_out.write('\n') + html_out.write('
\n') + html_out.write('
\n') + html_out.write('\n') + html_out.write('