changeset 9:acc5cd06a36a draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Tue, 28 Nov 2017 21:16:36 -0500
parents b04931a9fe71
children 53efef402c51
files Seqsero_result.html Seqsero_result.txt run_seqsero.py run_seqsero2.py seqsero.xml test.txt
diffstat 6 files changed, 243 insertions(+), 62 deletions(-) [+]
line wrap: on
line diff
--- a/Seqsero_result.html	Tue Nov 28 18:55:00 2017 -0500
+++ b/Seqsero_result.html	Tue Nov 28 21:16:36 2017 -0500
@@ -15,20 +15,6 @@
 <td>Predicted antigenic profile</td>
 <td>Predicted serotype(s)</td>
 </tr>
-<tr>
-<td>dataset_1_SRR1198853.fastq dataset_2_SRR1198853.fastq</td>
-<td>O--</td>
-<td>-</td>
-<td>-</td>
-<td>-:-:-</td>
-<td>N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)</td>
-</tr>
 </table>
-<tr>
-<p>
-<td>dataset_1_SRR1198853.fastq dataset_2_SRR1198853.fastq<br></td>
-<td>check fastq id and make them in accordance with each other...please wait...</td>
-</p>
-</tr>
 </body>
 </html>
--- a/Seqsero_result.txt	Tue Nov 28 18:55:00 2017 -0500
+++ b/Seqsero_result.txt	Tue Nov 28 21:16:36 2017 -0500
@@ -1,5 +1,1 @@
 Input_Files	O_antigen_prediction	H1_antigen_prediction(fliC)	H2_antigen_prediction(fljB)	Predicted_antigenic_profile	Predicted_serotype(s)
-dataset_1_SRR1198853.fastq dataset_2_SRR1198853.fastq	O--	-	-	-:-:-	N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)
-
-dataset_1_SRR1198853.fastq dataset_2_SRR1198853.fastq
-check fastq id and make them in accordance with each other...please wait...
--- a/run_seqsero.py	Tue Nov 28 18:55:00 2017 -0500
+++ b/run_seqsero.py	Tue Nov 28 21:16:36 2017 -0500
@@ -107,43 +107,44 @@
 def get_serotypes(outputs):
         '''
         '''
-	fastq2comment = {}
+        fastq2comment = {}
         fastq2serotype = {}
+
         for sample in outputs:
-		fastqs = ''
-		lines_used = []
+                fastqs = ''
+                lines_used = []
                 for line in sample:	# line is actually the entire seqsero output.
                         line = str(line)
                         linel = re.split("\n", line)
 #                       print(linel)
 			#lines_used = []
                         for element in linel:	# element is a line of seqsero output.
-				element = element.rstrip("\n")
-				test_out.write(element+"\n")
+                                element = element.rstrip("\n")
+                                test_out.write(element+"\n")
                                 elementl = re.split("\t", element)
                                 if elementl[0] == 'Input files:':
                                         fastqs = elementl[1]
-					lines_used.append(element)
+                                        lines_used.append(element)
                                         if elementl[1] not in fastq2serotype.keys():
                                                 fastq2serotype[fastqs] = ['']*5
-						fastq2comment[fastqs] = []
+                                                fastq2comment[fastqs] = []
                                 elif elementl[0] == 'O antigen prediction:':
-					lines_used.append(element)
+                                        lines_used.append(element)
                                         fastq2serotype[fastqs][0] = elementl[1] # add predicted profile
                                 elif elementl[0] == 'H1 antigen prediction(fliC):':
-					lines_used.append(element)
+                                        lines_used.append(element)
                                         fastq2serotype[fastqs][1] = elementl[1] # add predicted profile
                                 elif elementl[0] == 'H2 antigen prediction(fljB):':
-					lines_used.append(element)
+                                        lines_used.append(element)
                                         fastq2serotype[fastqs][2] = elementl[1] # add predicted profile                    
                                 elif elementl[0] == 'Predicted antigenic profile:':
-					lines_used.append(element)
+                                        lines_used.append(element)
                                         fastq2serotype[fastqs][3] = elementl[1] # add predicted profile
                                 elif elementl[0] == 'Predicted serotype(s):':
-					lines_used.append(element)
+                                        lines_used.append(element)
                                         fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype
-				if element not in lines_used and re.search("\w", fastqs) and len(element) > 7:
-					fastq2comment[fastqs].append(element)
+                                if element not in lines_used and re.search("\w", fastqs) and len(element) > 7:
+                                        fastq2comment[fastqs].append(element)
                 #print("\n"
         return fastq2serotype, fastq2comment
 
@@ -151,7 +152,7 @@
         '''
         Takes dict and prints to html file.
         '''
-	tab_out = open('Seqsero_result.txt', 'w');
+        tab_out = open('Seqsero_result.txt', 'w');
         html_out = open('Seqsero_result.html', 'w')
         html_out.write('<!DOCTYPE html>\n')
         html_out.write('<html>\n')
@@ -182,19 +183,19 @@
                 html_out.write('</tr>\n')
                 print(line_to_print)
         html_out.write('</table>\n')
-	print("\n")
+        print("\n")
         for fastq in fastq2comment:
                 tab_out.write("\n"+fastq+"\n")
                 html_out.write('<tr>\n')
-		html_out.write('<p>\n')
+                html_out.write('<p>\n')
                 html_out.write('<td>'+fastq+"<br></td>\n")
                 for line in fastq2comment[fastq]:
 			#if len(line) > 7:
                         html_out.write('<td>'+line+'</td>\n')
                         tab_out.write(line+"\n")
-			print(line)
-		print("\n")
-		html_out.write('</p>\n')
+                        print(line)
+                print("\n")
+                html_out.write('</p>\n')
                 html_out.write('</tr>\n')
         html_out.write('</body>\n')
         html_out.write('</html>\n')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/run_seqsero2.py	Tue Nov 28 21:16:36 2017 -0500
@@ -0,0 +1,217 @@
+#!/usr/bin/python
+
+import os, re, sys, time, datetime
+import subprocess
+from subprocess import Popen, PIPE
+
+
+# Declare Variables 
+# paths to files and scripts
+
+fileDir = os.path.dirname(os.path.realpath('__file__'))
+print(fileDir)
+
+fq_list1 = []
+Tool_directory  = sys.argv[2]
+database = Tool_directory + '/universe.sqlite'
+seqsero = Tool_directory + 'SeqSero.py'
+test_out = open(Tool_directory + "/test.txt", 'w')
+out_path = Tool_directory + '/files/000'
+test_out2 = open(Tool_directory + "/test2.txt", 'w')
+
+path2sample = Tool_directory + 'fastq_tmp'
+
+
+#test_out2.write(" Test test test");
+
+test_out.write("monkey ");
+#test_out.write("");
+test_out.write("\t".join(sys.argv)+'\n')
+
+
+
+
+print(Tool_directory + '  path printed ')
+
+test_out.write(str(len(sys.argv))+"\n")
+if len(sys.argv) >= 2:
+        test_out.write("\t".join(sys.argv)+'\n')
+        fq_list1 = sys.argv[1]
+
+test_out.write(str(len(sys.argv))+"\n")
+
+fastq_files = re.split(",", fq_list1)
+
+def print_time():
+        test_out.write(time.asctime( time.localtime(time.time())))
+
+tmp_path = "fastq_tmp"
+if not os.path.exists(tmp_path):
+        os.system('mkdir '+tmp_path)
+
+test_out.write(str(len(fastq_files))+"\n")
+
+def list_runs(fastq_files):
+        '''
+        Creates dict with runs as keys and list with filenames as values.
+        '''
+#       print("monkey")
+        run2fastqs = {}
+        for file in fastq_files:
+                run = ''
+                try:
+                        fastq = open(file, 'r')
+                        i = 0
+                        for line in fastq:
+                                line = line.rstrip("\n")
+                                if i == 0:
+                                        run = re.split("\s", line)[0]
+                                        run = re.sub('@', '', run)
+                                        run = re.sub("\.1", '', run)
+                                else:
+                                        break
+                                i += 1
+                        file1 = re.split('/', file)[-1]
+                        file2 = re.sub('.dat$', '_'+run+'.fastq', file1)
+                        new_path_file = tmp_path+'/'+file2
+#                       print run, new_path_file
+                        if file2 not in os.listdir(tmp_path):
+                                os.system('cp '+file+' '+new_path_file)
+                        if run in run2fastqs.keys():
+                                if file not in run2fastqs[run]:
+                                        run2fastqs[run].append(new_path_file)
+                        else:
+                                run2fastqs[run] = [new_path_file]
+                except IOError:
+                        print("Data not found. It is possible for a deleted file to still be listed "\
+                                "in a Galaxy library. Please confirm that the data still exists on this "\
+                                "server. You may need to upload it again.")
+        return run2fastqs
+
+def run_seqsero(run2fastqs):
+        '''
+        Takes files from run2fastqs and runs SeqSero.
+        '''
+        outputs = []
+        for run in run2fastqs:
+                seqsero_cmd = []
+                if len(run2fastqs[run]) == 2:
+                        seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]]
+                elif len(run2fastqs[run]) == 1:
+                        seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]]
+                p = Popen(seqsero_cmd, stdout=PIPE)
+                output = p.communicate()
+                outputs.append(output)
+        return outputs
+
+def get_serotypes(outputs):
+        '''
+        '''
+	fastq2comment = {}
+        fastq2serotype = {}
+        for sample in outputs:
+		fastqs = ''
+		lines_used = []
+                for line in sample:	# line is actually the entire seqsero output.
+                        line = str(line)
+                        linel = re.split("\n", line)
+#                       print(linel)
+			#lines_used = []
+                        for element in linel:	# element is a line of seqsero output.
+				element = element.rstrip("\n")
+				test_out.write(element+"\n")
+                                elementl = re.split("\t", element)
+                                if elementl[0] == 'Input files:':
+                                        fastqs = elementl[1]
+					lines_used.append(element)
+                                        if elementl[1] not in fastq2serotype.keys():
+                                                fastq2serotype[fastqs] = ['']*5
+						fastq2comment[fastqs] = []
+                                elif elementl[0] == 'O antigen prediction:':
+					lines_used.append(element)
+                                        fastq2serotype[fastqs][0] = elementl[1] # add predicted profile
+                                elif elementl[0] == 'H1 antigen prediction(fliC):':
+					lines_used.append(element)
+                                        fastq2serotype[fastqs][1] = elementl[1] # add predicted profile
+                                elif elementl[0] == 'H2 antigen prediction(fljB):':
+					lines_used.append(element)
+                                        fastq2serotype[fastqs][2] = elementl[1] # add predicted profile                    
+                                elif elementl[0] == 'Predicted antigenic profile:':
+					lines_used.append(element)
+                                        fastq2serotype[fastqs][3] = elementl[1] # add predicted profile
+                                elif elementl[0] == 'Predicted serotype(s):':
+					lines_used.append(element)
+                                        fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype
+				if element not in lines_used and re.search("\w", fastqs) and len(element) > 7:
+					fastq2comment[fastqs].append(element)
+                #print("\n"
+        return fastq2serotype, fastq2comment
+
+def print_html(fastq2serotype, fastq2comment):
+        '''
+        Takes dict and prints to html file.
+        '''
+	tab_out = open('Seqsero_result.txt', 'w');
+        html_out = open('Seqsero_result.html', 'w')
+        html_out.write('<!DOCTYPE html>\n')
+        html_out.write('<html>\n')
+        html_out.write('<head>\n')
+        html_out.write('<title>SeqSero Results</title>\n')
+        html_out.write('</head>\n')
+        html_out.write('<body>\n')
+        html_out.write('<body style="font-family:Helvetica;">\n')
+        html_out.write('<p style="font-size:10px">\n')
+        html_out.write('<table border=1>\n')
+        header = 'Input Files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted antigenic profile\tPredicted serotype(s)'
+        header = re.sub(' ', '_', header)
+        header_l = ['Input Files', 'O antigen prediction', 'H1 antigen prediction(fliC)', 'H2 antigen prediction(fljB)', 'Predicted antigenic profile', 'Predicted serotype(s)']
+        html_out.write('<tr>\n')
+        for element in header_l:
+                html_out.write('<td>'+element+'</td>\n')
+        html_out.write('</tr>\n')
+        tab_out.write(header+"\n")
+        print("\n\n", header)
+        for fastq in fastq2serotype:
+#               print(fastq, fastq2serotype[fastq])
+                line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq])
+                tab_out.write(line_to_print+"\n")
+                html_out.write('<tr>\n')
+                html_out.write('<td>'+fastq+'</td>\n')
+                for antigen in fastq2serotype[fastq]:
+                        html_out.write('<td>'+antigen+'</td>\n')
+                html_out.write('</tr>\n')
+                print(line_to_print)
+        html_out.write('</table>\n')
+	print("\n")
+        for fastq in fastq2comment:
+                tab_out.write("\n"+fastq+"\n")
+                html_out.write('<tr>\n')
+		html_out.write('<p>\n')
+                html_out.write('<td>'+fastq+"<br></td>\n")
+                for line in fastq2comment[fastq]:
+			#if len(line) > 7:
+                        html_out.write('<td>'+line+'</td>\n')
+                        tab_out.write(line+"\n")
+			print(line)
+		print("\n")
+		html_out.write('</p>\n')
+                html_out.write('</tr>\n')
+        html_out.write('</body>\n')
+        html_out.write('</html>\n')
+        html_out.close()
+
+
+def readFile(filename):
+    filehandle = open(filename)
+    print(filehandle.read())
+    filehandle.close()
+
+
+run2fastqs = list_runs(fastq_files)
+outputs = run_seqsero(run2fastqs)
+fastq2serotype, fastq2comment = get_serotypes(outputs)
+print_html(fastq2serotype, fastq2comment)
+
+print_time()
+                                                                                                                                                                                                                          
+
--- a/seqsero.xml	Tue Nov 28 18:55:00 2017 -0500
+++ b/seqsero.xml	Tue Nov 28 21:16:36 2017 -0500
@@ -12,12 +12,7 @@
                 <exit_code range="0:" level="warning"/>
                 <regex match="Error:"/>
         </stdio>
-         <!-- <command interpreter="python2.7">
-                run_seqsero.py $input1 
-              run_seqsero_batch_galaxy.py $input1 $input2-->
-                <!--test_bioblend.py $input1 $input2
-        </command> -->
-            <command >
+        <command>
      
                 python $__tool_directory__/run_seqsero.py $input1 $__tool_directory__/
                 
--- a/test.txt	Tue Nov 28 18:55:00 2017 -0500
+++ b/test.txt	Tue Nov 28 21:16:36 2017 -0500
@@ -1,22 +1,8 @@
-monkey /galaxydir/galaxy/tools/GalaxySeqsero/run_seqsero.py	/galaxydir/galaxy/database/files/000/dataset_1.dat,/galaxydir/galaxy/database/files/000/dataset_2.dat	/galaxydir/galaxy/tools/GalaxySeqsero/
+monkey /galaxydir/galaxy/tools/GalaxySeqsero/run_seqsero.py	/galaxydir/galaxy/database/files/000/dataset_170.dat,/galaxydir/galaxy/database/files/000/dataset_171.dat	/galaxydir/galaxy/tools/GalaxySeqsero/
 3
-/galaxydir/galaxy/tools/GalaxySeqsero/run_seqsero.py	/galaxydir/galaxy/database/files/000/dataset_1.dat,/galaxydir/galaxy/database/files/000/dataset_2.dat	/galaxydir/galaxy/tools/GalaxySeqsero/
+/galaxydir/galaxy/tools/GalaxySeqsero/run_seqsero.py	/galaxydir/galaxy/database/files/000/dataset_170.dat,/galaxydir/galaxy/database/files/000/dataset_171.dat	/galaxydir/galaxy/tools/GalaxySeqsero/
 3
 2
-('\n', '\n')
-Input files:	dataset_1_SRR1198853.fastq dataset_2_SRR1198853.fastq
-O antigen prediction:	O-?
-H1 antigen prediction(fliC):	k
-H2 antigen prediction(fljB):	1,5
-Predicted antigenic profile:	?:k:1,5
-Predicted serotype(s):	N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)
-
-check fastq id and make them in accordance with each other...please wait...
-
-
-
-Result:
-
-
+b"('\\n', '\\n')\nInput files:\tdataset_170_SRR6325381.fastq dataset_171_SRR6325381.fastq\nO antigen prediction:\tO-?\nH1 antigen prediction(fliC):\t-\nH2 antigen prediction(fljB):\t1,6\nPredicted antigenic profile:\t?:-:1,6\nPredicted serotype(s):\tN/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)\n\ncheck fastq id and make them in accordance with each other...please wait...\n\n\n\nResult:\n\n"
 None
-Mon Nov 27 16:46:44 2017
\ No newline at end of file
+Tue Nov 28 19:30:26 2017
\ No newline at end of file