annotate run_seqsero_batch_galaxy.1.py @ 5:d6ba84c5c4db draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 12:45:37 -0500
parents 38ad1130d077
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
1 #!/usr/bin/python
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
2
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
3 print 'monkey wonders where he is'
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
4
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
5 import os
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
6
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
7 # os.system('source /nfs/sw/apps/galaxy-dev/galaxy/.venv/bin/activate')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
8
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
9 import re, sys, time, datetime
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
10 import subprocess, psycopg2, sqlalchemy
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
11 from subprocess import Popen, PIPE
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
12 from sqlalchemy import *
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
13 from datetime import datetime
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
14
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
15 print 'monkey found some files', sys.argv
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
16 engine = create_engine('postgresql+psycopg2://galaxy:cF$cl0udh9c@galaxyprod.cvkyaz9id4ml.us-east-1.rds.amazonaws.com:5432/galaxy')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
17 print 'monkey says "vroom vroom"'
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
18 connection = engine.connect()
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
19 print "monkey made a connection!"
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
20
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
21 # database = '/nfs/sw/apps/galaxy-dev/galaxy/database/universe.sqlite'
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
22 seqsero = '/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/SeqSero.py'
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
23 test_out = open("/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/test.txt", 'w')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
24 out_path = '/nfs/sw/apps/galaxy-dev/galaxy/database/files/000'
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
25 test_out2 = open("/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/test2.txt", 'w')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
26
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
27 path2sample = '/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/fastq_tmp'
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
28
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
29 test_out.write("monkey ");
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
30 test_out.write("\t".join(sys.argv)+'\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
31
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
32 fq_list1 = []
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
33
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
34 test_out.write(str(len(sys.argv))+"\n")
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
35 if len(sys.argv) >= 2:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
36 test_out.write("\t".join(sys.argv)+'\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
37 fq_list1 = sys.argv[1:]
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
38
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
39 test_out.write(str(len(sys.argv))+"\n")
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
40
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
41 fastq_files = re.split(",", fq_list1[0]) # fq_list1[0] is a string with commas between the path/filenames.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
42
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
43 def print_time():
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
44 test_out.write(time.asctime( time.localtime(time.time())))
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
45
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
46 tmp_path = path2sample
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
47 if not os.path.exists(tmp_path):
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
48 os.system('mkdir '+tmp_path)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
49
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
50 test_out.write(str(len(fastq_files))+"\n")
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
51
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
52 def list_runs(fastq_files):
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
53 '''
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
54 Creates dict with runs as keys and list with filenames as values.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
55 '''
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
56 run2fastqs = {}
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
57 is_fastq = 'yes'
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
58 for file in fastq_files:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
59 print 'monkey found', file
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
60 run = ''
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
61 try:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
62 fastq = open(file, 'r')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
63 i = 0
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
64 for line in fastq:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
65 line = line.rstrip("\n")
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
66 if i == 0:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
67 run = re.split("\s", line)[0]
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
68 run = re.sub('@', '', run)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
69 run = re.split("\.", run)[0]
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
70 if not re.search('^@', line):
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
71 is_fastq = 'no'
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
72 else:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
73 break
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
74 i += 1
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
75 file1 = re.split('/', file)[-1]
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
76 dataset_id = re.split('_', file1)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
77 dataset_id = re.sub('.dat', '', dataset_id[-1])
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
78 print dataset_id, type(dataset_id)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
79 result = connection.execute('SELECT name FROM history_dataset_association WHERE dataset_id = '+dataset_id+';')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
80 original_filename = ''
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
81 for row in result:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
82 print 'monkey says the original_filename is something like', row[0]
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
83 original_filename = row[0]
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
84 if re.search('fasta$', original_filename):
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
85 if is_fastq == 'yes':
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
86 original_filename = re.sub('fasta', 'fastq', original_filename)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
87 else:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
88 print 'The input file is not a fastq file.'
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
89 file2 = re.sub('.dat$', '_'+original_filename, file1)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
90 print 'monkey renamed the file', file2
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
91 new_path_file = tmp_path+'/'+file2
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
92 if file2 not in os.listdir(tmp_path):
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
93 print file, new_path_file
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
94 os.system('cp '+file+' '+new_path_file)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
95 if run in run2fastqs.keys():
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
96 if file not in run2fastqs[run]:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
97 run2fastqs[run].append(new_path_file)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
98 else:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
99 run2fastqs[run] = [new_path_file]
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
100 except IOError:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
101 print "Data not found. It is possible for a deleted file to still be listed "\
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
102 "in a Galaxy library. Please confirm that the data still exists on this "\
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
103 "server. You may need to upload it again."
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
104 return run2fastqs
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
105
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
106 def run_seqsero(run2fastqs):
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
107 '''
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
108 Takes files from run2fastqs and runs SeqSero.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
109 '''
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
110 outputs = []
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
111 for run in run2fastqs:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
112 print run, run2fastqs[run]
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
113 seqsero_cmd = []
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
114 if len(run2fastqs[run]) == 2:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
115 seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]]
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
116 elif len(run2fastqs[run]) == 1:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
117 seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]]
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
118 print seqsero_cmd
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
119 p = Popen(seqsero_cmd, stdout=PIPE)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
120 output = p.communicate()
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
121 outputs.append(output)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
122 return outputs
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
123
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
124 def get_serotypes(outputs):
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
125 '''
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
126 '''
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
127 fastq2comment = {}
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
128 fastq2serotype = {}
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
129 for sample in outputs:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
130 fastqs = ''
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
131 lines_used = []
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
132 for line in sample: # line is actually the entire seqsero output.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
133 line = str(line)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
134 linel = re.split("\n", line)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
135 # print linel
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
136 #lines_used = []
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
137 for element in linel: # element is a line of seqsero output.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
138 element = element.rstrip("\n")
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
139 test_out.write(element+"\n")
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
140 elementl = re.split("\t", element)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
141 if elementl[0] == 'Input files:':
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
142 fastqs = elementl[1]
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
143 lines_used.append(element)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
144 if elementl[1] not in fastq2serotype.keys():
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
145 fastq2serotype[fastqs] = ['']*5
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
146 fastq2comment[fastqs] = []
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
147 elif elementl[0] == 'O antigen prediction:':
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
148 lines_used.append(element)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
149 fastq2serotype[fastqs][0] = elementl[1] # add predicted profile
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
150 elif elementl[0] == 'H1 antigen prediction(fliC):':
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
151 lines_used.append(element)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
152 fastq2serotype[fastqs][1] = elementl[1] # add predicted profile
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
153 elif elementl[0] == 'H2 antigen prediction(fljB):':
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
154 lines_used.append(element)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
155 fastq2serotype[fastqs][2] = elementl[1] # add predicted profile
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
156 elif elementl[0] == 'Predicted antigenic profile:':
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
157 lines_used.append(element)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
158 fastq2serotype[fastqs][3] = elementl[1] # add predicted profile
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
159 elif elementl[0] == 'Predicted serotype(s):':
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
160 lines_used.append(element)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
161 fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
162 if element not in lines_used and re.search("\w", fastqs) and len(element) > 7:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
163 fastq2comment[fastqs].append(element)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
164 #print "\n"
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
165 return fastq2serotype, fastq2comment
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
166
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
167 def print_html(fastq2serotype, fastq2comment):
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
168 '''
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
169 Takes dict and prints to html file.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
170 '''
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
171 tab_out = open('Seqsero_result.txt', 'w');
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
172 html_out = open('Seqsero_result.html', 'w')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
173 html_out.write('<!DOCTYPE html>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
174 html_out.write('<html>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
175 html_out.write('<head>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
176 html_out.write('<title>SeqSero Results</title>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
177 html_out.write('</head>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
178 html_out.write('<body>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
179 html_out.write('<body style="font-family:Helvetica;">\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
180 html_out.write('<p style="font-size:10px">\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
181 html_out.write('<table border=1>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
182 header = 'Input Files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted antigenic profile\tPredicted serotype(s)'
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
183 header = re.sub(' ', '_', header)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
184 header_l = ['Input Files', 'O antigen prediction', 'H1 antigen prediction(fliC)', 'H2 antigen prediction(fljB)', 'Predicted antigenic profile', 'Predicted serotype(s)']
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
185 html_out.write('<tr>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
186 for element in header_l:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
187 html_out.write('<td>'+element+'</td>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
188 html_out.write('</tr>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
189 tab_out.write(header+"\n")
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
190 print "\n\n", header
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
191 for fastq in fastq2serotype:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
192 # print fastq, fastq2serotype[fastq]
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
193 line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq])
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
194 tab_out.write(line_to_print+"\n")
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
195 html_out.write('<tr>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
196 html_out.write('<td>'+fastq+'</td>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
197 for antigen in fastq2serotype[fastq]:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
198 html_out.write('<td>'+antigen+'</td>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
199 html_out.write('</tr>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
200 print line_to_print
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
201 html_out.write('</table>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
202 print "\n"
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
203 for fastq in fastq2comment:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
204 tab_out.write("\n"+fastq+"\n")
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
205 html_out.write('<tr>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
206 html_out.write('<p>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
207 html_out.write('<td>'+fastq+"<br></td>\n")
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
208 for line in fastq2comment[fastq]:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
209 #if len(line) > 7:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
210 html_out.write('<td>'+line+'</td>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
211 tab_out.write(line+"\n")
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
212 print line
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
213 print "\n"
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
214 html_out.write('</p>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
215 html_out.write('</tr>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
216 html_out.write('</body>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
217 html_out.write('</html>\n')
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
218 html_out.close()
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
219
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
220 run2fastqs = list_runs(fastq_files)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
221 #print run2fastqs
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
222 outputs = run_seqsero(run2fastqs)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
223 fastq2serotype, fastq2comment = get_serotypes(outputs)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
224 print_html(fastq2serotype, fastq2comment)
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
225 print_time()
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
226
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
227