Mercurial > repos > charles_s_test > seqsero2
annotate run_seqsero_batch_galaxy.py @ 2:0d65b71ff8df draft
planemo upload commit 464b391afaa5819bc681452e85bea9d882730eb6
author | charles_s_test |
---|---|
date | Sun, 12 Nov 2017 02:27:32 -0500 |
parents | 6895de35a263 |
children | 38ad1130d077 |
rev | line source |
---|---|
0
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
1 #!/usr/bin/python |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
2 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
3 print 'monkey wonders where he is' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
4 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
5 import os |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
6 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
7 # os.system('source /nfs/sw/apps/galaxy-dev/galaxy/.venv/bin/activate') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
8 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
9 import re, sys, time, datetime |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
10 import subprocess, psycopg2, sqlalchemy |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
11 from subprocess import Popen, PIPE |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
12 from sqlalchemy import * |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
13 from datetime import datetime |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
14 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
15 print 'monkey found some files', sys.argv |
2
0d65b71ff8df
planemo upload commit 464b391afaa5819bc681452e85bea9d882730eb6
charles_s_test
parents:
0
diff
changeset
|
16 engine = create_engine('postgresql+psycopg2://galaxy:cF$cl0udh9c@galaxydev.cvkyaz9id4ml.us-east-1.rds.amazonaws.com:5432/galaxy') |
0
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
17 print 'monkey says "vroom vroom"' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
18 connection = engine.connect() |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
19 print "monkey made a connection!" |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
20 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
21 # database = '/nfs/sw/apps/galaxy-dev/galaxy/database/universe.sqlite' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
22 seqsero = '/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/SeqSero.py' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
23 test_out = open("/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/test.txt", 'w') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
24 out_path = '/nfs/sw/apps/galaxy-dev/galaxy/database/files/000' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
25 test_out2 = open("/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/test2.txt", 'w') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
26 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
27 path2sample = '/nfs/sw/apps/galaxy-dev/galaxy/tools/seqsero/fastq_tmp' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
28 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
29 test_out.write("monkey "); |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
30 test_out.write("\t".join(sys.argv)+'\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
31 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
32 fq_list1 = [] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
33 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
34 test_out.write(str(len(sys.argv))+"\n") |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
35 if len(sys.argv) >= 2: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
36 test_out.write("\t".join(sys.argv)+'\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
37 fq_list1 = sys.argv[1:] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
38 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
39 test_out.write(str(len(sys.argv))+"\n") |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
40 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
41 fastq_files = re.split(",", fq_list1[0]) # fq_list1[0] is a string with commas between the path/filenames. |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
42 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
43 def print_time(): |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
44 test_out.write(time.asctime( time.localtime(time.time()))) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
45 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
46 tmp_path = path2sample |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
47 if not os.path.exists(tmp_path): |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
48 os.system('mkdir '+tmp_path) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
49 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
50 test_out.write(str(len(fastq_files))+"\n") |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
51 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
52 def list_runs(fastq_files): |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
53 ''' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
54 Creates dict with runs as keys and list with filenames as values. |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
55 ''' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
56 run2fastqs = {} |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
57 is_fastq = 'yes' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
58 for file in fastq_files: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
59 print 'monkey found', file |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
60 run = '' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
61 try: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
62 fastq = open(file, 'r') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
63 i = 0 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
64 for line in fastq: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
65 line = line.rstrip("\n") |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
66 if i == 0: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
67 run = re.split("\s", line)[0] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
68 run = re.sub('@', '', run) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
69 run = re.split("\.", run)[0] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
70 if not re.search('^@', line): |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
71 is_fastq = 'no' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
72 else: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
73 break |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
74 i += 1 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
75 file1 = re.split('/', file)[-1] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
76 dataset_id = re.split('_', file1) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
77 dataset_id = re.sub('.dat', '', dataset_id[-1]) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
78 print dataset_id, type(dataset_id) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
79 result = connection.execute('SELECT name FROM history_dataset_association WHERE dataset_id = '+dataset_id+';') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
80 original_filename = '' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
81 for row in result: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
82 print 'monkey says the original_filename is something like', row[0] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
83 original_filename = row[0] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
84 if re.search('fasta$', original_filename): |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
85 if is_fastq == 'yes': |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
86 original_filename = re.sub('fasta', 'fastq', original_filename) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
87 else: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
88 print 'The input file is not a fastq file.' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
89 file2 = re.sub('.dat$', '_'+original_filename, file1) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
90 print 'monkey renamed the file', file2 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
91 new_path_file = tmp_path+'/'+file2 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
92 if file2 not in os.listdir(tmp_path): |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
93 print file, new_path_file |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
94 os.system('cp '+file+' '+new_path_file) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
95 if run in run2fastqs.keys(): |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
96 if file not in run2fastqs[run]: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
97 run2fastqs[run].append(new_path_file) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
98 else: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
99 run2fastqs[run] = [new_path_file] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
100 except IOError: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
101 print "Data not found. It is possible for a deleted file to still be listed "\ |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
102 "in a Galaxy library. Please confirm that the data still exists on this "\ |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
103 "server. You may need to upload it again." |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
104 return run2fastqs |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
105 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
106 def run_seqsero(run2fastqs): |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
107 ''' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
108 Takes files from run2fastqs and runs SeqSero. |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
109 ''' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
110 outputs = [] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
111 for run in run2fastqs: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
112 print run, run2fastqs[run] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
113 seqsero_cmd = [] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
114 if len(run2fastqs[run]) == 2: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
115 seqsero_cmd = ['python', seqsero, '-m', '2', '-i', run2fastqs[run][0], run2fastqs[run][1]] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
116 elif len(run2fastqs[run]) == 1: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
117 seqsero_cmd = ['python', seqsero, '-m', '1', '-i', run2fastqs[run][0]] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
118 print seqsero_cmd |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
119 p = Popen(seqsero_cmd, stdout=PIPE) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
120 output = p.communicate() |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
121 outputs.append(output) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
122 return outputs |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
123 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
124 def get_serotypes(outputs): |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
125 ''' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
126 ''' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
127 fastq2comment = {} |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
128 fastq2serotype = {} |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
129 for sample in outputs: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
130 fastqs = '' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
131 lines_used = [] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
132 for line in sample: # line is actually the entire seqsero output. |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
133 line = str(line) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
134 linel = re.split("\n", line) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
135 # print linel |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
136 #lines_used = [] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
137 for element in linel: # element is a line of seqsero output. |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
138 element = element.rstrip("\n") |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
139 test_out.write(element+"\n") |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
140 elementl = re.split("\t", element) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
141 if elementl[0] == 'Input files:': |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
142 fastqs = elementl[1] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
143 lines_used.append(element) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
144 if elementl[1] not in fastq2serotype.keys(): |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
145 fastq2serotype[fastqs] = ['']*5 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
146 fastq2comment[fastqs] = [] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
147 elif elementl[0] == 'O antigen prediction:': |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
148 lines_used.append(element) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
149 fastq2serotype[fastqs][0] = elementl[1] # add predicted profile |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
150 elif elementl[0] == 'H1 antigen prediction(fliC):': |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
151 lines_used.append(element) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
152 fastq2serotype[fastqs][1] = elementl[1] # add predicted profile |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
153 elif elementl[0] == 'H2 antigen prediction(fljB):': |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
154 lines_used.append(element) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
155 fastq2serotype[fastqs][2] = elementl[1] # add predicted profile |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
156 elif elementl[0] == 'Predicted antigenic profile:': |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
157 lines_used.append(element) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
158 fastq2serotype[fastqs][3] = elementl[1] # add predicted profile |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
159 elif elementl[0] == 'Predicted serotype(s):': |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
160 lines_used.append(element) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
161 fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
162 if element not in lines_used and re.search("\w", fastqs) and len(element) > 7: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
163 fastq2comment[fastqs].append(element) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
164 #print "\n" |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
165 return fastq2serotype, fastq2comment |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
166 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
167 def print_html(fastq2serotype, fastq2comment): |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
168 ''' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
169 Takes dict and prints to html file. |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
170 ''' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
171 tab_out = open('Seqsero_result.txt', 'w'); |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
172 html_out = open('Seqsero_result.html', 'w') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
173 html_out.write('<!DOCTYPE html>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
174 html_out.write('<html>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
175 html_out.write('<head>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
176 html_out.write('<title>SeqSero Results</title>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
177 html_out.write('</head>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
178 html_out.write('<body>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
179 html_out.write('<body style="font-family:Helvetica;">\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
180 html_out.write('<p style="font-size:10px">\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
181 html_out.write('<table border=1>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
182 header = 'Input Files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted antigenic profile\tPredicted serotype(s)' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
183 header = re.sub(' ', '_', header) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
184 header_l = ['Input Files', 'O antigen prediction', 'H1 antigen prediction(fliC)', 'H2 antigen prediction(fljB)', 'Predicted antigenic profile', 'Predicted serotype(s)'] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
185 html_out.write('<tr>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
186 for element in header_l: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
187 html_out.write('<td>'+element+'</td>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
188 html_out.write('</tr>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
189 tab_out.write(header+"\n") |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
190 print "\n\n", header |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
191 for fastq in fastq2serotype: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
192 # print fastq, fastq2serotype[fastq] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
193 line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq]) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
194 tab_out.write(line_to_print+"\n") |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
195 html_out.write('<tr>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
196 html_out.write('<td>'+fastq+'</td>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
197 for antigen in fastq2serotype[fastq]: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
198 html_out.write('<td>'+antigen+'</td>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
199 html_out.write('</tr>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
200 print line_to_print |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
201 html_out.write('</table>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
202 print "\n" |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
203 for fastq in fastq2comment: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
204 tab_out.write("\n"+fastq+"\n") |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
205 html_out.write('<tr>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
206 html_out.write('<p>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
207 html_out.write('<td>'+fastq+"<br></td>\n") |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
208 for line in fastq2comment[fastq]: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
209 #if len(line) > 7: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
210 html_out.write('<td>'+line+'</td>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
211 tab_out.write(line+"\n") |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
212 print line |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
213 print "\n" |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
214 html_out.write('</p>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
215 html_out.write('</tr>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
216 html_out.write('</body>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
217 html_out.write('</html>\n') |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
218 html_out.close() |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
219 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
220 run2fastqs = list_runs(fastq_files) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
221 #print run2fastqs |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
222 outputs = run_seqsero(run2fastqs) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
223 fastq2serotype, fastq2comment = get_serotypes(outputs) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
224 print_html(fastq2serotype, fastq2comment) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
225 print_time() |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
226 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
227 |