comparison blast_report.py @ 9:2b4f30c6b50a draft default tip

planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/blast_report commit 174f746f44dfdeb18301429116ccc0213c1e091e-dirty
author dfornika
date Mon, 02 Mar 2020 23:41:54 +0000
parents 18b097eb1a51
children
comparison
equal deleted inserted replaced
8:71dd0b1d5511 9:2b4f30c6b50a
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 from __future__ import print_function
3
4 '''Report on BLAST results. 2 '''Report on BLAST results.
5 3
6 python bccdc_blast_report.py input_tab cheetah_tmpl output_html output_tab [-f [filter_pident]:[filterkw1,...,filterkwN]] [-b bin1_label=bin1_path[,...binN_label=binN_path]] 4 python blast_report.py input_tab cheetah_tmpl output_html output_tab [-f [filter_pident]:[filterkw1,...,filterkwN]] [-b bin1_label=bin1_path[,...binN_label=binN_path]]
7 ''' 5 '''
8 6 import argparse
9 import optparse
10 import re 7 import re
11 import sys 8 import sys
9
10 from Cheetah.Template import Template
11
12 12
13 def stop_err( msg ): 13 def stop_err( msg ):
14 sys.stderr.write("%s\n" % msg) 14 sys.stderr.write("%s\n" % msg)
15 sys.exit(1) 15 sys.exit(1)
16
16 17
17 class BLASTBin: 18 class BLASTBin:
18 def __init__(self, label, file): 19 def __init__(self, label, file):
19 self.label = label 20 self.label = label
20 self.dict = {} 21 self.dict = {}
24 self.dict[line.rstrip().split('.')[0]] = '' 25 self.dict[line.rstrip().split('.')[0]] = ''
25 file_in.close() 26 file_in.close()
26 27
27 def __str__(self): 28 def __str__(self):
28 return "label: %s dict: %s" % (self.label, str(self.dict)) 29 return "label: %s dict: %s" % (self.label, str(self.dict))
30
29 31
30 class BLASTQuery: 32 class BLASTQuery:
31 def __init__(self, query_id): 33 def __init__(self, query_id):
32 self.query_id = query_id 34 self.query_id = query_id
33 self.matches = [] 35 self.matches = []
44 str([bin.label for bin in bins]), 46 str([bin.label for bin in bins]),
45 str(self.pident_filtered), 47 str(self.pident_filtered),
46 str(self.kw_filtered), 48 str(self.kw_filtered),
47 str(self.kw_filtered_breakdown)) 49 str(self.kw_filtered_breakdown))
48 50
51
49 class BLASTMatch: 52 class BLASTMatch:
50 def __init__(self, subject_acc, subject_descr, score, p_cov, p_ident, subject_bins): 53 def __init__(self, subject_acc, subject_descr, score, p_cov, p_ident, subject_bins):
51 self.subject_acc = subject_acc 54 self.subject_acc = subject_acc
52 self.subject_descr = subject_descr 55 self.subject_descr = subject_descr
53 self.score = score 56 self.score = score
61 self.subject_descr, 64 self.subject_descr,
62 str(self.score), 65 str(self.score),
63 str(round(self.p_cov,2)), 66 str(round(self.p_cov,2)),
64 str(round(self.p_ident, 2))) 67 str(round(self.p_ident, 2)))
65 68
69
70
66 #PARSE OPTIONS AND ARGUMENTS 71 #PARSE OPTIONS AND ARGUMENTS
67 parser = optparse.OptionParser(description='Report on BLAST results.', 72 parser = argparse.ArgumentParser()
68 usage='python bccdc_blast_report_generator.py input_tabut cheetah_tmpl output_html [output_id output_dir] [options]') 73
69 74 parser.add_argument('-f', '--filter',
70 parser.add_option('-f', '--filter',
71 type='string', 75 type='string',
72 dest='filter', 76 dest='filter',
73 ) 77 )
74 parser.add_option('-b', '--bins', 78 parser.add_argument('-b', '--bins',
75 type='string', 79 type='string',
76 dest='bins' 80 dest='bins'
77 ) 81 )
78 parser.add_option('-r', '--redundant', 82 parser.add_argument('-r', '--redundant',
79 dest='hsp', 83 dest='redundant',
80 default=False, 84 default=False,
81 action='store_true' 85 action='store_true'
82 ) 86 )
83 options, args = parser.parse_args() 87 args = parser.parse_args()
84 88
85 try: 89 try:
86 input_tab, cheetah_tmpl, output_html, output_tab = args 90 input_tab, cheetah_tmpl, output_html, output_tab = args
87 except: 91 except:
88 stop_err('you must supply the arguments input_tab, cheetah_tmpl and output_html.') 92 stop_err('you must supply the arguments input_tab, cheetah_tmpl and output_html.')
89 #print('input_tab: %s cheetah_tmpl: %s output_html: %s output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab)) 93 # print('input_tab: %s cheetah_tmpl: %s output_html: %s output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab))
94
90 95
91 #BINS 96 #BINS
92 bins=[] 97 bins=[]
93 if options.bins != None: 98 if args.bins != None:
94 bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in options.bins.split(',')]) 99 bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in args.bins.split(',')])
95 print('database bins: %s' % str([bin.label for bin in bins])) 100 print('database bins: %s' % str([bin.label for bin in bins]))
96 101
97 #FILTERS 102 #FILTERS
98 filter_pident = 0 103 filter_pident = 0
99 filter_kws = [] 104 filter_kws = []
100 if options.filter != None: 105 if args.filter != None:
101 pident_kws = options.filter.split(':') 106 pident_kws = args.filter.split(':')
102 filter_pident = float(pident_kws[0]) 107 filter_pident = float(pident_kws[0])
103 filter_kws = pident_kws[-1].split(',') 108 filter_kws = pident_kws[-1].split(',')
104 print('filter_pident: %s filter_kws: %s' % (str(filter_pident), str(filter_kws))) 109 print('filter_pident: %s filter_kws: %s' % (str(filter_pident), str(filter_kws)))
105 110
106 if options.hsp: 111 if args.redundant:
107 print('Throwing out redundant hits...') 112 print('Throwing out redundant hits...')
108 113
109 #RESULTS! 114 #RESULTS!
110 PIDENT_COL = 2 115 PIDENT_COL = 2
111 DESCR_COL = 25 116 DESCR_COL = 25
113 SCORE_COL = 11 118 SCORE_COL = 11
114 PCOV_COL = 24 119 PCOV_COL = 24
115 queries = [] 120 queries = []
116 current_query = '' 121 current_query = ''
117 output_tab = open(output_tab, 'w') 122 output_tab = open(output_tab, 'w')
123
118 with open(input_tab) as input_tab: 124 with open(input_tab) as input_tab:
119 for line in input_tab: 125 for line in input_tab:
120 cols = line.split('\t') 126 cols = line.split('\t')
121 if cols[0] != current_query: 127 if cols[0] != current_query:
122 current_query = cols[0] 128 current_query = cols[0]
123 queries.append(BLASTQuery(current_query)) 129 queries.append(BLASTQuery(current_query))
124 130
125 try: 131 try:
126 accs = cols[SUBJ_ID_COL].split('|')[1::2][1::2] 132 accs = cols[SUBJ_ID_COL].split('|')[1::2][1::2]
127 except IndexError as e: 133 except IndexError as e:
128 stop_err("Problem with splitting:" + cols[SUBJ_ID_COL]) 134 stop_err("Problem with splitting:" + cols[SUBJ_ID_COL])
129 135
130 #hsp option: keep best (first) hit only for each query and accession id. 136 #hsp option: keep best (first) hit only for each query and accession id.
131 if options.hsp: 137 if args.redundant:
132 if accs[0] in queries[-1].match_accessions: 138 if accs[0] in queries[-1].match_accessions:
133 continue #don't save the result and skip to the next 139 continue #don't save the result and skip to the next
134 else: 140 else:
135 queries[-1].match_accessions[accs[0]] = '' 141 queries[-1].match_accessions[accs[0]] = ''
136 142
154 queries[-1].kw_filtered_breakdown[kw] = 1 160 queries[-1].kw_filtered_breakdown[kw] = 1
155 if filter_by_kw: #if we are not filtering, for loop will not be entered and this will never be True 161 if filter_by_kw: #if we are not filtering, for loop will not be entered and this will never be True
156 queries[-1].kw_filtered += 1 162 queries[-1].kw_filtered += 1
157 continue 163 continue
158 descr = descrs.split(';')[0] 164 descr = descrs.split(';')[0]
159 165
160 #ATTEMPT BIN 166 #ATTEMPT BIN
161 subj_bins = [] 167 subj_bins = []
162 for bin in bins: #if we are not binning, bins = [] so for loop not entered 168 for bin in bins: #if we are not binning, bins = [] so for loop not entered
163 for acc in accs: 169 for acc in accs:
164 if acc.split('.')[0] in bin.dict: 170 if acc.split('.')[0] in bin.dict:
172 178
173 score = int(float(cols[SCORE_COL])) 179 score = int(float(cols[SCORE_COL]))
174 p_cov = float(cols[PCOV_COL]) 180 p_cov = float(cols[PCOV_COL])
175 181
176 #SAVE RESULT 182 #SAVE RESULT
177 queries[-1].matches.append(BLASTMatch(acc, descr, score, p_cov, p_ident, subj_bins)) 183 queries[-1].matches.append(
184 BLASTMatch(acc, descr, score, p_cov, p_ident, subj_bins)
185 )
178 output_tab.write(line) 186 output_tab.write(line)
179 input_tab.close() 187 input_tab.close()
180 output_tab.close() 188 output_tab.close()
181 189
182 ''' 190 '''
188 print(' bin: %s' % bin) 196 print(' bin: %s' % bin)
189 for x in query.bins[bin]: 197 for x in query.bins[bin]:
190 print(' %s' % str(query.matches[x])) 198 print(' %s' % str(query.matches[x]))
191 ''' 199 '''
192 200
193 from Cheetah.Template import Template
194 namespace = {'queries': queries} 201 namespace = {'queries': queries}
195 html = Template(file=cheetah_tmpl, searchList=[namespace]) 202 html = Template(file=cheetah_tmpl, searchList=[namespace])
196 out_html = open(output_html, 'w') 203 out_html = open(output_html, 'w')
197 out_html.write(str(html)) 204 out_html.write(str(html))
198 out_html.close() 205 out_html.close()
206
207
208 if __name__ == '__main__':
209 main()