comparison blast_report.py @ 7:445a1923bb97 draft

Uploaded
author dfornika
date Tue, 03 Mar 2020 05:34:11 +0000
parents c7ce2cd96546
children c4e67d856c19
comparison
equal deleted inserted replaced
6:b8a3578b6445 7:445a1923bb97
69 str(self.score), 69 str(self.score),
70 str(round(self.p_cov,2)), 70 str(round(self.p_cov,2)),
71 str(round(self.p_ident, 2))) 71 str(round(self.p_ident, 2)))
72 72
73 73
74
75 #PARSE OPTIONS AND ARGUMENTS 74 #PARSE OPTIONS AND ARGUMENTS
76 parser = argparse.ArgumentParser() 75 parser = argparse.ArgumentParser()
77 76
78 parser.add_argument('-f', '--filter', 77 parser.add_argument('-f', '--filter-keywords',
79 dest='filter', 78 dest='filter_keywords',
79 )
80 parser.add_argument('-i', '--min-identity',
81 dest='min_identity',
80 ) 82 )
81 parser.add_argument('-b', '--bins', 83 parser.add_argument('-b', '--bins',
82 dest='bins' 84 dest='bins'
83 ) 85 )
84 parser.add_argument('-r', '--discard-redundant', 86 parser.add_argument('-r', '--discard-redundant',
85 dest='discard_redundant', 87 dest='discard_redundant',
86 default=False, 88 default=False,
87 action='store_true' 89 action='store_true'
88 ) 90 )
91 parser.add_argument('input_tab')
92 parser.add_argument('cheetah_tmpl')
93 parser.add_argument('output_html')
94 parser.add_argument('output_tab')
89 args = parser.parse_args() 95 args = parser.parse_args()
90 96
91 try: 97 try:
92 input_tab, cheetah_tmpl, output_html, output_tab = args 98 input_tab, cheetah_tmpl, output_html, output_tab = args
93 except: 99 except:
94 stop_err('you must supply the arguments input_tab, cheetah_tmpl and output_html.') 100 stop_err('you must supply the arguments input_tab, cheetah_tmpl and output_html.')
95 # print('input_tab: %s cheetah_tmpl: %s output_html: %s output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab)) 101 print('input_tab: %s cheetah_tmpl: %s output_html: %s output_tab: %s' % (args.input_tab, args.cheetah_tmpl, args.output_html, args.output_tab))
96 102
97 103
98 #BINS 104 #BINS
99 bins=[] 105 bins=[]
100 if args.bins != None: 106 if args.bins != None:
101 bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in args.bins.split(',')]) 107 bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in args.bins.split(',')])
102 print('database bins: %s' % str([bin.label for bin in bins])) 108 print('database bins: %s' % str([bin.label for bin in bins]))
103 109
104 #FILTERS 110 #FILTERS
105 filter_pident = 0 111 filter_pident = 0
106 filter_kws = [] 112 filter_kws = []
107 if args.filter != None: 113 if args.keyword_filter:
108 pident_kws = args.filter.split(':') 114 filter_kws = args.keyword_filter.split(',')
109 filter_pident = float(pident_kws[0]) 115 print('minimum percent identity: %s filter_kws: %s' % (str(args.min_identity), str(filter_kws)))
110 filter_kws = pident_kws[-1].split(',')
111 print('filter_pident: %s filter_kws: %s' % (str(filter_pident), str(filter_kws)))
112 116
113 if args.discard_redundant: 117 if args.discard_redundant:
114 print('Throwing out redundant hits...') 118 print('Throwing out redundant hits...')
115 119
116 #RESULTS! 120
117 PIDENT_COL = 2 121 PIDENT_COL = 2
118 DESCR_COL = 25 122 DESCR_COL = 25
119 SUBJ_ID_COL = 12 123 SUBJ_ID_COL = 12
120 SCORE_COL = 11 124 SCORE_COL = 11
121 PCOV_COL = 24 125 PCOV_COL = 24
122 queries = [] 126 queries = []
123 current_query = '' 127 current_query = ''
124 output_tab = open(output_tab, 'w') 128 output_tab = open(args.output_tab, 'w')
125 129
126 with open(input_tab) as input_tab: 130 with open(args.input_tab) as input_tab:
127 for line in input_tab: 131 for line in input_tab:
128 cols = line.split('\t') 132 cols = line.split('\t')
129 if cols[0] != current_query: 133 if cols[0] != current_query:
130 current_query = cols[0] 134 current_query = cols[0]
131 queries.append(BLASTQuery(current_query)) 135 queries.append(BLASTQuery(current_query))
151 155
152 descrs = cols[DESCR_COL] 156 descrs = cols[DESCR_COL]
153 #FILTER BY KEY WORDS 157 #FILTER BY KEY WORDS
154 filter_by_kw = False 158 filter_by_kw = False
155 for kw in filter_kws: 159 for kw in filter_kws:
156 kw = kw.strip() #Fix by Damion D Nov 2013 160 kw = kw.strip()
157 if kw != '' and re.search(kw, descrs, re.IGNORECASE): 161 if kw != '' and re.search(kw, descrs, re.IGNORECASE):
158 filter_by_kw = True 162 filter_by_kw = True
159 try: 163 try:
160 queries[-1].kw_filtered_breakdown[kw] += 1 164 queries[-1].kw_filtered_breakdown[kw] += 1
161 except: 165 except:
199 for x in query.bins[bin]: 203 for x in query.bins[bin]:
200 print(' %s' % str(query.matches[x])) 204 print(' %s' % str(query.matches[x]))
201 ''' 205 '''
202 206
203 namespace = {'queries': queries} 207 namespace = {'queries': queries}
204 html = Template(file=cheetah_tmpl, searchList=[namespace]) 208 html = Template(file=args.cheetah_tmpl, searchList=[namespace])
205 out_html = open(output_html, 'w') 209 out_html = open(args.output_html, 'w')
206 out_html.write(str(html)) 210 out_html.write(str(html))
207 out_html.close() 211 out_html.close()
208 212
209 213
210 if __name__ == '__main__': 214 if __name__ == '__main__':