Mercurial > repos > dfornika > blast_report
diff blast_report.py @ 9:2b4f30c6b50a draft default tip
planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/blast_report commit 174f746f44dfdeb18301429116ccc0213c1e091e-dirty
author | dfornika |
---|---|
date | Mon, 02 Mar 2020 23:41:54 +0000 |
parents | 18b097eb1a51 |
children |
line wrap: on
line diff
--- a/blast_report.py Thu Sep 12 00:56:20 2019 -0400 +++ b/blast_report.py Mon Mar 02 23:41:54 2020 +0000 @@ -1,19 +1,20 @@ #!/usr/bin/env python -from __future__ import print_function - '''Report on BLAST results. -python bccdc_blast_report.py input_tab cheetah_tmpl output_html output_tab [-f [filter_pident]:[filterkw1,...,filterkwN]] [-b bin1_label=bin1_path[,...binN_label=binN_path]] +python blast_report.py input_tab cheetah_tmpl output_html output_tab [-f [filter_pident]:[filterkw1,...,filterkwN]] [-b bin1_label=bin1_path[,...binN_label=binN_path]] ''' - -import optparse +import argparse import re import sys +from Cheetah.Template import Template + + def stop_err( msg ): sys.stderr.write("%s\n" % msg) sys.exit(1) + class BLASTBin: def __init__(self, label, file): self.label = label @@ -27,6 +28,7 @@ def __str__(self): return "label: %s dict: %s" % (self.label, str(self.dict)) + class BLASTQuery: def __init__(self, query_id): self.query_id = query_id @@ -46,6 +48,7 @@ str(self.kw_filtered), str(self.kw_filtered_breakdown)) + class BLASTMatch: def __init__(self, subject_acc, subject_descr, score, p_cov, p_ident, subject_bins): self.subject_acc = subject_acc @@ -63,47 +66,49 @@ str(round(self.p_cov,2)), str(round(self.p_ident, 2))) + + #PARSE OPTIONS AND ARGUMENTS -parser = optparse.OptionParser(description='Report on BLAST results.', - usage='python bccdc_blast_report_generator.py input_tabut cheetah_tmpl output_html [output_id output_dir] [options]') +parser = argparse.ArgumentParser() -parser.add_option('-f', '--filter', +parser.add_argument('-f', '--filter', type='string', dest='filter', ) -parser.add_option('-b', '--bins', +parser.add_argument('-b', '--bins', type='string', dest='bins' ) -parser.add_option('-r', '--redundant', - dest='hsp', +parser.add_argument('-r', '--redundant', + dest='redundant', default=False, action='store_true' ) -options, args = parser.parse_args() +args = parser.parse_args() try: input_tab, cheetah_tmpl, output_html, output_tab = args except: stop_err('you must supply the arguments input_tab, cheetah_tmpl and output_html.') -#print('input_tab: %s cheetah_tmpl: %s output_html: %s output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab)) +# print('input_tab: %s cheetah_tmpl: %s output_html: %s output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab)) + #BINS bins=[] -if options.bins != None: - bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in options.bins.split(',')]) +if args.bins != None: + bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in args.bins.split(',')]) print('database bins: %s' % str([bin.label for bin in bins])) -#FILTERS + #FILTERS filter_pident = 0 filter_kws = [] -if options.filter != None: - pident_kws = options.filter.split(':') +if args.filter != None: + pident_kws = args.filter.split(':') filter_pident = float(pident_kws[0]) filter_kws = pident_kws[-1].split(',') print('filter_pident: %s filter_kws: %s' % (str(filter_pident), str(filter_kws))) -if options.hsp: +if args.redundant: print('Throwing out redundant hits...') #RESULTS! @@ -115,6 +120,7 @@ queries = [] current_query = '' output_tab = open(output_tab, 'w') + with open(input_tab) as input_tab: for line in input_tab: cols = line.split('\t') @@ -122,13 +128,13 @@ current_query = cols[0] queries.append(BLASTQuery(current_query)) - try: - accs = cols[SUBJ_ID_COL].split('|')[1::2][1::2] + try: + accs = cols[SUBJ_ID_COL].split('|')[1::2][1::2] except IndexError as e: - stop_err("Problem with splitting:" + cols[SUBJ_ID_COL]) + stop_err("Problem with splitting:" + cols[SUBJ_ID_COL]) #hsp option: keep best (first) hit only for each query and accession id. - if options.hsp: + if args.redundant: if accs[0] in queries[-1].match_accessions: continue #don't save the result and skip to the next else: @@ -156,7 +162,7 @@ queries[-1].kw_filtered += 1 continue descr = descrs.split(';')[0] - + #ATTEMPT BIN subj_bins = [] for bin in bins: #if we are not binning, bins = [] so for loop not entered @@ -174,7 +180,9 @@ p_cov = float(cols[PCOV_COL]) #SAVE RESULT - queries[-1].matches.append(BLASTMatch(acc, descr, score, p_cov, p_ident, subj_bins)) + queries[-1].matches.append( + BLASTMatch(acc, descr, score, p_cov, p_ident, subj_bins) + ) output_tab.write(line) input_tab.close() output_tab.close() @@ -190,9 +198,12 @@ print(' %s' % str(query.matches[x])) ''' -from Cheetah.Template import Template namespace = {'queries': queries} html = Template(file=cheetah_tmpl, searchList=[namespace]) out_html = open(output_html, 'w') out_html.write(str(html)) out_html.close() + + +if __name__ == '__main__': + main()