Mercurial > repos > dfornika > blast_report

diff blast_report.py @ 9:2b4f30c6b50a draft default tip
planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/blast_report commit 174f746f44dfdeb18301429116ccc0213c1e091e-dirty
author: dfornika
date: Mon, 02 Mar 2020 23:41:54 +0000
parents: 18b097eb1a51
--- a/blast_report.py	Thu Sep 12 00:56:20 2019 -0400
+++ b/blast_report.py	Mon Mar 02 23:41:54 2020 +0000
@@ -1,19 +1,20 @@
 #!/usr/bin/env python
-from __future__ import print_function
-
 '''Report on BLAST results.
 
-python bccdc_blast_report.py input_tab cheetah_tmpl output_html output_tab [-f [filter_pident]:[filterkw1,...,filterkwN]] [-b bin1_label=bin1_path[,...binN_label=binN_path]]
+python blast_report.py input_tab cheetah_tmpl output_html output_tab [-f [filter_pident]:[filterkw1,...,filterkwN]] [-b bin1_label=bin1_path[,...binN_label=binN_path]]
 '''
-
-import optparse
+import argparse
 import re
 import sys
 
+from Cheetah.Template import Template
+
+
 def stop_err( msg ):
     sys.stderr.write("%s\n" % msg)
     sys.exit(1)
 
+
 class BLASTBin:
     def __init__(self, label, file):
         self.label = label
@@ -27,6 +28,7 @@
     def __str__(self):
         return "label: %s    dict: %s" % (self.label, str(self.dict))
 
+
 class BLASTQuery:
     def __init__(self, query_id):
         self.query_id = query_id
@@ -46,6 +48,7 @@
                str(self.kw_filtered),
                str(self.kw_filtered_breakdown))
 
+
 class BLASTMatch:
     def __init__(self, subject_acc, subject_descr, score, p_cov, p_ident, subject_bins):
         self.subject_acc = subject_acc
@@ -63,47 +66,49 @@
                str(round(self.p_cov,2)),
                str(round(self.p_ident, 2)))
 
+
+
 #PARSE OPTIONS AND ARGUMENTS
-parser = optparse.OptionParser(description='Report on BLAST results.',
-                               usage='python bccdc_blast_report_generator.py input_tabut cheetah_tmpl output_html [output_id output_dir] [options]')
+parser = argparse.ArgumentParser()
 
-parser.add_option('-f', '--filter',
+parser.add_argument('-f', '--filter',
                     type='string',
                     dest='filter',
                     )
-parser.add_option('-b', '--bins',
+parser.add_argument('-b', '--bins',
                     type='string',
                     dest='bins'
                     )
-parser.add_option('-r', '--redundant',
-                    dest='hsp',
+parser.add_argument('-r', '--redundant',
+                    dest='redundant',
                     default=False,
                     action='store_true'
                     )
-options, args = parser.parse_args()
+args = parser.parse_args()
 
 try:
     input_tab, cheetah_tmpl, output_html, output_tab = args
 except:
     stop_err('you must supply the arguments input_tab, cheetah_tmpl and output_html.')
-#print('input_tab: %s    cheetah_tmpl: %s    output_html: %s    output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab))
+# print('input_tab: %s    cheetah_tmpl: %s    output_html: %s    output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab))
+
 
 #BINS
 bins=[]
-if options.bins != None:
-    bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in options.bins.split(',')])
+if args.bins != None:
+    bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in args.bins.split(',')])
 print('database bins: %s' % str([bin.label for bin in bins]))
 
-#FILTERS
+    #FILTERS
 filter_pident = 0
 filter_kws = []
-if options.filter != None:
-    pident_kws = options.filter.split(':')
+if args.filter != None:
+    pident_kws = args.filter.split(':')
     filter_pident = float(pident_kws[0])
     filter_kws = pident_kws[-1].split(',')
 print('filter_pident: %s    filter_kws: %s' % (str(filter_pident), str(filter_kws)))
 
-if options.hsp:
+if args.redundant:
     print('Throwing out redundant hits...')
 
 #RESULTS!
@@ -115,6 +120,7 @@
 queries = []
 current_query = ''
 output_tab = open(output_tab, 'w')
+    
 with open(input_tab) as input_tab:
     for line in input_tab:
         cols = line.split('\t')
@@ -122,13 +128,13 @@
             current_query = cols[0]
             queries.append(BLASTQuery(current_query))
 
-        try:        
-                accs = cols[SUBJ_ID_COL].split('|')[1::2][1::2]
+        try:
+            accs = cols[SUBJ_ID_COL].split('|')[1::2][1::2]
         except IndexError as e:
-                stop_err("Problem with splitting:" + cols[SUBJ_ID_COL])
+            stop_err("Problem with splitting:" + cols[SUBJ_ID_COL])
 
         #hsp option: keep best (first) hit only for each query and accession id.
-        if options.hsp:
+        if args.redundant:
             if accs[0] in queries[-1].match_accessions:
                 continue #don't save the result and skip to the next
             else:
@@ -156,7 +162,7 @@
             queries[-1].kw_filtered += 1
             continue
         descr = descrs.split(';')[0]
-
+        
         #ATTEMPT BIN
         subj_bins = []
         for bin in bins: #if we are not binning, bins = [] so for loop not entered
@@ -174,7 +180,9 @@
         p_cov = float(cols[PCOV_COL])
         
         #SAVE RESULT
-        queries[-1].matches.append(BLASTMatch(acc, descr, score, p_cov, p_ident, subj_bins))
+        queries[-1].matches.append(
+            BLASTMatch(acc, descr, score, p_cov, p_ident, subj_bins)
+        )
         output_tab.write(line)            
 input_tab.close()
 output_tab.close()
@@ -190,9 +198,12 @@
             print('        %s' % str(query.matches[x]))
 '''
 
-from Cheetah.Template import Template
 namespace = {'queries': queries}
 html = Template(file=cheetah_tmpl, searchList=[namespace])
 out_html = open(output_html, 'w')
 out_html.write(str(html))
 out_html.close()
+
+
+if __name__ == '__main__':
+    main()
author	dfornika
date	Mon, 02 Mar 2020 23:41:54 +0000
parents	18b097eb1a51
children