Mercurial > repos > dfornika > blast_report
changeset 9:2b4f30c6b50a draft default tip
planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/blast_report commit 174f746f44dfdeb18301429116ccc0213c1e091e-dirty
author | dfornika |
---|---|
date | Mon, 02 Mar 2020 23:41:54 +0000 |
parents | 71dd0b1d5511 |
children | |
files | blast_report.py blast_report.xml blast_report_bins.loc.sample blast_report_templates.loc.sample templates/template1.tmpl templates/template2.tmpl tool-data/blast_report_templates.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 9 files changed, 76 insertions(+), 313 deletions(-) [+] |
line wrap: on
line diff
--- a/blast_report.py Thu Sep 12 00:56:20 2019 -0400 +++ b/blast_report.py Mon Mar 02 23:41:54 2020 +0000 @@ -1,19 +1,20 @@ #!/usr/bin/env python -from __future__ import print_function - '''Report on BLAST results. -python bccdc_blast_report.py input_tab cheetah_tmpl output_html output_tab [-f [filter_pident]:[filterkw1,...,filterkwN]] [-b bin1_label=bin1_path[,...binN_label=binN_path]] +python blast_report.py input_tab cheetah_tmpl output_html output_tab [-f [filter_pident]:[filterkw1,...,filterkwN]] [-b bin1_label=bin1_path[,...binN_label=binN_path]] ''' - -import optparse +import argparse import re import sys +from Cheetah.Template import Template + + def stop_err( msg ): sys.stderr.write("%s\n" % msg) sys.exit(1) + class BLASTBin: def __init__(self, label, file): self.label = label @@ -27,6 +28,7 @@ def __str__(self): return "label: %s dict: %s" % (self.label, str(self.dict)) + class BLASTQuery: def __init__(self, query_id): self.query_id = query_id @@ -46,6 +48,7 @@ str(self.kw_filtered), str(self.kw_filtered_breakdown)) + class BLASTMatch: def __init__(self, subject_acc, subject_descr, score, p_cov, p_ident, subject_bins): self.subject_acc = subject_acc @@ -63,47 +66,49 @@ str(round(self.p_cov,2)), str(round(self.p_ident, 2))) + + #PARSE OPTIONS AND ARGUMENTS -parser = optparse.OptionParser(description='Report on BLAST results.', - usage='python bccdc_blast_report_generator.py input_tabut cheetah_tmpl output_html [output_id output_dir] [options]') +parser = argparse.ArgumentParser() -parser.add_option('-f', '--filter', +parser.add_argument('-f', '--filter', type='string', dest='filter', ) -parser.add_option('-b', '--bins', +parser.add_argument('-b', '--bins', type='string', dest='bins' ) -parser.add_option('-r', '--redundant', - dest='hsp', +parser.add_argument('-r', '--redundant', + dest='redundant', default=False, action='store_true' ) -options, args = parser.parse_args() +args = parser.parse_args() try: input_tab, cheetah_tmpl, output_html, output_tab = args except: stop_err('you must supply the arguments input_tab, cheetah_tmpl and output_html.') -#print('input_tab: %s cheetah_tmpl: %s output_html: %s output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab)) +# print('input_tab: %s cheetah_tmpl: %s output_html: %s output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab)) + #BINS bins=[] -if options.bins != None: - bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in options.bins.split(',')]) +if args.bins != None: + bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in args.bins.split(',')]) print('database bins: %s' % str([bin.label for bin in bins])) -#FILTERS + #FILTERS filter_pident = 0 filter_kws = [] -if options.filter != None: - pident_kws = options.filter.split(':') +if args.filter != None: + pident_kws = args.filter.split(':') filter_pident = float(pident_kws[0]) filter_kws = pident_kws[-1].split(',') print('filter_pident: %s filter_kws: %s' % (str(filter_pident), str(filter_kws))) -if options.hsp: +if args.redundant: print('Throwing out redundant hits...') #RESULTS! @@ -115,6 +120,7 @@ queries = [] current_query = '' output_tab = open(output_tab, 'w') + with open(input_tab) as input_tab: for line in input_tab: cols = line.split('\t') @@ -122,13 +128,13 @@ current_query = cols[0] queries.append(BLASTQuery(current_query)) - try: - accs = cols[SUBJ_ID_COL].split('|')[1::2][1::2] + try: + accs = cols[SUBJ_ID_COL].split('|')[1::2][1::2] except IndexError as e: - stop_err("Problem with splitting:" + cols[SUBJ_ID_COL]) + stop_err("Problem with splitting:" + cols[SUBJ_ID_COL]) #hsp option: keep best (first) hit only for each query and accession id. - if options.hsp: + if args.redundant: if accs[0] in queries[-1].match_accessions: continue #don't save the result and skip to the next else: @@ -156,7 +162,7 @@ queries[-1].kw_filtered += 1 continue descr = descrs.split(';')[0] - + #ATTEMPT BIN subj_bins = [] for bin in bins: #if we are not binning, bins = [] so for loop not entered @@ -174,7 +180,9 @@ p_cov = float(cols[PCOV_COL]) #SAVE RESULT - queries[-1].matches.append(BLASTMatch(acc, descr, score, p_cov, p_ident, subj_bins)) + queries[-1].matches.append( + BLASTMatch(acc, descr, score, p_cov, p_ident, subj_bins) + ) output_tab.write(line) input_tab.close() output_tab.close() @@ -190,9 +198,12 @@ print(' %s' % str(query.matches[x])) ''' -from Cheetah.Template import Template namespace = {'queries': queries} html = Template(file=cheetah_tmpl, searchList=[namespace]) out_html = open(output_html, 'w') out_html.write(str(html)) out_html.close() + + +if __name__ == '__main__': + main()
--- a/blast_report.xml Thu Sep 12 00:56:20 2019 -0400 +++ b/blast_report.xml Mon Mar 02 23:41:54 2020 +0000 @@ -1,54 +1,44 @@ -<tool id="blast_report" name="BLAST report" force_history_refresh="true" version="1.1.0"> - <description>Report on BLAST results</description> - <command > - <![CDATA[ - '$__tool_directory__/blast_report.py' - '${in_tab}' - '${tmpl.fields.path}' - '${out_html}' - '${out_tab}' - -f '${filter_pident}:$filter_kws' - #if str($bins) == "None" - #pass - #else - -b "${bins.fields.path}" - #end if - #if $hsp_bool - -r - #end if +<tool id="blast_report" name="BLAST report" version="0.1.0+galaxy0" > + <description>Produce an HTML table report of BLAST results</description> + <command detect_errors="error_code"> + <![CDATA[ + '${__tool_directory__}/blast_report.py' + '${tabular_blast_report_input}' + '${report_template}' + '${out_html}' + '${out_tab}' + -f ${filter_pident}:$filter_kws + #if str($bins) != "None" + -b "${bins}" + #end if + $discard_redundant ]]> </command> <inputs> - <param name="in_tab" type="data" format="tabular" label="Tabular BLAST results (extended 26 columns)"/> - <param name="tmpl" type="select" optional="false" label="Report template"> + <param name="tabular_blast_report_input" type="data" format="tabular" label="Tabular BLAST results (extended 26 columns)"/> + <param name="report_template" type="select" optional="false" label="Report template"> <options from_data_table="blast_report_templates"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> + <validator type="no_options" message="No BLAST report templates are available" /> </options> </param> <param name="filter_pident" type="integer" min="90" max="100" value="97" label="Minimum percentage identity"/> <param name="filter_kws" type="text" size="50" label="Comma-separated list of description keyword filters" value="bovine,clone,environmental,swine,uncultivated,uncultured,unidentified"/> <param name="bins" type="select" label="Database bins" multiple="true" display="checkboxes"> - <options from_data_table="blast_report_bins"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> + <options from_data_table="blast_reference_bins"> + <validator type="no_options" message="No BLAST reference bins available" /> </options> </param> - <!--<repeat name="hist_bins" title="History database bins"> - <param name="filter" type="data" format="csv" label="History database bin"/> - </repeat>--> - <param name="hsp_bool" type="boolean" label="Throw out redundant hits?"/> - <param name="tab_bool" type="boolean" label="Output tabular file?"/> + <param name="discard_redundant" type="boolean" truevalue="-r" falsevalue="" label="Throw out redundant hits?"/> + <param name="output_tabular" type="boolean" label="Output tabular file?"/> </inputs> <outputs> <data name="out_html" format="html" label="$tool.name on data $in_tab.hid: report"/> <data name="out_tab" format="tabular" label="$tool.name on data $in_tab.hid: tabular results"> - <filter> tab_bool </filter> + <filter> output_tabular </filter> </data> </outputs> <help> + .. class:: infomark **What it does**
--- a/blast_report_bins.loc.sample Thu Sep 12 00:56:20 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -# Expect three columns, tab separated, as follows: -# - value (Galaxy records this in the Galaxy DB) -# - name (Galaxy shows this in the UI) -# - path (folder name containing the Kraken DB) -# -# e.g. -# rdp<tab>RDP<tab>/path/to/bins/rdp.csv
--- a/blast_report_templates.loc.sample Thu Sep 12 00:56:20 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -# Expect three columns, tab separated, as follows: -# - value (Galaxy records this in the Galaxy DB) -# - name (Galaxy shows this in the UI) -# - path (folder name containing the Kraken DB) -# -# e.g. -# template1<tab>Template 1<tab>/path/to/templates/template1.templ
--- a/templates/template1.tmpl Thu Sep 12 00:56:20 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,108 +0,0 @@ -#silent import time -#set $display_m = 20 -#set $header = '<tr class="header"><th>Accession</th><th>Description</th><th>Score</th><th>% Coverage</th><th>% Identity</th></tr>' -<html> - <head> - <style> - body { - font-size:0.75em; - } - table, tr { - width: 100%; - } - table { - border-collapse: collapse; - border: 1px solid black; - } - tr.header { - background-color: lightgrey; - } - th { - border: 1px solid black; - } - td { - border-left: 1px solid black; - border-right: 1px solid black; - border-bottom: 1px dashed grey; - } - td.descr { - font-size: 80%; - } - h3 { - page-break-before: always; - color: blue; - } - h3.first { - page-break-before: avoid; - } - span.super { - color: navy; - font-size: 75%; - vertical-align: top; - } - </style> - <script> - function toggle(id){ - var element = document.getElementById(id) - console.log(id) - if (element.style.display == 'none') { - //console.log(element.tagName); - if (element.tagName == 'TBODY') element.style.display = 'table-row-group'; - else if (element.tagName == 'TD') element.style.display = 'table-cell'; - else element.style.display = 'block'; - } else { - element.style.display = 'none'; - } - } - </script> - </head> - <body> - #set $q = 0 - #for $query in $queries - #set $bin_symbols = dict([($bin,$i) for $i, $bin in enumerate($query.bins, 1)]) - #set $m = 0 - <h3 id="${query.query_id}" #if $q == 0 then'class="first"' else '' #>$query.query_id</h3> - <br/> - <table id="${query.query_id}_matches"> - #if len($query.matches) == 0: - <tr class="header"><th colspan="5">No matches to report</th></tr> - </table> - #else: - $header - #for $match in $query.matches: - #if $m == $display_m - <tbody id="${query.query_id}_extra" style="display:none"> - #end if - <tr> - <td>$match.subject_acc <span class="super">#echo ', '.join(sorted([str($bin_symbols[$bin]) for $bin in $match.bins]))#</span></td> - <td class="descr">$match.subject_descr</td> - <td>$match.score</td> - <td>$match.p_cov</td> - <td>$match.p_ident</td> - </tr> - #set $m += 1 - #end for - #if $m >= $display_m - </tbody> - <td id="${query.query_id}_show" align="center" colspan="6" >Displaying ${display_m}/$m matches. <a href="#${query.query_id}_extra" onclick="toggle('${query.query_id}_extra'); toggle('${query.query_id}_show'); toggle('${query.query_id}_hide');">Show the remaining results.</a></td> - <td id="${query.query_id}_hide" align="center" colspan="6" style="display:none"><a href="#${query.query_id}" onclick="toggle('${query.query_id}_extra'); toggle('${query.query_id}_show'); toggle('${query.query_id}_hide');">Hide the last #echo $m - $display_m # results.</a></td> - <tr> - </tr> - #end if - </table> - #if len($bin_symbols) > 0: - <p>#echo ', '.join(['<span class="super">%s</span> %s'%($bin_symbols[$bin],$bin) for $bin in $query.bins])#</p> - #end if - #end if - #if $query.pident_filtered > 0: - <p>$query.pident_filtered results filtered by % Identity.</p> - #end if - #if $query.kw_filtered > 0: - <p>$query.kw_filtered results filtered by description keywords: #echo ', '.join(list(["%s matches to '%s'" % (str($query.kw_filtered_breakdown[$kw]),$kw) for $kw in $query.kw_filtered_breakdown])) #.</p> - #end if - <p>Report produced on #echo time.strftime("%d/%m/%Y") #.</p> - <hr noshade size="1" color="blue"> - #set $q += 1 - #end for - </body> -</html>
--- a/templates/template2.tmpl Thu Sep 12 00:56:20 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,127 +0,0 @@ -#silent import time -#set $display_m = 20 -#set $header = '<tr class="header"><th>Accession</th><th>Description</th><th>Score</th><th>% Coverage</th><th>% Identity</th></tr>' -<html> - <head> - <style> - body { - font-size:0.75em; - } - table, tr { - width: 100%; - } - table { - border-collapse: collapse; - border: 1px solid black; - } - tr.header { - background-color: lightgrey; - } - th { - border: 1px solid black; - } - td { - border-left: 1px solid black; - border-right: 1px solid black; - border-bottom: 1px dashed grey; - } - td.descr { - font-size: 80%; - } - h3 { - page-break-before: always; - color: blue; - } - h3.first { - page-break-before: avoid; - } - span.super { - color: navy; - font-size: 75%; - vertical-align: top; - } - </style> - <script> - function toggle(id){ - var element = document.getElementById(id) - console.log(id) - if (element.style.display == 'none') { - //console.log(element.tagName); - if (element.tagName == 'TBODY') element.style.display = 'table-row-group'; - else if (element.tagName == 'TD') element.style.display = 'table-cell'; - else element.style.display = 'block'; - } else { - element.style.display = 'none'; - } - } - </script> - </head> - <body> - #set $q = 0 - #for $query in $queries - #set $bin_symbols = dict([($bin,$i) for $i, $bin in enumerate($query.bins, 1)]) - #set $m = 0 - <h3 id="${query.query_id}" #if $q == 0 then'class="first"' else '' #>$query.query_id</h3> - <br/> - <table id="${query.query_id}_matches"> - #set $num_of_euzby = -1 - #if len($query.matches) == 0: - <tr class="header"><th colspan="5">No matches to report</th></tr> - </table> - #else: - $header - #try - #set $priority = $query.bins['Euzby'] - #set $front = [] - #for $i in reversed($priority) - #silent $front.append($query.matches.pop($i)) - #end for - #set $num_of_euzby = len($front) - #silent $front.reverse() - #silent $front.extend($query.matches) - #set $query.matches = $front - #except - #pass - #end try - #for $match in $query.matches: - #if $m == $display_m - <tbody id="${query.query_id}_extra" style="display:none"> - #end if - ##if $m>0 and set($match.bins)!=set($query.matches[m-1].bins) - ##put an empty line to separate Euzby records from other records - #if $m==$num_of_euzby and $m>0 - <tr><td align="center" colspan="6"> </td></tr> - #end if - <tr> - <td>$match.subject_acc <span class="super">#echo ', '.join(sorted([str($bin_symbols[$bin]) for $bin in $match.bins]))#</span></td> - <td class="descr">$match.subject_descr</td> - <td>$match.score</td> - <td>$match.p_cov</td> - <td>$match.p_ident</td> - </tr> - #set $m += 1 - #end for - #if $m >= $display_m - </tbody> - <td id="${query.query_id}_show" align="center" colspan="6" >Displaying ${display_m}/$m matches. <a href="#${query.query_id}_extra" onclick="toggle('${query.query_id}_extra'); toggle('${query.query_id}_show'); toggle('${query.query_id}_hide');">Show the remaining results.</a></td> - <td id="${query.query_id}_hide" align="center" colspan="6" style="display:none"><a href="#${query.query_id}" onclick="toggle('${query.query_id}_extra'); toggle('${query.query_id}_show'); toggle('${query.query_id}_hide');">Hide the last #echo $m - $display_m # results.</a></td> - <tr> - </tr> - #end if - </table> - #if len($bin_symbols) > 0: - <p>#echo ', '.join(['<span class="super">%s</span> %s'%($bin_symbols[$bin],$bin) for $bin in $query.bins])#</p> - #end if - #end if - #if $query.pident_filtered > 0: - <p>$query.pident_filtered results filtered by % Identity.</p> - #end if - #if $query.kw_filtered > 0: - <p>$query.kw_filtered results filtered by description keywords: #echo ', '.join(list(["%s matches to '%s'" % (str($query.kw_filtered_breakdown[$kw]),$kw) for $kw in $query.kw_filtered_breakdown])) #.</p> - #end if - <p>Report produced on #echo time.strftime("%d/%m/%Y") #.</p> - <hr noshade size="1" color="blue"> - #set $q += 1 - #end for - </body> -</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/blast_report_templates.loc.sample Mon Mar 02 23:41:54 2020 +0000 @@ -0,0 +1,7 @@ +# Expect three columns, tab separated, as follows: +# - value (Galaxy records this in the Galaxy DB, consider using a UUID but any unique value will work) +# - name (Galaxy shows this in the UI) +# - path (Path to the blast report template (cheetah format)) +# +# e.g. +# f45ee89a-d456-469a-8aeb-54cdfea821ec<tab>Default BLAST Report Template<tab>/path/to/template.tmpl
--- a/tool_data_table_conf.xml.sample Thu Sep 12 00:56:20 2019 -0400 +++ b/tool_data_table_conf.xml.sample Mon Mar 02 23:41:54 2020 +0000 @@ -1,10 +1,6 @@ <?xml version="1.0"?> <tables> - <!-- Locations of BLAST Report bins in the required format --> - <table name="blast_report_bins" comment_char="#"> - <columns>value, name, path</columns> - <file path="tool-data/blast_report_bins.loc" /> - </table> + <!-- Locations of BLAST report templates in the required format --> <table name="blast_report_templates" comment_char="#"> <columns>value, name, path</columns> <file path="tool-data/blast_report_templates.loc" />
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Mon Mar 02 23:41:54 2020 +0000 @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of BLAST report templates in the required format --> + <table name="blast_report_templates" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/blast_report_templates.loc" /> + </table> +</tables>