changeset 9:2b4f30c6b50a draft default tip

planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/blast_report commit 174f746f44dfdeb18301429116ccc0213c1e091e-dirty
author dfornika
date Mon, 02 Mar 2020 23:41:54 +0000
parents 71dd0b1d5511
children
files blast_report.py blast_report.xml blast_report_bins.loc.sample blast_report_templates.loc.sample templates/template1.tmpl templates/template2.tmpl tool-data/blast_report_templates.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 9 files changed, 76 insertions(+), 313 deletions(-) [+]
line wrap: on
line diff
--- a/blast_report.py	Thu Sep 12 00:56:20 2019 -0400
+++ b/blast_report.py	Mon Mar 02 23:41:54 2020 +0000
@@ -1,19 +1,20 @@
 #!/usr/bin/env python
-from __future__ import print_function
-
 '''Report on BLAST results.
 
-python bccdc_blast_report.py input_tab cheetah_tmpl output_html output_tab [-f [filter_pident]:[filterkw1,...,filterkwN]] [-b bin1_label=bin1_path[,...binN_label=binN_path]]
+python blast_report.py input_tab cheetah_tmpl output_html output_tab [-f [filter_pident]:[filterkw1,...,filterkwN]] [-b bin1_label=bin1_path[,...binN_label=binN_path]]
 '''
-
-import optparse
+import argparse
 import re
 import sys
 
+from Cheetah.Template import Template
+
+
 def stop_err( msg ):
     sys.stderr.write("%s\n" % msg)
     sys.exit(1)
 
+
 class BLASTBin:
     def __init__(self, label, file):
         self.label = label
@@ -27,6 +28,7 @@
     def __str__(self):
         return "label: %s    dict: %s" % (self.label, str(self.dict))
 
+
 class BLASTQuery:
     def __init__(self, query_id):
         self.query_id = query_id
@@ -46,6 +48,7 @@
                str(self.kw_filtered),
                str(self.kw_filtered_breakdown))
 
+
 class BLASTMatch:
     def __init__(self, subject_acc, subject_descr, score, p_cov, p_ident, subject_bins):
         self.subject_acc = subject_acc
@@ -63,47 +66,49 @@
                str(round(self.p_cov,2)),
                str(round(self.p_ident, 2)))
 
+
+
 #PARSE OPTIONS AND ARGUMENTS
-parser = optparse.OptionParser(description='Report on BLAST results.',
-                               usage='python bccdc_blast_report_generator.py input_tabut cheetah_tmpl output_html [output_id output_dir] [options]')
+parser = argparse.ArgumentParser()
 
-parser.add_option('-f', '--filter',
+parser.add_argument('-f', '--filter',
                     type='string',
                     dest='filter',
                     )
-parser.add_option('-b', '--bins',
+parser.add_argument('-b', '--bins',
                     type='string',
                     dest='bins'
                     )
-parser.add_option('-r', '--redundant',
-                    dest='hsp',
+parser.add_argument('-r', '--redundant',
+                    dest='redundant',
                     default=False,
                     action='store_true'
                     )
-options, args = parser.parse_args()
+args = parser.parse_args()
 
 try:
     input_tab, cheetah_tmpl, output_html, output_tab = args
 except:
     stop_err('you must supply the arguments input_tab, cheetah_tmpl and output_html.')
-#print('input_tab: %s    cheetah_tmpl: %s    output_html: %s    output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab))
+# print('input_tab: %s    cheetah_tmpl: %s    output_html: %s    output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab))
+
 
 #BINS
 bins=[]
-if options.bins != None:
-    bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in options.bins.split(',')])
+if args.bins != None:
+    bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in args.bins.split(',')])
 print('database bins: %s' % str([bin.label for bin in bins]))
 
-#FILTERS
+    #FILTERS
 filter_pident = 0
 filter_kws = []
-if options.filter != None:
-    pident_kws = options.filter.split(':')
+if args.filter != None:
+    pident_kws = args.filter.split(':')
     filter_pident = float(pident_kws[0])
     filter_kws = pident_kws[-1].split(',')
 print('filter_pident: %s    filter_kws: %s' % (str(filter_pident), str(filter_kws)))
 
-if options.hsp:
+if args.redundant:
     print('Throwing out redundant hits...')
 
 #RESULTS!
@@ -115,6 +120,7 @@
 queries = []
 current_query = ''
 output_tab = open(output_tab, 'w')
+    
 with open(input_tab) as input_tab:
     for line in input_tab:
         cols = line.split('\t')
@@ -122,13 +128,13 @@
             current_query = cols[0]
             queries.append(BLASTQuery(current_query))
 
-        try:        
-                accs = cols[SUBJ_ID_COL].split('|')[1::2][1::2]
+        try:
+            accs = cols[SUBJ_ID_COL].split('|')[1::2][1::2]
         except IndexError as e:
-                stop_err("Problem with splitting:" + cols[SUBJ_ID_COL])
+            stop_err("Problem with splitting:" + cols[SUBJ_ID_COL])
 
         #hsp option: keep best (first) hit only for each query and accession id.
-        if options.hsp:
+        if args.redundant:
             if accs[0] in queries[-1].match_accessions:
                 continue #don't save the result and skip to the next
             else:
@@ -156,7 +162,7 @@
             queries[-1].kw_filtered += 1
             continue
         descr = descrs.split(';')[0]
-
+        
         #ATTEMPT BIN
         subj_bins = []
         for bin in bins: #if we are not binning, bins = [] so for loop not entered
@@ -174,7 +180,9 @@
         p_cov = float(cols[PCOV_COL])
         
         #SAVE RESULT
-        queries[-1].matches.append(BLASTMatch(acc, descr, score, p_cov, p_ident, subj_bins))
+        queries[-1].matches.append(
+            BLASTMatch(acc, descr, score, p_cov, p_ident, subj_bins)
+        )
         output_tab.write(line)            
 input_tab.close()
 output_tab.close()
@@ -190,9 +198,12 @@
             print('        %s' % str(query.matches[x]))
 '''
 
-from Cheetah.Template import Template
 namespace = {'queries': queries}
 html = Template(file=cheetah_tmpl, searchList=[namespace])
 out_html = open(output_html, 'w')
 out_html.write(str(html))
 out_html.close()
+
+
+if __name__ == '__main__':
+    main()
--- a/blast_report.xml	Thu Sep 12 00:56:20 2019 -0400
+++ b/blast_report.xml	Mon Mar 02 23:41:54 2020 +0000
@@ -1,54 +1,44 @@
-<tool id="blast_report" name="BLAST report" force_history_refresh="true" version="1.1.0">
-    <description>Report on BLAST results</description>
-    <command >
-        <![CDATA[  
-          '$__tool_directory__/blast_report.py'
-          '${in_tab}'
-          '${tmpl.fields.path}'
-          '${out_html}'
-          '${out_tab}'
-          -f '${filter_pident}:$filter_kws'
-          #if str($bins) == "None"
-            #pass
-          #else
-          -b "${bins.fields.path}"
-          #end if
-          #if $hsp_bool
-            -r
-          #end if
+<tool id="blast_report" name="BLAST report" version="0.1.0+galaxy0" >
+    <description>Produce an HTML table report of BLAST results</description>
+    <command detect_errors="error_code">
+        <![CDATA[
+          '${__tool_directory__}/blast_report.py' 
+            '${tabular_blast_report_input}'
+            '${report_template}'
+            '${out_html}'
+            '${out_tab}'
+            -f ${filter_pident}:$filter_kws
+            #if str($bins) != "None"
+              -b "${bins}"
+            #end if
+            $discard_redundant
         ]]>
     </command>
     <inputs>
-        <param name="in_tab" type="data" format="tabular" label="Tabular BLAST results (extended 26 columns)"/>
-        <param name="tmpl" type="select" optional="false" label="Report template">
+        <param name="tabular_blast_report_input" type="data" format="tabular" label="Tabular BLAST results (extended 26 columns)"/>
+        <param name="report_template" type="select" optional="false" label="Report template">
             <options from_data_table="blast_report_templates">
-                <column name="value" index="0"/>
-                <column name="name" index="1"/>
-		<column name="path" index="2"/>
+                <validator type="no_options" message="No BLAST report templates are available" />
             </options>
         </param>
         <param name="filter_pident" type="integer" min="90" max="100" value="97" label="Minimum percentage identity"/>
         <param name="filter_kws" type="text" size="50" label="Comma-separated list of description keyword filters" value="bovine,clone,environmental,swine,uncultivated,uncultured,unidentified"/>
         <param name="bins" type="select" label="Database bins" multiple="true" display="checkboxes">
-            <options from_data_table="blast_report_bins">
-                <column name="value" index="0"/>
-                <column name="name" index="1"/>
-		<column name="path" index="2"/>
+            <options from_data_table="blast_reference_bins">
+                <validator type="no_options" message="No BLAST reference bins available" />
             </options>
         </param>
-        <!--<repeat name="hist_bins" title="History database bins">
-            <param name="filter" type="data" format="csv" label="History database bin"/>
-        </repeat>-->
-        <param name="hsp_bool" type="boolean" label="Throw out redundant hits?"/> 
-        <param name="tab_bool" type="boolean" label="Output tabular file?"/>
+       	<param name="discard_redundant" type="boolean" truevalue="-r" falsevalue="" label="Throw out redundant hits?"/> 
+        <param name="output_tabular" type="boolean" label="Output tabular file?"/>
     </inputs>
     <outputs>
         <data name="out_html" format="html" label="$tool.name on data $in_tab.hid: report"/>
         <data name="out_tab" format="tabular" label="$tool.name on data $in_tab.hid: tabular results">
-            <filter> tab_bool </filter>
+            <filter> output_tabular </filter>
         </data>
     </outputs>
     <help>
+
 .. class:: infomark
 
 **What it does**
--- a/blast_report_bins.loc.sample	Thu Sep 12 00:56:20 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-# Expect three columns, tab separated, as follows:
-# - value (Galaxy records this in the Galaxy DB)
-# - name (Galaxy shows this in the UI)
-# - path (folder name containing the Kraken DB)
-#
-# e.g.
-# rdp<tab>RDP<tab>/path/to/bins/rdp.csv
--- a/blast_report_templates.loc.sample	Thu Sep 12 00:56:20 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-# Expect three columns, tab separated, as follows:
-# - value (Galaxy records this in the Galaxy DB)
-# - name (Galaxy shows this in the UI)
-# - path (folder name containing the Kraken DB)
-#
-# e.g.
-# template1<tab>Template 1<tab>/path/to/templates/template1.templ
--- a/templates/template1.tmpl	Thu Sep 12 00:56:20 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-#silent import time
-#set $display_m = 20
-#set $header = '<tr class="header"><th>Accession</th><th>Description</th><th>Score</th><th>% Coverage</th><th>% Identity</th></tr>'
-<html>
-	<head>
-		<style>
-			body {
-				font-size:0.75em;
-			}
-			table, tr {
-				width: 100%;
-			}
-			table {
-				border-collapse: collapse;
-				border: 1px solid black;
-			}
-			tr.header {
-				background-color: lightgrey;
-			}
-			th {
-				border: 1px solid black;
-			}
-			td {
-				border-left: 1px solid black;
-				border-right: 1px solid black;
-				border-bottom: 1px dashed grey;
-			}
-			td.descr {
-				font-size: 80%;
-			}
-			h3 {
-				page-break-before: always;
-				color: blue;
-			}
-			h3.first {
-				page-break-before: avoid;
-			}
-			span.super {
-				color: navy;
-				font-size: 75%;
-				vertical-align: top;
-			}
-		</style>
-		<script>
-			function toggle(id){
-				var element = document.getElementById(id)
-				console.log(id)
-				if (element.style.display == 'none') {
-					//console.log(element.tagName);
-					if (element.tagName == 'TBODY') element.style.display = 'table-row-group';
-					else if (element.tagName == 'TD') element.style.display = 'table-cell';
-					else element.style.display = 'block';
-				} else {
-					element.style.display = 'none';
-				}
-			}
-		</script>
-	</head>
-	<body>
-		#set $q = 0
-		#for $query in $queries
-			#set $bin_symbols = dict([($bin,$i) for $i, $bin in enumerate($query.bins, 1)])
-			#set $m = 0
-			<h3 id="${query.query_id}" #if $q == 0 then'class="first"' else '' #>$query.query_id</h3>
-			<br/>
-			<table id="${query.query_id}_matches">
-			#if len($query.matches) == 0:
-				<tr class="header"><th colspan="5">No matches to report</th></tr>
-			</table>
-			#else:
-			$header
-			#for $match in $query.matches:
-				#if $m == $display_m
-				<tbody id="${query.query_id}_extra" style="display:none">
-				#end if
-				<tr>
-					<td>$match.subject_acc <span class="super">#echo ', '.join(sorted([str($bin_symbols[$bin]) for $bin in $match.bins]))#</span></td>
-					<td class="descr">$match.subject_descr</td>
-					<td>$match.score</td>
-					<td>$match.p_cov</td>
-					<td>$match.p_ident</td>
-				</tr>
-			#set $m += 1
-			#end for
-			#if $m >= $display_m
-			</tbody>
-				<td id="${query.query_id}_show" align="center" colspan="6" >Displaying ${display_m}/$m matches. <a href="#${query.query_id}_extra" onclick="toggle('${query.query_id}_extra'); toggle('${query.query_id}_show'); toggle('${query.query_id}_hide');">Show the remaining results.</a></td>
-				<td id="${query.query_id}_hide" align="center" colspan="6" style="display:none"><a href="#${query.query_id}" onclick="toggle('${query.query_id}_extra'); toggle('${query.query_id}_show'); toggle('${query.query_id}_hide');">Hide the last #echo $m - $display_m # results.</a></td>
-			<tr>
-			</tr>
-			#end if
-			</table>
-			#if len($bin_symbols) > 0:
-			<p>#echo ', '.join(['<span class="super">%s</span> %s'%($bin_symbols[$bin],$bin) for $bin in $query.bins])#</p>
-			#end if
-			#end if
-			#if $query.pident_filtered > 0:
-			<p>$query.pident_filtered results filtered by % Identity.</p>
-			#end if
-			#if $query.kw_filtered > 0:
-			<p>$query.kw_filtered results filtered by description keywords: #echo ', '.join(list(["%s matches to '%s'" % (str($query.kw_filtered_breakdown[$kw]),$kw) for $kw in $query.kw_filtered_breakdown])) #.</p>
-			#end if
-			<p>Report produced on #echo time.strftime("%d/%m/%Y") #.</p>
-			<hr noshade size="1" color="blue">
-		#set $q += 1
-		#end for
-	</body>
-</html>
--- a/templates/template2.tmpl	Thu Sep 12 00:56:20 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-#silent import time
-#set $display_m = 20
-#set $header = '<tr class="header"><th>Accession</th><th>Description</th><th>Score</th><th>% Coverage</th><th>% Identity</th></tr>'
-<html>
-	<head>
-		<style>
-			body {
-				font-size:0.75em;
-			}
-			table, tr {
-				width: 100%;
-			}
-			table {
-				border-collapse: collapse;
-				border: 1px solid black;
-			}
-			tr.header {
-				background-color: lightgrey;
-			}
-			th {
-				border: 1px solid black;
-			}
-			td {
-				border-left: 1px solid black;
-				border-right: 1px solid black;
-				border-bottom: 1px dashed grey;
-			}
-			td.descr {
-				font-size: 80%;
-			}
-			h3 {
-				page-break-before: always;
-				color: blue;
-			}
-			h3.first {
-				page-break-before: avoid;
-			}
-			span.super {
-				color: navy;
-				font-size: 75%;
-				vertical-align: top;
-			}
-		</style>
-		<script>
-			function toggle(id){
-				var element = document.getElementById(id)
-				console.log(id)
-				if (element.style.display == 'none') {
-					//console.log(element.tagName);
-					if (element.tagName == 'TBODY') element.style.display = 'table-row-group';
-					else if (element.tagName == 'TD') element.style.display = 'table-cell';
-					else element.style.display = 'block';
-				} else {
-					element.style.display = 'none';
-				}
-			}
-		</script>
-	</head>
-	<body>
-		#set $q = 0
-		#for $query in $queries
-			#set $bin_symbols = dict([($bin,$i) for $i, $bin in enumerate($query.bins, 1)])
-			#set $m = 0
-			<h3 id="${query.query_id}" #if $q == 0 then'class="first"' else '' #>$query.query_id</h3>
-			<br/>
-			<table id="${query.query_id}_matches">
-			#set $num_of_euzby = -1
-			#if len($query.matches) == 0:
-				<tr class="header"><th colspan="5">No matches to report</th></tr>
-			</table>
-			#else:
-			$header
-			#try
-			#set $priority = $query.bins['Euzby']
-			#set $front = []
-			#for $i in reversed($priority)
-			#silent $front.append($query.matches.pop($i))
-			#end for
-			#set $num_of_euzby = len($front)
-			#silent $front.reverse()
-			#silent $front.extend($query.matches)
-			#set $query.matches = $front
-			#except
-			#pass
-			#end try
-			#for $match in $query.matches:
-				#if $m == $display_m
-				<tbody id="${query.query_id}_extra" style="display:none">
-				#end if
-				##if $m>0 and set($match.bins)!=set($query.matches[m-1].bins)
-				##put an empty line to separate Euzby records from other records
-				#if $m==$num_of_euzby and $m>0
-				<tr><td align="center" colspan="6">&nbsp;</td></tr>
-				#end if
-				<tr>
-					<td>$match.subject_acc <span class="super">#echo ', '.join(sorted([str($bin_symbols[$bin]) for $bin in $match.bins]))#</span></td>
-					<td class="descr">$match.subject_descr</td>
-					<td>$match.score</td>
-					<td>$match.p_cov</td>
-					<td>$match.p_ident</td>
-				</tr>
-			#set $m += 1
-			#end for
-			#if $m >= $display_m
-			</tbody>
-				<td id="${query.query_id}_show" align="center" colspan="6" >Displaying ${display_m}/$m matches. <a href="#${query.query_id}_extra" onclick="toggle('${query.query_id}_extra'); toggle('${query.query_id}_show'); toggle('${query.query_id}_hide');">Show the remaining results.</a></td>
-				<td id="${query.query_id}_hide" align="center" colspan="6" style="display:none"><a href="#${query.query_id}" onclick="toggle('${query.query_id}_extra'); toggle('${query.query_id}_show'); toggle('${query.query_id}_hide');">Hide the last #echo $m - $display_m # results.</a></td>
-			<tr>
-			</tr>
-			#end if
-			</table>
-			#if len($bin_symbols) > 0:
-			<p>#echo ', '.join(['<span class="super">%s</span> %s'%($bin_symbols[$bin],$bin) for $bin in $query.bins])#</p>
-			#end if
-			#end if
-			#if $query.pident_filtered > 0:
-			<p>$query.pident_filtered results filtered by % Identity.</p>
-			#end if
-			#if $query.kw_filtered > 0:
-			<p>$query.kw_filtered results filtered by description keywords: #echo ', '.join(list(["%s matches to '%s'" % (str($query.kw_filtered_breakdown[$kw]),$kw) for $kw in $query.kw_filtered_breakdown])) #.</p>
-			#end if
-			<p>Report produced on #echo time.strftime("%d/%m/%Y") #.</p>
-			<hr noshade size="1" color="blue">
-		#set $q += 1
-		#end for
-	</body>
-</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/blast_report_templates.loc.sample	Mon Mar 02 23:41:54 2020 +0000
@@ -0,0 +1,7 @@
+# Expect three columns, tab separated, as follows:
+# - value (Galaxy records this in the Galaxy DB, consider using a UUID but any unique value will work)
+# - name (Galaxy shows this in the UI)
+# - path (Path to the blast report template (cheetah format))
+#
+# e.g.
+# f45ee89a-d456-469a-8aeb-54cdfea821ec<tab>Default BLAST Report Template<tab>/path/to/template.tmpl
--- a/tool_data_table_conf.xml.sample	Thu Sep 12 00:56:20 2019 -0400
+++ b/tool_data_table_conf.xml.sample	Mon Mar 02 23:41:54 2020 +0000
@@ -1,10 +1,6 @@
 <?xml version="1.0"?>
 <tables>
-    <!-- Locations of BLAST Report bins in the required format -->
-    <table name="blast_report_bins" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/blast_report_bins.loc" />
-    </table>
+    <!-- Locations of BLAST report templates in the required format -->
     <table name="blast_report_templates" comment_char="#">
         <columns>value, name, path</columns>
         <file path="tool-data/blast_report_templates.loc" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Mon Mar 02 23:41:54 2020 +0000
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of BLAST report templates in the required format -->
+    <table name="blast_report_templates" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/blast_report_templates.loc" />
+    </table>
+</tables>