changeset 1:9272a08cb8fe

update v2
author Jan Kanis <jan.code@jankanis.nl>
date Thu, 23 Jul 2015 09:46:31 +0200
parents d448fe1df4a1
children b39e07d89fda
files blast2html.py blast2html.xml
diffstat 2 files changed, 89 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/blast2html.py	Wed Jul 22 17:13:06 2015 +0200
+++ b/blast2html.py	Thu Jul 23 09:46:31 2015 +0200
@@ -7,8 +7,92 @@
 # License: GPL version 3 or (at your option) any higher version
 
 from __future__ import unicode_literals, division, print_function
+import sys
+import argparse
+import six, codecs, io
+from os import path
 
-print("Hello World")
-print("The tool is working!")
+# print("Hello World")
+# print("The tool is working!")
 
 
+def main():
+    default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja')
+
+    parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page",
+                                     usage="{0} [-i] INPUT [-o OUTPUT] [--genelink-template URL_TEMPLATE] [--dbname DBNAME]".format(sys.argv[0]))
+    input_group = parser.add_mutually_exclusive_group(required=True)
+    input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'),
+                             help='The input Blast XML file, same as -i/--input')
+    input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'), 
+                             help='The input Blast XML file')
+    parser.add_argument('-o', '--output', type=argparse.FileType(mode='w'), default=sys.stdout,
+                        help='The output html file')
+    # We just want the file name here, so jinja can open the file
+    # itself. But it is easier to just use a FileType so argparse can
+    # handle the errors. This introduces a small race condition when
+    # jinja later tries to re-open the template file, but we don't
+    # care too much.
+    parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template,
+                        help='The template file to use. Defaults to blast_html.html.jinja')
+
+    parser.add_argument('--dbname', type=str, default='Gene Bank',
+                        help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'")
+    parser.add_argument('--genelink-template', metavar='URL_TEMPLATE',
+                        default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
+                        help="""A link template to link hits to a gene bank webpage. The template string is a 
+                        Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, 
+                        {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be 
+                        replaced by the Nth element of the id or defline, where '|' is the field separator. 
+                        
+                        The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
+                        which is a link to the NCBI nucleotide database.""")
+
+    parser.add_argument('--db-config-dir',
+                        help="""The directory where databases are configured in blastdb*.loc files. These files
+                        are consulted for creating a gene bank link. The files should conform to the format that
+                        Galaxy's BLAST expect, i.e. tab-separated tables (with lines starting with '#' ignored),
+                        with two extra fields, for a total of five fields per line instead of three.. The third 
+                        field of each line should be a database path as used by BLAST. The fourth field is the 
+                        human readable database name, and the fifth a template link to the gene bank conforming
+                        to the syntax for the --genelink-template option. Entries in these config files override 
+                        links specified using --genelink-template and --dbname.""")
+    
+    args = parser.parse_args()
+    if args.input == None:
+        args.input = args.positional_arg
+    if args.input == None:
+        parser.error('no input specified')
+
+    if six.PY2:
+        # The argparse.FileType wrapper doesn't support an encoding
+        # argument, so for python 2 we need to wrap or reopen the
+        # output. The input files are already read as utf-8 by the
+        # respective libraries.
+        #
+        # One option is using codecs, but the codecs' writelines()
+        # method doesn't support streaming but collects all output and
+        # writes at once (see Python issues #5445 and #21910). On the
+        # other hand the io module is slower (though not
+        # significantly).
+
+        # args.output = codecs.getwriter('utf-8')(args.output)
+        # def fixed_writelines(iter, self=args.output):
+        #     for i in iter:
+        #         self.write(i)
+        # args.output.writelines = fixed_writelines
+
+        args.output.close()
+        args.output = io.open(args.output.name, 'w', encoding='utf-8')
+
+    templatedir, templatename = path.split(args.template.name)
+    args.template.close()
+    if not templatedir:
+        templatedir = '.'
+
+    args.output.write("<html><title>Hello World!</title><body><h1>Hello World!</h1><p>It works!</p></body></html>\n")
+        
+    args.output.close()
+
+if __name__ == '__main__':
+    main()
--- a/blast2html.xml	Wed Jul 22 17:13:06 2015 +0200
+++ b/blast2html.xml	Thu Jul 23 09:46:31 2015 +0200
@@ -1,4 +1,4 @@
-<tool id="blast2html_test" name="blast2html_test" version="0.0.17">
+<tool id="blast2html_test" name="blast2html_test" version="0.0.18">
     
     <description>Convert BLAST XML to HTML</description>
     
@@ -26,6 +26,8 @@
 Blast2html: Blast XML to HTML conversion tool
 =============================================
 
+This is the help section in blast2html.xml
+
 This tool accepts Blast XML as input, and creates an HTML page with a human readable version of the result. The output includes graphical displays of where a sequence matches a target and metrics on the quality of the match.
 
 Example output: