view blast2html.py @ 2:b39e07d89fda

Update help
author Jan Kanis <jan.code@jankanis.nl>
date Thu, 23 Jul 2015 11:24:47 +0200
parents 9272a08cb8fe
children
line wrap: on
line source

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Actually this program works with both python 2 and 3, tested against python 2.6

# Copyright The Hyve B.V. 2014-2015
# License: GPL version 3 or (at your option) any higher version

from __future__ import unicode_literals, division, print_function
import sys
import argparse
import six, codecs, io
from os import path

# print("Hello World")
# print("The tool is working!")


def main():
    default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja')

    parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page",
                                     usage="{0} [-i] INPUT [-o OUTPUT] [--genelink-template URL_TEMPLATE] [--dbname DBNAME]".format(sys.argv[0]))
    input_group = parser.add_mutually_exclusive_group(required=True)
    input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'),
                             help='The input Blast XML file, same as -i/--input')
    input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'), 
                             help='The input Blast XML file')
    parser.add_argument('-o', '--output', type=argparse.FileType(mode='w'), default=sys.stdout,
                        help='The output html file')
    # We just want the file name here, so jinja can open the file
    # itself. But it is easier to just use a FileType so argparse can
    # handle the errors. This introduces a small race condition when
    # jinja later tries to re-open the template file, but we don't
    # care too much.
    parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template,
                        help='The template file to use. Defaults to blast_html.html.jinja')

    parser.add_argument('--dbname', type=str, default='Gene Bank',
                        help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'")
    parser.add_argument('--genelink-template', metavar='URL_TEMPLATE',
                        default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
                        help="""A link template to link hits to a gene bank webpage. The template string is a 
                        Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, 
                        {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be 
                        replaced by the Nth element of the id or defline, where '|' is the field separator. 
                        
                        The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
                        which is a link to the NCBI nucleotide database.""")

    parser.add_argument('--db-config-dir',
                        help="""The directory where databases are configured in blastdb*.loc files. These files
                        are consulted for creating a gene bank link. The files should conform to the format that
                        Galaxy's BLAST expect, i.e. tab-separated tables (with lines starting with '#' ignored),
                        with two extra fields, for a total of five fields per line instead of three.. The third 
                        field of each line should be a database path as used by BLAST. The fourth field is the 
                        human readable database name, and the fifth a template link to the gene bank conforming
                        to the syntax for the --genelink-template option. Entries in these config files override 
                        links specified using --genelink-template and --dbname.""")
    
    args = parser.parse_args()
    if args.input == None:
        args.input = args.positional_arg
    if args.input == None:
        parser.error('no input specified')

    if six.PY2:
        # The argparse.FileType wrapper doesn't support an encoding
        # argument, so for python 2 we need to wrap or reopen the
        # output. The input files are already read as utf-8 by the
        # respective libraries.
        #
        # One option is using codecs, but the codecs' writelines()
        # method doesn't support streaming but collects all output and
        # writes at once (see Python issues #5445 and #21910). On the
        # other hand the io module is slower (though not
        # significantly).

        # args.output = codecs.getwriter('utf-8')(args.output)
        # def fixed_writelines(iter, self=args.output):
        #     for i in iter:
        #         self.write(i)
        # args.output.writelines = fixed_writelines

        args.output.close()
        args.output = io.open(args.output.name, 'w', encoding='utf-8')

    templatedir, templatename = path.split(args.template.name)
    args.template.close()
    if not templatedir:
        templatedir = '.'

    args.output.write("<html><title>Hello World!</title><body><h1>Hello World!</h1><p>It works!</p></body></html>\n")
        
    args.output.close()

if __name__ == '__main__':
    main()