0
|
1 #!/usr/bin/env python3
|
|
2 # -*- coding: utf-8 -*-
|
|
3
|
|
4 # Actually this program works with both python 2 and 3, tested against python 2.6
|
|
5
|
|
6 # Copyright The Hyve B.V. 2014-2015
|
|
7 # License: GPL version 3 or (at your option) any higher version
|
|
8
|
|
9 from __future__ import unicode_literals, division, print_function
|
1
|
10 import sys
|
|
11 import argparse
|
|
12 import six, codecs, io
|
|
13 from os import path
|
0
|
14
|
1
|
15 # print("Hello World")
|
|
16 # print("The tool is working!")
|
0
|
17
|
|
18
|
1
|
19 def main():
|
|
20 default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja')
|
|
21
|
|
22 parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page",
|
|
23 usage="{0} [-i] INPUT [-o OUTPUT] [--genelink-template URL_TEMPLATE] [--dbname DBNAME]".format(sys.argv[0]))
|
|
24 input_group = parser.add_mutually_exclusive_group(required=True)
|
|
25 input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'),
|
|
26 help='The input Blast XML file, same as -i/--input')
|
|
27 input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'),
|
|
28 help='The input Blast XML file')
|
|
29 parser.add_argument('-o', '--output', type=argparse.FileType(mode='w'), default=sys.stdout,
|
|
30 help='The output html file')
|
|
31 # We just want the file name here, so jinja can open the file
|
|
32 # itself. But it is easier to just use a FileType so argparse can
|
|
33 # handle the errors. This introduces a small race condition when
|
|
34 # jinja later tries to re-open the template file, but we don't
|
|
35 # care too much.
|
|
36 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template,
|
|
37 help='The template file to use. Defaults to blast_html.html.jinja')
|
|
38
|
|
39 parser.add_argument('--dbname', type=str, default='Gene Bank',
|
|
40 help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'")
|
|
41 parser.add_argument('--genelink-template', metavar='URL_TEMPLATE',
|
|
42 default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
|
|
43 help="""A link template to link hits to a gene bank webpage. The template string is a
|
|
44 Python format string. It can contain the following replacement elements: {id[N]}, {fullid},
|
|
45 {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be
|
|
46 replaced by the Nth element of the id or defline, where '|' is the field separator.
|
|
47
|
|
48 The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
|
|
49 which is a link to the NCBI nucleotide database.""")
|
|
50
|
|
51 parser.add_argument('--db-config-dir',
|
|
52 help="""The directory where databases are configured in blastdb*.loc files. These files
|
|
53 are consulted for creating a gene bank link. The files should conform to the format that
|
|
54 Galaxy's BLAST expect, i.e. tab-separated tables (with lines starting with '#' ignored),
|
|
55 with two extra fields, for a total of five fields per line instead of three.. The third
|
|
56 field of each line should be a database path as used by BLAST. The fourth field is the
|
|
57 human readable database name, and the fifth a template link to the gene bank conforming
|
|
58 to the syntax for the --genelink-template option. Entries in these config files override
|
|
59 links specified using --genelink-template and --dbname.""")
|
|
60
|
|
61 args = parser.parse_args()
|
|
62 if args.input == None:
|
|
63 args.input = args.positional_arg
|
|
64 if args.input == None:
|
|
65 parser.error('no input specified')
|
|
66
|
|
67 if six.PY2:
|
|
68 # The argparse.FileType wrapper doesn't support an encoding
|
|
69 # argument, so for python 2 we need to wrap or reopen the
|
|
70 # output. The input files are already read as utf-8 by the
|
|
71 # respective libraries.
|
|
72 #
|
|
73 # One option is using codecs, but the codecs' writelines()
|
|
74 # method doesn't support streaming but collects all output and
|
|
75 # writes at once (see Python issues #5445 and #21910). On the
|
|
76 # other hand the io module is slower (though not
|
|
77 # significantly).
|
|
78
|
|
79 # args.output = codecs.getwriter('utf-8')(args.output)
|
|
80 # def fixed_writelines(iter, self=args.output):
|
|
81 # for i in iter:
|
|
82 # self.write(i)
|
|
83 # args.output.writelines = fixed_writelines
|
|
84
|
|
85 args.output.close()
|
|
86 args.output = io.open(args.output.name, 'w', encoding='utf-8')
|
|
87
|
|
88 templatedir, templatename = path.split(args.template.name)
|
|
89 args.template.close()
|
|
90 if not templatedir:
|
|
91 templatedir = '.'
|
|
92
|
|
93 args.output.write("<html><title>Hello World!</title><body><h1>Hello World!</h1><p>It works!</p></body></html>\n")
|
|
94
|
|
95 args.output.close()
|
|
96
|
|
97 if __name__ == '__main__':
|
|
98 main()
|