Mercurial > repos > iuc > data_manager_homer_preparse
comparison data_manager/homer_genome_preparse.py @ 0:5d2e5fddec81 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_homer_preparse commit e49d856e0181edc6120220a1b819cba2466a4289"
author | iuc |
---|---|
date | Sun, 08 Aug 2021 10:57:48 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5d2e5fddec81 |
---|---|
1 #!/usr/bin/env python | |
2 # Dan Blankenberg for bowtie2 | |
3 # Modified by Lucille Delisle for homer | |
4 from __future__ import print_function | |
5 | |
6 import json | |
7 import optparse | |
8 import os | |
9 import subprocess | |
10 import sys | |
11 | |
12 DEFAULT_DATA_TABLE_NAME = "homer_preparse" | |
13 | |
14 | |
15 def get_id_name(params, dbkey, fasta_description=None): | |
16 # TODO: ensure sequence_id is unique and does not already appear in location file | |
17 sequence_id = params['param_dict']['sequence_id'] | |
18 if not sequence_id: | |
19 sequence_id = dbkey | |
20 | |
21 sequence_name = params['param_dict']['sequence_name'] | |
22 if not sequence_name: | |
23 sequence_name = fasta_description | |
24 if not sequence_name: | |
25 sequence_name = dbkey | |
26 return sequence_id, sequence_name | |
27 | |
28 | |
29 def homer_preparse(data_manager_dict, fasta_filename, params, target_directory, dbkey, sequence_id, | |
30 sequence_name, size, mask, version, | |
31 data_table_name=DEFAULT_DATA_TABLE_NAME): | |
32 args = ['preparseGenome.pl', fasta_filename, '-size', str(size), '-preparsedDir', target_directory] | |
33 if mask: | |
34 args.append('-mask') | |
35 proc = subprocess.Popen(args=args, shell=False, cwd=target_directory) | |
36 return_code = proc.wait() | |
37 if return_code: | |
38 print("Error preparsing genome.", file=sys.stderr) | |
39 sys.exit(return_code) | |
40 mask_suffix = 'r' if mask else '' | |
41 mask_suffix_name = ' masked' if mask else '' | |
42 data_table_entry = dict(value=sequence_id + mask_suffix + '_' + str(size), dbkey=dbkey, | |
43 mask=str(mask), size=str(size), name=sequence_name + mask_suffix_name + ' (' + str(size) + 'bp)', | |
44 path=sequence_id + mask_suffix + '_' + str(size), | |
45 path_fasta=fasta_filename, | |
46 version=version) | |
47 _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) | |
48 | |
49 | |
50 def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): | |
51 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) | |
52 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, []) | |
53 data_manager_dict['data_tables'][data_table_name].append(data_table_entry) | |
54 return data_manager_dict | |
55 | |
56 | |
57 def main(): | |
58 parser = optparse.OptionParser() | |
59 parser.add_option('-f', '--fasta_filename', dest='fasta_filename', action='store', type="string", default=None, help='fasta_filename') | |
60 parser.add_option('-d', '--fasta_dbkey', dest='fasta_dbkey', action='store', type="string", default=None, help='fasta_dbkey') | |
61 parser.add_option('-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta_description') | |
62 parser.add_option('-s', '--size', dest='size', action='store', type="int", default=200, help='fragment size') | |
63 parser.add_option('-m', '--mask', dest='mask', action='store_true', default=False, help='mask the lower case bases (repeats)') | |
64 parser.add_option('-n', '--data_table_name', dest='data_table_name', action='store', type="string", default=None, help='data_table_name') | |
65 parser.add_option('--index_version', dest='index_version', action='store', type="string", default=None, help='index version') | |
66 (options, args) = parser.parse_args() | |
67 | |
68 filename = args[0] | |
69 | |
70 with open(filename) as fh: | |
71 params = json.load(fh) | |
72 target_directory = params['output_data'][0]['extra_files_path'] | |
73 os.mkdir(target_directory) | |
74 data_manager_dict = {} | |
75 | |
76 dbkey = options.fasta_dbkey | |
77 | |
78 if dbkey in [None, '', '?']: | |
79 raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (dbkey)) | |
80 | |
81 sequence_id, sequence_name = get_id_name(params, dbkey=dbkey, fasta_description=options.fasta_description) | |
82 | |
83 # preparse the genome | |
84 homer_preparse(data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, | |
85 sequence_name, options.size, options.mask, options.index_version, | |
86 data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME) | |
87 | |
88 # save info to json file | |
89 with open(filename, 'w') as fh: | |
90 json.dump(data_manager_dict, fh, sort_keys=True) | |
91 | |
92 | |
93 if __name__ == "__main__": | |
94 main() |