Mercurial > repos > sanbi-uwc > data_manager_novocraft_index_builder
annotate data_manager/novocraft_index_builder.py @ 18:2b89ba1c0057 draft default tip
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit c8c46a5600bb091d701b8cf78f80a50c6b6812f4
author | sanbi-uwc |
---|---|
date | Tue, 21 Jun 2016 10:12:02 -0400 |
parents | db293ee25be5 |
children |
rev | line source |
---|---|
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
2 # Z. Mashologu (SANBI-UWC) |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
3 # import dict as dict |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
4 from __future__ import print_function |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
5 import os |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
6 import sys |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
7 import logging |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
8 import argparse |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
9 import shlex |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
10 from subprocess import check_call, CalledProcessError |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
11 |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
12 log = logging.getLogger(__name__) |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
13 |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
14 from json import loads, dumps |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
15 |
6
8dd4b25716d1
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 05004076adb510abc08f6c1e28ba22b2c1a29534
sanbi-uwc
parents:
5
diff
changeset
|
16 DEFAULT_DATA_TABLE_NAME = "novocraft_index" |
4
c276a826fc4b
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 1e4e16c747ca6ef261d3307f47a09ff1d49756a1
sanbi-uwc
parents:
0
diff
changeset
|
17 |
18
2b89ba1c0057
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit c8c46a5600bb091d701b8cf78f80a50c6b6812f4
sanbi-uwc
parents:
17
diff
changeset
|
18 def get_dbkey_id_name(params): |
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
19 # TODO: ensure sequence_id is unique and does not already appear in location file |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
20 sequence_id = params['param_dict']['sequence_id'] |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
21 sequence_name = params['param_dict']['sequence_name'] |
18
2b89ba1c0057
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit c8c46a5600bb091d701b8cf78f80a50c6b6812f4
sanbi-uwc
parents:
17
diff
changeset
|
22 sequence_desc = params['param_dict']['sequence_desc'] |
2b89ba1c0057
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit c8c46a5600bb091d701b8cf78f80a50c6b6812f4
sanbi-uwc
parents:
17
diff
changeset
|
23 if not sequence_desc: |
2b89ba1c0057
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit c8c46a5600bb091d701b8cf78f80a50c6b6812f4
sanbi-uwc
parents:
17
diff
changeset
|
24 sequence_desc = sequence_name |
2b89ba1c0057
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit c8c46a5600bb091d701b8cf78f80a50c6b6812f4
sanbi-uwc
parents:
17
diff
changeset
|
25 return sequence_id, sequence_name, sequence_desc |
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
26 |
17
db293ee25be5
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 0745f158bbb0d5d190cc4503157d2d3092ab8cc5
sanbi-uwc
parents:
14
diff
changeset
|
27 def _make_novocraft_index(data_manager_dict, fasta_filename, target_directory, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME): |
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
28 if os.path.exists(target_directory) and not os.path.isdir(target_directory): |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
29 print("Output directory path already exists but is not a directory: {}".format(target_directory), |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
30 file=sys.stderr) |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
31 elif not os.path.exists(target_directory): |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
32 os.mkdir(target_directory) |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
33 |
12
283eec05d1de
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit e14dbfe322e3787d1ec9b7467a3677b101784227
sanbi-uwc
parents:
11
diff
changeset
|
34 nix_file = sequence_id + ".nix" |
283eec05d1de
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit e14dbfe322e3787d1ec9b7467a3677b101784227
sanbi-uwc
parents:
11
diff
changeset
|
35 index_filename = os.path.join(target_directory, nix_file) |
11
a9b725202132
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 9ea1c9c5fdf0ed65170469f809b81ba9e82f91d3
sanbi-uwc
parents:
10
diff
changeset
|
36 cmdline_str = 'novoindex {} {}'.format(index_filename, fasta_filename) |
a9b725202132
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 9ea1c9c5fdf0ed65170469f809b81ba9e82f91d3
sanbi-uwc
parents:
10
diff
changeset
|
37 cmdline = shlex.split(cmdline_str) |
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
38 |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
39 try: |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
40 check_call(cmdline) |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
41 except CalledProcessError: |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
42 print("Error building RNA STAR index", file=sys.stderr) |
4
c276a826fc4b
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 1e4e16c747ca6ef261d3307f47a09ff1d49756a1
sanbi-uwc
parents:
0
diff
changeset
|
43 |
17
db293ee25be5
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 0745f158bbb0d5d190cc4503157d2d3092ab8cc5
sanbi-uwc
parents:
14
diff
changeset
|
44 data_table_entry = dict( value=sequence_id, dbkey=sequence_id, name=sequence_name, path=index_filename ) |
4
c276a826fc4b
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 1e4e16c747ca6ef261d3307f47a09ff1d49756a1
sanbi-uwc
parents:
0
diff
changeset
|
45 _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) |
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
46 |
4
c276a826fc4b
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 1e4e16c747ca6ef261d3307f47a09ff1d49756a1
sanbi-uwc
parents:
0
diff
changeset
|
47 def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): |
c276a826fc4b
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 1e4e16c747ca6ef261d3307f47a09ff1d49756a1
sanbi-uwc
parents:
0
diff
changeset
|
48 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) |
c276a826fc4b
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 1e4e16c747ca6ef261d3307f47a09ff1d49756a1
sanbi-uwc
parents:
0
diff
changeset
|
49 data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) |
c276a826fc4b
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 1e4e16c747ca6ef261d3307f47a09ff1d49756a1
sanbi-uwc
parents:
0
diff
changeset
|
50 data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) |
c276a826fc4b
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 1e4e16c747ca6ef261d3307f47a09ff1d49756a1
sanbi-uwc
parents:
0
diff
changeset
|
51 return data_manager_dict |
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
52 |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
53 def main(): |
7
d04a59b76845
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 03f281362d5cf7c4d2a813203b83f5d12eb8ca46
sanbi-uwc
parents:
6
diff
changeset
|
54 parser = argparse.ArgumentParser(description="Generate Novo-craft genome index and JSON describing this") |
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
55 parser.add_argument('output_filename') |
18
2b89ba1c0057
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit c8c46a5600bb091d701b8cf78f80a50c6b6812f4
sanbi-uwc
parents:
17
diff
changeset
|
56 parser.add_argument('--input_filename') |
6
8dd4b25716d1
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 05004076adb510abc08f6c1e28ba22b2c1a29534
sanbi-uwc
parents:
5
diff
changeset
|
57 parser.add_argument('--data_table_name', default='novocraft_index') |
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
58 args = parser.parse_args() |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
59 |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
60 filename = args.output_filename |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
61 |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
62 params = loads(open(filename).read()) |
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
63 target_directory = params['output_data'][0]['extra_files_path'] |
14
d053e7b179b5
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 2117346f2705cc3990219e5e99d7feb3f5828b9b
sanbi-uwc
parents:
12
diff
changeset
|
64 os.makedirs(target_directory) |
4
c276a826fc4b
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 1e4e16c747ca6ef261d3307f47a09ff1d49756a1
sanbi-uwc
parents:
0
diff
changeset
|
65 data_manager_dict = {} |
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
66 |
18
2b89ba1c0057
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit c8c46a5600bb091d701b8cf78f80a50c6b6812f4
sanbi-uwc
parents:
17
diff
changeset
|
67 sequence_id, sequence_name, sequence_desc = get_dbkey_id_name(params) |
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
68 |
18
2b89ba1c0057
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit c8c46a5600bb091d701b8cf78f80a50c6b6812f4
sanbi-uwc
parents:
17
diff
changeset
|
69 #Make novocraft index |
2b89ba1c0057
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit c8c46a5600bb091d701b8cf78f80a50c6b6812f4
sanbi-uwc
parents:
17
diff
changeset
|
70 _make_novocraft_index(data_manager_dict, args.input_filename, target_directory, sequence_id, sequence_name, args.data_table_name or DEFAULT_DATA_TABLE_NAME ) |
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
71 |
4
c276a826fc4b
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 1e4e16c747ca6ef261d3307f47a09ff1d49756a1
sanbi-uwc
parents:
0
diff
changeset
|
72 open(filename, 'wb').write(dumps( data_manager_dict )) |
0
6cddc1a6e282
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
sanbi-uwc
parents:
diff
changeset
|
73 |
18
2b89ba1c0057
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit c8c46a5600bb091d701b8cf78f80a50c6b6812f4
sanbi-uwc
parents:
17
diff
changeset
|
74 if __name__ == "__main__": main() |