Mercurial > repos > sanbi-uwc > data_manager_fetch_artic_primers
annotate data_manager/fetch_artic_primers.py @ 3:6114f017f819 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
author | sanbi-uwc |
---|---|
date | Fri, 17 Apr 2020 11:42:59 +0000 |
parents | 59b3556d9ec8 |
children |
rev | line source |
---|---|
0
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
2 |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
3 from __future__ import print_function, division |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
4 |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
5 import argparse |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
6 import json |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
7 import os |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
8 import os.path |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
9 import sys |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
10 |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
11 import requests |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
12 |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
13 DATA_TABLE_NAME = 'artic_primers' |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
14 def fetch_artic_primers(output_filename, output_directory, primers): |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
15 primer_sets = { |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
16 'ARTICv1': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed', |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
17 'ARTICv2': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V2/nCoV-2019.bed', |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
18 'ARTICv3': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.bed' |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
19 } |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
20 |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
21 if not os.path.isdir(output_directory): |
1
59b3556d9ec8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
0
diff
changeset
|
22 os.makedirs(output_directory) |
59b3556d9ec8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
0
diff
changeset
|
23 data_manager_dict = {} |
59b3556d9ec8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
0
diff
changeset
|
24 data_manager_dict['data_tables'] = json.load(open(output_filename)).get('data_tables', {}) |
0
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
25 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
26 data_manager_dict['data_tables'][DATA_TABLE_NAME] = data_manager_dict['data_tables'].get(DATA_TABLE_NAME, []) |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
27 |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
28 data = [] |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
29 for name, url in primer_sets.items(): |
1
59b3556d9ec8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
0
diff
changeset
|
30 if name not in primers: |
59b3556d9ec8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
0
diff
changeset
|
31 continue |
0
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
32 response = requests.get(url) |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
33 if response.status_code != 200: |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
34 print('Error: download of', url, 'failed with code', response.status_code, file=sys.stderr) |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
35 exit(response.status_code) |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
36 bed_output_filename = os.path.join(output_directory, name + '.bed') |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
37 open(bed_output_filename, 'w').write(response.text) |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
38 description = name[:-2] + ' ' + name[-2:] + ' primer set' |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
39 data.append(dict(value=name, path=bed_output_filename, description=description)) |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
40 data_manager_dict['data_tables'][DATA_TABLE_NAME].extend(data) |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
41 print(data_manager_dict) |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
42 json.dump(data_manager_dict, open(output_filename, 'w')) |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
43 |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
44 class SplitArgs(argparse.Action): |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
45 def __call__(self, parser, namespace, values, option_string=None): |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
46 setattr(namespace, self.dest, values.split(',')) |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
47 |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
48 |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
49 if __name__ == '__main__': |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
50 parser = argparse.ArgumentParser(description='Fetch ARTIC primer files for Galaxy use') |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
51 parser.add_argument('--output_directory', default='tmp', help='Directory to write output to') |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
52 parser.add_argument('--galaxy_datamanager_filename', help='Galaxy JSON format file describing data manager inputs') |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
53 parser.add_argument('--primers', default='ARTCIv1,ARTICv2,ARTICv3', action=SplitArgs, help='Comma separated list of primers to fetch') |
c401f63b3d12
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
54 args = parser.parse_args() |
3
6114f017f819
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
1
diff
changeset
|
55 |
6114f017f819
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
1
diff
changeset
|
56 config = json.load(open(args.galaxy_datamanager_filename)) |
6114f017f819
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
1
diff
changeset
|
57 output_directory = config.get('output_data', [{}])[0].get('extra_files_path', None) |
6114f017f819
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
1
diff
changeset
|
58 if output_directory is None: |
6114f017f819
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
1
diff
changeset
|
59 output_directory = args.output_directory |
6114f017f819
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
1
diff
changeset
|
60 fetch_artic_primers(args.galaxy_datamanager_filename, output_directory, args.primers) |