Mercurial > repos > sanbi-uwc > data_manager_fetch_refseq
annotate data_manager/fetch_artic_primers.py @ 19:d118e256faca draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
author | sanbi-uwc |
---|---|
date | Thu, 16 Apr 2020 10:19:57 +0000 |
parents | |
children |
rev | line source |
---|---|
19
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
2 |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
3 from __future__ import print_function, division |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
4 |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
5 import argparse |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
6 import json |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
7 import os |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
8 import os.path |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
9 import sys |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
10 |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
11 import requests |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
12 |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
13 DATA_TABLE_NAME = 'artic_primers' |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
14 def fetch_artic_primers(output_filename, output_directory, primers): |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
15 primer_sets = { |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
16 'ARTICv1': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed', |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
17 'ARTICv2': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V2/nCoV-2019.bed', |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
18 'ARTICv3': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.bed' |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
19 } |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
20 |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
21 if not os.path.isdir(output_directory): |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
22 os.makedirs(output_directory) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
23 data_manager_dict = json.load(open(output_filename)) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
24 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
25 data_manager_dict['data_tables'][DATA_TABLE_NAME] = data_manager_dict['data_tables'].get(DATA_TABLE_NAME, []) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
26 |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
27 data = [] |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
28 for name, url in primer_sets.items(): |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
29 response = requests.get(url) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
30 if response.status_code != 200: |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
31 print('Error: download of', url, 'failed with code', response.status_code, file=sys.stderr) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
32 exit(response.status_code) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
33 bed_output_filename = os.path.join(output_directory, name + '.bed') |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
34 open(bed_output_filename, 'w').write(response.text) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
35 description = name[:-2] + ' ' + name[-2:] + ' primer set' |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
36 data.append(dict(value=name, path=bed_output_filename, description=description)) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
37 data_manager_dict['data_tables'][DATA_TABLE_NAME].extend(data) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
38 print(data_manager_dict) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
39 json.dump(data_manager_dict, open(output_filename, 'w')) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
40 |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
41 class SplitArgs(argparse.Action): |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
42 def __call__(self, parser, namespace, values, option_string=None): |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
43 setattr(namespace, self.dest, values.split(',')) |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
44 |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
45 |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
46 if __name__ == '__main__': |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
47 parser = argparse.ArgumentParser(description='Fetch ARTIC primer files for Galaxy use') |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
48 parser.add_argument('--output_directory', default='tmp', help='Directory to write output to') |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
49 parser.add_argument('--galaxy_datamanager_filename', help='Galaxy JSON format file describing data manager inputs') |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
50 parser.add_argument('--primers', default='ARTCIv1,ARTICv2,ARTICv3', action=SplitArgs, help='Comma separated list of primers to fetch') |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
51 args = parser.parse_args() |
d118e256faca
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff
changeset
|
52 fetch_artic_primers(args.galaxy_datamanager_filename, args.output_directory, args.primers) |