annotate data_manager/fetch_artic_primers.py @ 19:d118e256faca draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
author sanbi-uwc
date Thu, 16 Apr 2020 10:19:57 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
19
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
1 #!/usr/bin/env python
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
2
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
3 from __future__ import print_function, division
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
4
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
5 import argparse
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
6 import json
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
7 import os
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
8 import os.path
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
9 import sys
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
10
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
11 import requests
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
12
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
13 DATA_TABLE_NAME = 'artic_primers'
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
14 def fetch_artic_primers(output_filename, output_directory, primers):
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
15 primer_sets = {
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
16 'ARTICv1': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed',
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
17 'ARTICv2': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V2/nCoV-2019.bed',
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
18 'ARTICv3': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.bed'
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
19 }
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
20
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
21 if not os.path.isdir(output_directory):
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
22 os.makedirs(output_directory)
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
23 data_manager_dict = json.load(open(output_filename))
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
24 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
25 data_manager_dict['data_tables'][DATA_TABLE_NAME] = data_manager_dict['data_tables'].get(DATA_TABLE_NAME, [])
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
26
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
27 data = []
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
28 for name, url in primer_sets.items():
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
29 response = requests.get(url)
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
30 if response.status_code != 200:
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
31 print('Error: download of', url, 'failed with code', response.status_code, file=sys.stderr)
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
32 exit(response.status_code)
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
33 bed_output_filename = os.path.join(output_directory, name + '.bed')
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
34 open(bed_output_filename, 'w').write(response.text)
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
35 description = name[:-2] + ' ' + name[-2:] + ' primer set'
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
36 data.append(dict(value=name, path=bed_output_filename, description=description))
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
37 data_manager_dict['data_tables'][DATA_TABLE_NAME].extend(data)
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
38 print(data_manager_dict)
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
39 json.dump(data_manager_dict, open(output_filename, 'w'))
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
40
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
41 class SplitArgs(argparse.Action):
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
42 def __call__(self, parser, namespace, values, option_string=None):
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
43 setattr(namespace, self.dest, values.split(','))
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
44
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
45
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
46 if __name__ == '__main__':
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
47 parser = argparse.ArgumentParser(description='Fetch ARTIC primer files for Galaxy use')
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
48 parser.add_argument('--output_directory', default='tmp', help='Directory to write output to')
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
49 parser.add_argument('--galaxy_datamanager_filename', help='Galaxy JSON format file describing data manager inputs')
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
50 parser.add_argument('--primers', default='ARTCIv1,ARTICv2,ARTICv3', action=SplitArgs, help='Comma separated list of primers to fetch')
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
51 args = parser.parse_args()
d118e256faca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
52 fetch_artic_primers(args.galaxy_datamanager_filename, args.output_directory, args.primers)