annotate data_manager/fetch_artic_primers.py @ 3:6114f017f819 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
author sanbi-uwc
date Fri, 17 Apr 2020 11:42:59 +0000
parents 59b3556d9ec8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
1 #!/usr/bin/env python
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
2
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
3 from __future__ import print_function, division
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
4
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
5 import argparse
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
6 import json
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
7 import os
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
8 import os.path
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
9 import sys
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
10
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
11 import requests
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
12
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
13 DATA_TABLE_NAME = 'artic_primers'
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
14 def fetch_artic_primers(output_filename, output_directory, primers):
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
15 primer_sets = {
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
16 'ARTICv1': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed',
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
17 'ARTICv2': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V2/nCoV-2019.bed',
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
18 'ARTICv3': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.bed'
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
19 }
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
20
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
21 if not os.path.isdir(output_directory):
1
59b3556d9ec8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents: 0
diff changeset
22 os.makedirs(output_directory)
59b3556d9ec8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents: 0
diff changeset
23 data_manager_dict = {}
59b3556d9ec8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents: 0
diff changeset
24 data_manager_dict['data_tables'] = json.load(open(output_filename)).get('data_tables', {})
0
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
25 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
26 data_manager_dict['data_tables'][DATA_TABLE_NAME] = data_manager_dict['data_tables'].get(DATA_TABLE_NAME, [])
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
27
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
28 data = []
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
29 for name, url in primer_sets.items():
1
59b3556d9ec8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents: 0
diff changeset
30 if name not in primers:
59b3556d9ec8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents: 0
diff changeset
31 continue
0
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
32 response = requests.get(url)
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
33 if response.status_code != 200:
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
34 print('Error: download of', url, 'failed with code', response.status_code, file=sys.stderr)
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
35 exit(response.status_code)
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
36 bed_output_filename = os.path.join(output_directory, name + '.bed')
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
37 open(bed_output_filename, 'w').write(response.text)
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
38 description = name[:-2] + ' ' + name[-2:] + ' primer set'
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
39 data.append(dict(value=name, path=bed_output_filename, description=description))
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
40 data_manager_dict['data_tables'][DATA_TABLE_NAME].extend(data)
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
41 print(data_manager_dict)
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
42 json.dump(data_manager_dict, open(output_filename, 'w'))
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
43
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
44 class SplitArgs(argparse.Action):
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
45 def __call__(self, parser, namespace, values, option_string=None):
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
46 setattr(namespace, self.dest, values.split(','))
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
47
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
48
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
49 if __name__ == '__main__':
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
50 parser = argparse.ArgumentParser(description='Fetch ARTIC primer files for Galaxy use')
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
51 parser.add_argument('--output_directory', default='tmp', help='Directory to write output to')
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
52 parser.add_argument('--galaxy_datamanager_filename', help='Galaxy JSON format file describing data manager inputs')
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
53 parser.add_argument('--primers', default='ARTCIv1,ARTICv2,ARTICv3', action=SplitArgs, help='Comma separated list of primers to fetch')
c401f63b3d12 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents:
diff changeset
54 args = parser.parse_args()
3
6114f017f819 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents: 1
diff changeset
55
6114f017f819 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents: 1
diff changeset
56 config = json.load(open(args.galaxy_datamanager_filename))
6114f017f819 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents: 1
diff changeset
57 output_directory = config.get('output_data', [{}])[0].get('extra_files_path', None)
6114f017f819 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents: 1
diff changeset
58 if output_directory is None:
6114f017f819 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents: 1
diff changeset
59 output_directory = args.output_directory
6114f017f819 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 120c6491f4b0888220e432693a9805d8198d7397"
sanbi-uwc
parents: 1
diff changeset
60 fetch_artic_primers(args.galaxy_datamanager_filename, output_directory, args.primers)