Mercurial > repos > sanbi-uwc > data_manager_fetch_refseq
comparison data_manager/fetch_refseq.py @ 10:d878e492546c draft
planemo upload for repository https://github.com/pvanheus/refseq_fasta_data_manager commit b682adad2c3c74567d23e1a5cf2bfcc3df1c96ae-dirty
author | sanbi-uwc |
---|---|
date | Fri, 07 Sep 2018 20:03:30 -0400 |
parents | 4852eb1a75e5 |
children | b1e8f8a613e6 |
comparison
equal
deleted
inserted
replaced
9:7cfce83a7f62 | 10:d878e492546c |
---|---|
129 parser.add_argument('--galaxy_datamanager_filename', help='Galaxy JSON format file describing data manager inputs') | 129 parser.add_argument('--galaxy_datamanager_filename', help='Galaxy JSON format file describing data manager inputs') |
130 parser.add_argument('--division_names', nargs='+', help='RefSeq divisions to download') | 130 parser.add_argument('--division_names', nargs='+', help='RefSeq divisions to download') |
131 parser.add_argument('--mol_types', nargs='+', help='Molecule types (genomic, rna, protein) to fetch') | 131 parser.add_argument('--mol_types', nargs='+', help='Molecule types (genomic, rna, protein) to fetch') |
132 parser.add_argument('--pin_date', help='Force download date to this version string') | 132 parser.add_argument('--pin_date', help='Force download date to this version string') |
133 args = parser.parse_args() | 133 args = parser.parse_args() |
134 | |
135 division_names = args.division_names.split(',') | |
136 mol_types = args.mol_types.split(',') | |
134 if args.galaxy_datamanager_filename is not None: | 137 if args.galaxy_datamanager_filename is not None: |
135 dm_opts = json.loads(open(args.galaxy_datamanager_filename).read()) | 138 dm_opts = json.loads(open(args.galaxy_datamanager_filename).read()) |
136 output_directory = dm_opts['output_data'][0]['extra_files_path'] # take the extra_files_path of the first output parameter | 139 output_directory = dm_opts['output_data'][0]['extra_files_path'] # take the extra_files_path of the first output parameter |
137 data_manager_dict = {} | 140 data_manager_dict = {} |
138 else: | 141 else: |
139 output_directory = args.output_directory | 142 output_directory = args.output_directory |
140 for division_name in args.division_names: | 143 for division_name in division_names: |
141 if args.pin_date is not None: | 144 if args.pin_date is not None: |
142 today_str = args.pin_date | 145 today_str = args.pin_date |
143 else: | 146 else: |
144 today_str = date.today().strftime('%Y-%m-%d') # ISO 8601 date format | 147 today_str = date.today().strftime('%Y-%m-%d') # ISO 8601 date format |
145 [release_num, fasta_files] = get_refseq_division(division_name, args.mol_types, output_directory, args.debug, args.compress) | 148 [release_num, fasta_files] = get_refseq_division(division_name, mol_types, output_directory, args.debug, args.compress) |
146 if args.galaxy_datamanager_filename is not None: | 149 if args.galaxy_datamanager_filename is not None: |
147 for i, mol_type in enumerate(args.mol_types): | 150 for i, mol_type in enumerate(args.mol_types): |
148 assert mol_type in fasta_files[i], "Filename does not contain expected mol_type ({}, {})".format(mol_type, fasta_files[i]) | 151 assert mol_type in fasta_files[i], "Filename does not contain expected mol_type ({}, {})".format(mol_type, fasta_files[i]) |
149 unique_key = division_name + '.' + release_num + '.' + mol_type + '.' + today_str | 152 unique_key = division_name + '.' + release_num + '.' + mol_type + '.' + today_str |
150 dbkey = division_name + '.' + release_num + '.' + mol_type | 153 dbkey = division_name + '.' + release_num + '.' + mol_type |