comparison data_manager/fetch_refseq.py @ 10:d878e492546c draft

planemo upload for repository https://github.com/pvanheus/refseq_fasta_data_manager commit b682adad2c3c74567d23e1a5cf2bfcc3df1c96ae-dirty
author sanbi-uwc
date Fri, 07 Sep 2018 20:03:30 -0400
parents 4852eb1a75e5
children b1e8f8a613e6
comparison
equal deleted inserted replaced
9:7cfce83a7f62 10:d878e492546c
129 parser.add_argument('--galaxy_datamanager_filename', help='Galaxy JSON format file describing data manager inputs') 129 parser.add_argument('--galaxy_datamanager_filename', help='Galaxy JSON format file describing data manager inputs')
130 parser.add_argument('--division_names', nargs='+', help='RefSeq divisions to download') 130 parser.add_argument('--division_names', nargs='+', help='RefSeq divisions to download')
131 parser.add_argument('--mol_types', nargs='+', help='Molecule types (genomic, rna, protein) to fetch') 131 parser.add_argument('--mol_types', nargs='+', help='Molecule types (genomic, rna, protein) to fetch')
132 parser.add_argument('--pin_date', help='Force download date to this version string') 132 parser.add_argument('--pin_date', help='Force download date to this version string')
133 args = parser.parse_args() 133 args = parser.parse_args()
134
135 division_names = args.division_names.split(',')
136 mol_types = args.mol_types.split(',')
134 if args.galaxy_datamanager_filename is not None: 137 if args.galaxy_datamanager_filename is not None:
135 dm_opts = json.loads(open(args.galaxy_datamanager_filename).read()) 138 dm_opts = json.loads(open(args.galaxy_datamanager_filename).read())
136 output_directory = dm_opts['output_data'][0]['extra_files_path'] # take the extra_files_path of the first output parameter 139 output_directory = dm_opts['output_data'][0]['extra_files_path'] # take the extra_files_path of the first output parameter
137 data_manager_dict = {} 140 data_manager_dict = {}
138 else: 141 else:
139 output_directory = args.output_directory 142 output_directory = args.output_directory
140 for division_name in args.division_names: 143 for division_name in division_names:
141 if args.pin_date is not None: 144 if args.pin_date is not None:
142 today_str = args.pin_date 145 today_str = args.pin_date
143 else: 146 else:
144 today_str = date.today().strftime('%Y-%m-%d') # ISO 8601 date format 147 today_str = date.today().strftime('%Y-%m-%d') # ISO 8601 date format
145 [release_num, fasta_files] = get_refseq_division(division_name, args.mol_types, output_directory, args.debug, args.compress) 148 [release_num, fasta_files] = get_refseq_division(division_name, mol_types, output_directory, args.debug, args.compress)
146 if args.galaxy_datamanager_filename is not None: 149 if args.galaxy_datamanager_filename is not None:
147 for i, mol_type in enumerate(args.mol_types): 150 for i, mol_type in enumerate(args.mol_types):
148 assert mol_type in fasta_files[i], "Filename does not contain expected mol_type ({}, {})".format(mol_type, fasta_files[i]) 151 assert mol_type in fasta_files[i], "Filename does not contain expected mol_type ({}, {})".format(mol_type, fasta_files[i])
149 unique_key = division_name + '.' + release_num + '.' + mol_type + '.' + today_str 152 unique_key = division_name + '.' + release_num + '.' + mol_type + '.' + today_str
150 dbkey = division_name + '.' + release_num + '.' + mol_type 153 dbkey = division_name + '.' + release_num + '.' + mol_type