Mercurial > repos > sanbi-uwc > data_manager_novocraft_index_builder
diff data_manager/novocraft_index_builder.py @ 17:db293ee25be5 draft
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 0745f158bbb0d5d190cc4503157d2d3092ab8cc5
author | sanbi-uwc |
---|---|
date | Fri, 15 Apr 2016 09:03:40 -0400 |
parents | d053e7b179b5 |
children | 2b89ba1c0057 |
line wrap: on
line diff
--- a/data_manager/novocraft_index_builder.py Thu Mar 31 09:57:08 2016 -0400 +++ b/data_manager/novocraft_index_builder.py Fri Apr 15 09:03:40 2016 -0400 @@ -17,20 +17,14 @@ DEFAULT_DATA_TABLE_NAME = "novocraft_index" def get_dbkey_id_name(params, dbkey_description=None): - dbkey = params['param_dict']['dbkey'] # TODO: ensure sequence_id is unique and does not already appear in location file sequence_id = params['param_dict']['sequence_id'] - if not sequence_id: - sequence_id = dbkey # uuid.uuid4() generate and use an uuid instead? - sequence_name = params['param_dict']['sequence_name'] if not sequence_name: sequence_name = dbkey_description - if not sequence_name: - sequence_name = dbkey - return dbkey, sequence_id, sequence_name + return sequence_id, sequence_name -def _make_novocraft_index(data_manager_dict, fasta_filename, target_directory, dbkey, sequence_id, sequence_name, add_system_module, data_table_name=DEFAULT_DATA_TABLE_NAME): +def _make_novocraft_index(data_manager_dict, fasta_filename, target_directory, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME): if os.path.exists(target_directory) and not os.path.isdir(target_directory): print("Output directory path already exists but is not a directory: {}".format(target_directory), file=sys.stderr) @@ -43,13 +37,11 @@ cmdline = shlex.split(cmdline_str) try: - if add_system_module == 'true': - check_call(['module add novoindex']) check_call(cmdline) except CalledProcessError: print("Error building RNA STAR index", file=sys.stderr) - data_table_entry = dict( value=sequence_id, dbkey=dbkey, name=sequence_name, path=index_filename ) + data_table_entry = dict( value=sequence_id, dbkey=sequence_id, name=sequence_name, path=index_filename ) _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): @@ -58,18 +50,18 @@ data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) return data_manager_dict -def download_from_url( data_manager_dict, params, target_directory, dbkey, sequence_id, sequence_name, add_system_module, data_table_name=DEFAULT_DATA_TABLE_NAME ): +def download_from_url( data_manager_dict, params, target_directory, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME ): # TODO: we should automatically do decompression here urls = filter(bool, map(lambda x: x.strip(), params['param_dict']['reference_source']['user_url'].split('\n'))) fasta_reader = [urllib2.urlopen(url) for url in urls] - _make_novocraft_index(data_manager_dict, fasta_reader, target_directory, dbkey, sequence_id, sequence_name, data_table_name, add_system_module) + _make_novocraft_index(data_manager_dict, fasta_reader, target_directory, sequence_id, sequence_name, data_table_name) -def download_from_history( data_manager_dict, params, target_directory, dbkey, sequence_id, sequence_name, add_system_module, data_table_name=DEFAULT_DATA_TABLE_NAME ): +def download_from_history( data_manager_dict, params, target_directory, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME ): # TODO: allow multiple FASTA input files input_filename = params['param_dict']['reference_source']['input_fasta'] - _make_novocraft_index(data_manager_dict, input_filename, target_directory, dbkey, sequence_id, sequence_name, add_system_module, data_table_name ) + _make_novocraft_index(data_manager_dict, input_filename, target_directory, sequence_id, sequence_name, data_table_name ) REFERENCE_SOURCE_TO_DOWNLOAD = dict(url=download_from_url, history=download_from_history) @@ -78,7 +70,6 @@ parser.add_argument('output_filename') parser.add_argument('--dbkey_description') parser.add_argument('--data_table_name', default='novocraft_index') - parser.add_argument('--add_system_module', default=False) args = parser.parse_args() filename = args.output_filename @@ -88,14 +79,11 @@ os.makedirs(target_directory) data_manager_dict = {} - dbkey, sequence_id, sequence_name = get_dbkey_id_name(params, dbkey_description=args.dbkey_description) - - if dbkey in [None, '', '?']: - raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (dbkey)) + sequence_id, sequence_name = get_dbkey_id_name(params, dbkey_description=args.dbkey_description) # Fetch the FASTA REFERENCE_SOURCE_TO_DOWNLOAD[params['param_dict']['reference_source']['reference_source_selector']]\ - (data_manager_dict, params, target_directory, dbkey, sequence_id, sequence_name, args.add_system_module, data_table_name=args.data_table_name or DEFAULT_DATA_TABLE_NAME ) + (data_manager_dict, params, target_directory, sequence_id, sequence_name, data_table_name=args.data_table_name or DEFAULT_DATA_TABLE_NAME ) open(filename, 'wb').write(dumps( data_manager_dict ))