Mercurial > repos > chrisw > data_manager_monorail_index_fetcher
changeset 18:402c6528fb6f draft
Uploaded
| author | chrisw | 
|---|---|
| date | Thu, 14 Nov 2019 02:12:39 +0000 | 
| parents | aa1eda63b53d | 
| children | e7d440a3b439 | 
| files | data_manager_monorail_index/data_manager/data_manager.py data_manager_monorail_index/data_manager/monorail_index_fetcher.xml data_manager_monorail_index/data_manager_conf.xml data_manager_monorail_index/tool-data/monorail_index.loc.sample | 
| diffstat | 4 files changed, 52 insertions(+), 17 deletions(-) [+] | 
line wrap: on
 line diff
--- a/data_manager_monorail_index/data_manager/data_manager.py Wed Nov 13 23:15:07 2019 +0000 +++ b/data_manager_monorail_index/data_manager/data_manager.py Thu Nov 14 02:12:39 2019 +0000 @@ -10,28 +10,33 @@ import zipfile parser = argparse.ArgumentParser(description='Create data manager json.') -parser.add_argument('--out', dest='output', action='store', help='JSON filename') +parser.add_argument('--config', dest='config', action='store', help='JSON filename') parser.add_argument('--name', dest='name', action='store', default=None, help='Data table entry unique ID') -parser.add_argument('--url', dest='url', action='store', help='Monorail Indexes Base URL', default="http://snaptron.cs.jhu.edu/data/monorail/ath10") +parser.add_argument('--url', dest='url', action='store', help='Monorail Indexes Base URL', default="http://snaptron.cs.jhu.edu/data/monorail/1.0.0/ath10") +parser.add_argument('--subdir', dest='subdir', action='store', help='subdirectory where the indexes are temporarily downloaded to') +#parser.add_argument('--version', dest='version', action='store', help='Version of Monorail as a whole (not a particular index/program)') args = parser.parse_args() def main(args): workdir = os.path.join(os.getcwd(), 'monorail_index') data_manager_entry = {} - ref = args.url.split('/')[-1] - #data_manager_entry['dbkey'] = 'mrail.'+ref + #URL syntax assumes that the last two components of the URL are: <version>/<ref> + #e.g. 1.0.0/ath10 + url_comps = args.url.split('/') + ref = url_comps[-1] + version = url_comps[-2] data_manager_entry['dbkey'] = ref.lower() - data_manager_entry['value'] = 'v'+ref.lower() - jsonin = open(args.output).read() + jsonin = open(args.config).read() params = json.loads(jsonin) - target_directory = params['output_data'][0]['extra_files_path'] + #target_directory = params['output_data'][0]['extra_files_path'] #data_manager_entry['path'] = params['output_data'][0]['extra_files_path'] - data_manager_entry['path'] = target_directory - data_manager_entry['name'] = 'mrail.'+ref + data_manager_entry['path'] = args.subdir + data_manager_entry['exons_path'] = args.subdir + data_manager_entry['version'] = args.version #data_manager_entry['exons_path'] = data_manager_entry['path'] + os.sep + 'gtf' + os.sep + 'exons.bed' - data_manager_json = dict(data_tables={'monorail_index': [data_manager_entry]}) - file(args.output, 'wb').write(json.dumps(data_manager_json)) + data_manager_json = {'data_tables':{'monorail_index': [data_manager_entry]}} + file(args.output, 'w').write(json.dumps(data_manager_json)) if __name__ == '__main__': main(args)
--- a/data_manager_monorail_index/data_manager/monorail_index_fetcher.xml Wed Nov 13 23:15:07 2019 +0000 +++ b/data_manager_monorail_index/data_manager/monorail_index_fetcher.xml Thu Nov 14 02:12:39 2019 +0000 @@ -10,13 +10,13 @@ #set $subdir = os.path.basename($target_directory) mkdir '${target_directory}' && bash '${__tool_directory__}/download_and_extract_monorail_index.sh' '${target_directory}' '${monorail_index_url}' && - python '${__tool_directory__}/data_manager.py' --out "${out_file}" + python '${__tool_directory__}/data_manager.py' --config "${out_file}" --subdir ${subdir} #if $monorail_index_url: --url "${monorail_index_url}" #end if ]]></command> <inputs> - <param label="Enter base URL for Monorail index directories (e.g. http://snaptron.cs.jhu.edu/data/monorail/ath10)" name="monorail_index_url" type="text" /> + <param label="Enter base URL for Monorail index directories (e.g. http://snaptron.cs.jhu.edu/data/monorail/1.0.0/ath10)" name="monorail_index_url" type="text" /> </inputs> <outputs> <data format="data_manager_json" name="out_file" />
--- a/data_manager_monorail_index/data_manager_conf.xml Wed Nov 13 23:15:07 2019 +0000 +++ b/data_manager_monorail_index/data_manager_conf.xml Thu Nov 14 02:12:39 2019 +0000 @@ -3,14 +3,19 @@ <data_manager tool_file="data_manager/monorail_index_fetcher.xml" id="monorail_index_fetcher" version="1.0.0"> <data_table name="monorail_index"> <output> - <column name="value" /> - <column name="dbkey" /> + <column name="value"/> + <column name="dbkey"/> + <column name="version"/> <column name="path" output_ref="out_file"> <move relative_symlinks="True" type="directory"> <!-- no need to set source, gets taken from out_dir.extra_files_path --> - <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/mri/${value}</target> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">monorail/${version}/${dbkey}</target> </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/{$dbkey}/mri/${value}/${path}</value_translation> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/monorail/${version}/${dbkey}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + <column name="exons_path"> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/monorail/${version}/${dbkey}/gtf/exons.bed</value_translation> <value_translation type="function">abspath</value_translation> </column> </output>
--- a/data_manager_monorail_index/tool-data/monorail_index.loc.sample Wed Nov 13 23:15:07 2019 +0000 +++ b/data_manager_monorail_index/tool-data/monorail_index.loc.sample Thu Nov 14 02:12:39 2019 +0000 @@ -0,0 +1,25 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of rna-star indexed sequences data files. You will +#need to create these data files and then create a rnastar_index2.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The rnastar_index2.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_base_path> <with_gene_model> <version> +# +#The <with_gene_model> column should be 1 or 0, indicating whether the index +#was built with annotations (i.e., --sjdbGTFfile and --sjdbOverhang were used) +#or not. +# +#The <version> column indicates the STAR version that introduced the format of +#the index, i.e., the oldest STAR version that could make use of the index. +# +#Note that STAR indices can become quite large. Consequently, it is only +#advisable to create indices with annotations if it's known ahead of time that +#(A) the annotations won't be frequently updated and (B) the read lengths used +#will also rarely vary. If either of these is not the case, it's advisable to +#create indices without annotations and then specify an annotation file and +#maximum read length (minus 1) when running STAR. +# +#hg19 hg19 hg19 full /mnt/galaxyIndices/genomes/hg19/rnastar 0 2.7.1a +#hg19Ensembl hg19Ensembl hg19 full with Ensembl annotation /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar 1 2.7.1a
