Mercurial > repos > wolma > rnastar_index_builder
changeset 4:039890bdcb62 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_star_index_builder commit 93138516106c74b1ccc70d2d946e6eaa29c7c1a3-dirty
author | wolma |
---|---|
date | Wed, 16 Oct 2019 03:33:53 -0400 |
parents | 80bbca356b76 |
children | bb74adafc98f |
files | data_manager/macros.xml data_manager/rna_star_index_builder.py data_manager/rna_star_index_builder.xml data_manager_conf.xml |
diffstat | 4 files changed, 132 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/macros.xml Mon Jun 24 12:27:02 2019 -0400 +++ b/data_manager/macros.xml Wed Oct 16 03:33:53 2019 -0400 @@ -1,10 +1,11 @@ <macros> <!-- REMEMBER to bump the version of rna_star_index_builder_data_manager whenever you make changes to the following two version tokens! - The data manager uses a symlink to this macro file to keep the versions in - sync. --> + The data manager uses a symlink to this macro file to keep the STAR and + the index versions in sync, but you should manually adjust the +galaxy + version number. --> <!-- STAR version to be used --> - <token name="@VERSION@">2.7.1a</token> + <token name="@VERSION@">2.7.2b</token> <!-- STAR index version compatible with this version of STAR This is the STAR version that introduced the index structure expected by the current version. @@ -21,7 +22,7 @@ </requirements> </xml> - <xml name="index_selection" token_with_gene_model="1"> + <xml name="index_selection" token_with_gene_model="0"> <param argument="--genomeDir" name="genomeDir" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> @@ -65,4 +66,100 @@ </conditional> </actions> </xml> + <token name="@TEMPINDEX@"><![CDATA[ + ## Create temporary index for custom reference + #if str($refGenomeSource.geneSource) == 'history': + mkdir -p tempstargenomedir && + STAR + --runMode genomeGenerate + --genomeDir 'tempstargenomedir' + --genomeFastaFiles '${refGenomeSource.genomeFastaFiles}' + ## Handle difference between indices with/without annotations + #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf': + --sjdbOverhang '${refGenomeSource.GTFconditional.sjdbOverhang}' + --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}' + #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3': + --sjdbGTFtagExonParentTranscript Parent + #end if + #end if + #if str($refGenomeSource.genomeSAindexNbases): + --genomeSAindexNbases ${refGenomeSource.genomeSAindexNbases} + #end if + --runThreadN \${GALAXY_SLOTS:-4} + && + #end if + ]]></token> + <token name="@REFGENOMEHANDLING@" ><![CDATA[ + --runThreadN \${GALAXY_SLOTS:-4} + --genomeLoad NoSharedMemory + --genomeDir + #if str($refGenomeSource.geneSource) == 'history': + tempstargenomedir + #else: + '${refGenomeSource.GTFconditional.genomeDir.fields.path}' + ## Handle difference between indices with/without annotations + #if str($refGenomeSource.GTFconditional.GTFselect) == 'without-gtf': + #if $refGenomeSource.GTFconditional.sjdbGTFfile: + --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang + --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}' + #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3': + --sjdbGTFtagExonParentTranscript Parent + #end if + #end if + #end if + #end if + ]]></token> + <xml name="stdio" > + <stdio> + <regex match="FATAL error" source="both" level="fatal"/> + <regex match="EXITING: FATAL INPUT ERROR:" source="both" level="fatal"/> + <regex match="EXITING: fatal error trying to allocate genome arrays, exception thrown: std::bad_alloc" source="both" level="fatal"/> + <regex match="\[sam_read1\] missing header\? Abort!" source="both" level="fatal"/> + <yield /> + </stdio> + </xml> + <xml name="refgenomehandling" > + <conditional name="refGenomeSource"> + <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options"> + <option value="indexed" selected="true">Use a built-in index</option> + <option value="history">Use reference genome from history and create temporary index</option> + </param> + <when value="indexed"> + <conditional name="GTFconditional"> + <param name="GTFselect" type="select" + label="Reference genome with or without an annotation" + help="Select the '... with builtin gene-model' option to select from the list of available indexes that were built with splice junction information. Select the '... without builtin gene-model' option to select from the list of available indexes without annotated splice junctions."> + <option value="without-gtf" selected='true'>use genome reference without builtin gene-model</option> + <option value="with-gtf">use genome reference with builtin gene-model</option> + </param> + <when value="with-gtf"> + <expand macro="index_selection" with_gene_model="1" /> + </when> + <when value="without-gtf"> + <expand macro="index_selection" with_gene_model="0" /> + <expand macro="@SJDBOPTIONS@" /> + </when> + </conditional> + </when> + <when value="history"> + <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" /> + <!-- Currently, this parameter is not exposed in the wrapper, + but used only in the tests to avoid excessive index sizes for + the tiny test genomes. --> + <param name="genomeSAindexNbases" type="hidden" value="" /> + <conditional name="GTFconditional"> + <param name="GTFselect" type="select" + label="Build index with our without known splice junctions annotation" + help="To build an index with known splice junctions annotated, you will have to provide a GTF or GFF3 dataset that describes the gene models (the location of genes, transcripts and exons) known for the reference genome."> + <option value="without-gtf">build index without gene-model</option> + <option value="with-gtf">build index with gene-model</option> + </param> + <when value="with-gtf"> + <expand macro="@SJDBOPTIONS@" optional="false"/> + </when> + <when value="without-gtf" /> + </conditional> + </when> + </conditional> + </xml> </macros>
--- a/data_manager/rna_star_index_builder.py Mon Jun 24 12:27:02 2019 -0400 +++ b/data_manager/rna_star_index_builder.py Wed Oct 16 03:33:53 2019 -0400 @@ -18,13 +18,29 @@ args = parser.parse_args() if args.dbkey in [ None, '', '?' ]: - raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( args.dbkey ) ) + raise Exception( + '"%s" is not a valid dbkey. You must specify a valid dbkey.' + % ( args.dbkey ) + ) with_gene_model = "0" if args.with_gene_model: with_gene_model = "1" - data_manager_dict = {'data_tables': {args.data_table: [dict({"value": args.value, "dbkey": args.dbkey, "name": args.name, "path": args.subdir, "with_gene_model": with_gene_model, "version": args.index_version} )]}} + data_manager_dict = { + 'data_tables': { + args.data_table: [ + { + "value": args.value, + "dbkey": args.dbkey, + "name": args.name, + "subdir": args.subdir, + "with_gene_model": with_gene_model, + "version": args.index_version + } + ] + } + } open( args.config_file, 'w' ).write( json.dumps( data_manager_dict ) )
--- a/data_manager/rna_star_index_builder.xml Mon Jun 24 12:27:02 2019 -0400 +++ b/data_manager/rna_star_index_builder.xml Wed Oct 16 03:33:53 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="2.7.1a" profile="17.01"> +<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="@IDX_VERSION@+galaxy1" profile="17.01"> <description>builder</description> <macros> @@ -14,17 +14,16 @@ GALAXY_MEMORY_BYTES=\$((GALAXY_MEMORY_MB * 1000000)) ; fi ; -#import json, os -#set params = json.loads(open(str($out_file)).read()) -#set target_directory = $params['output_data'][0]['extra_files_path'].encode('ascii', 'replace') -#set subdir = os.path.basename(target_directory) +#import os +#set $target_directory = str($out_file.extra_files_path) +#set $subdir = os.path.basename($target_directory) -mkdir -p '${target_directory}/${subdir}' && +mkdir '${target_directory}' && STAR --runMode genomeGenerate --genomeFastaFiles '${all_fasta_source.fields.path}' ---genomeDir '${target_directory}/${subdir}' +--genomeDir '${target_directory}' --limitGenomeGenerateRAM \${GALAXY_MEMORY_BYTES} #if $GTFconditional.GTFselect == "withGTF": --sjdbGTFfile '${GTFconditional.sjdbGTFfile}' @@ -118,7 +117,10 @@ *What it does* -This is a Galaxy datamanager for the rna STAR gap-aware RNA aligner. +This is a Galaxy data manager tool for the gap-aware RNA aligner STAR. + +This version of the tool builds STAR indices of the format first introduced +with STAR version @IDX_VERSION@. Please read the fine manual - that and the google group are the places to learn about the options above.
--- a/data_manager_conf.xml Mon Jun 24 12:27:02 2019 -0400 +++ b/data_manager_conf.xml Wed Oct 16 03:33:53 2019 -0400 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <data_managers> - <data_manager tool_file="data_manager/rna_star_index_builder.xml" id="rna_star_index_builder"> + <data_manager tool_file="data_manager/rna_star_index_builder.xml" id="rna_star_index_builder" version="0.0.6"> <data_table name="rnastar_index2_versioned"> <output> <column name="value" /> @@ -12,9 +12,9 @@ out_file.extra_files_path is used as base by default if no source, eg for type=directory, then refers to base --> - <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">rnastar/${version}/${dbkey}/${value}</target> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">rnastar/${version}/${dbkey}/${value}/${subdir}</target> </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/rnastar/${version}/${dbkey}/${value}</value_translation> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/rnastar/${version}/${dbkey}/${value}/${subdir}</value_translation> <value_translation type="function">abspath</value_translation> </column> <column name="with_gene_model" />