Mercurial > repos > iuc > data_manager_star_index_builder
changeset 5:1bd6dc820044 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_star_index_builder commit 39b9b154845ecf517f7d9e9d76c386b473c3ebd2
author | iuc |
---|---|
date | Wed, 18 Jul 2018 13:25:56 -0400 |
parents | e17bb3d69770 |
children | 54cddd903437 |
files | data_manager/macros.xml data_manager/rna_star_index_builder.xml |
diffstat | 2 files changed, 95 insertions(+), 46 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/macros.xml Mon Jan 15 15:39:06 2018 -0500 +++ b/data_manager/macros.xml Wed Jul 18 13:25:56 2018 -0400 @@ -1,8 +1,8 @@ <macros> <xml name="requirements"> <requirements> - <requirement type="package" version="2.5.2b">star</requirement> - <requirement type="package" version="0.1.19">samtools</requirement> + <requirement type="package" version="2.6.0b">star</requirement> + <requirement type="package" version="1.8">samtools</requirement> </requirements> </xml> <token name="@FASTQ_GZ_OPTION@"> @@ -17,4 +17,23 @@ <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="true" help="Exon junction information for mapping splices"/> <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/> </xml> + <xml name="dbKeyActions"> + <actions> + <conditional name="refGenomeSource.geneSource"> + <when value="indexed"> + <action type="metadata" name="dbkey"> + <option type="from_data_table" name="rnastar_index2" column="1" offset="0"> + <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> + <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/> + </option> + </action> + </when> + <when value="history"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="refGenomeSource.genomeFastaFiles" param_attribute="dbkey" /> + </action> + </when> + </conditional> + </actions> + </xml> </macros>
--- a/data_manager/rna_star_index_builder.xml Mon Jan 15 15:39:06 2018 -0500 +++ b/data_manager/rna_star_index_builder.xml Wed Jul 18 13:25:56 2018 -0400 @@ -1,54 +1,63 @@ -<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="0.0.4" profile="17.01"> +<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="0.0.5" profile="17.01"> <description>builder</description> - + <macros> <import>macros.xml</import> </macros> - + <expand macro="requirements" /> - + <command><![CDATA[ - #import json, os - #set params = json.loads( open( str($out_file) ).read() ) - #set target_directory = $params[ 'output_data' ][0]['extra_files_path'].encode('ascii', 'replace') - #set subdir = os.path.basename(target_directory) +if [ -z "\$GALAXY_MEMORY_MB" ] ; then + GALAXY_MEMORY_BYTES=31000000000 ; +else + GALAXY_MEMORY_BYTES=\$((GALAXY_MEMORY_MB * 1000000)) ; +fi ; - mkdir -p '${target_directory}/${subdir}' && +#import json, os +#set params = json.loads(open(str($out_file)).read()) +#set target_directory = $params['output_data'][0]['extra_files_path'].encode('ascii', 'replace') +#set subdir = os.path.basename(target_directory) + +mkdir -p '${target_directory}/${subdir}' && - STAR - --runMode genomeGenerate - --genomeFastaFiles '${all_fasta_source.fields.path}' - --genomeDir '${target_directory}/${subdir}' - #if str($GTFconditional.GTFselect) == "withGTF": - --sjdbGTFfile '${GTFconditional.sjdbGTFfile}' - --sjdbOverhang '${GTFconditional.sjdbOverhang}' - #end if - --runThreadN \${GALAXY_SLOTS:-2} && +STAR +--runMode genomeGenerate +--genomeFastaFiles '${all_fasta_source.fields.path}' +--genomeDir '${target_directory}/${subdir}' +--limitGenomeGenerateRAM \${GALAXY_MEMORY_BYTES} +#if $GTFconditional.GTFselect == "withGTF": + --sjdbGTFfile '${GTFconditional.sjdbGTFfile}' + --sjdbOverhang ${GTFconditional.sjdbOverhang} +#end if +#if $advanced_options.advanced_options_selector == "advanced": + --genomeSAindexNbases ${advanced_options.genomeSAindexNbases} + --genomeChrBinNbits ${advanced_options.genomeChrBinNbits} + --genomeSAsparseD ${advanced_options.genomeSAsparseD} +#end if +--runThreadN \${GALAXY_SLOTS:-2} && - python ${__tool_directory__}/rna_star_index_builder.py - --config-file '${out_file}' - --value '${all_fasta_source.fields.value}' - --dbkey '${all_fasta_source.fields.dbkey}' - #if $name: - --name '$name' - #else - --name '${all_fasta_source.fields.name}' - #end if - #if str($GTFconditional.GTFselect) == "withGTF": - --withGTF 1 - #end if - --data-table 'rnastar_index2' - --subdir '${subdir}' +python '${__tool_directory__}/rna_star_index_builder.py' +--config-file '${out_file}' +--value '${all_fasta_source.fields.value}' +--dbkey '${all_fasta_source.fields.dbkey}' +#if $name: + --name '$name' +#else + --name '${all_fasta_source.fields.name}' +#end if +#if str($GTFconditional.GTFselect) == "withGTF": + --withGTF 1 +#end if +--data-table rnastar_index2 +--subdir '${subdir}' ]]></command> <inputs> <param name="all_fasta_source" type="select" label="Source FASTA Sequence"> <options from_data_table="all_fasta"/> </param> - <param name="name" - type="text" - value="" - label="Informative name for sequence index" - help="By using different settings, you may have several indices per reference genome. Give an appropriate description to the index to distinguish between indices"/> + <param name="name" type="text" value="" label="Informative name for sequence index" + help="By using different settings, you may have several indices per reference genome. Give an appropriate description to the index to distinguish between indices"/> <conditional name="GTFconditional"> <param name="GTFselect" type="select" label="Reference genome with or without an annotation" help="Must the index have been created WITH a GTF file (if not you can specify one afterward)."> <option value="withoutGTF">use genome reference without builtin gene-model</option> @@ -60,6 +69,30 @@ </when> <when value="withoutGTF" /> </conditional> + <conditional name="advanced_options"> + <param name="advanced_options_selector" type="select" label="Advanced options"> + <option value="default" selected="true">Use default options</option> + <option value="advanced">Set advanced options</option> + </param> + <when value="default" /> + <when value="advanced"> + <param argument="--genomeSAindexNbases" type="integer" min="1" value="14" + label="Length (bases) of the SA pre-indexing string" + help="Typically between 10 and 15. Longer strings will use much more memory, but allow + faster searches. For small genomes, the parameter –genomeSAindexNbases must be scaled + down to min(14, log2(GenomeLength)/2 - 1). For example, for 1 megaBase genome, this is + equal to 9, for 100 kiloBase genome, this is equal to 7."/> + <param argument="--genomeChrBinNbits" type="integer" min="1" value="18" + label="Log2(chrBin), where chrBin is the size of the bins for genome storage" + help="Each chromosome will occupy an integer number of bins. For a genome with large number + of contigs, it is recommended to scale this parameter as min(18, + log2[max(GenomeLength/NumberOfReferences,ReadLength)]). For example, for 3 gigaBase + genome with 100,000 chromosomes/scaffolds, this is equal to 15."/> + <param argument="--genomeSAsparseD" type="integer" min="1" value="1" label="Suffix array sparsity" + help="The distance between indices: use bigger numbers to decrease needed RAM at the cost of + mapping speed reduction"/> + </when> + </conditional> </inputs> <outputs> @@ -79,11 +112,9 @@ </tests> --> - <help> - + <help><![CDATA[ .. class:: infomark -<![CDATA[ *What it does* This is a Galaxy datamanager for the rna STAR gap-aware RNA aligner. @@ -95,7 +126,7 @@ To run efficiently, RNA-STAR requires enough free memory to hold the SA-indexed reference genome in RAM. For Human Genome hg19 this index is about 27GB and running RNA-STAR requires approximately ~30GB of RAM. -For custom genomes, the rule of thub is to multiply the size of the +For custom genomes, the rule of thumb is to multiply the size of the reference FASTA file by 9 to estimated required amount of RAM. *Note on sjdbOverhang* @@ -126,8 +157,7 @@ Cheers Alex -**Notice:** If you leave name, description, or id blank, it will be generated automatically. -]]> - </help> +**Notice:** If you leave name, description, or id blank, it will be generated automatically. + ]]></help> <expand macro="citations" /> </tool>