Mercurial > repos > iuc > data_manager_star_index_builder

--- a/data_manager/macros.xml	Mon Jan 15 15:39:06 2018 -0500
+++ b/data_manager/macros.xml	Wed Jul 18 13:25:56 2018 -0400
@@ -1,8 +1,8 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="2.5.2b">star</requirement>
-            <requirement type="package" version="0.1.19">samtools</requirement>
+            <requirement type="package" version="2.6.0b">star</requirement>
+            <requirement type="package" version="1.8">samtools</requirement>
         </requirements>
     </xml>
     <token name="@FASTQ_GZ_OPTION@">
@@ -17,4 +17,23 @@
          <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="true" help="Exon junction information for mapping splices"/>
          <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/>
     </xml>
+    <xml name="dbKeyActions">
+        <actions>
+            <conditional name="refGenomeSource.geneSource">
+                <when value="indexed">
+                    <action type="metadata" name="dbkey">
+                        <option type="from_data_table" name="rnastar_index2" column="1" offset="0">
+                            <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
+                            <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/>
+                        </option>
+                    </action>
+                </when>
+                <when value="history">
+                    <action type="metadata" name="dbkey">
+                        <option type="from_param" name="refGenomeSource.genomeFastaFiles" param_attribute="dbkey" />
+                    </action>
+                </when>
+            </conditional>
+        </actions>
+    </xml>
 </macros>
--- a/data_manager/rna_star_index_builder.xml	Mon Jan 15 15:39:06 2018 -0500
+++ b/data_manager/rna_star_index_builder.xml	Wed Jul 18 13:25:56 2018 -0400
@@ -1,54 +1,63 @@
-<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="0.0.4" profile="17.01">
+<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="0.0.5" profile="17.01">
     <description>builder</description>
-
+
     <macros>
         <import>macros.xml</import>
     </macros>
-
+
     <expand macro="requirements" />
-
+
     <command><![CDATA[
-        #import json, os
-        #set params = json.loads( open( str($out_file) ).read() )
-        #set target_directory = $params[ 'output_data' ][0]['extra_files_path'].encode('ascii', 'replace')
-        #set subdir = os.path.basename(target_directory)
+if [ -z "\$GALAXY_MEMORY_MB" ] ; then
+    GALAXY_MEMORY_BYTES=31000000000 ;
+else
+    GALAXY_MEMORY_BYTES=\$((GALAXY_MEMORY_MB * 1000000)) ;
+fi ;

-        mkdir -p '${target_directory}/${subdir}' &&
+#import json, os
+#set params = json.loads(open(str($out_file)).read())
+#set target_directory = $params['output_data'][0]['extra_files_path'].encode('ascii', 'replace')
+#set subdir = os.path.basename(target_directory)
+
+mkdir -p '${target_directory}/${subdir}' &&

-        STAR
-            --runMode genomeGenerate
-            --genomeFastaFiles '${all_fasta_source.fields.path}'
-            --genomeDir '${target_directory}/${subdir}'
-            #if str($GTFconditional.GTFselect) == "withGTF":
-                --sjdbGTFfile '${GTFconditional.sjdbGTFfile}'
-                --sjdbOverhang '${GTFconditional.sjdbOverhang}'
-            #end if
-            --runThreadN \${GALAXY_SLOTS:-2} &&
+STAR
+--runMode genomeGenerate
+--genomeFastaFiles '${all_fasta_source.fields.path}'
+--genomeDir '${target_directory}/${subdir}'
+--limitGenomeGenerateRAM \${GALAXY_MEMORY_BYTES}
+#if $GTFconditional.GTFselect == "withGTF":
+    --sjdbGTFfile '${GTFconditional.sjdbGTFfile}'
+    --sjdbOverhang ${GTFconditional.sjdbOverhang}
+#end if
+#if $advanced_options.advanced_options_selector == "advanced":
+    --genomeSAindexNbases ${advanced_options.genomeSAindexNbases}
+    --genomeChrBinNbits ${advanced_options.genomeChrBinNbits}
+    --genomeSAsparseD ${advanced_options.genomeSAsparseD}
+#end if
+--runThreadN \${GALAXY_SLOTS:-2} &&

-        python ${__tool_directory__}/rna_star_index_builder.py
-            --config-file '${out_file}'
-            --value '${all_fasta_source.fields.value}'
-            --dbkey '${all_fasta_source.fields.dbkey}'
-            #if $name:
-                --name '$name'
-            #else
-                --name '${all_fasta_source.fields.name}'
-            #end if
-            #if str($GTFconditional.GTFselect) == "withGTF":
-                --withGTF 1
-            #end if
-            --data-table 'rnastar_index2'
-            --subdir '${subdir}'
+python '${__tool_directory__}/rna_star_index_builder.py'
+--config-file '${out_file}'
+--value '${all_fasta_source.fields.value}'
+--dbkey '${all_fasta_source.fields.dbkey}'
+#if $name:
+    --name '$name'
+#else
+    --name '${all_fasta_source.fields.name}'
+#end if
+#if str($GTFconditional.GTFselect) == "withGTF":
+    --withGTF 1
+#end if
+--data-table rnastar_index2
+--subdir '${subdir}'
     ]]></command>
     <inputs>
         <param name="all_fasta_source" type="select" label="Source FASTA Sequence">
             <options from_data_table="all_fasta"/>
         </param>
-        <param name="name"
-               type="text"
-               value=""
-               label="Informative name for sequence index"
-               help="By using different settings, you may have several indices per reference genome. Give an appropriate description to the index to distinguish between indices"/>
+        <param name="name" type="text" value="" label="Informative name for sequence index"
+            help="By using different settings, you may have several indices per reference genome. Give an appropriate description to the index to distinguish between indices"/>
         <conditional name="GTFconditional">
             <param name="GTFselect" type="select" label="Reference genome with or without an annotation" help="Must the index have been created WITH a GTF file (if not you can specify one afterward).">
                 <option value="withoutGTF">use genome reference without builtin gene-model</option>
@@ -60,6 +69,30 @@
             </when>
             <when value="withoutGTF" />
         </conditional>
+        <conditional name="advanced_options">
+            <param name="advanced_options_selector" type="select" label="Advanced options">
+                <option value="default" selected="true">Use default options</option>
+                <option value="advanced">Set advanced options</option>
+            </param>
+            <when value="default" />
+            <when value="advanced">
+                <param argument="--genomeSAindexNbases" type="integer" min="1" value="14"
+                    label="Length (bases) of the SA pre-indexing string"
+                    help="Typically between 10 and 15. Longer strings will use much more memory, but allow
+                        faster searches. For small genomes, the parameter –genomeSAindexNbases must be scaled
+                        down to min(14, log2(GenomeLength)/2 - 1). For example, for 1 megaBase genome, this is
+                        equal to 9, for 100 kiloBase genome, this is equal to 7."/>
+                <param argument="--genomeChrBinNbits" type="integer" min="1" value="18"
+                    label="Log2(chrBin), where chrBin is the size of the bins for genome storage"
+                    help="Each chromosome will occupy an integer number of bins. For a genome with large number
+                        of contigs, it is recommended to scale this parameter as min(18,
+                        log2[max(GenomeLength/NumberOfReferences,ReadLength)]). For example, for 3 gigaBase
+                        genome with 100,000 chromosomes/scaffolds, this is equal to 15."/>
+                <param argument="--genomeSAsparseD" type="integer" min="1" value="1" label="Suffix array sparsity"
+                    help="The distance between indices: use bigger numbers to decrease needed RAM at the cost of
+                        mapping speed reduction"/>
+            </when>
+        </conditional>
     </inputs>

     <outputs>
@@ -79,11 +112,9 @@
     </tests>
     -->

-    <help>
-
+    <help><![CDATA[
 .. class:: infomark

-<![CDATA[
 *What it does*

 This is a Galaxy datamanager for the rna STAR gap-aware RNA aligner.
@@ -95,7 +126,7 @@
 To run efficiently, RNA-STAR requires enough free memory to
 hold the SA-indexed reference genome in RAM. For Human Genome hg19 this
 index is about 27GB and running RNA-STAR requires approximately ~30GB of RAM.
-For custom genomes, the rule of thub is to multiply the size of the
+For custom genomes, the rule of thumb is to multiply the size of the
 reference FASTA file by 9 to estimated required amount of RAM.

 *Note on sjdbOverhang*
@@ -126,8 +157,7 @@
     Cheers
     Alex

-**Notice:** If you leave name, description, or id blank, it will be generated automatically.
-]]>
-    </help>
+**Notice:** If you leave name, description, or id blank, it will be generated automatically.
+    ]]></help>
     <expand macro="citations" />
 </tool>