changeset 4:039890bdcb62 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_star_index_builder commit 93138516106c74b1ccc70d2d946e6eaa29c7c1a3-dirty
author wolma
date Wed, 16 Oct 2019 03:33:53 -0400
parents 80bbca356b76
children bb74adafc98f
files data_manager/macros.xml data_manager/rna_star_index_builder.py data_manager/rna_star_index_builder.xml data_manager_conf.xml
diffstat 4 files changed, 132 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/macros.xml	Mon Jun 24 12:27:02 2019 -0400
+++ b/data_manager/macros.xml	Wed Oct 16 03:33:53 2019 -0400
@@ -1,10 +1,11 @@
 <macros>
     <!-- REMEMBER to bump the version of rna_star_index_builder_data_manager
     whenever you make changes to the following two version tokens!
-    The data manager uses a symlink to this macro file to keep the versions in
-    sync. -->
+    The data manager uses a symlink to this macro file to keep the STAR and
+    the index versions in sync, but you should manually adjust the +galaxy
+    version number. -->
     <!-- STAR version to be used -->
-    <token name="@VERSION@">2.7.1a</token>
+    <token name="@VERSION@">2.7.2b</token>
     <!-- STAR index version compatible with this version of STAR
     This is the STAR version that introduced the index structure expected
     by the current version.
@@ -21,7 +22,7 @@
         </requirements>
     </xml>
 
-    <xml name="index_selection" token_with_gene_model="1">
+    <xml name="index_selection" token_with_gene_model="0">
         <param argument="--genomeDir" name="genomeDir" type="select"
         label="Select reference genome"
         help="If your genome of interest is not listed, contact the Galaxy team">
@@ -65,4 +66,100 @@
             </conditional>
         </actions>
     </xml>
+    <token name="@TEMPINDEX@"><![CDATA[
+    ## Create temporary index for custom reference
+    #if str($refGenomeSource.geneSource) == 'history':
+        mkdir -p tempstargenomedir &&
+        STAR
+            --runMode genomeGenerate
+            --genomeDir 'tempstargenomedir'
+            --genomeFastaFiles '${refGenomeSource.genomeFastaFiles}'
+            ## Handle difference between indices with/without annotations
+            #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf':
+                --sjdbOverhang '${refGenomeSource.GTFconditional.sjdbOverhang}'
+                --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}'
+                #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3':
+                    --sjdbGTFtagExonParentTranscript Parent
+                #end if
+            #end if
+            #if str($refGenomeSource.genomeSAindexNbases):
+                --genomeSAindexNbases ${refGenomeSource.genomeSAindexNbases}
+            #end if
+            --runThreadN \${GALAXY_SLOTS:-4}
+        &&
+    #end if
+    ]]></token>
+    <token name="@REFGENOMEHANDLING@" ><![CDATA[
+    --runThreadN \${GALAXY_SLOTS:-4}
+    --genomeLoad NoSharedMemory
+    --genomeDir
+    #if str($refGenomeSource.geneSource) == 'history':
+        tempstargenomedir
+    #else:
+        '${refGenomeSource.GTFconditional.genomeDir.fields.path}'
+        ## Handle difference between indices with/without annotations
+        #if str($refGenomeSource.GTFconditional.GTFselect) == 'without-gtf':
+            #if $refGenomeSource.GTFconditional.sjdbGTFfile:
+                --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang
+                --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}'
+                #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3':
+                    --sjdbGTFtagExonParentTranscript Parent
+                #end if
+            #end if
+        #end if
+        #end if
+        ]]></token>
+    <xml name="stdio" >
+        <stdio>
+            <regex match="FATAL error" source="both" level="fatal"/>
+            <regex match="EXITING: FATAL INPUT ERROR:" source="both" level="fatal"/>
+            <regex match="EXITING: fatal error trying to allocate genome arrays, exception thrown: std::bad_alloc" source="both" level="fatal"/>
+            <regex match="\[sam_read1\] missing header\? Abort!" source="both" level="fatal"/>
+            <yield />
+        </stdio>
+    </xml>
+    <xml name="refgenomehandling" >
+        <conditional name="refGenomeSource">
+            <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options">
+                <option value="indexed" selected="true">Use a built-in index</option>
+                <option value="history">Use reference genome from history and create temporary index</option>
+            </param>
+            <when value="indexed">
+                <conditional name="GTFconditional">
+                    <param name="GTFselect" type="select"
+                           label="Reference genome with or without an annotation"
+                           help="Select the '... with builtin gene-model' option to select from the list of available indexes that were built with splice junction information. Select the '... without builtin gene-model' option to select from the list of available indexes without annotated splice junctions.">
+                        <option value="without-gtf" selected='true'>use genome reference without builtin gene-model</option>
+                        <option value="with-gtf">use genome reference with builtin gene-model</option>
+                    </param>
+                    <when value="with-gtf">
+                        <expand macro="index_selection" with_gene_model="1" />
+                    </when>
+                    <when value="without-gtf">
+                        <expand macro="index_selection" with_gene_model="0" />
+                        <expand macro="@SJDBOPTIONS@" />
+                    </when>
+                </conditional>
+            </when>
+            <when value="history">
+                <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" />
+                <!-- Currently, this parameter is not exposed in the wrapper,
+                     but used only in the tests to avoid excessive index sizes for
+                     the tiny test genomes. -->
+                <param name="genomeSAindexNbases" type="hidden" value="" />
+                <conditional name="GTFconditional">
+                    <param name="GTFselect" type="select"
+                           label="Build index with our without known splice junctions annotation"
+                           help="To build an index with known splice junctions annotated, you will have to provide a GTF or GFF3 dataset that describes the gene models (the location of genes, transcripts and exons) known for the reference genome.">
+                        <option value="without-gtf">build index without gene-model</option>
+                        <option value="with-gtf">build index with gene-model</option>
+                    </param>
+                    <when value="with-gtf">
+                        <expand macro="@SJDBOPTIONS@" optional="false"/>
+                    </when>
+                    <when value="without-gtf" />
+                </conditional>
+            </when>
+        </conditional>
+    </xml>
 </macros>
--- a/data_manager/rna_star_index_builder.py	Mon Jun 24 12:27:02 2019 -0400
+++ b/data_manager/rna_star_index_builder.py	Wed Oct 16 03:33:53 2019 -0400
@@ -18,13 +18,29 @@
     args = parser.parse_args()
 
     if args.dbkey in [ None, '', '?' ]:
-        raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( args.dbkey ) )
+        raise Exception(
+            '"%s" is not a valid dbkey. You must specify a valid dbkey.'
+            % ( args.dbkey )
+        )
 
     with_gene_model = "0"
     if args.with_gene_model:
         with_gene_model = "1"
 
-    data_manager_dict = {'data_tables': {args.data_table: [dict({"value": args.value, "dbkey": args.dbkey, "name": args.name, "path": args.subdir, "with_gene_model": with_gene_model, "version": args.index_version} )]}}
+    data_manager_dict = {
+        'data_tables': {
+            args.data_table: [
+                {
+                    "value": args.value,
+                    "dbkey": args.dbkey,
+                    "name": args.name,
+                    "subdir": args.subdir,
+                    "with_gene_model": with_gene_model,
+                    "version": args.index_version
+                }
+            ]
+        }
+    }
     open( args.config_file, 'w' ).write( json.dumps( data_manager_dict ) )
 
 
--- a/data_manager/rna_star_index_builder.xml	Mon Jun 24 12:27:02 2019 -0400
+++ b/data_manager/rna_star_index_builder.xml	Wed Oct 16 03:33:53 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="2.7.1a" profile="17.01">
+<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="@IDX_VERSION@+galaxy1" profile="17.01">
     <description>builder</description>
 
     <macros>
@@ -14,17 +14,16 @@
     GALAXY_MEMORY_BYTES=\$((GALAXY_MEMORY_MB * 1000000)) ;
 fi ;
 
-#import json, os
-#set params = json.loads(open(str($out_file)).read())
-#set target_directory = $params['output_data'][0]['extra_files_path'].encode('ascii', 'replace')
-#set subdir = os.path.basename(target_directory)
+#import os
+#set $target_directory = str($out_file.extra_files_path)
+#set $subdir = os.path.basename($target_directory)
 
-mkdir -p '${target_directory}/${subdir}' &&
+mkdir '${target_directory}' &&
 
 STAR
 --runMode genomeGenerate
 --genomeFastaFiles '${all_fasta_source.fields.path}'
---genomeDir '${target_directory}/${subdir}'
+--genomeDir '${target_directory}'
 --limitGenomeGenerateRAM \${GALAXY_MEMORY_BYTES}
 #if $GTFconditional.GTFselect == "withGTF":
     --sjdbGTFfile '${GTFconditional.sjdbGTFfile}'
@@ -118,7 +117,10 @@
 
 *What it does*
 
-This is a Galaxy datamanager for the rna STAR gap-aware RNA aligner.
+This is a Galaxy data manager tool for the gap-aware RNA aligner STAR.
+
+This version of the tool builds STAR indices of the format first introduced
+with STAR version @IDX_VERSION@.
 
 Please read the fine manual - that and the google group are the places to learn about the options above.
 
--- a/data_manager_conf.xml	Mon Jun 24 12:27:02 2019 -0400
+++ b/data_manager_conf.xml	Wed Oct 16 03:33:53 2019 -0400
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <data_managers>
-    <data_manager tool_file="data_manager/rna_star_index_builder.xml" id="rna_star_index_builder">
+    <data_manager tool_file="data_manager/rna_star_index_builder.xml" id="rna_star_index_builder" version="0.0.6">
         <data_table name="rnastar_index2_versioned">
             <output>
                 <column name="value" />
@@ -12,9 +12,9 @@
                             out_file.extra_files_path is used as base by default
                             if no source, eg for type=directory, then refers to base 
                         -->
-                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">rnastar/${version}/${dbkey}/${value}</target>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">rnastar/${version}/${dbkey}/${value}/${subdir}</target>
                     </move>
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/rnastar/${version}/${dbkey}/${value}</value_translation>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/rnastar/${version}/${dbkey}/${value}/${subdir}</value_translation>
                     <value_translation type="function">abspath</value_translation>
                 </column>
                 <column name="with_gene_model" />