changeset 0:624238a8440f draft default tip

Uploaded
author fubar
date Mon, 29 Sep 2014 20:50:45 -0400
parents
children
files rg_rnaStar.xml tool-data/rnastar_indexes.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 4 files changed, 322 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rg_rnaStar.xml	Mon Sep 29 20:50:45 2014 -0400
@@ -0,0 +1,289 @@
+<tool id="rna_star" name="rnastar" version="2.4.0d">
+    <description>Gapped-read mapper for RNA-seq data</description>
+    <requirements>
+        <requirement type="package" version="2.4.0d">rnastar</requirement>
+        <requirement type="package" version="0.1.19">samtools</requirement>
+    </requirements>
+    <command>
+    ##
+    ## Run STAR.
+    ##
+
+        STAR
+    ## Can adjust this as appropriate for the system.
+    --genomeLoad NoSharedMemory
+
+    --genomeDir ${refGenomeSource.index.fields.path} 
+    --readFilesIn $input1 
+    #if $singlePaired.sPaired == "paired"
+            $singlePaired.input2
+        #end if
+        --runThreadN 4
+    #if $params.settingsType == "full":
+        --chimSegmentMin $params.chim_segment_min
+        --chimScoreMin $params.chim_score_min
+    #end if
+
+    ## may or may not need to generate SAM tags and handle non-canonicals for Cufflinks tools.
+    ${outSAMstrandField} ${outFilterIntronMotifs} ${outSAMattributes}
+
+    ;
+
+    ##
+    ## BAM conversion.
+    ##
+
+    ## Convert aligned reads.
+    samtools view -Shb Aligned.out.sam | samtools sort - Aligned.out
+
+    ## Convert chimeric reads.
+    #if $params.settingsType == "full" and $params.chim_segment_min > 0:
+        ; samtools view -Shb Chimeric.out.sam | samtools sort - Chimeric.out
+    #end if
+    </command>
+
+    <stdio>
+        <regex match=".*" source="both" level="warning" description="Some stderr/stdout text"/>
+    </stdio>
+
+    <inputs>
+        <param name="jobName" type="text" size="120" value="rna-star run" label="Job narrative (added to output names)" 
+          help="Only letters, numbers and underscores (_) will be retained in this field">
+           <sanitizer invalid_char="">
+              <valid initial="string.letters,string.digits"><add value="_" /> </valid>
+           </sanitizer>
+        </param>
+        <!-- FASTQ input(s) and options specifically for paired-end data. -->
+        <conditional name="singlePaired">
+            <param name="sPaired" type="select" label="Single ended or mate-pair ended reads in this library?">
+              <option value="single" selected="true">Single-end</option>
+              <option value="paired">Paired-end</option>
+            </param>
+            <when value="single">
+                <param format="fastqsanger,fastq,fasta" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>
+            </when>
+            <when value="paired">
+                <param format="fastqsanger,fastq,fasta" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" 
+            help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
+                <param format="fastqsanger,fastq,fasta" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads"
+            help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
+            </when>
+        </conditional>
+
+        <!-- Genome source. -->
+        <conditional name="refGenomeSource">
+            <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+                <option value="indexed">Use a built-in index</option>
+                <option value="history">Use one from the history</option>
+            </param>
+            <when value="indexed">
+            <param name="index" type="select" label="Select a reference genome">
+                <options from_data_table="rnastar_indexes">
+                    <filter type="sort_by" column="2"/>
+                    <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+                </options>
+            </param>
+            </when>
+            <when value="history">
+                <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
+            </when>
+        </conditional>
+            <param name="outSAMattributes" type="select" label="Include extra sam attributes for downstream processing">
+              <option value="--outSAMattributes Standard">Standard - eg for old Samtools downstream</option>
+              <option value="--outSAMattributes All" selected="true">All modern Samtools attributes - see below</option>
+            </param>
+            <param name="outSAMstrandField" type="select" label="Include extra sam attributes for downstream processing">
+              <option value="--outSAMstrandField intronMotif" selected="true">Add XS for cufflinks</option>
+              <option value="">No XS added to sam output</option>
+            </param>
+            <param name="outFilterIntronMotifs" type="select" label="Canonical junction preparation for unstranded data">
+              <option value="">No special handling - all non-canonical junctions passed through</option>
+              <option value="--outFilterIntronMotifs RemoveNoncanonical" selected="true">Remove all non-canonical junctions for eg cufflinks</option>
+              <option value="--outFilterIntronMotifs RemoveNoncanonicalUnannotated">Remove only unannotated non-canonical junctions for eg cufflinks</option>
+            </param>
+        <!-- Parameter settings. -->
+        <conditional name="params">
+            <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any STAR parameter.">
+                <option value="preSet" selected="true">Use Defaults</option>
+                <option value="full">Full parameter list</option>
+            </param>
+            <when value="preSet" />
+            <!-- Full/advanced params. -->
+            <when value="full">
+            <param name="chim_segment_min" type="integer" min="0" value="0" label="Minimum chimeric segment length" />
+            <param name="chim_score_min" type="integer" min="0" value="0" label="Minimum total (summed) score of the chimeric segments" />
+
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+       <data format="txt" name="output_log" label="${on_string}_{jobName}.log" from_work_dir="Log.final.out"/>
+       <data format="interval" name="chimeric_junctions" label="${on_string}_{jobName}_starchimjunc.bed" from_work_dir="Chimeric.out.junction">
+          <filter>(params['settingsType'] == 'full' and params['chim_segment_min'] > 0)</filter>
+          <actions>
+             <conditional name="refGenomeSource.genomeSource">
+             <when value="indexed">
+               <action type="metadata" name="dbkey">
+                <option type="from_data_table" name="star_indexes" column="1" offset="0">
+                 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
+                  <filter type="param_value" ref="refGenomeSource.index" column="0"/>
+                </option>
+               </action>
+             </when>
+             <when value="history">
+               <action type="metadata" name="dbkey">
+                 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
+               </action>
+             </when>
+             </conditional>
+          </actions>
+       </data>
+       <data format="bam" name="chimeric_reads" label="${on_string}_${jobName}_starmappedchim.bam" 
+                    from_work_dir="Chimeric.out.bam">
+         <filter>(params['settingsType'] == 'full' and params['chim_segment_min'] > 0)</filter>
+          <actions>
+             <conditional name="refGenomeSource.genomeSource">
+             <when value="indexed">
+               <action type="metadata" name="dbkey">
+                <option type="from_data_table" name="star_indexes" column="1" offset="0">
+                 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
+                  <filter type="param_value" ref="refGenomeSource.index" column="0"/>
+                </option>
+               </action>
+             </when>
+             <when value="history">
+               <action type="metadata" name="dbkey">
+                 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
+               </action>
+             </when>
+             </conditional>
+          </actions>
+        </data>
+        <data format="interval" name="splice_junctions" label="${on_string}_${jobName}_starsplicejunct.bed" 
+                   from_work_dir="SJ.out.tab">
+          <actions>
+             <conditional name="refGenomeSource.genomeSource">
+             <when value="indexed">
+               <action type="metadata" name="dbkey">
+                <option type="from_data_table" name="star_indexes" column="1" offset="0">
+                 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
+                  <filter type="param_value" ref="refGenomeSource.index" column="0"/>
+                </option>
+               </action>
+             </when>
+             <when value="history">
+               <action type="metadata" name="dbkey">
+                 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
+               </action>
+             </when>
+             </conditional>
+          </actions>
+        </data>
+        <data format="bam" name="mapped_reads" label="${on_string}_${jobName}_starmapped.bam" 
+                    from_work_dir="Aligned.out.bam">
+          <actions>
+             <conditional name="refGenomeSource.genomeSource">
+             <when value="indexed">
+               <action type="metadata" name="dbkey">
+                <option type="from_data_table" name="star_indexes" column="1" offset="0">
+                 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
+                  <filter type="param_value" ref="refGenomeSource.index" column="0"/>
+                </option>
+               </action>
+             </when>
+             <when value="history">
+               <action type="metadata" name="dbkey">
+                 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
+               </action>
+             </when>
+             </conditional>
+          </actions>
+        </data>
+    </outputs>
+<help>
+
+**What it does**
+Runs the rna star gapped aligner. Suited to paired or single end rna-seq.
+
+8.2: SAM alignments
+
+The number of loci Nmap a read maps to (multi-mapping) is given by NH:i: field.
+The mapping quality MAPQ (column 5) is 255 for uniquely mapping reads, and int(-10*log10(1-1/Nmap)) for
+multi-mapping reads. This scheme is same as the one used by Tophat and is compatible with Cufflinks.
+
+For multi-mappers, all alignments except one are marked with 0x100 (secondary alignment) in the FLAG
+column 2. The un-marked alignment is either the best one (i.e. highest scoring), or is randomly selected from
+the alignments of equal quality.
+
+8.2.1: Standard SAM attributes
+With default --outSAMattributes Standard option the following SAM attributes will be generated:
+
+Column 12: NH: number of loci a read (pair) maps to
+Column 13: IH: alignment index for all alignments of a read
+Column 14: aS: alignment score
+Column 15: nM: number of mismatches (does not include indels)
+
+8.2.2: Extra SAM attrbiutes
+If --outSAMattributes All option is used, the following additional attributes will be output:
+
+Column 16: jM:B:c,M1,M2,... Intron motifs for all junctions (i.e. N in CIGAR):
+0: non-canonical; 1:GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT.
+
+If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.
+Column 17: jI:B:I,Start1,End1,Start2,End2,... Start and End of introns for all junctions (1-based)
+
+Note, that samtools 0.1.18 or later have to be used with these extra attributes.
+
+
+8.2.3: XS SAM strand attribute for Cufflinks/Cuffdiff
+
+If you have un-stranded RNA-seq data, and wish to run Cufflinks/Cuffdiff on STAR alignments, you will
+need to run STAR with --outSAMstrandField intronMotif option, which will generate the XS
+strand attribute for all alignments that contain splice junctions. The spliced alignments that have undefined
+strand (i.e. containing only non-canonical junctions) will be suppressed.
+
+If you have stranded RNA-seq data, you do not need to use any specific STAR options. Instead, you need
+to run Cufflinks with the library option --library-type options. For example, cufflinks with
+library-type fr-firststrand should be used for the “standard” dUTP protocol.
+This option has to be used only for Cufflinks runs and not for STAR runs.
+
+It is recommended to remove the non-canonical junctions for Cufflinks runs using –
+
+--outFilterIntronMotifs RemoveNoncanonical
+filter out alignments that contain non-canonical junctions
+
+OR
+
+--outFilterIntronMotifs RemoveNoncanonicalUnannotated
+filter out alignments that contain non-canonical unannotated junctions
+when using annotated splice junctions database. The annotated non-
+canonical junctions will be kept.
+ 
+    
+**Attributions**
+
+Note that each component has its own license. Good luck with figuring out your obligations.
+
+rna_star - see the web site at rna_star_
+
+For details, please see the rna_starMS_
+"STAR: ultrafast universal RNA-seq aligner"
+A. Dobin et al, Bioinformatics 2012; doi: 10.1093/bioinformatics/bts635
+
+Galaxy_ (that's what you are using right now!) for gluing everything together 
+
+Most of the work for this wrapper XML is Jeremy Goecks' original STAR_ wrapper 
+
+Minor tweaks to output names to suit our downstream purposes, toolshed automated dependencies 
+and odds and ends of other code and documentation comprising this tool was 
+written by Ross Lazarus and that part is licensed_ the same way as other rgenetics artefacts
+
+.. _STAR: https://bitbucket.org/jgoecks/jeremys-code/raw/fa1930a689b8e2f6b59cc1706e5ba0ed8ad357be/galaxy/tool-wrappers/star.xml
+.. _licensed: http://creativecommons.org/licenses/by-nc-nd/3.0/
+.. _rna_star: http://code.google.com/p/rna-star/
+.. _rna_starMS: http://bioinformatics.oxfordjournals.org/content/29/1/15.full
+.. _Galaxy: http://getgalaxy.org
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rnastar_indexes.loc.sample	Mon Sep 29 20:50:45 2014 -0400
@@ -0,0 +1,11 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of rna-star indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie2_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Mon Sep 29 20:50:45 2014 -0400
@@ -0,0 +1,7 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <table name="rnastar_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/rnastar_indexes.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Mon Sep 29 20:50:45 2014 -0400
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="rnastar" version="2.4.0d">
+        <repository changeset_revision="2ab1b6182b63" name="package_rnastar_240d" owner="fubar" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="samtools" version="0.1.19">
+        <repository changeset_revision="632f1a03db92" name="package_samtools_0_1_19" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+
+    <readme>
+Installs the STAR wrapper and dependency packages samtools and star - see https://code.google.com/p/rna-star/ 
+STAR is a very fast mapper for rna-seq giving junctions if the indexes are constructed with a junction library
+    </readme>
+
+</tool_dependency>