25
|
1 <tool id="rna_star" name="rnastar" version="2.4.0d">
|
15
|
2 <description>Gapped-read mapper for RNA-seq data</description>
|
|
3 <requirements>
|
25
|
4 <requirement type="package" version="2.4.0d">rnastar</requirement>
|
19
|
5 <requirement type="package" version="0.1.19">samtools</requirement>
|
15
|
6 </requirements>
|
|
7 <command>
|
|
8 ##
|
|
9 ## Run STAR.
|
|
10 ##
|
25
|
11 #if str($refGenomeSource.genomeSource) == 'history':
|
|
12 mkdir -p tempstargenomedir; STAR --runMode genomeGenerate --genomeDir "tempstargenomedir" --genomeFastaFiles "$refGenomeSource.ownFile" --runThreadN 2
|
|
13 #if str($refGenomeSource.geneModel) != 'None':
|
|
14 --sjdbOverhang "100" --sjdbGTFfile "$refGenomeSource.geneModel"
|
|
15 #if str($refGenomeSource.geneModel.ext) == 'gff3':
|
|
16 --sjdbGTFtagExonParentTranscript Parent
|
|
17 #end if
|
|
18 #end if
|
|
19 ;
|
|
20 #end if
|
|
21 STAR
|
15
|
22 ## Can adjust this as appropriate for the system.
|
|
23 --genomeLoad NoSharedMemory
|
25
|
24 #if str($refGenomeSource.genomeSource) == 'history':
|
|
25 --genomeDir "tempstargenomedir"
|
|
26 #else
|
|
27 --genomeDir $refGenomeSource.index.fields.pathls
|
|
28 #end if
|
|
29 --readFilesIn $singlePaired.input1
|
|
30 #if str($singlePaired.sPaired) == "paired"
|
15
|
31 $singlePaired.input2
|
25
|
32 #end if
|
15
|
33 --runThreadN 4
|
25
|
34 #if str($params.settingsType) == "full":
|
15
|
35 --chimSegmentMin $params.chim_segment_min
|
|
36 --chimScoreMin $params.chim_score_min
|
|
37 #end if
|
|
38
|
|
39 ## may or may not need to generate SAM tags and handle non-canonicals for Cufflinks tools.
|
25
|
40 $outSAMstrandField $outFilterIntronMotifs $outSAMattributes
|
15
|
41
|
|
42 ;
|
|
43 ##
|
|
44 ## BAM conversion.
|
|
45 ##
|
|
46
|
|
47 ## Convert aligned reads.
|
25
|
48 samtools view -Shb Aligned.out.sam | samtools sort - AlignedSorted 2>/dev/null
|
15
|
49
|
|
50 ## Convert chimeric reads.
|
25
|
51 #if str($params.settingsType) == "full" and $params.chim_segment_min > 0:
|
|
52 ; samtools view -Shb Chimeric.out.sam | samtools sort - ChimericSorted 2>/dev/null
|
15
|
53 #end if
|
|
54 </command>
|
|
55
|
|
56 <stdio>
|
25
|
57 <regex match=".*" source="both" level="warning" description="Some stderr/stdout text"/>
|
15
|
58 </stdio>
|
|
59
|
|
60 <inputs>
|
|
61 <param name="jobName" type="text" size="120" value="rna-star run" label="Job narrative (added to output names)"
|
|
62 help="Only letters, numbers and underscores (_) will be retained in this field">
|
|
63 <sanitizer invalid_char="">
|
|
64 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
|
|
65 </sanitizer>
|
|
66 </param>
|
|
67 <!-- FASTQ input(s) and options specifically for paired-end data. -->
|
|
68 <conditional name="singlePaired">
|
|
69 <param name="sPaired" type="select" label="Single ended or mate-pair ended reads in this library?">
|
|
70 <option value="single" selected="true">Single-end</option>
|
|
71 <option value="paired">Paired-end</option>
|
|
72 </param>
|
|
73 <when value="single">
|
|
74 <param format="fastqsanger,fastq,fasta" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>
|
|
75 </when>
|
|
76 <when value="paired">
|
25
|
77 <param format="fastqsanger,fastq,fasta" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads"
|
|
78 help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
|
|
79 <param format="fastqsanger,fastq,fasta" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads"
|
|
80 help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
|
15
|
81 </when>
|
|
82 </conditional>
|
|
83
|
|
84 <!-- Genome source. -->
|
|
85 <conditional name="refGenomeSource">
|
|
86 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
|
25
|
87 <option value="indexed" selected="True">Use a built-in index</option>
|
|
88 <option value="history">Index and use a genome fasta file from my current history</option>
|
15
|
89 </param>
|
|
90 <when value="indexed">
|
|
91 <param name="index" type="select" label="Select a reference genome">
|
25
|
92 <options from_data_table="rnastar_index">
|
15
|
93 <filter type="sort_by" column="2"/>
|
|
94 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
|
|
95 </options>
|
|
96 </param>
|
|
97 </when>
|
|
98 <when value="history">
|
|
99 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
|
25
|
100 <param name="geneModel" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions. Leave blank for none"
|
|
101 optional="true" help="Optional. If supplied, the index file will retain exon junction information for mapping splices" />
|
15
|
102 </when>
|
|
103 </conditional>
|
|
104 <param name="outSAMattributes" type="select" label="Include extra sam attributes for downstream processing">
|
|
105 <option value="--outSAMattributes Standard">Standard - eg for old Samtools downstream</option>
|
|
106 <option value="--outSAMattributes All" selected="true">All modern Samtools attributes - see below</option>
|
|
107 </param>
|
|
108 <param name="outSAMstrandField" type="select" label="Include extra sam attributes for downstream processing">
|
|
109 <option value="--outSAMstrandField intronMotif" selected="true">Add XS for cufflinks</option>
|
|
110 <option value="">No XS added to sam output</option>
|
|
111 </param>
|
|
112 <param name="outFilterIntronMotifs" type="select" label="Canonical junction preparation for unstranded data">
|
|
113 <option value="">No special handling - all non-canonical junctions passed through</option>
|
|
114 <option value="--outFilterIntronMotifs RemoveNoncanonical" selected="true">Remove all non-canonical junctions for eg cufflinks</option>
|
|
115 <option value="--outFilterIntronMotifs RemoveNoncanonicalUnannotated">Remove only unannotated non-canonical junctions for eg cufflinks</option>
|
|
116 </param>
|
|
117 <!-- Parameter settings. -->
|
|
118 <conditional name="params">
|
|
119 <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any STAR parameter.">
|
|
120 <option value="preSet" selected="true">Use Defaults</option>
|
|
121 <option value="full">Full parameter list</option>
|
|
122 </param>
|
|
123 <when value="preSet" />
|
|
124 <!-- Full/advanced params. -->
|
|
125 <when value="full">
|
|
126 <param name="chim_segment_min" type="integer" min="0" value="0" label="Minimum chimeric segment length" />
|
|
127 <param name="chim_score_min" type="integer" min="0" value="0" label="Minimum total (summed) score of the chimeric segments" />
|
|
128
|
|
129 </when>
|
|
130 </conditional>
|
|
131 </inputs>
|
|
132
|
|
133 <outputs>
|
25
|
134 <data format="txt" name="output_log" label="${jobName}.log" from_work_dir="Log.final.out"/>
|
|
135 <data format="interval" name="chimeric_junctions" label="${jobName}_starchimjunc.bed" from_work_dir="Chimeric.out.junction">
|
15
|
136 <filter>(params['settingsType'] == 'full' and params['chim_segment_min'] > 0)</filter>
|
|
137 <actions>
|
|
138 <conditional name="refGenomeSource.genomeSource">
|
|
139 <when value="indexed">
|
|
140 <action type="metadata" name="dbkey">
|
25
|
141 <option type="from_data_table" name="rnastar_index" column="1" offset="0">
|
15
|
142 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
|
|
143 <filter type="param_value" ref="refGenomeSource.index" column="0"/>
|
|
144 </option>
|
|
145 </action>
|
|
146 </when>
|
|
147 <when value="history">
|
|
148 <action type="metadata" name="dbkey">
|
|
149 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
|
|
150 </action>
|
|
151 </when>
|
|
152 </conditional>
|
|
153 </actions>
|
|
154 </data>
|
25
|
155 <data format="bam" name="chimeric_reads" label="${jobName}_starmappedchim.bam"
|
|
156 from_work_dir="ChimericSorted.bam">
|
15
|
157 <filter>(params['settingsType'] == 'full' and params['chim_segment_min'] > 0)</filter>
|
|
158 <actions>
|
|
159 <conditional name="refGenomeSource.genomeSource">
|
|
160 <when value="indexed">
|
|
161 <action type="metadata" name="dbkey">
|
25
|
162 <option type="from_data_table" name="rnastar_index" column="1" offset="0">
|
15
|
163 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
|
|
164 <filter type="param_value" ref="refGenomeSource.index" column="0"/>
|
|
165 </option>
|
|
166 </action>
|
|
167 </when>
|
|
168 <when value="history">
|
|
169 <action type="metadata" name="dbkey">
|
|
170 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
|
|
171 </action>
|
|
172 </when>
|
|
173 </conditional>
|
|
174 </actions>
|
|
175 </data>
|
25
|
176 <data format="interval" name="splice_junctions" label="${jobName}_starsplicejunct.bed"
|
15
|
177 from_work_dir="SJ.out.tab">
|
|
178 <actions>
|
|
179 <conditional name="refGenomeSource.genomeSource">
|
|
180 <when value="indexed">
|
|
181 <action type="metadata" name="dbkey">
|
25
|
182 <option type="from_data_table" name="rnastar_index" column="1" offset="0">
|
15
|
183 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
|
|
184 <filter type="param_value" ref="refGenomeSource.index" column="0"/>
|
|
185 </option>
|
|
186 </action>
|
|
187 </when>
|
|
188 <when value="history">
|
|
189 <action type="metadata" name="dbkey">
|
|
190 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
|
|
191 </action>
|
|
192 </when>
|
|
193 </conditional>
|
|
194 </actions>
|
|
195 </data>
|
25
|
196 <data format="bam" name="mapped_reads" label="${jobName}_starmapped.bam"
|
|
197 from_work_dir="AlignedSorted.bam">
|
15
|
198 <actions>
|
|
199 <conditional name="refGenomeSource.genomeSource">
|
|
200 <when value="indexed">
|
|
201 <action type="metadata" name="dbkey">
|
25
|
202 <option type="from_data_table" name="rnastar_index" column="1" offset="0">
|
15
|
203 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
|
|
204 <filter type="param_value" ref="refGenomeSource.index" column="0"/>
|
|
205 </option>
|
|
206 </action>
|
|
207 </when>
|
|
208 <when value="history">
|
|
209 <action type="metadata" name="dbkey">
|
|
210 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
|
|
211 </action>
|
|
212 </when>
|
|
213 </conditional>
|
|
214 </actions>
|
|
215 </data>
|
|
216 </outputs>
|
25
|
217 <tests>
|
|
218 <test>
|
|
219 <param name='input1' value='tophat_in2.fastqsanger' ftype='fastqsanger' />
|
|
220 <param name='jobName' value='rnastar_test' />
|
|
221 <param name='genomeSource' value='history' />
|
|
222 <param name='ownFile' value='tophat_test.fa' />
|
|
223 <param name='sPaired' value='single' />
|
|
224 <param name='outSAMattributes' value='--outSAMattributes All' />
|
|
225 <param name='outSAMstrandField' value='--outSAMstrandField intronMotif' />
|
|
226 <param name='outFilterIntronMotifs' value='--outFilterIntronMotifs RemoveNoncanonical' />
|
|
227 <output name='output_log' file='rnastar_test.log' compare='diff' lines_diff = '10'/>
|
|
228 <output name='splice_junctions' file="rnastar_test_splicejunctions.bed" compare="sim_size" delta="200"/>
|
|
229 <output name='mapped_reads' file="rnastar_test_mapped_reads.bam" compare="sim_size" delta="200" />
|
|
230 </test>
|
|
231 </tests>
|
15
|
232 <help>
|
|
233
|
|
234 **What it does**
|
|
235 Runs the rna star gapped aligner. Suited to paired or single end rna-seq.
|
|
236
|
|
237 8.2: SAM alignments
|
|
238
|
|
239 The number of loci Nmap a read maps to (multi-mapping) is given by NH:i: field.
|
|
240 The mapping quality MAPQ (column 5) is 255 for uniquely mapping reads, and int(-10*log10(1-1/Nmap)) for
|
|
241 multi-mapping reads. This scheme is same as the one used by Tophat and is compatible with Cufflinks.
|
|
242
|
|
243 For multi-mappers, all alignments except one are marked with 0x100 (secondary alignment) in the FLAG
|
|
244 column 2. The un-marked alignment is either the best one (i.e. highest scoring), or is randomly selected from
|
|
245 the alignments of equal quality.
|
|
246
|
|
247 8.2.1: Standard SAM attributes
|
|
248 With default --outSAMattributes Standard option the following SAM attributes will be generated:
|
|
249
|
|
250 Column 12: NH: number of loci a read (pair) maps to
|
|
251 Column 13: IH: alignment index for all alignments of a read
|
|
252 Column 14: aS: alignment score
|
|
253 Column 15: nM: number of mismatches (does not include indels)
|
|
254
|
|
255 8.2.2: Extra SAM attrbiutes
|
|
256 If --outSAMattributes All option is used, the following additional attributes will be output:
|
|
257
|
|
258 Column 16: jM:B:c,M1,M2,... Intron motifs for all junctions (i.e. N in CIGAR):
|
|
259 0: non-canonical; 1:GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT.
|
|
260
|
|
261 If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.
|
|
262 Column 17: jI:B:I,Start1,End1,Start2,End2,... Start and End of introns for all junctions (1-based)
|
|
263
|
|
264 Note, that samtools 0.1.18 or later have to be used with these extra attributes.
|
|
265
|
|
266
|
|
267 8.2.3: XS SAM strand attribute for Cufflinks/Cuffdiff
|
|
268
|
|
269 If you have un-stranded RNA-seq data, and wish to run Cufflinks/Cuffdiff on STAR alignments, you will
|
|
270 need to run STAR with --outSAMstrandField intronMotif option, which will generate the XS
|
|
271 strand attribute for all alignments that contain splice junctions. The spliced alignments that have undefined
|
|
272 strand (i.e. containing only non-canonical junctions) will be suppressed.
|
|
273
|
|
274 If you have stranded RNA-seq data, you do not need to use any specific STAR options. Instead, you need
|
|
275 to run Cufflinks with the library option --library-type options. For example, cufflinks with
|
25
|
276 library-type fr-firststrand should be used for the b
|
15
|
277
|
25
|
278 It is recommended to remove the non-canonical junctions for Cufflinks runs using b
|
|
279
|
15
|
280
|
|
281 --outFilterIntronMotifs RemoveNoncanonical
|
|
282 filter out alignments that contain non-canonical junctions
|
|
283
|
|
284 OR
|
|
285
|
|
286 --outFilterIntronMotifs RemoveNoncanonicalUnannotated
|
|
287 filter out alignments that contain non-canonical unannotated junctions
|
|
288 when using annotated splice junctions database. The annotated non-
|
|
289 canonical junctions will be kept.
|
|
290
|
|
291
|
|
292 **Attributions**
|
|
293
|
|
294 Note that each component has its own license. Good luck with figuring out your obligations.
|
|
295
|
|
296 rna_star - see the web site at rna_star_
|
|
297
|
|
298 For details, please see the rna_starMS_
|
|
299 "STAR: ultrafast universal RNA-seq aligner"
|
|
300 A. Dobin et al, Bioinformatics 2012; doi: 10.1093/bioinformatics/bts635
|
|
301
|
|
302 Galaxy_ (that's what you are using right now!) for gluing everything together
|
|
303
|
|
304 Most of the work for this wrapper XML is Jeremy Goecks' original STAR_ wrapper
|
|
305
|
|
306 Minor tweaks to output names to suit our downstream purposes, toolshed automated dependencies
|
|
307 and odds and ends of other code and documentation comprising this tool was
|
|
308 written by Ross Lazarus and that part is licensed_ the same way as other rgenetics artefacts
|
|
309
|
|
310 .. _STAR: https://bitbucket.org/jgoecks/jeremys-code/raw/fa1930a689b8e2f6b59cc1706e5ba0ed8ad357be/galaxy/tool-wrappers/star.xml
|
|
311 .. _licensed: http://creativecommons.org/licenses/by-nc-nd/3.0/
|
|
312 .. _rna_star: http://code.google.com/p/rna-star/
|
|
313 .. _rna_starMS: http://bioinformatics.oxfordjournals.org/content/29/1/15.full
|
|
314 .. _Galaxy: http://getgalaxy.org
|
|
315
|
|
316 </help>
|
25
|
317 <citations>
|
|
318 <citation type="doi">doi: 10.1093/bioinformatics/bts635</citation>
|
|
319 </citations>
|
15
|
320 </tool>
|
25
|
321
|