0
|
1 <tool id="rsem_calculate_expression" name="RSEM calculate expression" version="1.1.17">
|
|
2 <description>RNA-Seq by Expectation-Maximization</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="1.1.17">rsem</requirement>
|
|
5 <requirement type="package" version="0.1.19">samtools</requirement>
|
|
6 <requirement type="package" version="1.0.0">bowtie</requirement>
|
|
7 </requirements>
|
|
8 <command interpreter="perl">
|
|
9 rsem-calculate-expression
|
|
10 --calc-ci $useci.ci
|
|
11 --fragment-length-mean $fraglenmean
|
|
12 --fragment-length-min $fraglenmin
|
|
13 --fragment-length-sd $fraglensd
|
|
14 --fragment-length-max $fraglenmax
|
|
15 --bowtie-e $bowtie_e
|
|
16 --bowtie-m $bowtie_m
|
|
17
|
|
18 #if $input.format=="fastq"
|
|
19 ## IF FASTQ AND SINGLE END READS (DEFAULTS)
|
|
20 #if $input.fastqmatepair.matepair=="single" #rsem-wrapper-1.1.17.pl --bam_genome $bam_genome --bamtype $bamtype
|
|
21 --seed-length $seedlength $input.fastq_select --estimate-rspd $rspd --forward-prob
|
|
22 $fprob -p $cpus --bowtie-n $bowtie_mis --output-genome-bam --single_fastq $singlefastq
|
|
23 --output $output --isoformfile $isoforms --bamfile $bam_res --log $log
|
|
24 --sampling-for-bam $sampling_for_bam --reference ${index.fields.path}
|
|
25 #end if
|
|
26 ## IF FASTQ AND PAIRED END READS (DEFAULTS)
|
|
27 #if $input.fastqmatepair.matepair=="paired" #rsem-wrapper-1.1.17.pl --bam_genome $bam_genome --bamtype $bamtype
|
|
28 --paired-end --seed-length $seedlength --estimate-rspd $rspd $input.fastq_select --forward-prob $fprob -p $cpus
|
|
29 --bowtie-n $bowtie_mis --output-genome-bam --fastq1 $fastq1 --fastq2 $fastq2 --output
|
|
30 $output --isoformfile $isoforms --bamfile $bam_res --log $log --sampling-for-bam
|
|
31 $sampling_for_bam --reference ${index.fields.path}
|
|
32 #end if
|
|
33 #end if
|
|
34 #if $input.format=="fasta"
|
|
35 ## IF FASTA AND SINGLE END READS (DEFAULTS)
|
|
36 #if $input.fastamatepair.matepair=="single" #rsem-wrapper-1.1.17.pl --bam_genome $bam_genome --bamtype $bamtype
|
|
37 --no-qualities --seed-length $seedlength --estimate-rspd $rspd --forward-prob $fprob -p $cpus --bowtie-n $bowtie_mis
|
|
38 --output-genome-bam --single_fasta $single_fasta --output $output --isoformfile
|
|
39 $isoforms --bamfile $bam_res --log $log --sampling-for-bam $sampling_for_bam --reference
|
|
40 ${index.fields.path}
|
|
41 #end if
|
|
42 ## IF FASTA AND PAIRED END READS (DEFAULTS)
|
|
43 #if $input.fastamatepair.matepair=="paired" #rsem-wrapper-1.1.17.pl --bam_genome $bam_genome --bamtype $bamtype
|
|
44 --no-qualities --paired-end --seed-length $seedlength --estimate-rspd $rspd --forward-prob $fprob -p $cpus
|
|
45 --bowtie-n $bowtie_mis --output-genome-bam --fasta1 $fasta1 --fasta2 $fasta2 --output
|
|
46 $output --isoformfile $isoforms --bamfile $bam_res --log $log --sampling-for-bam
|
|
47 $sampling_for_bam --reference ${index.fields.path}
|
|
48 #end if
|
|
49 #end if
|
|
50
|
|
51 </command>
|
|
52
|
|
53 <inputs>
|
|
54 <param name="sample" type="text" format="txt" label="Sample label" />
|
|
55 <conditional name="input">
|
|
56 <param name="format" type="select" label="Input file type">
|
|
57 <option value="fastq">FASTQ</option>
|
|
58 <option value="fasta">FASTA</option>
|
|
59 </param>
|
|
60 <when value="fastq">
|
|
61 <param name="fastq_select" size="15" type="select" label="FASTQ type" >
|
|
62 <option value="--phred33-quals">phred33 qualities</option>
|
|
63 <option value="--solexa-quals">solexa qualities</option>
|
|
64 <option value="--phred64-quals">phred64 qualities</option>
|
|
65 </param>
|
|
66
|
|
67 <conditional name="fastqmatepair">
|
|
68 <when value="single">
|
|
69 <param name="singlefastq" type="data" checked="yes" format="fastq" label="FASTQ file" />
|
|
70 </when>
|
|
71 <when value="paired">
|
|
72 <param name="fastq1" type="data" format="fastq" label="Read 1 fastq file" />
|
|
73 <param name="fastq2" type="data" format="fastq" label="Read 2 fastq file" />
|
|
74 </when>
|
|
75 <param name="matepair" type="select" label="Library type">
|
|
76 <option value="single">Single End Reads</option>
|
|
77 <option value="paired">Paired End Reads</option>
|
|
78 </param>
|
|
79 </conditional>
|
|
80 </when>
|
|
81 <when value="fasta">
|
|
82 <conditional name="fastamatepair">
|
|
83 <param name="matepair" type="select" label="Library Type">
|
|
84 <option value="single">Single End Reads</option>
|
|
85 <option value="paired">Paired End Reads</option>
|
|
86 </param>
|
|
87 <when value="single">
|
|
88 <param name="single_fasta" type="data" checked="yes" format="fasta" label="fasta file" />
|
|
89 </when>
|
|
90 <when value="paired">
|
|
91 <param name="fasta1" type="data" format="fasta" label="Read 1 fasta file" />
|
|
92 <param name="fasta2" type="data" format="fasta" label="Read 2 fasta file" />
|
|
93 </when>
|
|
94 </conditional>
|
|
95 </when>
|
|
96 <when>
|
|
97 <conditional name="fastamatepair">
|
|
98 <param name="matepair" type="select" label="Library Type" >
|
|
99 <option value="single">Single End Reads</option>
|
|
100 <option value="paired">Paired End Reads</option>
|
|
101 </param>
|
|
102 <when value="single">
|
|
103 <param name="singlefastq" type="data" checked="yes" format="fastq" label="FASTQ file" />
|
|
104 </when>
|
|
105 <when value="paired">
|
|
106 <param name="fastq1" type="data" format="fastq" label="Read 1 FASTQ file" />
|
|
107 <param name="fastq2" type="data" format="fastq" label="Read 2 FASTQ file" />
|
|
108 </when>
|
|
109 </conditional>
|
|
110 </when>
|
|
111 </conditional>
|
|
112 <param name="fprob" type="select" >
|
|
113 <label>Is the library strand specific?</label>
|
|
114 <option value="0.5">No</option>
|
|
115 <option value="1">Yes, the reads (or first reads from paired-end libraries) are only in the forward orientation</option>
|
|
116 <option value="0">Yes, the reads (or first reads from paired-end libraries) are only in the reverse orientation</option>
|
|
117 </param>
|
|
118
|
|
119 <param name="index" type="select" label="Select RSEM reference" help="Select from a list of pre-indexed references. If you don't see anything consult the wrapper's documentation on how to create or download a reference">
|
|
120 <options from_data_table="rsem_indexes">
|
|
121 <filter type="sort_by" column="2" />
|
|
122 <validator type="no_options" message="No indexes are available" />
|
|
123 </options>
|
|
124 </param>
|
|
125 <param name="fraglenmean" size="4" type="text" value="-1" label="Fragment length mean (single-end data only)" help="The mean of the fragment length distribution, which is assumed to be a Gaussian. (Default: -1, which disables use of the fragment length distribution)" />
|
|
126 <param name="fraglensd" size="4" type="text" value="0" label="The standard deviation of the fragment length distribution (single-end data only)" help="Default 0, which assumes that all fragments are of the same length, given by the rounded value of fragment length mean. " />
|
|
127
|
|
128 <param name="bamtype" type="select" label="Create genome bam file" help="In addition to the transcript-coordinate-based BAM file output, also output a BAM file with the read alignments in genomic coordinates" >
|
|
129 <option value="no">no</option>
|
|
130 <option value="yes">yes</option>
|
|
131 </param>
|
|
132 <param name="sampling_for_bam" type="select" format="text" help="When RSEM generates a BAM file, instead of outputing all alignments a read has with their posterior probabilities, one alignment is sampled and outputed according to the posterior probabilities. If the sampling result is that the read comes from the noise transcript, nothing is outputed. (Default: off)">
|
|
133 <label>Sample Bam File</label>
|
|
134 <option value="no">no</option>
|
|
135 <option value="yes">yes</option>
|
|
136 </param>
|
|
137 <param name="rspd" type="select" format="text" help="Set this option if you want to estimate the read start position distribution (RSPD) from data. Otherwise, RSEM will use a uniform RSPD. (Default: off)">
|
|
138 <label>Estimate and correct for a non-uniform read start position distribution (RSPD)</label>
|
|
139 <option value="no">no</option>
|
|
140 <option value="yes">yes</option>
|
|
141 </param>
|
|
142
|
|
143 <!-- <conditional name="fullpar">
|
|
144 <param name="fullpar" type="select" label="Full list of parameters" help="use
|
|
145 full list for linting all the parameters in RSEM">
|
|
146 <option value="default">Default</option>
|
|
147 <option value="fullset">Full Set</option>
|
|
148 </param>
|
|
149 <when value="fullset"> -->
|
|
150 <!-- <param name="testing" size="4" type="text" value="" label="Advanced Parameters" />
|
|
151 -->
|
|
152 <conditional name="useci">
|
|
153 <param name="ci" type="select" label="Calculate 95% Credibility Intervals">
|
|
154 <option value="no">no</option>
|
|
155 <option value="yes">yes</option>
|
|
156 </param>
|
|
157 <when value="yes">
|
|
158 <param name="cimem" size="4" type="text" value="1024" label="Amount of memory in (MB) for computing CI" />
|
|
159 </when>
|
|
160 </conditional>
|
|
161 <param name="fraglenmin" size="4" type="text" value="1" label="Minimum read/insert length. Minimum read/insert length allowed. This is also the value for the bowtie -X option" />
|
|
162 <param name="fraglenmax" size="4" type="text" value="1000" label="Maximum read/insert length. Minimum read/insert length allowed. This is also the value for the bowtie -l option" />
|
|
163 <param name="bowtie_mis" size="2" type="text" value="2" label="Bowtie mismatches" help="Bowtie parameter max # of mismatches in the seed. (Range: 0-3, Default: 2) "/>
|
|
164 <param name="bowtie_e" size="4" type="text" value="99999999" label="Maximum sum of quality scores at mismatched positions in read alignments. This is also the value for the Bowtie -e option" />
|
|
165 <param name="bowtie_m" size="4" type="text" value="200" label="Discard alignments for reads with number of alignments greater than" />
|
|
166 <param name="seedlength" size="2" type="text" value="25" label="Seed length used by the read aligner" help="Providing the correct value for this parameter is important for RSEM's accuracy if the data are single-end reads. RSEM uses this value for Bowtie's seed length parameter. The minimum value is 25. (Default:25)" />
|
|
167 <param name="cpus" size="2" type="integer" value="1" label="Number of threads to use" help="Number of threads to use. Both Bowtie and expression estimation will use this many threads. (Default: 1)" />
|
|
168 <!-- </when> --> <!-- </conditional> -->
|
|
169 </inputs>
|
|
170 <stdio>
|
|
171 <exit_code range="1:" level="fatal" description="Error Running RSEM" />
|
|
172 </stdio>
|
|
173 <outputs>
|
|
174 <data format="tabular" name="output" label="${sample}.gene_abundances"/>
|
|
175 <data format="tabular" name="isoforms" label="${sample}.isoform_abundances"/>
|
|
176 <data format="bam" name="bam_res" label="${sample}.transcript.bam"/>
|
|
177 <data format="bam" name="bam_genome" label="${sample}.genome.bam">
|
|
178 <filter>bamtype == "yes"</filter>
|
|
179 </data>
|
|
180
|
|
181 <data format="txt" name="log" label="${sample}.rsem_log"/>
|
|
182 </outputs>
|
|
183 <help>
|
|
184
|
|
185
|
|
186 RSEM HOME PAGE - http://deweylab.biostat.wisc.edu/rsem/
|
|
187
|
|
188 NAME
|
|
189 rsem-calculate-expression
|
|
190
|
|
191 SYNOPSIS
|
|
192 rsem-calculate-expression [options] upstream_read_file(s) reference_name sample_name
|
|
193 rsem-calculate-expression [options] --paired-end upstream_read_file/s downstream_read_file/s reference_name sample_name
|
|
194 rsem-calculate-expression [options] --sam/--bam [--paired-end] input reference_name sample_name
|
|
195
|
|
196 ARGUMENTS
|
|
197 upstream_read_files/s
|
|
198 Comma-separated list of files containing single-end reads or
|
|
199 upstream reads for paired-end data. By default, these files are
|
|
200 assumed to be in FASTQ format. If the --no-qualities option is
|
|
201 specified, then FASTA format is expected.
|
|
202
|
|
203 downstream_read_file/s
|
|
204 Comma-separated list of files containing downstream reads which are
|
|
205 paired with the upstream reads. By default, these files are assumed
|
|
206 to be in FASTQ format. If the --no-qualities option is specified,
|
|
207 then FASTA format is expected.
|
|
208
|
|
209 input
|
|
210 SAM/BAM formatted input file. If "-" is specified for the filename,
|
|
211 SAM/BAM input is instead assumed to come from standard input. RSEM
|
|
212 requires all alignments of the same read group together. For
|
|
213 paired-end reads, RSEM also requires the two mates of any alignment
|
|
214 be adjacent. See Description section for how to make input file obey
|
|
215 RSEM's requirements.
|
|
216
|
|
217 reference_name
|
|
218 The name of the reference used. The user must have run
|
|
219 'rsem-prepare-reference' with this reference_name before running
|
|
220 this program.
|
|
221
|
|
222 sample_name
|
|
223 The name of the sample analyzed. All output files are prefixed by
|
|
224 this name (e.g., sample_name.genes.results)
|
|
225
|
|
226 OPTIONS
|
|
227
|
|
228 --paired-end
|
|
229 Input reads are paired-end reads. (Default: off)
|
|
230
|
|
231 --no-qualities
|
|
232 Input reads do not contain quality scores. (Default: off)
|
|
233
|
|
234 --strand-specific
|
|
235 The RNA-Seq protocol used to generate the reads is strand specific,
|
|
236 i.e., all (upstream) reads are derived from the forward strand. This
|
|
237 option is equivalent to --forward-prob=1.0. With this option set, if
|
|
238 RSEM runs the Bowtie aligner, the '--norc' Bowtie option will be
|
|
239 used, which disables alignment to the reverse strand of transcripts.
|
|
240 (Default: off)
|
|
241
|
|
242 --sam
|
|
243 Input file is in SAM format. (Default: off)
|
|
244
|
|
245 --bam
|
|
246 Input file is in BAM format. (Default: off)
|
|
247
|
|
248 --sam-header-info [file]
|
|
249 RSEM reads header information from input by default. If this option
|
|
250 is on, header information is read from the specified file. For the
|
|
251 format of the file, please see SAM official website. (Default: "")
|
|
252
|
|
253 -p/--num-threads [int]
|
|
254 Number of threads to use. Both Bowtie and expression estimation will
|
|
255 use this many threads. (Default: 1)
|
|
256
|
|
257 --no-bam-output
|
|
258 Do not output any BAM file. (Default: off)
|
|
259
|
|
260 --output-genome-bam
|
|
261 Generate a BAM file, 'sample_name.genome.bam', with alignments
|
|
262 mapped to genomic coordinates and annotated with their posterior
|
|
263 probabilities. In addition, RSEM will call samtools (included in
|
|
264 RSEM package) to sort and index the bam file.
|
|
265 'sample_name.genome.sorted.bam' and
|
|
266 'sample_name.genome.sorted.bam.bai' will be generated. (Default:
|
|
267 off)
|
|
268
|
|
269 --sampling-for-bam
|
|
270 When RSEM generates a BAM file, instead of outputing all alignments
|
|
271 a read has with their posterior probabilities, one alignment is
|
|
272 sampled and outputed according to the posterior probabilities. If
|
|
273 the sampling result is that the read comes from the "noise"
|
|
274 transcript, nothing is outputed. (Default: off)
|
|
275
|
|
276 --calc-ci
|
|
277 Calculate 95% credibility intervals and posterior mean estimates.
|
|
278 (Default: off)
|
|
279
|
|
280 --seed-length [int]
|
|
281 Seed length used by the read aligner. Providing the correct value is
|
|
282 important for RSEM. If RSEM runs Bowtie, it uses this value for
|
|
283 Bowtie's seed length parameter. Any read with its or at least one of
|
|
284 its mates' (for paired-end reads) length less than this value will
|
|
285 be ignored. If the references are not added poly(A) tails, the
|
|
286 minimum allowed value is 5, otherwise, the minimum allowed value is
|
|
287 25. Note that this script will only check if the value less or equal than
|
|
288 5 and give a warning message if the value less than 25 but greter or equal than
|
|
289 5. (Default: 25)
|
|
290
|
|
291 --tag [string]
|
|
292 The name of the optional field used in the SAM input for identifying
|
|
293 a read with too many valid alignments. The field should have the
|
|
294 format [tagName]:i:[value], where a [value] bigger than 0 indicates
|
|
295 a read with too many alignments. (Default: "")
|
|
296
|
|
297 --bowtie-path [path]
|
|
298 The path to the bowtie executables. (Default: the path to the bowtie
|
|
299 executables is assumed to be in the user's PATH environment
|
|
300 variable)
|
|
301
|
|
302 --bowtie-n [int]
|
|
303 (Bowtie parameter) max # of mismatches in the seed. (Range: 0-3,
|
|
304 Default: 2)
|
|
305
|
|
306 --bowtie-e [int]
|
|
307 (Bowtie parameter) max sum of mismatch quality scores across the
|
|
308 alignment. (Default: 99999999)
|
|
309
|
|
310 --bowtie-m [int]
|
|
311 (Bowtie parameter) suppress all alignments for a read if greater then [int]
|
|
312 valid alignments exist. (Default: 200)
|
|
313
|
|
314 --bowtie-chunkmbs [int]
|
|
315 (Bowtie parameter) memory allocated for best first alignment
|
|
316 calculation (Default: 0 - use bowtie's default)
|
|
317
|
|
318 --phred33-quals
|
|
319 Input quality scores are encoded as Phred+33. (Default: on)
|
|
320
|
|
321 --phred64-quals
|
|
322 Input quality scores are encoded as Phred+64 (default for GA
|
|
323 Pipeline ver. less than 1.3). (Default: off)
|
|
324
|
|
325 --solexa-quals
|
|
326 Input quality scores are solexa encoded (from GA Pipeline ver. less
|
|
327 than 1.3). (Default: off)
|
|
328
|
|
329 --forward-prob [double]
|
|
330 Probability of generating a read from the forward strand of a
|
|
331 transcript. Set to 1 for a strand-specific protocol where all
|
|
332 (upstream) reads are derived from the forward strand, 0 for a
|
|
333 strand-specific protocol where all (upstream) read are derived from
|
|
334 the reverse strand, or 0.5 for a non-strand-specific protocol.
|
|
335 (Default: 0.5)
|
|
336
|
|
337 --fragment-length-min [int]
|
|
338 Minimum read/insert length allowed. This is also the value for the
|
|
339 bowtie -I option. (Default: 1)
|
|
340
|
|
341 --fragment-length-max [int]
|
|
342 Maximum read/insert length allowed. This is also the value for the
|
|
343 bowtie -X option. (Default: 1000)
|
|
344
|
|
345 --fragment-length-mean [double]
|
|
346 (single-end data only) The mean of the fragment length distribution,
|
|
347 which is assumed to be a Gaussian. (Default: -1, which disables use
|
|
348 of the fragment length distribution)
|
|
349
|
|
350 --fragment-length-sd [double]
|
|
351 (single-end data only) The standard deviation of the fragment length
|
|
352 distribution, which is assumed to be a Gaussian. (Default: 0, which
|
|
353 assumes that all fragments are of the same length, given by the
|
|
354 rounded value of --fragment-length-mean)
|
|
355
|
|
356 --estimate-rspd
|
|
357 Set this option if you want to estimate the read start position
|
|
358 distribution (RSPD) from data. Otherwise, RSEM will use a uniform
|
|
359 RSPD. (Default: off)
|
|
360
|
|
361 --num-rspd-bins [int]
|
|
362 Number of bins in the RSPD. Only relevant when '--estimate-rspd' is
|
|
363 specified. Use of the default setting is recommended. (Default: 20)
|
|
364
|
|
365 --ci-memory [int]
|
|
366 Maximum size (in memory, MB) of the auxiliary buffer used for
|
|
367 computing credibility intervals (CI). Set it larger for a faster CI
|
|
368 calculation. However, leaving 2 GB memory free for other usage is
|
|
369 recommended. (Default: 1024)
|
|
370
|
|
371 --keep-intermediate-files
|
|
372 Keep temporary files generated by RSEM. RSEM creates a temporary
|
|
373 directory, 'sample_name.temp', into which it puts all intermediate
|
|
374 output files. If this directory already exists, RSEM overwrites all
|
|
375 files generated by previous RSEM runs inside of it. By default,
|
|
376 after RSEM finishes, the temporary directory is deleted. Set this
|
|
377 option to prevent the deletion of this directory and the
|
|
378 intermediate files inside of it. (Default: off)
|
|
379
|
|
380 --time
|
|
381 Output time consumed by each step of RSEM to 'sample_name.time'.
|
|
382 (Default: off)
|
|
383
|
|
384 -q/--quiet
|
|
385 Suppress the output of logging information. (Default: off)
|
|
386
|
|
387 -h/--help
|
|
388 Show help information.
|
|
389
|
|
390 DESCRIPTION
|
|
391 In its default mode, this program aligns input reads against a reference
|
|
392 transcriptome with Bowtie and calculates expression values using the
|
|
393 alignments. RSEM assumes the data are single-end reads with quality
|
|
394 scores, unless the '--paired-end' or '--no-qualities' options are
|
|
395 specified. Users may use an alternative aligner by specifying one of the
|
|
396 --sam and --bam options, and providing an alignment file in the
|
|
397 specified format. However, users should make sure that they align
|
|
398 against the indices generated by 'rsem-prepare-reference' and the
|
|
399 alignment file satisfies the requirements mentioned in ARGUMENTS
|
|
400 section.
|
|
401
|
|
402 One simple way to make the alignment file satisfying RSEM's requirements
|
|
403 (assuming the aligner used put mates in a paired-end read adjacent) is
|
|
404 to use 'convert-sam-for-rsem' script. This script only accept SAM format
|
|
405 files as input. If a BAM format file is obtained, please use samtools to
|
|
406 convert it to a SAM file first. For example, if '/ref/mouse_125' is the
|
|
407 'reference_name' and the SAM file is named 'input.sam', you can run the
|
|
408 following command:
|
|
409
|
|
410 convert-sam-for-rsem /ref/mouse_125 input.sam -o input_for_rsem.sam
|
|
411
|
|
412 For details, please refer to 'convert-sam-for-rsem's documentation page.
|
|
413
|
|
414 The SAM/BAM format RSEM uses is v1.4. However, it is compatible with old
|
|
415 SAM/BAM format. However, RSEM cannot recognize 0x100 in the FLAG field.
|
|
416 In addition, RSEM requires SEQ and QUAL are not '*'.
|
|
417
|
|
418 The user must run 'rsem-prepare-reference' with the appropriate
|
|
419 reference before using this program.
|
|
420
|
|
421 For single-end data, it is strongly recommended that the user provide
|
|
422 the fragment length distribution parameters (--fragment-length-mean and
|
|
423 --fragment-length-sd). For paired-end data, RSEM will automatically
|
|
424 learn a fragment length distribution from the data.
|
|
425
|
|
426 Please note that some of the default values for the Bowtie parameters
|
|
427 are not the same as those defined for Bowtie itself.
|
|
428
|
|
429 The temporary directory and all intermediate files will be removed when
|
|
430 RSEM finishes unless '--keep-intermediate-files' is specified.
|
|
431
|
|
432 With the '--calc-ci' option, 95% credibility intervals and posterior
|
|
433 mean estimates will be calculated in addition to maximum likelihood
|
|
434 estimates.
|
|
435
|
|
436 OUTPUT
|
|
437 sample_name.genes.results
|
|
438 File containing gene level expression estimates. The format of each
|
|
439 line in this file is:
|
|
440
|
|
441 gene_id expected_counts tau_value [pmc_value tau_pme_value
|
|
442 tau_ci_lower_bound tau_ci_upper_bound] transcript_id_list
|
|
443
|
|
444 Fields are separated by the tab character. Fields within "[]" are
|
|
445 only presented if '--calc-ci' is set. pme stands for posterior mean
|
|
446 estimation. pmc stands for posterior mean counts. ci_lower_bound(l)
|
|
447 means the lower bound of the credibility intervals,
|
|
448 ci_upper_bound(u) means the upper bound of the credibility
|
|
449 intervals. So the credibility interval is [l, u].
|
|
450 'transcript_id_list' is a space-separated list of transcript_ids
|
|
451 belonging to the gene. If no gene information is provided, this file
|
|
452 has the same content as 'sample_name.isoforms.results'.
|
|
453
|
|
454 sample_name.isoforms.results
|
|
455 File containing isoform level expression values. The format of each
|
|
456 line in this file is:
|
|
457
|
|
458 transcript_id expected_counts tau_value [pmc_value tau_pme_value
|
|
459 tau_ci_lower_bound tau_ci_upper_bound] gene_id
|
|
460
|
|
461 Fields are separated by the tab character. 'gene_id' is the gene_id
|
|
462 of the gene which this transcript belongs to. If no gene information
|
|
463 is provided, 'gene_id' and 'transcript_id' are the same.
|
|
464
|
|
465 sample_name.transcript.bam, sample_name.transcript.sorted.bam and
|
|
466 sample_name.transcript.sorted.bam.bai
|
|
467 Only generated when --no-bam-output is not specified.
|
|
468
|
|
469 'sample_name.transcript.bam' is a BAM-formatted file of read
|
|
470 alignments in transcript coordinates. The MAPQ field of each
|
|
471 alignment is set to min(100, floor(-10 * log10(1.0 - w) + 0.5)),
|
|
472 where w is the posterior probability of that alignment being the
|
|
473 true mapping of a read. In addition, RSEM pads a new tag ZW:f:value,
|
|
474 where value is a single precision floating number representing the
|
|
475 posterior probability.
|
|
476
|
|
477 'sample_name.transcript.sorted.bam' and
|
|
478 'sample_name.transcript.sorted.bam.bai' are the sorted BAM file and
|
|
479 indices generated by samtools (included in RSEM package).
|
|
480
|
|
481 sample_name.genome.bam, sample_name.genome.sorted.bam and
|
|
482 sample_name.genome.sorted.bam.bai
|
|
483 Only generated when --no-bam-output is not specified and
|
|
484 --output-genome-bam is specified.
|
|
485
|
|
486 'sample_name.genome.bam' is a BAM-formatted file of read alignments
|
|
487 in genomic coordinates. Alignments of reads that have identical
|
|
488 genomic coordinates (i.e., alignments to different isoforms that
|
|
489 share the same genomic region) are collapsed into one alignment. The
|
|
490 MAPQ field of each alignment is set to min(100, floor(-10 *
|
|
491 log10(1.0 - w) + 0.5)), where w is the posterior probability of that
|
|
492 alignment being the true mapping of a read. In addition, RSEM pads a
|
|
493 new tag ZW:f:value, where value is a single precision floating
|
|
494 number representing the posterior probability. If an alignment is
|
|
495 spliced, a XS:A:value tag is also added, where value is either '+'
|
|
496 or '-' indicating the strand of the transcript it aligns to.
|
|
497
|
|
498 'sample_name.genome.sorted.bam' and
|
|
499 'sample_name.genome.sorted.bam.bai' are the sorted BAM file and
|
|
500 indices generated by samtools (included in RSEM package).
|
|
501
|
|
502 sample_name.sam.gz
|
|
503 Only generated when the input files are raw reads instead of SAM/BAM
|
|
504 format files
|
|
505
|
|
506 It is the gzipped SAM output produced by bowtie aligner.
|
|
507
|
|
508 sample_name.time
|
|
509 Only generated when --time is specified.
|
|
510
|
|
511 It contains time (in seconds) consumed by aligning reads, estimating
|
|
512 expression levels and calculating credibility intervals.
|
|
513
|
|
514 sample_name.stat
|
|
515 This is a folder instead of a file. All model related statistics are
|
|
516 stored in this folder. Use 'rsem-plot-model' can generate plots
|
|
517 using this folder.
|
|
518
|
|
519 EXAMPLES
|
|
520 Assume the path to the bowtie executables is in the user's PATH
|
|
521 environment variable. Reference files are under '/ref' with name
|
|
522 'mouse_125'.
|
|
523
|
|
524 1) '/data/mmliver.fq', single-end reads with quality scores. Quality
|
|
525 scores are encoded as for 'GA pipeline version >= 1.3'. We want to use 8
|
|
526 threads and generate a genome BAM file:
|
|
527
|
|
528 rsem-calculate-expression --phred64-quals \
|
|
529 -p 8 \
|
|
530 --output-genome-bam \
|
|
531 /data/mmliver.fq \
|
|
532 /ref/mouse_125 \
|
|
533 mmliver_single_quals
|
|
534
|
|
535 2) '/data/mmliver_1.fq' and '/data/mmliver_2.fq', paired-end reads with
|
|
536 quality scores. Quality scores are in SANGER format. We want to use 8
|
|
537 threads and do not generate a genome BAM file:
|
|
538
|
|
539 rsem-calculate-expression -p 8 \
|
|
540 --paired-end \
|
|
541 /data/mmliver_1.fq \
|
|
542 /data/mmliver_2.fq \
|
|
543 /ref/mouse_125 \
|
|
544 mmliver_paired_end_quals
|
|
545
|
|
546 3) '/data/mmliver.fa', single-end reads without quality scores. We want
|
|
547 to use 8 threads:
|
|
548
|
|
549 rsem-calculate-expression -p 8 \
|
|
550 --no-qualities \
|
|
551 /data/mmliver.fa \
|
|
552 /ref/mouse_125 \
|
|
553 mmliver_single_without_quals
|
|
554
|
|
555 4) Data are the same as 1). We want to take a fragment length
|
|
556 distribution into consideration. We set the fragment length mean to 150
|
|
557 and the standard deviation to 35. In addition to a BAM file, we also
|
|
558 want to generate credibility intervals. We allow RSEM to use 1GB of
|
|
559 memory for CI calculation:
|
|
560
|
|
561 rsem-calculate-expression --bowtie-path /sw/bowtie \
|
|
562 --phred64-quals \
|
|
563 --fragment-length-mean 150.0 \
|
|
564 --fragment-length-sd 35.0 \
|
|
565 -p 8 \
|
|
566 --output-genome-bam \
|
|
567 --calc-ci \
|
|
568 --ci-memory 1024 \
|
|
569 /data/mmliver.fq \
|
|
570 /ref/mouse_125 \
|
|
571 mmliver_single_quals
|
|
572
|
|
573 5) '/data/mmliver_paired_end_quals.bam', paired-end reads with quality
|
|
574 scores. We want to use 8 threads:
|
|
575
|
|
576 rsem-calculate-expression --paired-end \
|
|
577 --bam \
|
|
578 -p 8 \
|
|
579 /data/mmliver_paired_end_quals.bam \
|
|
580 /ref/mouse_125 \
|
|
581 mmliver_paired_end_quals
|
|
582 </help>
|
|
583 </tool>
|