comparison rsem_calculate_expression.xml @ 0:64d45f959303

Uploaded
author jjohnson
date Mon, 11 Nov 2013 13:54:43 -0500
parents
children 1ff2fc8da328
comparison
equal deleted inserted replaced
-1:000000000000 0:64d45f959303
1 <tool id="rsem_calculate_expression" name="RSEM calculate expression" version="1.1.17">
2 <description>RNA-Seq by Expectation-Maximization</description>
3 <requirements>
4 <requirement type="package" version="1.1.17">rsem</requirement>
5 <requirement type="package" version="0.1.19">samtools</requirement>
6 <requirement type="package" version="1.0.0">bowtie</requirement>
7 </requirements>
8 <command interpreter="perl">
9 rsem-calculate-expression
10 --calc-ci $useci.ci
11 --fragment-length-mean $fraglenmean
12 --fragment-length-min $fraglenmin
13 --fragment-length-sd $fraglensd
14 --fragment-length-max $fraglenmax
15 --bowtie-e $bowtie_e
16 --bowtie-m $bowtie_m
17
18 #if $input.format=="fastq"
19 ## IF FASTQ AND SINGLE END READS (DEFAULTS)
20 #if $input.fastqmatepair.matepair=="single" #rsem-wrapper-1.1.17.pl --bam_genome $bam_genome --bamtype $bamtype
21 --seed-length $seedlength $input.fastq_select --estimate-rspd $rspd --forward-prob
22 $fprob -p $cpus --bowtie-n $bowtie_mis --output-genome-bam --single_fastq $singlefastq
23 --output $output --isoformfile $isoforms --bamfile $bam_res --log $log
24 --sampling-for-bam $sampling_for_bam --reference ${index.fields.path}
25 #end if
26 ## IF FASTQ AND PAIRED END READS (DEFAULTS)
27 #if $input.fastqmatepair.matepair=="paired" #rsem-wrapper-1.1.17.pl --bam_genome $bam_genome --bamtype $bamtype
28 --paired-end --seed-length $seedlength --estimate-rspd $rspd $input.fastq_select --forward-prob $fprob -p $cpus
29 --bowtie-n $bowtie_mis --output-genome-bam --fastq1 $fastq1 --fastq2 $fastq2 --output
30 $output --isoformfile $isoforms --bamfile $bam_res --log $log --sampling-for-bam
31 $sampling_for_bam --reference ${index.fields.path}
32 #end if
33 #end if
34 #if $input.format=="fasta"
35 ## IF FASTA AND SINGLE END READS (DEFAULTS)
36 #if $input.fastamatepair.matepair=="single" #rsem-wrapper-1.1.17.pl --bam_genome $bam_genome --bamtype $bamtype
37 --no-qualities --seed-length $seedlength --estimate-rspd $rspd --forward-prob $fprob -p $cpus --bowtie-n $bowtie_mis
38 --output-genome-bam --single_fasta $single_fasta --output $output --isoformfile
39 $isoforms --bamfile $bam_res --log $log --sampling-for-bam $sampling_for_bam --reference
40 ${index.fields.path}
41 #end if
42 ## IF FASTA AND PAIRED END READS (DEFAULTS)
43 #if $input.fastamatepair.matepair=="paired" #rsem-wrapper-1.1.17.pl --bam_genome $bam_genome --bamtype $bamtype
44 --no-qualities --paired-end --seed-length $seedlength --estimate-rspd $rspd --forward-prob $fprob -p $cpus
45 --bowtie-n $bowtie_mis --output-genome-bam --fasta1 $fasta1 --fasta2 $fasta2 --output
46 $output --isoformfile $isoforms --bamfile $bam_res --log $log --sampling-for-bam
47 $sampling_for_bam --reference ${index.fields.path}
48 #end if
49 #end if
50
51 </command>
52
53 <inputs>
54 <param name="sample" type="text" format="txt" label="Sample label" />
55 <conditional name="input">
56 <param name="format" type="select" label="Input file type">
57 <option value="fastq">FASTQ</option>
58 <option value="fasta">FASTA</option>
59 </param>
60 <when value="fastq">
61 <param name="fastq_select" size="15" type="select" label="FASTQ type" >
62 <option value="--phred33-quals">phred33 qualities</option>
63 <option value="--solexa-quals">solexa qualities</option>
64 <option value="--phred64-quals">phred64 qualities</option>
65 </param>
66
67 <conditional name="fastqmatepair">
68 <when value="single">
69 <param name="singlefastq" type="data" checked="yes" format="fastq" label="FASTQ file" />
70 </when>
71 <when value="paired">
72 <param name="fastq1" type="data" format="fastq" label="Read 1 fastq file" />
73 <param name="fastq2" type="data" format="fastq" label="Read 2 fastq file" />
74 </when>
75 <param name="matepair" type="select" label="Library type">
76 <option value="single">Single End Reads</option>
77 <option value="paired">Paired End Reads</option>
78 </param>
79 </conditional>
80 </when>
81 <when value="fasta">
82 <conditional name="fastamatepair">
83 <param name="matepair" type="select" label="Library Type">
84 <option value="single">Single End Reads</option>
85 <option value="paired">Paired End Reads</option>
86 </param>
87 <when value="single">
88 <param name="single_fasta" type="data" checked="yes" format="fasta" label="fasta file" />
89 </when>
90 <when value="paired">
91 <param name="fasta1" type="data" format="fasta" label="Read 1 fasta file" />
92 <param name="fasta2" type="data" format="fasta" label="Read 2 fasta file" />
93 </when>
94 </conditional>
95 </when>
96 <when>
97 <conditional name="fastamatepair">
98 <param name="matepair" type="select" label="Library Type" >
99 <option value="single">Single End Reads</option>
100 <option value="paired">Paired End Reads</option>
101 </param>
102 <when value="single">
103 <param name="singlefastq" type="data" checked="yes" format="fastq" label="FASTQ file" />
104 </when>
105 <when value="paired">
106 <param name="fastq1" type="data" format="fastq" label="Read 1 FASTQ file" />
107 <param name="fastq2" type="data" format="fastq" label="Read 2 FASTQ file" />
108 </when>
109 </conditional>
110 </when>
111 </conditional>
112 <param name="fprob" type="select" >
113 <label>Is the library strand specific?</label>
114 <option value="0.5">No</option>
115 <option value="1">Yes, the reads (or first reads from paired-end libraries) are only in the forward orientation</option>
116 <option value="0">Yes, the reads (or first reads from paired-end libraries) are only in the reverse orientation</option>
117 </param>
118
119 <param name="index" type="select" label="Select RSEM reference" help="Select from a list of pre-indexed references. If you don't see anything consult the wrapper's documentation on how to create or download a reference">
120 <options from_data_table="rsem_indexes">
121 <filter type="sort_by" column="2" />
122 <validator type="no_options" message="No indexes are available" />
123 </options>
124 </param>
125 <param name="fraglenmean" size="4" type="text" value="-1" label="Fragment length mean (single-end data only)" help="The mean of the fragment length distribution, which is assumed to be a Gaussian. (Default: -1, which disables use of the fragment length distribution)" />
126 <param name="fraglensd" size="4" type="text" value="0" label="The standard deviation of the fragment length distribution (single-end data only)" help="Default 0, which assumes that all fragments are of the same length, given by the rounded value of fragment length mean. " />
127
128 <param name="bamtype" type="select" label="Create genome bam file" help="In addition to the transcript-coordinate-based BAM file output, also output a BAM file with the read alignments in genomic coordinates" >
129 <option value="no">no</option>
130 <option value="yes">yes</option>
131 </param>
132 <param name="sampling_for_bam" type="select" format="text" help="When RSEM generates a BAM file, instead of outputing all alignments a read has with their posterior probabilities, one alignment is sampled and outputed according to the posterior probabilities. If the sampling result is that the read comes from the noise transcript, nothing is outputed. (Default: off)">
133 <label>Sample Bam File</label>
134 <option value="no">no</option>
135 <option value="yes">yes</option>
136 </param>
137 <param name="rspd" type="select" format="text" help="Set this option if you want to estimate the read start position distribution (RSPD) from data. Otherwise, RSEM will use a uniform RSPD. (Default: off)">
138 <label>Estimate and correct for a non-uniform read start position distribution (RSPD)</label>
139 <option value="no">no</option>
140 <option value="yes">yes</option>
141 </param>
142
143 <!-- <conditional name="fullpar">
144 <param name="fullpar" type="select" label="Full list of parameters" help="use
145 full list for linting all the parameters in RSEM">
146 <option value="default">Default</option>
147 <option value="fullset">Full Set</option>
148 </param>
149 <when value="fullset"> -->
150 <!-- <param name="testing" size="4" type="text" value="" label="Advanced Parameters" />
151 -->
152 <conditional name="useci">
153 <param name="ci" type="select" label="Calculate 95% Credibility Intervals">
154 <option value="no">no</option>
155 <option value="yes">yes</option>
156 </param>
157 <when value="yes">
158 <param name="cimem" size="4" type="text" value="1024" label="Amount of memory in (MB) for computing CI" />
159 </when>
160 </conditional>
161 <param name="fraglenmin" size="4" type="text" value="1" label="Minimum read/insert length. Minimum read/insert length allowed. This is also the value for the bowtie -X option" />
162 <param name="fraglenmax" size="4" type="text" value="1000" label="Maximum read/insert length. Minimum read/insert length allowed. This is also the value for the bowtie -l option" />
163 <param name="bowtie_mis" size="2" type="text" value="2" label="Bowtie mismatches" help="Bowtie parameter max # of mismatches in the seed. (Range: 0-3, Default: 2) "/>
164 <param name="bowtie_e" size="4" type="text" value="99999999" label="Maximum sum of quality scores at mismatched positions in read alignments. This is also the value for the Bowtie -e option" />
165 <param name="bowtie_m" size="4" type="text" value="200" label="Discard alignments for reads with number of alignments greater than" />
166 <param name="seedlength" size="2" type="text" value="25" label="Seed length used by the read aligner" help="Providing the correct value for this parameter is important for RSEM's accuracy if the data are single-end reads. RSEM uses this value for Bowtie's seed length parameter. The minimum value is 25. (Default:25)" />
167 <param name="cpus" size="2" type="integer" value="1" label="Number of threads to use" help="Number of threads to use. Both Bowtie and expression estimation will use this many threads. (Default: 1)" />
168 <!-- </when> --> <!-- </conditional> -->
169 </inputs>
170 <stdio>
171 <exit_code range="1:" level="fatal" description="Error Running RSEM" />
172 </stdio>
173 <outputs>
174 <data format="tabular" name="output" label="${sample}.gene_abundances"/>
175 <data format="tabular" name="isoforms" label="${sample}.isoform_abundances"/>
176 <data format="bam" name="bam_res" label="${sample}.transcript.bam"/>
177 <data format="bam" name="bam_genome" label="${sample}.genome.bam">
178 <filter>bamtype == "yes"</filter>
179 </data>
180
181 <data format="txt" name="log" label="${sample}.rsem_log"/>
182 </outputs>
183 <help>
184
185
186 RSEM HOME PAGE - http://deweylab.biostat.wisc.edu/rsem/
187
188 NAME
189 rsem-calculate-expression
190
191 SYNOPSIS
192 rsem-calculate-expression [options] upstream_read_file(s) reference_name sample_name
193 rsem-calculate-expression [options] --paired-end upstream_read_file/s downstream_read_file/s reference_name sample_name
194 rsem-calculate-expression [options] --sam/--bam [--paired-end] input reference_name sample_name
195
196 ARGUMENTS
197 upstream_read_files/s
198 Comma-separated list of files containing single-end reads or
199 upstream reads for paired-end data. By default, these files are
200 assumed to be in FASTQ format. If the --no-qualities option is
201 specified, then FASTA format is expected.
202
203 downstream_read_file/s
204 Comma-separated list of files containing downstream reads which are
205 paired with the upstream reads. By default, these files are assumed
206 to be in FASTQ format. If the --no-qualities option is specified,
207 then FASTA format is expected.
208
209 input
210 SAM/BAM formatted input file. If "-" is specified for the filename,
211 SAM/BAM input is instead assumed to come from standard input. RSEM
212 requires all alignments of the same read group together. For
213 paired-end reads, RSEM also requires the two mates of any alignment
214 be adjacent. See Description section for how to make input file obey
215 RSEM's requirements.
216
217 reference_name
218 The name of the reference used. The user must have run
219 'rsem-prepare-reference' with this reference_name before running
220 this program.
221
222 sample_name
223 The name of the sample analyzed. All output files are prefixed by
224 this name (e.g., sample_name.genes.results)
225
226 OPTIONS
227
228 --paired-end
229 Input reads are paired-end reads. (Default: off)
230
231 --no-qualities
232 Input reads do not contain quality scores. (Default: off)
233
234 --strand-specific
235 The RNA-Seq protocol used to generate the reads is strand specific,
236 i.e., all (upstream) reads are derived from the forward strand. This
237 option is equivalent to --forward-prob=1.0. With this option set, if
238 RSEM runs the Bowtie aligner, the '--norc' Bowtie option will be
239 used, which disables alignment to the reverse strand of transcripts.
240 (Default: off)
241
242 --sam
243 Input file is in SAM format. (Default: off)
244
245 --bam
246 Input file is in BAM format. (Default: off)
247
248 --sam-header-info [file]
249 RSEM reads header information from input by default. If this option
250 is on, header information is read from the specified file. For the
251 format of the file, please see SAM official website. (Default: "")
252
253 -p/--num-threads [int]
254 Number of threads to use. Both Bowtie and expression estimation will
255 use this many threads. (Default: 1)
256
257 --no-bam-output
258 Do not output any BAM file. (Default: off)
259
260 --output-genome-bam
261 Generate a BAM file, 'sample_name.genome.bam', with alignments
262 mapped to genomic coordinates and annotated with their posterior
263 probabilities. In addition, RSEM will call samtools (included in
264 RSEM package) to sort and index the bam file.
265 'sample_name.genome.sorted.bam' and
266 'sample_name.genome.sorted.bam.bai' will be generated. (Default:
267 off)
268
269 --sampling-for-bam
270 When RSEM generates a BAM file, instead of outputing all alignments
271 a read has with their posterior probabilities, one alignment is
272 sampled and outputed according to the posterior probabilities. If
273 the sampling result is that the read comes from the "noise"
274 transcript, nothing is outputed. (Default: off)
275
276 --calc-ci
277 Calculate 95% credibility intervals and posterior mean estimates.
278 (Default: off)
279
280 --seed-length [int]
281 Seed length used by the read aligner. Providing the correct value is
282 important for RSEM. If RSEM runs Bowtie, it uses this value for
283 Bowtie's seed length parameter. Any read with its or at least one of
284 its mates' (for paired-end reads) length less than this value will
285 be ignored. If the references are not added poly(A) tails, the
286 minimum allowed value is 5, otherwise, the minimum allowed value is
287 25. Note that this script will only check if the value less or equal than
288 5 and give a warning message if the value less than 25 but greter or equal than
289 5. (Default: 25)
290
291 --tag [string]
292 The name of the optional field used in the SAM input for identifying
293 a read with too many valid alignments. The field should have the
294 format [tagName]:i:[value], where a [value] bigger than 0 indicates
295 a read with too many alignments. (Default: "")
296
297 --bowtie-path [path]
298 The path to the bowtie executables. (Default: the path to the bowtie
299 executables is assumed to be in the user's PATH environment
300 variable)
301
302 --bowtie-n [int]
303 (Bowtie parameter) max # of mismatches in the seed. (Range: 0-3,
304 Default: 2)
305
306 --bowtie-e [int]
307 (Bowtie parameter) max sum of mismatch quality scores across the
308 alignment. (Default: 99999999)
309
310 --bowtie-m [int]
311 (Bowtie parameter) suppress all alignments for a read if greater then [int]
312 valid alignments exist. (Default: 200)
313
314 --bowtie-chunkmbs [int]
315 (Bowtie parameter) memory allocated for best first alignment
316 calculation (Default: 0 - use bowtie's default)
317
318 --phred33-quals
319 Input quality scores are encoded as Phred+33. (Default: on)
320
321 --phred64-quals
322 Input quality scores are encoded as Phred+64 (default for GA
323 Pipeline ver. less than 1.3). (Default: off)
324
325 --solexa-quals
326 Input quality scores are solexa encoded (from GA Pipeline ver. less
327 than 1.3). (Default: off)
328
329 --forward-prob [double]
330 Probability of generating a read from the forward strand of a
331 transcript. Set to 1 for a strand-specific protocol where all
332 (upstream) reads are derived from the forward strand, 0 for a
333 strand-specific protocol where all (upstream) read are derived from
334 the reverse strand, or 0.5 for a non-strand-specific protocol.
335 (Default: 0.5)
336
337 --fragment-length-min [int]
338 Minimum read/insert length allowed. This is also the value for the
339 bowtie -I option. (Default: 1)
340
341 --fragment-length-max [int]
342 Maximum read/insert length allowed. This is also the value for the
343 bowtie -X option. (Default: 1000)
344
345 --fragment-length-mean [double]
346 (single-end data only) The mean of the fragment length distribution,
347 which is assumed to be a Gaussian. (Default: -1, which disables use
348 of the fragment length distribution)
349
350 --fragment-length-sd [double]
351 (single-end data only) The standard deviation of the fragment length
352 distribution, which is assumed to be a Gaussian. (Default: 0, which
353 assumes that all fragments are of the same length, given by the
354 rounded value of --fragment-length-mean)
355
356 --estimate-rspd
357 Set this option if you want to estimate the read start position
358 distribution (RSPD) from data. Otherwise, RSEM will use a uniform
359 RSPD. (Default: off)
360
361 --num-rspd-bins [int]
362 Number of bins in the RSPD. Only relevant when '--estimate-rspd' is
363 specified. Use of the default setting is recommended. (Default: 20)
364
365 --ci-memory [int]
366 Maximum size (in memory, MB) of the auxiliary buffer used for
367 computing credibility intervals (CI). Set it larger for a faster CI
368 calculation. However, leaving 2 GB memory free for other usage is
369 recommended. (Default: 1024)
370
371 --keep-intermediate-files
372 Keep temporary files generated by RSEM. RSEM creates a temporary
373 directory, 'sample_name.temp', into which it puts all intermediate
374 output files. If this directory already exists, RSEM overwrites all
375 files generated by previous RSEM runs inside of it. By default,
376 after RSEM finishes, the temporary directory is deleted. Set this
377 option to prevent the deletion of this directory and the
378 intermediate files inside of it. (Default: off)
379
380 --time
381 Output time consumed by each step of RSEM to 'sample_name.time'.
382 (Default: off)
383
384 -q/--quiet
385 Suppress the output of logging information. (Default: off)
386
387 -h/--help
388 Show help information.
389
390 DESCRIPTION
391 In its default mode, this program aligns input reads against a reference
392 transcriptome with Bowtie and calculates expression values using the
393 alignments. RSEM assumes the data are single-end reads with quality
394 scores, unless the '--paired-end' or '--no-qualities' options are
395 specified. Users may use an alternative aligner by specifying one of the
396 --sam and --bam options, and providing an alignment file in the
397 specified format. However, users should make sure that they align
398 against the indices generated by 'rsem-prepare-reference' and the
399 alignment file satisfies the requirements mentioned in ARGUMENTS
400 section.
401
402 One simple way to make the alignment file satisfying RSEM's requirements
403 (assuming the aligner used put mates in a paired-end read adjacent) is
404 to use 'convert-sam-for-rsem' script. This script only accept SAM format
405 files as input. If a BAM format file is obtained, please use samtools to
406 convert it to a SAM file first. For example, if '/ref/mouse_125' is the
407 'reference_name' and the SAM file is named 'input.sam', you can run the
408 following command:
409
410 convert-sam-for-rsem /ref/mouse_125 input.sam -o input_for_rsem.sam
411
412 For details, please refer to 'convert-sam-for-rsem's documentation page.
413
414 The SAM/BAM format RSEM uses is v1.4. However, it is compatible with old
415 SAM/BAM format. However, RSEM cannot recognize 0x100 in the FLAG field.
416 In addition, RSEM requires SEQ and QUAL are not '*'.
417
418 The user must run 'rsem-prepare-reference' with the appropriate
419 reference before using this program.
420
421 For single-end data, it is strongly recommended that the user provide
422 the fragment length distribution parameters (--fragment-length-mean and
423 --fragment-length-sd). For paired-end data, RSEM will automatically
424 learn a fragment length distribution from the data.
425
426 Please note that some of the default values for the Bowtie parameters
427 are not the same as those defined for Bowtie itself.
428
429 The temporary directory and all intermediate files will be removed when
430 RSEM finishes unless '--keep-intermediate-files' is specified.
431
432 With the '--calc-ci' option, 95% credibility intervals and posterior
433 mean estimates will be calculated in addition to maximum likelihood
434 estimates.
435
436 OUTPUT
437 sample_name.genes.results
438 File containing gene level expression estimates. The format of each
439 line in this file is:
440
441 gene_id expected_counts tau_value [pmc_value tau_pme_value
442 tau_ci_lower_bound tau_ci_upper_bound] transcript_id_list
443
444 Fields are separated by the tab character. Fields within "[]" are
445 only presented if '--calc-ci' is set. pme stands for posterior mean
446 estimation. pmc stands for posterior mean counts. ci_lower_bound(l)
447 means the lower bound of the credibility intervals,
448 ci_upper_bound(u) means the upper bound of the credibility
449 intervals. So the credibility interval is [l, u].
450 'transcript_id_list' is a space-separated list of transcript_ids
451 belonging to the gene. If no gene information is provided, this file
452 has the same content as 'sample_name.isoforms.results'.
453
454 sample_name.isoforms.results
455 File containing isoform level expression values. The format of each
456 line in this file is:
457
458 transcript_id expected_counts tau_value [pmc_value tau_pme_value
459 tau_ci_lower_bound tau_ci_upper_bound] gene_id
460
461 Fields are separated by the tab character. 'gene_id' is the gene_id
462 of the gene which this transcript belongs to. If no gene information
463 is provided, 'gene_id' and 'transcript_id' are the same.
464
465 sample_name.transcript.bam, sample_name.transcript.sorted.bam and
466 sample_name.transcript.sorted.bam.bai
467 Only generated when --no-bam-output is not specified.
468
469 'sample_name.transcript.bam' is a BAM-formatted file of read
470 alignments in transcript coordinates. The MAPQ field of each
471 alignment is set to min(100, floor(-10 * log10(1.0 - w) + 0.5)),
472 where w is the posterior probability of that alignment being the
473 true mapping of a read. In addition, RSEM pads a new tag ZW:f:value,
474 where value is a single precision floating number representing the
475 posterior probability.
476
477 'sample_name.transcript.sorted.bam' and
478 'sample_name.transcript.sorted.bam.bai' are the sorted BAM file and
479 indices generated by samtools (included in RSEM package).
480
481 sample_name.genome.bam, sample_name.genome.sorted.bam and
482 sample_name.genome.sorted.bam.bai
483 Only generated when --no-bam-output is not specified and
484 --output-genome-bam is specified.
485
486 'sample_name.genome.bam' is a BAM-formatted file of read alignments
487 in genomic coordinates. Alignments of reads that have identical
488 genomic coordinates (i.e., alignments to different isoforms that
489 share the same genomic region) are collapsed into one alignment. The
490 MAPQ field of each alignment is set to min(100, floor(-10 *
491 log10(1.0 - w) + 0.5)), where w is the posterior probability of that
492 alignment being the true mapping of a read. In addition, RSEM pads a
493 new tag ZW:f:value, where value is a single precision floating
494 number representing the posterior probability. If an alignment is
495 spliced, a XS:A:value tag is also added, where value is either '+'
496 or '-' indicating the strand of the transcript it aligns to.
497
498 'sample_name.genome.sorted.bam' and
499 'sample_name.genome.sorted.bam.bai' are the sorted BAM file and
500 indices generated by samtools (included in RSEM package).
501
502 sample_name.sam.gz
503 Only generated when the input files are raw reads instead of SAM/BAM
504 format files
505
506 It is the gzipped SAM output produced by bowtie aligner.
507
508 sample_name.time
509 Only generated when --time is specified.
510
511 It contains time (in seconds) consumed by aligning reads, estimating
512 expression levels and calculating credibility intervals.
513
514 sample_name.stat
515 This is a folder instead of a file. All model related statistics are
516 stored in this folder. Use 'rsem-plot-model' can generate plots
517 using this folder.
518
519 EXAMPLES
520 Assume the path to the bowtie executables is in the user's PATH
521 environment variable. Reference files are under '/ref' with name
522 'mouse_125'.
523
524 1) '/data/mmliver.fq', single-end reads with quality scores. Quality
525 scores are encoded as for 'GA pipeline version >= 1.3'. We want to use 8
526 threads and generate a genome BAM file:
527
528 rsem-calculate-expression --phred64-quals \
529 -p 8 \
530 --output-genome-bam \
531 /data/mmliver.fq \
532 /ref/mouse_125 \
533 mmliver_single_quals
534
535 2) '/data/mmliver_1.fq' and '/data/mmliver_2.fq', paired-end reads with
536 quality scores. Quality scores are in SANGER format. We want to use 8
537 threads and do not generate a genome BAM file:
538
539 rsem-calculate-expression -p 8 \
540 --paired-end \
541 /data/mmliver_1.fq \
542 /data/mmliver_2.fq \
543 /ref/mouse_125 \
544 mmliver_paired_end_quals
545
546 3) '/data/mmliver.fa', single-end reads without quality scores. We want
547 to use 8 threads:
548
549 rsem-calculate-expression -p 8 \
550 --no-qualities \
551 /data/mmliver.fa \
552 /ref/mouse_125 \
553 mmliver_single_without_quals
554
555 4) Data are the same as 1). We want to take a fragment length
556 distribution into consideration. We set the fragment length mean to 150
557 and the standard deviation to 35. In addition to a BAM file, we also
558 want to generate credibility intervals. We allow RSEM to use 1GB of
559 memory for CI calculation:
560
561 rsem-calculate-expression --bowtie-path /sw/bowtie \
562 --phred64-quals \
563 --fragment-length-mean 150.0 \
564 --fragment-length-sd 35.0 \
565 -p 8 \
566 --output-genome-bam \
567 --calc-ci \
568 --ci-memory 1024 \
569 /data/mmliver.fq \
570 /ref/mouse_125 \
571 mmliver_single_quals
572
573 5) '/data/mmliver_paired_end_quals.bam', paired-end reads with quality
574 scores. We want to use 8 threads:
575
576 rsem-calculate-expression --paired-end \
577 --bam \
578 -p 8 \
579 /data/mmliver_paired_end_quals.bam \
580 /ref/mouse_125 \
581 mmliver_paired_end_quals
582 </help>
583 </tool>