comparison rsem_calculate_expression.xml @ 1:1ff2fc8da328

Updates to rsem_calculate_expression.xml
author Jim Johnson <jj@umn.edu>
date Thu, 05 Dec 2013 10:54:28 -0600 (2013-12-05)
parents 64d45f959303
children f6b8155ab12a
comparison
equal deleted inserted replaced
0:64d45f959303 1:1ff2fc8da328
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.1.17">rsem</requirement> 4 <requirement type="package" version="1.1.17">rsem</requirement>
5 <requirement type="package" version="0.1.19">samtools</requirement> 5 <requirement type="package" version="0.1.19">samtools</requirement>
6 <requirement type="package" version="1.0.0">bowtie</requirement> 6 <requirement type="package" version="1.0.0">bowtie</requirement>
7 </requirements> 7 </requirements>
8 <command interpreter="perl"> 8 <command>
9 rsem-calculate-expression 9 rsem-calculate-expression
10 --calc-ci $useci.ci 10 ## --tag string
11 --fragment-length-mean $fraglenmean 11 #if $seedlength:
12 --fragment-length-min $fraglenmin 12 --seed-length $seedlength
13 --fragment-length-sd $fraglensd 13 #end if
14 --fragment-length-max $fraglenmax 14 --forward-prob $forward_prob
15 --bowtie-e $bowtie_e 15 #if $rsem_options.fullparams == 'fullset':
16 --bowtie-m $bowtie_m 16 ## Fragment info
17 17 #if $rsem_options.fragment_length_mean:
18 #if $input.format=="fastq" 18 --fragment-length-mean $rsem_options.fragment_length_mean
19 ## IF FASTQ AND SINGLE END READS (DEFAULTS) 19 #end if
20 #if $input.fastqmatepair.matepair=="single" #rsem-wrapper-1.1.17.pl --bam_genome $bam_genome --bamtype $bamtype 20 #if $rsem_options.fragment_length_min:
21 --seed-length $seedlength $input.fastq_select --estimate-rspd $rspd --forward-prob 21 --fragment-length-min $rsem_options.fragment_length_min
22 $fprob -p $cpus --bowtie-n $bowtie_mis --output-genome-bam --single_fastq $singlefastq 22 #end if
23 --output $output --isoformfile $isoforms --bamfile $bam_res --log $log 23 #if $rsem_options.fragment_length_sd:
24 --sampling-for-bam $sampling_for_bam --reference ${index.fields.path} 24 --fragment-length-sd $rsem_options.fragment_length_sd
25 #end if 25 #end if
26 ## IF FASTQ AND PAIRED END READS (DEFAULTS) 26 #if $rsem_options.fragment_length_max:
27 #if $input.fastqmatepair.matepair=="paired" #rsem-wrapper-1.1.17.pl --bam_genome $bam_genome --bamtype $bamtype 27 --fragment-length-max $rsem_options.fragment_length_max
28 --paired-end --seed-length $seedlength --estimate-rspd $rspd $input.fastq_select --forward-prob $fprob -p $cpus 28 #end if
29 --bowtie-n $bowtie_mis --output-genome-bam --fastq1 $fastq1 --fastq2 $fastq2 --output 29 ## RSPD
30 $output --isoformfile $isoforms --bamfile $bam_res --log $log --sampling-for-bam 30 #if $rsem_options.rspd.estimate == 'yes':
31 $sampling_for_bam --reference ${index.fields.path} 31 --estimate-rspd
32 #end if 32 #if $rsem_options.rspd.num_rspd_bins:
33 #end if 33 --num-rspd-bins $rsem_options.rspd.num_rspd_bins
34 #if $input.format=="fasta"
35 ## IF FASTA AND SINGLE END READS (DEFAULTS)
36 #if $input.fastamatepair.matepair=="single" #rsem-wrapper-1.1.17.pl --bam_genome $bam_genome --bamtype $bamtype
37 --no-qualities --seed-length $seedlength --estimate-rspd $rspd --forward-prob $fprob -p $cpus --bowtie-n $bowtie_mis
38 --output-genome-bam --single_fasta $single_fasta --output $output --isoformfile
39 $isoforms --bamfile $bam_res --log $log --sampling-for-bam $sampling_for_bam --reference
40 ${index.fields.path}
41 #end if 34 #end if
42 ## IF FASTA AND PAIRED END READS (DEFAULTS) 35 #end if
43 #if $input.fastamatepair.matepair=="paired" #rsem-wrapper-1.1.17.pl --bam_genome $bam_genome --bamtype $bamtype 36 ## Calculate 95% credibility intervals and posterior mean estimates.
44 --no-qualities --paired-end --seed-length $seedlength --estimate-rspd $rspd --forward-prob $fprob -p $cpus 37 #if $rsem_options.useci.ci == 'yes':
45 --bowtie-n $bowtie_mis --output-genome-bam --fasta1 $fasta1 --fasta2 $fasta2 --output 38 --calc-ci
46 $output --isoformfile $isoforms --bamfile $bam_res --log $log --sampling-for-bam 39 #if $rsem_options.useci.cimem:
47 $sampling_for_bam --reference ${index.fields.path} 40 --ci-memory $rsem_options.useci.cimem
48 #end if 41 #end if
42 #end if
49 #end if 43 #end if
50 44 ## --num-threads $GALAXY_SLOTS
51 </command> 45 #if $input.format != 'bam' and $input.bowtie_options.fullparams == 'fullset':
52 46 ## Bowtie params
53 <inputs> 47 #if $bowtie_options.bowtie_e:
54 <param name="sample" type="text" format="txt" label="Sample label" /> 48 --bowtie-e $bowtie_options.bowtie_e
55 <conditional name="input"> 49 #end if
56 <param name="format" type="select" label="Input file type"> 50 #if $bowtie_options.bowtie_m:
57 <option value="fastq">FASTQ</option> 51 --bowtie-m $bowtie_options.bowtie_m
58 <option value="fasta">FASTA</option> 52 #end if
59 </param> 53 #if $bowtie_options.bowtie_n:
60 <when value="fastq"> 54 --bowtie-n $bowtie_options.bowtie_n
61 <param name="fastq_select" size="15" type="select" label="FASTQ type" > 55 #end if
62 <option value="--phred33-quals">phred33 qualities</option> 56 #end if
63 <option value="--solexa-quals">solexa qualities</option> 57 ## Outputs
64 <option value="--phred64-quals">phred64 qualities</option> 58 #if $rsem_outputs.result_bams == 'none':
65 </param> 59 --no-bam-output
66 60 #else
67 <conditional name="fastqmatepair"> 61 #if $rsem_outputs.result_bams == 'both':
68 <when value="single"> 62 --output-genome-bam
69 <param name="singlefastq" type="data" checked="yes" format="fastq" label="FASTQ file" /> 63 #end if
70 </when> 64 $rsem_outputs.sampling_for_bam
71 <when value="paired"> 65 #end if
72 <param name="fastq1" type="data" format="fastq" label="Read 1 fastq file" /> 66 ## Input data
73 <param name="fastq2" type="data" format="fastq" label="Read 2 fastq file" /> 67 #if $input.format=="fastq"
74 </when> 68 $input.fastq_select
75 <param name="matepair" type="select" label="Library type"> 69 #if $input.fastq.matepair=="single":
76 <option value="single">Single End Reads</option> 70 $input.fastq.singlefastq
77 <option value="paired">Paired End Reads</option> 71 #elif $input.fastq.matepair=="paired":
78 </param> 72 --paired-end
79 </conditional> 73 $input.fastq.fastq1
80 </when> 74 $input.fastq.fastq2
81 <when value="fasta"> 75 #end if
82 <conditional name="fastamatepair"> 76 #elif $input.format=="fasta"
83 <param name="matepair" type="select" label="Library Type"> 77 --no-qualities
84 <option value="single">Single End Reads</option> 78 #if $input.fasta.matepair=="single":
85 <option value="paired">Paired End Reads</option> 79 $input.fasta.singlefasta
86 </param> 80 #elif $input.fasta.matepair=="paired":
87 <when value="single"> 81 --paired-end
88 <param name="single_fasta" type="data" checked="yes" format="fasta" label="fasta file" /> 82 $input.fasta.fasta1
89 </when> 83 $input.fasta.fasta2
90 <when value="paired"> 84 #end if
91 <param name="fasta1" type="data" format="fasta" label="Read 1 fasta file" /> 85 #elif $input.format=="sam"
92 <param name="fasta2" type="data" format="fasta" label="Read 2 fasta file" /> 86 #if $input.matepair=="paired":
93 </when> 87 --paired-end
94 </conditional> 88 #end if
95 </when> 89 #if $input.rsem_sam._extension == 'sam':
96 <when> 90 --sam
97 <conditional name="fastamatepair"> 91 #elif $input.rsem_sam._extension == 'bam':
98 <param name="matepair" type="select" label="Library Type" > 92 --bam
99 <option value="single">Single End Reads</option> 93 #end if
100 <option value="paired">Paired End Reads</option> 94 $input.rsem_sam
101 </param> 95 #end if
102 <when value="single"> 96 ## RSEM reference
103 <param name="singlefastq" type="data" checked="yes" format="fastq" label="FASTQ file" /> 97 #if $reference.refSrc == 'history':
104 </when> 98 ${reference.rsem_ref.extra_files_path}/${reference.rsem_ref.metadata.reference_name}
105 <when value="paired"> 99 #elif $reference.refSrc == 'cached':
106 <param name="fastq1" type="data" format="fastq" label="Read 1 FASTQ file" /> 100 ${reference.index.fields.path}
107 <param name="fastq2" type="data" format="fastq" label="Read 2 FASTQ file" /> 101 #end if
108 </when> 102 ## sample_name: use a hard coded name so we can pull out galaxy outputs
109 </conditional> 103 rsem_output
110 </when> 104 ## direct output into logfile
111 </conditional> 105 > $log
112 <param name="fprob" type="select" > 106 </command>
113 <label>Is the library strand specific?</label> 107 <macros>
114 <option value="0.5">No</option> 108 <macro name="rsem_options">
115 <option value="1">Yes, the reads (or first reads from paired-end libraries) are only in the forward orientation</option> 109 <param name="seedlength" type="integer" value="25" optional="true" label="Seed length used by the read aligner" help="Providing the correct value for this parameter is important for RSEM's accuracy if the data are single-end reads. RSEM uses this value for Bowtie's seed length parameter. The minimum value is 25. (Default:25)">
116 <option value="0">Yes, the reads (or first reads from paired-end libraries) are only in the reverse orientation</option> 110 </param>
117 </param> 111 <param name="forward_prob" type="select" label="Is the library strand specific?">
118 112 <option value="0.5" selected="true">No</option>
119 <param name="index" type="select" label="Select RSEM reference" help="Select from a list of pre-indexed references. If you don't see anything consult the wrapper's documentation on how to create or download a reference"> 113 <option value="1">Yes, the reads (or first reads from paired-end libraries) are only in the forward orientation</option>
120 <options from_data_table="rsem_indexes"> 114 <option value="0">Yes, the reads (or first reads from paired-end libraries) are only in the reverse orientation</option>
121 <filter type="sort_by" column="2" /> 115 </param>
122 <validator type="no_options" message="No indexes are available" /> 116 <conditional name="rsem_options">
123 </options> 117 <param name="fullparams" type="select" label="Additional RSEM options">
124 </param> 118 <option value="default">Use RSEM Defaults</option>
125 <param name="fraglenmean" size="4" type="text" value="-1" label="Fragment length mean (single-end data only)" help="The mean of the fragment length distribution, which is assumed to be a Gaussian. (Default: -1, which disables use of the fragment length distribution)" /> 119 <option value="fullset">Set Additional RSEM Options</option>
126 <param name="fraglensd" size="4" type="text" value="0" label="The standard deviation of the fragment length distribution (single-end data only)" help="Default 0, which assumes that all fragments are of the same length, given by the rounded value of fragment length mean. " /> 120 </param>
127 121 <when value="default"/>
128 <param name="bamtype" type="select" label="Create genome bam file" help="In addition to the transcript-coordinate-based BAM file output, also output a BAM file with the read alignments in genomic coordinates" > 122 <when value="fullset">
129 <option value="no">no</option> 123 <param name="fragment_length_min" type="integer" value="1" optional="true" label="Minimum read/insert length." help=" This is also the value for the bowtie -I option">
130 <option value="yes">yes</option> 124 <validator type="in_range" message="0 or greater" min="0" />
131 </param> 125 </param>
132 <param name="sampling_for_bam" type="select" format="text" help="When RSEM generates a BAM file, instead of outputing all alignments a read has with their posterior probabilities, one alignment is sampled and outputed according to the posterior probabilities. If the sampling result is that the read comes from the noise transcript, nothing is outputed. (Default: off)"> 126 <param name="fragment_length_max" type="integer" value="1000" optional="true" label="Maximum read/insert length." help=" This is also the value for the bowtie -X option">
133 <label>Sample Bam File</label> 127 <validator type="in_range" message="0 or greater" min="0" max="1000000"/>
134 <option value="no">no</option> 128 </param>
135 <option value="yes">yes</option> 129 <param name="fragment_length_mean" type="float" value="" optional="true" label="Fragment length mean (single-end data only)" help="The mean of the fragment length distribution, which is assumed to be a Gaussian. (Default: -1, which disables use of the fragment length distribution)">
136 </param> 130 </param>
137 <param name="rspd" type="select" format="text" help="Set this option if you want to estimate the read start position distribution (RSPD) from data. Otherwise, RSEM will use a uniform RSPD. (Default: off)"> 131 <param name="fragment_length_sd" type="float" value="" optional="true" label="The standard deviation of the fragment length distribution (single-end data only)" help="Default 0, which assumes that all fragments are of the same length, given by the rounded value of fragment length mean. ">
138 <label>Estimate and correct for a non-uniform read start position distribution (RSPD)</label> 132 </param>
139 <option value="no">no</option> 133 <conditional name="rspd">
140 <option value="yes">yes</option> 134 <param name="estimate" type="select" lanel="Read Start Position Distribution (RSPD)"
141 </param> 135 help="Set this option if you want to estimate the read start position distribution (RSPD) from data. Otherwise, RSEM will use a uniform RSPD.">
142 136 <option value="no" selected="true">Use a uniform RSPD</option>
143 <!-- <conditional name="fullpar"> 137 <option value="yes">Estimate and correct for a non-uniform RSPD</option>
144 <param name="fullpar" type="select" label="Full list of parameters" help="use 138 </param>
145 full list for linting all the parameters in RSEM"> 139 <when value="no"/>
146 <option value="default">Default</option> 140 <when value="yes">
147 <option value="fullset">Full Set</option> 141 <param name="num_rspd_bins" type="integer" value="20" optional="true" label="Number of bins in the RSPD." help="Use of the default setting of 20 is recommended.">
148 </param> 142 <validator type="in_range" message="" min="0" max="100"/>
149 <when value="fullset"> --> 143 </param>
150 <!-- <param name="testing" size="4" type="text" value="" label="Advanced Parameters" /> 144 </when>
151 --> 145 </conditional>
152 <conditional name="useci"> 146 <conditional name="useci">
153 <param name="ci" type="select" label="Calculate 95% Credibility Intervals"> 147 <param name="ci" type="select" label="Calculate 95% Credibility Intervals">
154 <option value="no">no</option> 148 <option value="no" selected="true">no</option>
155 <option value="yes">yes</option> 149 <option value="yes">yes</option>
156 </param> 150 </param>
157 <when value="yes"> 151 <when value="no"/>
158 <param name="cimem" size="4" type="text" value="1024" label="Amount of memory in (MB) for computing CI" /> 152 <when value="yes">
159 </when> 153 <param name="cimem" size="4" type="text" value="1024" label="Amount of memory in (MB) for computing CI" />
154 </when>
155 </conditional>
156 </when>
157 </conditional>
158 </macro>
159 <macro name="bowtie_options">
160 <conditional name="bowtie_options">
161 <param name="fullparams" type="select" label="bowtie settings">
162 <option value="default">use bowtie defaults</option>
163 <option value="fullset">set bowtie options</option>
164 </param>
165 <when value="default"/>
166 <when value="fullset">
167 <param name="bowtie_n" type="integer" value="2" optional="true" label="Bowtie mismatches" help="Bowtie parameter max # of mismatches in the seed. (Range: 0-3, Default: 2) ">
168 <validator type="in_range" message="max # of mismatches in the seed between 0 and 3" min="0" max="3"/>
169 </param>
170 <param name="bowtie_e" type="integer" value="99999999" label="Maximum sum of quality scores at mismatched positions in read alignments. This is also the value for the Bowtie -e option">
171 </param>
172 <param name="bowtie_m" type="integer" value="200" label="Discard alignments for reads with number of alignments greater than">
173 </param>
174 </when>
175 </conditional>
176 </macro>
177 <macro name="sampling_for_bam">
178 <param name="sampling_for_bam" type="boolean" truevalue="--sampling-for-bam" falsevalue="" checked="false" label="Use sampling for BAM">
179 <help> When RSEM generates a BAM file, instead of outputing all alignments a read has with their posterior probabilities, one alignment is sampled according to the posterior probabilities. The sampling procedure includes the alignment to the "noise" transcript, which does not appear in the BAM file. Only the sampled alignment has a weight of 1. All other alignments have weight 0. If the "noise" transcript is sampled, all alignments appeared in the BAM file should have weight 0. (Default: off)
180 </help>
181 </param>
182 </macro>
183 </macros>
184
185 <inputs>
186 <param name="sample" type="text" value="rsem_sample" label="Sample name" />
187 <conditional name="reference">
188 <param name="refSrc" type="select" label="RSEM Reference">
189 <option value="cached">Locally cached</option>
190 <option value="history">From your history</option>
191 </param>
192 <when value="cached">
193 <param name="index" type="select" label="Select RSEM reference" help="Select from a list of pre-indexed references. If you don't see anything consult the wrapper's documentation on how to create or download a reference">
194 <options from_data_table="rsem_indexes">
195 <filter type="sort_by" column="2" />
196 <validator type="no_options" message="No indexes are available" />
197 </options>
198 </param>
199 </when>
200 <when value="history">
201 <param name="rsem_ref" type="data" format="rsem_ref" label="RSEM reference" />
202 </when>
160 </conditional> 203 </conditional>
161 <param name="fraglenmin" size="4" type="text" value="1" label="Minimum read/insert length. Minimum read/insert length allowed. This is also the value for the bowtie -X option" /> 204 <conditional name="input">
162 <param name="fraglenmax" size="4" type="text" value="1000" label="Maximum read/insert length. Minimum read/insert length allowed. This is also the value for the bowtie -l option" /> 205 <param name="format" type="select" label="RSEM Input file type">
163 <param name="bowtie_mis" size="2" type="text" value="2" label="Bowtie mismatches" help="Bowtie parameter max # of mismatches in the seed. (Range: 0-3, Default: 2) "/> 206 <option value="fastq">FASTQ</option>
164 <param name="bowtie_e" size="4" type="text" value="99999999" label="Maximum sum of quality scores at mismatched positions in read alignments. This is also the value for the Bowtie -e option" /> 207 <option value="fasta">FASTA</option>
165 <param name="bowtie_m" size="4" type="text" value="200" label="Discard alignments for reads with number of alignments greater than" /> 208 <option value="sam">SAM/BAM</option>
166 <param name="seedlength" size="2" type="text" value="25" label="Seed length used by the read aligner" help="Providing the correct value for this parameter is important for RSEM's accuracy if the data are single-end reads. RSEM uses this value for Bowtie's seed length parameter. The minimum value is 25. (Default:25)" /> 209 </param>
167 <param name="cpus" size="2" type="integer" value="1" label="Number of threads to use" help="Number of threads to use. Both Bowtie and expression estimation will use this many threads. (Default: 1)" /> 210 <when value="fastq">
168 <!-- </when> --> <!-- </conditional> --> 211 <param name="fastq_select" size="15" type="select" label="FASTQ type" >
212 <option value="--phred33-quals">phred33 qualities (default for sanger)</option>
213 <option value="--solexa-quals">solexa qualities</option>
214 <option value="--phred64-quals">phred64 qualities</option>
215 </param>
216 <conditional name="fastq">
217 <param name="matepair" type="select" label="Library type">
218 <option value="single">Single End Reads</option>
219 <option value="paired">Paired End Reads</option>
220 </param>
221 <when value="single">
222 <param name="singlefastq" type="data" format="fastq" label="FASTQ file" />
223 </when>
224 <when value="paired">
225 <param name="fastq1" type="data" format="fastq" label="Read 1 fastq file" />
226 <param name="fastq2" type="data" format="fastq" label="Read 2 fastq file" />
227 </when>
228 </conditional>
229 <expand macro="bowtie_options"/>
230 </when>
231 <when value="fasta">
232 <conditional name="fasta">
233 <param name="matepair" type="select" label="Library Type">
234 <option value="single">Single End Reads</option>
235 <option value="paired">Paired End Reads</option>
236 </param>
237 <when value="single">
238 <param name="singlefasta" type="data" format="fasta" label="fasta file" />
239 </when>
240 <when value="paired">
241 <param name="fasta1" type="data" format="fasta" label="Read 1 fasta file" />
242 <param name="fasta2" type="data" format="fasta" label="Read 2 fasta file" />
243 </when>
244 </conditional>
245 <expand macro="bowtie_options"/>
246 </when>
247 <when value="sam">
248 <!-- convert-sam-for-rsem /ref/mouse_125 input.sam -o input_for_rsem.sam -->
249 <param name="matepair" type="select" label="Library Type">
250 <option value="single">Single End Reads</option>
251 <option value="paired">Paired End Reads</option>
252 </param>
253 <param name="rsem_sam" type="data" format="rsem_sam" label="RSEM formatted SAM file" />
254 </when>
255 </conditional>
256 <expand macro="rsem_options"/>
257 <conditional name="rsem_outputs">
258 <param name="result_bams" type="select" label="Create bam results files"
259 help="In addition to the transcript-coordinate-based BAM file output, also output a BAM file with the read alignments in genomic coordinates" >
260 <option value="none">No BAM results files</option>
261 <option value="default" selected="true">Transcript BAM results file</option>
262 <option value="both">Transcript and genome BAM results files</option>
263 </param>
264 <when value="none"/>
265 <when value="default">
266 <expand macro="sampling_for_bam"/>
267 </when>
268 <when value="both">
269 <expand macro="sampling_for_bam"/>
270 </when>
271 </conditional>
169 </inputs> 272 </inputs>
170 <stdio> 273 <stdio>
171 <exit_code range="1:" level="fatal" description="Error Running RSEM" /> 274 <exit_code range="1:" level="fatal" description="Error Running RSEM" />
172 </stdio> 275 </stdio>
173 <outputs> 276 <outputs>
174 <data format="tabular" name="output" label="${sample}.gene_abundances"/> 277 <data format="tabular" name="output" label="${sample}.gene_abundances" from_work_dir="rsem_output.genes.results"/>
175 <data format="tabular" name="isoforms" label="${sample}.isoform_abundances"/> 278 <data format="tabular" name="isoforms" label="${sample}.isoform_abundances" from_work_dir="rsem_output.isoforms.results"/>
176 <data format="bam" name="bam_res" label="${sample}.transcript.bam"/> 279 <data format="bam" name="transcript_bam" label="${sample}.transcript.bam" from_work_dir="rsem_output.transcript.bam" >
177 <data format="bam" name="bam_genome" label="${sample}.genome.bam"> 280 <filter>rsem_outputs['result_bams'] != "none"</filter>
178 <filter>bamtype == "yes"</filter>
179 </data> 281 </data>
180 282 <data format="bam" name="transcript__sorted_bam" label="${sample}.transcript.bam" from_work_dir="rsem_output.transcript.sorted.bam" >
283 <filter>rsem_outputs['result_bams'] != "none"</filter>
284 </data>
285 <data format="bam" name="genome_bam" label="${sample}.genome.bam" from_work_dir="rsem_output.genome.bam">
286 <filter>rsem_outputs['result_bams'] == "both"</filter>
287 </data>
288 <data format="bam" name="genome_sorted_bam" label="${sample}.genome.bam" from_work_dir="rsem_output.genome.sorted.bam">
289 <filter>rsem_outputs['result_bams'] == "both"</filter>
290 </data>
181 <data format="txt" name="log" label="${sample}.rsem_log"/> 291 <data format="txt" name="log" label="${sample}.rsem_log"/>
182 </outputs> 292 </outputs>
183 <help> 293 <help>
184 294
185 295