comparison graphprot_predict_profile.xml @ 0:0bc573ec2010 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit 9bb5f3c8ed8e87ec5652b5bc8bf9c774d5534a1a
author bgruening
date Fri, 25 May 2018 11:48:57 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0bc573ec2010
1 <tool id="graphprot_predict_profile" name="GraphProt predict profile" version="1.1.7">
2 <description>- Predict RBP binding profiles</description>
3 <requirements>
4 <requirement type="package" version="1.1.7">graphprot</requirement>
5 </requirements>
6 <command><![CDATA[
7 #if $select_model.model_selector == 'select_model_from_repo':
8 mkdir -p ./model &&
9 tar -zxvf '$__tool_directory__/data/${select_model.repo_model}.tar.gz' -C ./model &&
10 #end if
11 perl '$__tool_directory__/graphprot_predict_profile_wrapper.pl'
12 -fasta '$fasta_file'
13 #if $select_model.model_selector == 'select_model_from_history':
14 -model '$select_model.model_file'
15 #if $select_model.set_params.set_params_selector == 'supply_params_file':
16 -params '$select_model.set_params.params_file'
17 #elif $select_model.set_params.set_params_selector == 'manual_params_setting':
18 #if $select_model.set_params.model_type.model_type_selector == 'sequence':
19 -onlyseq
20 #elif $select_model.set_params.model_type.model_type_selector == 'structure':
21 -abstraction $select_model.set_params.model_type.gp_abstraction
22 #end if
23 -R $select_model.set_params.gp_r
24 -D $select_model.set_params.gp_d
25 -bitsize $select_model.set_params.gp_bitsize
26 -lambda $select_model.set_params.gp_lambda
27 -epochs $select_model.set_params.gp_epochs
28 #if $select_model.set_params.gev_options.distr_my
29 -distr-my $select_model.set_params.gev_options.distr_my
30 #end if
31 #if $select_model.set_params.gev_options.distr_sigma
32 -distr-sigma $select_model.set_params.gev_options.distr_sigma
33 #end if
34 #if $select_model.set_params.gev_options.distr_xi
35 -distr-xi $select_model.set_params.gev_options.distr_xi
36 #end if
37 #end if
38 #elif $select_model.model_selector == 'select_model_from_repo':
39 -model './model/${select_model.repo_model}.model'
40 -params './model/${select_model.repo_model}.params'
41 #end if
42 $peak_region_options.p50_output
43 #if $peak_region_options.merge_dist
44 -merge-dist $peak_region_options.merge_dist
45 #end if
46 #if $peak_region_options.p_val_thr
47 -thr-p $peak_region_options.p_val_thr
48 #end if
49 #if $peak_region_options.score_thr
50 -thr-sc $peak_region_options.score_thr
51 #end if
52 ]]></command>
53 <inputs>
54 <param name="fasta_file" type="data" format="fasta" label="Input FASTA file" argument="-fasta"
55 help="FASTA file containing sequences to predict binding profiles on"/>
56
57 <conditional name="select_model">
58 <param name="model_selector" type="select" label="Select GraphProt model"
59 help="Select GraphProt model for binding profile prediction">
60 <option value="select_model_from_history" selected="true">Select model from history</option>
61 <option value="select_model_from_repo">Select model from repository</option>
62 </param>
63 <when value="select_model_from_history">
64 <param name="model_file" type="data" format="data" label="GraphProt model file" argument="-model"
65 help="Predict binding profile for the given GraphProt RBP model"/>
66 <conditional name="set_params">
67 <param name="set_params_selector" type="select" label="Set model parameters">
68 <option value="supply_params_file" selected="true">Select parameter file from history</option>
69 <option value="manual_params_setting">Manually set model parameters</option>
70 </param>
71 <when value="supply_params_file">
72 <param name="params_file" type="data" format="txt"
73 label="Model parameter file" argument="-params"
74 help="Parameter file containing model parameters"/>
75 </when>
76 <when value="manual_params_setting">
77 <conditional name="model_type">
78 <param name="model_type_selector" type="select" label="Select model type">
79 <option value="sequence" selected="true">Supplied model is a sequence model</option>
80 <option value="structure">Supplied model is a structure model</option>
81 </param>
82 <when value="sequence"/>
83 <when value="structure">
84 <param name="gp_abstraction" type="integer" value="3" argument="-abstraction"
85 label="RNAshapes abstraction level for RNA structure graphs"/>
86 </when>
87 </conditional>
88 <param name="gp_r" type="integer" value="1" min="0" max="4"
89 label="GraphProt radius" argument="-R"/>
90 <param name="gp_d" type="integer" value="4" min="0" max="6"
91 label="GraphProt distance" argument="-D"/>
92 <param name="gp_bitsize" type="integer" value="14"
93 label="GraphProt bitsize used for feature encoding" argument="-bitsize"/>
94 <param name="gp_lambda" type="float" value="10e-6"
95 label="SGD parameter lambda for classification" argument="-lambda"/>
96 <param name="gp_epochs" type="integer" value="10"
97 label="SGD parameter epochs for classification" argument="-epochs"/>
98 <section name="gev_options" title="GEV distribution parameters for signifying GraphProt scores">
99 <param name="distr_my" type="float" optional="True"
100 label="GEV distribution parameter my" argument="-distr-my"/>
101 <param name="distr_sigma" type="float" optional="True"
102 label="GEV distribution parameter sigma" argument="-distr-sigma"/>
103 <param name="distr_xi" type="float" optional="True"
104 label="GEV distribution parameter xi" argument="-distr-xi"/>
105 </section>
106 </when>
107 </conditional>
108 </when>
109 <when value="select_model_from_repo">
110 <param name="repo_model" type="select" label="Select model" help="Select model from a list of pre-trained GraphProt models">
111 <option value="EWSR1_eCLIP_K562_ENCSR887LPK" selected="true">EWSR1_eCLIP_K562_ENCSR887LPK.model</option>
112 <option value="FMR1_eCLIP_K562_ENCSR331VNX">FMR1_eCLIP_K562_ENCSR331VNX.model</option>
113 <option value="HNRNPC_eCLIP_HepG2_ENCSR550DVK">HNRNPC_eCLIP_HepG2_ENCSR550DVK.model</option>
114 <option value="HUR_PAR-CLIP_HEK293_Mukherjee">HUR_PAR-CLIP_HEK293_Mukherjee.model</option>
115 <option value="IGF2BP1-3_PAR-CLIP_HEK293_Hafner">IGF2BP1-3_PAR-CLIP_HEK293_Hafner.model</option>
116 <option value="IGF2BP1-3_PAR-CLIP_HEK293_Hafner_structure">IGF2BP1-3_PAR-CLIP_HEK293_Hafner_structure.model</option>
117 <option value="KHDRBS1_eCLIP_K562_ENCSR628IDK">KHDRBS1_eCLIP_K562_ENCSR628IDK.model</option>
118 <option value="KHDRBS1_eCLIP_K562_ENCSR628IDK_structure">KHDRBS1_eCLIP_K562_ENCSR628IDK_structure.model</option>
119 <option value="PUM2_PAR-CLIP_HEK293_Hafner">PUM2_PAR-CLIP_HEK293_Hafner.model</option>
120 <option value="PUM2_eCLIP_K562_exonized_3utr">PUM2_eCLIP_K562_exonized_3utr.model</option>
121 <option value="QKI_PAR-CLIP_HEK293_Hafner">QKI_PAR-CLIP_HEK293_Hafner.model</option>
122 <option value="QKI_eCLIP_HepG2_ENCSR570WLM">EWSR1_eCLIP_K562_ENCSR887LPK.model</option>
123 <option value="QKI_eCLIP_HepG2_ENCSR570WLM_structure">QKI_eCLIP_HepG2_ENCSR570WLM_structure.model</option>
124 </param>
125 </when>
126 </conditional>
127 <section name="peak_region_options" title="Peak region options">
128 <param name="merge_dist" type="integer" optional="True"
129 label="Maximum distance between two peak regions for merging" argument="-merge-dist"
130 help="By default all non-overlapping regions will be reported. E.g. a distance of 1 means that two regions above the set threshold (score or p-value) will be merged if they is 1 nucleotide that separates the two regions"/>
131 <param name="p_val_thr" type="float" optional="True"
132 label="Set p-value threshold for reporting peak regions"
133 help="Regions with p-value lower or equal the given value are reported. If no distribution parameters are given (either manually or in .params file), no p-values will be calculated and peak regions will be filtered by their peak scores instead (default: 0.05)"/>
134 <param name="score_thr" type="float" optional="True"
135 label="Set GraphProt peak score threshold for reporting peak regions"
136 help="Regions with peak score higher or equal the given value are reported. If distribution parameters are given (either manually or in .params file), p-value threshold will be used instead of the peak score threshold for filtering (default: 0)"/>
137 <param name="p50_output" label="Also output p50 score filtered peak regions file" type="boolean"
138 truevalue="-p50-out" falsevalue="" checked="False"
139 help="Output is a peak regions BED file using the best average score found in at least 50 % of the positive training sites (p50) as threshold for defining peak regions. NOTE that if the score is not given in .params file, an empty file will be output if selected."/>
140 </section>
141 </inputs>
142 <outputs>
143 <data format="txt" name="average_profile_outfile" label="${tool.name} on ${on_string} (average profile)" from_work_dir="GraphProt.average_profile"/>
144 <data format="bed" name="peak_regions_outfile" label="${tool.name} on ${on_string} (peak regions)" from_work_dir="GraphProt.peak_regions.bed"/>
145 <data format="bed" name="peak_regions_p50_outfile" label="${tool.name} on ${on_string} (peak regions p50)" from_work_dir="GraphProt.peak_regions_p50.bed"/>
146 </outputs>
147 <tests>
148 <test>
149 <param name="fasta_file" value="test.fa" ftype="fasta"/>
150 <param name="model_selector" value="select_model_from_history"/>
151 <param name="model_file" value="test.model"/>
152 <param name="set_params_selector" value="supply_params_file"/>
153 <param name="params_file" value="test.params"/>
154 <param name="p50_output" value="True"/>
155 <output name="average_profile_outfile" file="GraphProt_predict_profile_test_out1.average_profile"/>
156 <output name="peak_regions_outfile" file="GraphProt_predict_profile_test_out1.peak_regions.bed"/>
157 <output name="peak_regions_p50_outfile" file="GraphProt_predict_profile_test_out1.peak_regions_p50.bed"/>
158 </test>
159 <test>
160 <param name="fasta_file" value="test.fa" ftype="fasta"/>
161 <param name="model_selector" value="select_model_from_history"/>
162 <param name="model_file" value="test.model"/>
163 <param name="set_params_selector" value="manual_params_setting"/>
164 <param name="model_type_selector" value="sequence"/>
165 <param name="gp_r" value="1"/>
166 <param name="gp_d" value="4"/>
167 <param name="gp_epochs" value="20"/>
168 <param name="gp_lambda" value="0.001"/>
169 <param name="gp_bitsize" value="14"/>
170 <param name="merge_dist" value="1"/>
171 <param name="score_thr" value="1"/>
172 <output name="average_profile_outfile" file="GraphProt_predict_profile_test_out2.average_profile"/>
173 <output name="peak_regions_outfile" file="GraphProt_predict_profile_test_out2.peak_regions.bed"/>
174 </test>
175 <test>
176 <param name="fasta_file" value="test.fa" ftype="fasta"/>
177 <param name="model_selector" value="select_model_from_history"/>
178 <param name="model_file" value="structure_test.model"/>
179 <param name="set_params_selector" value="supply_params_file"/>
180 <param name="params_file" value="structure_test.params"/>
181 <param name="model_type_selector" value="structure"/>
182 <param name="gp_abstraction" value="3"/>
183 <param name="score_thr" value="2"/>
184 <output name="average_profile_outfile" file="GraphProt_predict_profile_test_out3.average_profile"/>
185 <output name="peak_regions_outfile" file="GraphProt_predict_profile_test_out3.peak_regions.bed"/>
186 </test>
187 <test>
188 <param name="fasta_file" value="test.fa" ftype="fasta"/>
189 <param name="model_selector" value="select_model_from_repo"/>
190 <param name="repo_model" value="FMR1_eCLIP_K562_ENCSR331VNX"/>
191 <param name="p50_output" value="True"/>
192 <output name="average_profile_outfile" file="GraphProt_predict_profile_test_out4.average_profile"/>
193 <output name="peak_regions_outfile" file="GraphProt_predict_profile_test_out4.peak_regions.bed"/>
194 <output name="peak_regions_p50_outfile" file="GraphProt_predict_profile_test_out4.peak_regions_p50.bed"/>
195 </test>
196
197 </tests>
198 <help>
199
200 Use GraphProt (-action predict_profile) to predict binding profiles for a given RBP model (supplied as .model and .params file) on a given set of FASTA sequences. After predicting position-wise scores, the scores are averaged over small windows (11 nt with averaged score position in center) to smooth out the profiles and peak regions are extracted based on the set thresholds (p-value or score) and merge distance.
201
202 **Output files**
203
204 The procedure has three output files (third is optional):
205
206 1) An average_profile file containing averaged position-scores over all supplied sequences
207
208 2) A peak regions BED file which contains peak-scoring regions above the supplied threshold (p-value default: 0.05, score default: 0)
209
210 3) A peak regions BED file using the best average score found in at least 50 % of the positive training sites (p50). NOTE that this requires the p50 score to be given in the .params file, otherwise if set an empty file will be output.
211
212 **Model selection**
213
214 The GraphProt model used for profile prediction can either be uploaded to history or chosen from an example collection of models (Select model from repository). For the repository models, the corresponding parameter file is selected automatically, providing all model parameters necessary for prediction and p-value calculation. If you choose to upload a model to the history, it is recommended to use the corresponding .params file for automatically setting the model parameters. Otherwise the model parameters have to be entered manually.
215
216 **p-value calculation**
217
218 Signifying the GraphProt scores is done by fitting a generalized extreme value (GEV) distribution on a set of scores derived from 10000 transcript sequences for each GraphProt model. The GEV distribution has three parameters: my (location), sigma (scale), and xi (shape). The fitted parameter values usually are read in from the .params file, but can also be entered manually. Parameter fitting was done in R using the minpack.lm_ package, using the probability density function (PDF) described here_. If no GEV parameter values are specified (either in .params file or manually), p-value calculation for the scores will be skipped and the peak regions will be extracted based on the set threshold score.
219
220 .. _here: https://en.wikipedia.org/wiki/Generalized_extreme_value_distribution
221 .. _minpack.lm: https://cran.r-project.org/web/packages/minpack.lm
222
223 </help>
224 <citations>
225 <citation type="doi">10.1186/gb-2014-15-1-r17</citation>
226 </citations>
227 </tool>