comparison selection.xml @ 6:7dc6ce39fb89 default tip

add selection tool
author blanck
date Wed, 29 Apr 2015 10:08:52 +0200
parents
children
comparison
equal deleted inserted replaced
5:b7f3854e08f8 6:7dc6ce39fb89
1 <tool id="selection" name="Markers selection" force_history_refresh="True" version="0.1.0">
2 <command interpreter="python">
3 selection.py '$input' '$response' '$chromosome' '$__new_file_path__' '$settingsSNP.signal'
4 #if $settingsSNP.signal == "CN":
5 '$settingsSNP.snp'
6 #end if
7 #if $settingsSNP.signal == "fracB":
8 'none'
9 #end if
10 '$settings.settingsType'
11 #if $settings.settingsType == "tumor":
12 '$tumorcsv'
13 #end if
14 #if $settings.settingsType == "standard":
15 'none'
16 #end if
17 '$folds' '$settingsLoss.loss' '$outputgraph' '$output' '$pdffigures' '$outputlog' '$log' '$__user_id__'
18 #if $settingsLoss.loss == "linear":
19 '$settingsLoss.package'
20 #end if
21 #if $settingsLoss.loss == "logistic":
22 'HDPenReg'
23 #end if
24 </command>
25 <inputs>
26 <param name="input" type="data" format="dsf" label="Dataset summary file" help="Summary text file generated by the Data normalization tool"/>
27
28 <param name="response" type="data" format="csv" label="Data response" help="Data response csv file. See below for more information on file format" />
29
30 <param name="chromosome" type="select" size="6" multiple="true" label="Chromosomes">
31 <option value="1">chr 1</option>
32 <option value="2">chr 2</option>
33 <option value="3">chr 3</option>
34 <option value="4">chr 4</option>
35 <option value="5">chr 5</option>
36 <option value="6">chr 6</option>
37 <option value="7">chr 7</option>
38 <option value="8">chr 8</option>
39 <option value="9">chr 9</option>
40 <option value="10">chr 10</option>
41 <option value="11">chr 11</option>
42 <option value="12">chr 12</option>
43 <option value="13">chr 13</option>
44 <option value="14">chr 14</option>
45 <option value="15">chr 15</option>
46 <option value="16">chr 16</option>
47 <option value="17">chr 17</option>
48 <option value="18">chr 18</option>
49 <option value="19">chr 19</option>
50 <option value="20">chr 20</option>
51 <option value="21">chr 21</option>
52 <option value="22">chr 22</option>
53 <option value="23">chr 23</option>
54 <option value="24">chr 24</option>
55 <option value="25">chr 25</option>
56 </param>
57 <conditional name="settingsSNP">
58 <param name="signal" type="select" multiple="false" label="Signal you want to work on">
59 <option value="CN">CN</option>
60 <option value="fracB">fracB</option>
61 </param>
62 <when value="fracB"/>
63 <when value="CN">
64 <param name="snp" type="select" label="Select Probes">
65 <option value="FALSE">CN and SNP probes</option>
66 <option value="TRUE">Only SNP probes</option>
67 </param>
68 </when>
69 </conditional>
70 <conditional name="settings">
71 <param name="settingsType" type="select" label="Reference" help="">
72 <option value="standard">Study without reference</option>
73 <option value="tumor">Normal-tumor study</option>
74 </param>
75 <when value="standard" />
76 <when value="tumor">
77 <param name="tumorcsv" type="data" format="csv" label="tumor boost csv file" help="Normal-tumor csv file. See below for more information."/>
78 </when>
79 </conditional>
80
81 <param name="folds" type="integer" min="1" value="10" label ="Number of folds for cross validation" help="Integer between 1 and number of file in the .cel file dataset"/>
82 <conditional name="settingsLoss">
83 <param name="loss" type="select" multiple="false" label="Response type">
84 <option value="linear">Linear</option>
85 <option value="logistic">Logistic</option>
86 </param>
87 <when value="logistic" />
88 <when value="linear">
89 <param name="package" type="select" multiple="false" label="Method" help="Either “HDPenReg” or “spikeslab”. Used package in linear case">
90 <option value="HDPenReg">HDPenReg</option>
91 <option value="spikeslab">spikeslab</option>
92 </param>
93 </when>
94 </conditional>
95 <param name="outputgraph" type="select" multiple="false" label="Plot figures">
96 <option value="TRUE">Yes</option>
97 <option value="FALSE">No</option>
98 </param>
99 <param name="outputlog" type="select" label="Output log">
100 <option value="TRUE">Yes</option>
101 <option value="FALSE">No</option>
102 </param>
103
104 </inputs>
105 <outputs>
106 <data format="txt" name="output" label="selection of ${input.name}" />
107 <data format="pdf" name="pdffigures" label="figures of SNPs selection of ${input.name}">
108 <filter>outputgraph == "TRUE"</filter>
109 <filter>(settingsLoss['package'] != 'spikeslab')</filter>
110 </data>
111 <data format="log" name="log" label="log of SNPs selection of ${input.name}">
112 <filter>outputlog == "TRUE"</filter>
113 </data>
114 </outputs>
115 <stdio>
116 <exit_code range="1:" level="fatal" description="See logs for more details" />
117 </stdio>
118 <help>
119 .. class:: warningmark
120
121 Data normalization must be run with the Data Normalization tool prior to SNPs selection. Otherwise, the standalone version can be used to perform marker selection from matrices containing data normalized with tools different from the one proposed in this instance.
122
123 -----
124
125 **What it does**
126
127 This tool selects some relevant markers according to a response using penalized regressions.
128
129 Output:
130
131 A tabular text file containing 5 columns which describe all the selected SNPs (1 line per SNPs):
132
133 - chr: Chromosome containing the selected SNP.
134 - position: Position of the selected SNP.
135 - index: Index of the selected SNP.
136 - names: Name of the selected SNP.
137 - coefficient: Regression coefficient of the selected SNP.
138
139 -----
140
141 **Data Response csv file**
142
143 Data response csv file format:
144
145 - The first column contains the names of the different files of the data-set.
146
147 - The second column contains the response associated with each file.
148
149 - Column names of these two columns are respectively files and response.
150
151 - Columns are separated by a comma
152
153 - *Extensions of the files (.CEL for example) should be removed*
154
155
156
157 **Example**
158
159 Let 3 .cel files in the studied dataset ::
160
161 patient1.cel
162 patient2.cel
163 patient3.cel
164
165 The csv file should look like this ::
166
167 files,response
168 patient1,1.92145
169 patient2,2.12481
170 patient3,1.23545
171
172
173 -----
174
175 **Normal-tumor study**
176
177 In cases where normal (control) samples match to tumor samples, they are taken as references to extract copy number profile. In this case, a normal-tumor csv file must be provided :
178
179 - The first column contains the names of the files corresponding to normal samples of the dataset.
180
181 - The second column contains the names of the tumor samples files.
182
183 - Column names of these two columns are respectively normal and tumor.
184
185 - Columns are separated by a comma.
186
187 - *Extensions of the files (.CEL for example) should be removed*
188
189
190 **Example**
191
192 Let 6 .cel files in the studied dataset (3 patients, each of them being represented by a couple of normal and tumor cel file.) ::
193
194 patient1_normal.cel
195 patient1_tumor.cel
196 patient2_normal.cel
197 patient2_tumor.cel
198 patient3_normal.cel
199 patient3_tumor.cel
200
201
202 The csv file should look like this ::
203
204 normal,tumor
205 patient1_normal,patient1_tumor
206 patient2_normal,patient2_tumor
207 patient3_normal,patient3_tumor
208
209 -----
210
211
212
213 **Citation**
214
215 If you use this tool please cite :
216
217 `Q. Grimonprez, A. Celisse, M. Cheok, M. Figeac, and G. Marot. MPAgenomics : An R package for multi-patients analysis of genomic markers, 2014. Preprint &lt;http://fr.arxiv.org/abs/1401.5035&gt;`_
218
219 </help>
220 </tool>