comparison variant_effect_predictor/variant_effect_predictor.xml @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children a5976b2dce6f
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 <tool id="ensembl" name="ENSEMBL variant effect predictor">
2 <description>to annotate variants using an ENSEMBL database</description>
3 <command interpreter="perl">
4 variant_effect_predictor.pl -i=$input -o=$output -species=$species
5 #if $database_options.database_options_selector == "advanced"
6 --host=$database_options.host --user=$database_options.username --port=$database_options.portnum
7 #if $database_options.password
8 --password=$database_options.password
9 #end if
10 #else
11 ## hardcoded default values - bad?
12 --host=www.ebi.edu.au --user=anonymous --port=3306
13 #end if
14 #if $parameters.everything
15 --everything
16 #else
17 #if $parameters.sift_options.sift
18 --sift $parameters.sift_options.sift_value.value
19 #end if
20 #if $parameters.polyphen_options.polyphen
21 --polyphen $parameters.polyphen_options.polyphen_value.value
22 #end if
23 #if $parameters.ccds
24 --ccds
25 #end if
26 #if $parameters.hgvs
27 --hgvs
28 #end if
29 #if $parameters.hgnc
30 --hgnc
31 #end if
32 #if $parameters.numbers
33 --numbers
34 #end if
35 #if $parameters.domains
36 --domains
37 #end if
38 #if $parameters.regulatory
39 --regulatory
40 #end if
41 #if $parameters.canonical
42 --canonical
43 #end if
44 #if $parameters.protein
45 --protein
46 #end if
47 #if $parameters.gmaf
48 --gmaf
49 #end if
50 #end if
51 </command>
52 <inputs>
53 <param format="vcf" name="input" type="data" label="Input variants file" help="This should be a variant file in vcf format."/> <!-- TODO: allow other variant format types? -->
54 <param name="species" label="Name of the species being annotated" type="text" Default ="human" help="Species for your data. This can be the latin name e.g. 'homo_sapiens' or any Ensembl alias e.g. 'mouse'. Specifying the latin name can speed up initial database connection as the registry does not have to load all available database aliases on the server."/> <!-- TODO: files in galaxy have a reference genome specified. We should probaby try to use that instead. -->
55 <conditional name="database_options">
56 <param name="database_options_selector" type="select" label="Database Options">
57 <option value="basic" selected="True">Use Default Database</option>
58 <option value="advanced">Choose Database Manually</option>
59 </param>
60 <when value="basic">
61 <!-- no options -->
62 </when>
63 <when value="advanced">
64 <param name="host" label="Database host address" type="text" default="www.ebi.edu.au" help="By default connects to the EMBL Australia database at www.ebi.edu.au"/> <!-- TODO: may want a drop-down list with the main EMBL database listed too and with an other field, with Australian as default? In this case should state that there is a cap. -->
65 <param name="username" label="Username" Default="anonymous" type="text" help="Default='anonymous'"/>
66 <param name="password" label="Password, if required" Default="" type="text" help="Most public ENSEMBL databases do not require a password for access"/>
67 <param name="portnum" label="Database port" type="text" Default="3306" help="The default for EMBL Australia's ENSEMBL is 3306."/>
68 </when>
69 </conditional>
70 <conditional name="parameters">
71 <param name="everything" label="everything" type="boolean" checked="true"
72 help="shortcut to switch on all the following parameters"/>
73 <when value="true"></when>
74 <when value="false">
75 <conditional name="sift_options">
76 <param name="sift" label="Sift" type="boolean"
77 help="Human only SIFT predicts whether an amino acid substitution affects protein function based on sequence homology and the physical properties of amino acids. The VEP can output the prediction term, score or both. Not used by default"/>
78 <when value="true">
79 <param name="sift_value" label="options" type="select">
80 <option value="b" selected="true">Both (prediction term and score)</option>
81 <option value="s">score</option>
82 <option value="p">prediction term</option>
83 </param>
84 </when>
85 <when value="false"></when>
86 </conditional>
87 <conditional name="polyphen_options">
88 <param name="polyphen" label="PolyPhen" type="boolean"
89 help="Human only PolyPhen is a tool which predicts possible impact of an amino acid substitution on the structure and function of a human protein using straightforward physical and comparative considerations. The VEP can output the prediction term, score or both. Not used by default"/>
90 <when value="true">
91 <param name="polyphen_value" label="options" type="select">
92 <option value="b" selected="true">Both (prediction term and score)</option>
93 <option value="s">score</option>
94 <option value="p">prediction term</option>
95 </param>
96 </when>
97 <when value="false"></when>
98 </conditional>
99 <param name="ccds" label="Add CCDS transcript identifier" type="boolean"/>
100 <param name="hgvs" label="Add HGVS nomenclature" type="boolean"/>
101 <param name="hgnc" label="Add HGNC gene Identifier" type="boolean"/>
102 <param name="numbers" label="Add affected exon and intron Numbers" type="boolean"/>
103 <param name="domains" label="Add (overlapping) protein Domains" type="boolean"/>
104 <param name="regulatory" label="Overlaps Regulatory regions" type="boolean" />
105 <param name="canonical" label="Add flag for Canonical transcript" type="boolean"/>
106 <param name="protein" label="Ensembl Protein identifier" type="boolean"/>
107 <param name="gmaf" label="Add GMAF (Global Minor Allele Frequency)" type="boolean" />
108 </when>
109 </conditional>
110 </inputs>
111 <outputs>
112 <!-- TODO: Can we optionally make it vcf? -->
113 <data format="tabular" name="output" label="${tool.name} on ${on_string}"/>
114 </outputs>
115
116 <help>
117 ============
118 Description
119 ============
120 This tool connects to the ENSEMBL database using ENSEMBL's Variant Effect Predictor script and retrieves annotations for an input variants file.
121
122 ============
123 Parameters
124 ============
125 everything
126 Shortcut flag to switch on all of the following:
127 ``sift b
128 polyphen b
129 ccds
130 hgvs
131 hgnc
132 numbers
133 domains
134 regulatory
135 cell_type
136 canonical
137 protein
138 gmaf``
139
140 sift [both|score|prediction term]
141 **Human only** SIFT predicts whether an amino acid substitution affects protein function based on sequence homology and the physical properties of amino acids. The VEP can output the prediction term, score or both. *Not used by default*
142
143 polyphen [both|score|prediction term]
144 **Human only** PolyPhen is a tool which predicts possible impact of an amino acid substitution on the structure and function of a human protein using straightforward physical and comparative considerations. The VEP can output the prediction term, score or both. *Not used by default*
145
146 ccds
147 Adds the CCDS transcript identifier (where available) to the output. *Not used by default*
148
149 hgvs
150 Add HGVS nomenclature based on Ensembl stable identifiers to the output. Both coding and protein sequence names are added where appropriate. Currently it is not possible to generate HGVS identifiers from the cache; a database connection must be made. *Not used by default*
151
152 hgnc
153 Adds the HGNC gene identifer (where available) to the output. *Not used by default*
154
155 numbers
156 Adds affected exon and intron numbering to to output. Format is Number/Total. *Not used by default*
157
158 domains
159 Adds names of overlapping protein domains to output. *Not used by default*
160
161 regulatory
162 Look for overlaps with regulatory regions. The script can also call if a variant falls in a high information position within a transcription factor binding site. Output lines have a Feature type of RegulatoryFeature or MotifFeature. *Not used by default*
163
164 cell_type
165 Report only regulatory regions that are found in the given cell type(s). Can be a single cell type or a comma-separated list. The functional type in each cell type is reported under CELL_TYPE in the output. To retrieve a list of cell types, use ``--cell_type list``. *Not used by default*
166
167 canonical
168 Adds a flag indicating if the transcript is the canonical transcript for the gene. *Not used by default*
169
170 protein
171 Add the Ensembl protein identifier to the output where appropriate. *Not used by default*
172
173 gmaf
174 Add the global minor allele frequency (MAF) from 1000 Genomes Phase 1 data for any existing variant to the output. *Not used by default*
175 </help>
176 </tool>
177