annotate variant_effect_predictor/variant_effect_predictor.xml @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children a5976b2dce6f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 <tool id="ensembl" name="ENSEMBL variant effect predictor">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 <description>to annotate variants using an ENSEMBL database</description>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 <command interpreter="perl">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 variant_effect_predictor.pl -i=$input -o=$output -species=$species
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 #if $database_options.database_options_selector == "advanced"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 --host=$database_options.host --user=$database_options.username --port=$database_options.portnum
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 #if $database_options.password
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 --password=$database_options.password
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 #else
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 ## hardcoded default values - bad?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 --host=www.ebi.edu.au --user=anonymous --port=3306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 #if $parameters.everything
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 --everything
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 #else
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 #if $parameters.sift_options.sift
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 --sift $parameters.sift_options.sift_value.value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 #if $parameters.polyphen_options.polyphen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 --polyphen $parameters.polyphen_options.polyphen_value.value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 #if $parameters.ccds
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 --ccds
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 #if $parameters.hgvs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 --hgvs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 #if $parameters.hgnc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 --hgnc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 #if $parameters.numbers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 --numbers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 #if $parameters.domains
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 --domains
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 #if $parameters.regulatory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 --regulatory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 #if $parameters.canonical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 --canonical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 #if $parameters.protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 --protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 #if $parameters.gmaf
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 --gmaf
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 #end if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 </command>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 <inputs>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 <param format="vcf" name="input" type="data" label="Input variants file" help="This should be a variant file in vcf format."/> <!-- TODO: allow other variant format types? -->
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 <param name="species" label="Name of the species being annotated" type="text" Default ="human" help="Species for your data. This can be the latin name e.g. 'homo_sapiens' or any Ensembl alias e.g. 'mouse'. Specifying the latin name can speed up initial database connection as the registry does not have to load all available database aliases on the server."/> <!-- TODO: files in galaxy have a reference genome specified. We should probaby try to use that instead. -->
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 <conditional name="database_options">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 <param name="database_options_selector" type="select" label="Database Options">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 <option value="basic" selected="True">Use Default Database</option>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 <option value="advanced">Choose Database Manually</option>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 </param>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 <when value="basic">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 <!-- no options -->
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 </when>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 <when value="advanced">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 <param name="host" label="Database host address" type="text" default="www.ebi.edu.au" help="By default connects to the EMBL Australia database at www.ebi.edu.au"/> <!-- TODO: may want a drop-down list with the main EMBL database listed too and with an other field, with Australian as default? In this case should state that there is a cap. -->
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 <param name="username" label="Username" Default="anonymous" type="text" help="Default='anonymous'"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 <param name="password" label="Password, if required" Default="" type="text" help="Most public ENSEMBL databases do not require a password for access"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 <param name="portnum" label="Database port" type="text" Default="3306" help="The default for EMBL Australia's ENSEMBL is 3306."/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 </when>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 </conditional>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 <conditional name="parameters">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 <param name="everything" label="everything" type="boolean" checked="true"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 help="shortcut to switch on all the following parameters"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 <when value="true"></when>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 <when value="false">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 <conditional name="sift_options">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 <param name="sift" label="Sift" type="boolean"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 help="Human only SIFT predicts whether an amino acid substitution affects protein function based on sequence homology and the physical properties of amino acids. The VEP can output the prediction term, score or both. Not used by default"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 <when value="true">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 <param name="sift_value" label="options" type="select">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 <option value="b" selected="true">Both (prediction term and score)</option>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 <option value="s">score</option>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 <option value="p">prediction term</option>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 </param>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 </when>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 <when value="false"></when>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 </conditional>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 <conditional name="polyphen_options">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 <param name="polyphen" label="PolyPhen" type="boolean"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 help="Human only PolyPhen is a tool which predicts possible impact of an amino acid substitution on the structure and function of a human protein using straightforward physical and comparative considerations. The VEP can output the prediction term, score or both. Not used by default"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 <when value="true">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 <param name="polyphen_value" label="options" type="select">
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 <option value="b" selected="true">Both (prediction term and score)</option>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 <option value="s">score</option>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 <option value="p">prediction term</option>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 </param>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 </when>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 <when value="false"></when>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 </conditional>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 <param name="ccds" label="Add CCDS transcript identifier" type="boolean"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 <param name="hgvs" label="Add HGVS nomenclature" type="boolean"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 <param name="hgnc" label="Add HGNC gene Identifier" type="boolean"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 <param name="numbers" label="Add affected exon and intron Numbers" type="boolean"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 <param name="domains" label="Add (overlapping) protein Domains" type="boolean"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 <param name="regulatory" label="Overlaps Regulatory regions" type="boolean" />
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 <param name="canonical" label="Add flag for Canonical transcript" type="boolean"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 <param name="protein" label="Ensembl Protein identifier" type="boolean"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 <param name="gmaf" label="Add GMAF (Global Minor Allele Frequency)" type="boolean" />
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 </when>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 </conditional>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 </inputs>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 <outputs>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 <!-- TODO: Can we optionally make it vcf? -->
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 <data format="tabular" name="output" label="${tool.name} on ${on_string}"/>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 </outputs>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 <help>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 ============
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 Description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 ============
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 This tool connects to the ENSEMBL database using ENSEMBL's Variant Effect Predictor script and retrieves annotations for an input variants file.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 ============
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 Parameters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 ============
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 everything
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 Shortcut flag to switch on all of the following:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 ``sift b
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 polyphen b
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 ccds
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 hgvs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 hgnc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 numbers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 domains
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 regulatory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 cell_type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 canonical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 gmaf``
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 sift [both|score|prediction term]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 **Human only** SIFT predicts whether an amino acid substitution affects protein function based on sequence homology and the physical properties of amino acids. The VEP can output the prediction term, score or both. *Not used by default*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 polyphen [both|score|prediction term]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 **Human only** PolyPhen is a tool which predicts possible impact of an amino acid substitution on the structure and function of a human protein using straightforward physical and comparative considerations. The VEP can output the prediction term, score or both. *Not used by default*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 ccds
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 Adds the CCDS transcript identifier (where available) to the output. *Not used by default*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 hgvs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 Add HGVS nomenclature based on Ensembl stable identifiers to the output. Both coding and protein sequence names are added where appropriate. Currently it is not possible to generate HGVS identifiers from the cache; a database connection must be made. *Not used by default*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 hgnc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 Adds the HGNC gene identifer (where available) to the output. *Not used by default*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 numbers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 Adds affected exon and intron numbering to to output. Format is Number/Total. *Not used by default*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 domains
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 Adds names of overlapping protein domains to output. *Not used by default*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 regulatory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 Look for overlaps with regulatory regions. The script can also call if a variant falls in a high information position within a transcription factor binding site. Output lines have a Feature type of RegulatoryFeature or MotifFeature. *Not used by default*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 cell_type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 Report only regulatory regions that are found in the given cell type(s). Can be a single cell type or a comma-separated list. The functional type in each cell type is reported under CELL_TYPE in the output. To retrieve a list of cell types, use ``--cell_type list``. *Not used by default*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 canonical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 Adds a flag indicating if the transcript is the canonical transcript for the gene. *Not used by default*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 Add the Ensembl protein identifier to the output where appropriate. *Not used by default*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 gmaf
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 Add the global minor allele frequency (MAF) from 1000 Genomes Phase 1 data for any existing variant to the output. *Not used by default*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 </help>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 </tool>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177