0
|
1 <tool id="ensembl_cdna_translate" name="Ensembl cDNA Translations" version="0.1.0">
|
|
2 <description>using Ensembl REST API</description>
|
7
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
0
|
6 <requirements>
|
|
7 <requirement type="package" version="0.4.10">requests-cache</requirement>
|
|
8 <requirement type="package" version="1.62">biopython</requirement>
|
8
|
9 <requirement type="package" version="3.1.4">twobitreader</requirement>
|
0
|
10 </requirements>
|
|
11 <stdio>
|
|
12 <exit_code range="1:" />
|
|
13 </stdio>
|
|
14 <command><![CDATA[
|
7
|
15 #if $features.feature_src == 'history_bed':
|
|
16 cat '$features.input'
|
0
|
17 #else
|
|
18 python '$__tool_directory__/ensembl_cdna_translate.py'
|
|
19 #if $species:
|
|
20 --species '$species'
|
|
21 #end if
|
7
|
22 $features.transcript_raw
|
|
23 #if $features.biotypes:
|
|
24 --biotypes '$features.biotypes'
|
|
25 #end if
|
|
26 #if $features.regions:
|
|
27 --regions '$features.regions'
|
0
|
28 #end if
|
|
29 #if str($output_choice).find('transcript_bed') >= 0:
|
|
30 --transcripts
|
|
31 #if str($output_choice).find('translation') >= 0:
|
|
32 '-' | tee '$transcript_bed'
|
|
33 #else
|
|
34 '$transcript_bed'
|
|
35 #end if
|
|
36 #elif str($output_choice).find('translation') >= 0:
|
|
37 --transcripts '-'
|
|
38 #end if
|
|
39 #end if
|
|
40 #if str($output_choice).find('translation') >= 0:
|
|
41 | python '$__tool_directory__/ensembl_cdna_translate.py' -i '-'
|
8
|
42 #if $ref.ref_source == 'cached':
|
|
43 --twobit='$ref.ref_loc.fields.path'
|
|
44 #elif $ref.ref_source == 'history':
|
|
45 --twobit='$ref.ref_file'
|
4
|
46 #end if
|
8
|
47 --min_length $translations.min_length
|
|
48 #if $translations.enzyme:
|
|
49 --enzyme '$translations.enzyme'
|
|
50 #end if
|
|
51 $translations.translate_all
|
|
52 #if $features.feature_src == 'history_bed' and str($output_choice).find('transcript_bed') >= 0:
|
2
|
53 --transcripts '$transcript_bed'
|
0
|
54 #end if
|
|
55 #if str($output_choice).find('translation_bed') >= 0:
|
|
56 --bed '$translation_bed'
|
|
57 #end if
|
|
58 #if str($output_choice).find('translation_fasta') >= 0:
|
|
59 --fasta '$translation_fasta'
|
|
60 #end if
|
8
|
61 #if $features.biotypes:
|
|
62 --biotypes '$features.biotypes'
|
|
63 #end if
|
0
|
64 #end if
|
|
65 ]]></command>
|
|
66 <inputs>
|
|
67 <param name="species" type="text" value="" label="Ensembl species" >
|
|
68 <help>
|
|
69 </help>
|
7
|
70 <expand macro="species_options" />
|
|
71 </param>
|
|
72 <conditional name="features">
|
|
73 <param name="feature_src" type="select" label="Features to translate">
|
|
74 <option value="ensembl_rest">Retrieve from Ensembl</option>
|
|
75 <option value="history_bed">Use Ensembl BED file</option>
|
|
76 </param>
|
|
77 <when value="ensembl_rest">
|
|
78 <param name="transcript_raw" type="boolean" truevalue="--raw" falsevalue="" checked="true"
|
|
79 label="Keep extra columns from ensembl BED"/>
|
|
80 <param name="biotypes" type="text" value="" optional="true" label="Restrict Feature retrieval to these biotypes" >
|
|
81 <expand macro="biotypes_help" />
|
|
82 </param>
|
|
83 <param name="regions" type="text" value="" optional="true" label="Restrict Feature retrieval to comma-separated list of regions" >
|
|
84 <help>Each region is specifed as: chr or chr:pos or chr:from-to</help>
|
|
85 <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
|
|
86 </param>
|
|
87 </when>
|
|
88 <when value="history_bed">
|
|
89 <param name="input" type="data" format="bed" optional="true" label="A BED file with 12 columns"
|
|
90 help="thickStart and thickEnd define protein coding region, blocks define exon regions"/>
|
|
91 <param name="biotypes" type="text" value="" optional="true" label="Restrict Feature translation to these biotypes" >
|
|
92 <expand macro="biotypes_help" />
|
|
93 </param>
|
|
94 </when>
|
|
95 </conditional>
|
8
|
96 <conditional name="ref">
|
|
97 <param name="ref_source" type="select" label="Source for Genomic Sequence Data">
|
|
98 <option value="cached">Locally cached twobit</option>
|
|
99 <option value="history">History dataset twobit</option>
|
|
100 <option value="ensembl_rest">Retrieve sequences from Ensembl (Slow and only for Ensembl Transcripts)</option>
|
|
101 </param>
|
|
102 <when value="cached">
|
|
103 <param name="ref_loc" type="select" label="Select reference 2bit file">
|
|
104 <options from_data_table="twobit" />
|
|
105 </param>
|
|
106 </when>
|
|
107 <when value="history">
|
|
108 <param name="ref_file" type="data" format="twobit" label="reference 2bit file" />
|
|
109 </when>
|
|
110 <when value="ensembl_rest"/>
|
7
|
111 </conditional>
|
8
|
112 <section name="translations" expanded="false" title="Translation Options">
|
|
113 <param name="min_length" type="integer" value="10" min="1" label="Minimum length of protein translation to report"/>
|
|
114 <param name="translate_all" type="boolean" truevalue="--all" falsevalue="" checked="false"
|
|
115 label="Report all translations (Default is non reference protein sequences)"/>
|
|
116 <param name="enzyme" type="select" optional="true" label="Digest enzyme"
|
|
117 help="Remove frags that are in a reference protein">
|
|
118 <option value="trypsin">trypsin: ([KR](?=[^P]))|((?<=W)K(?=P))|((?<=M)R(?=P))</option>
|
|
119 </param>
|
|
120 </section>
|
0
|
121 <param name="output_choice" type="select" multiple="true" display="checkboxes" label="Outputs">
|
|
122 <option value="transcript_bed">transcripts.bed</option>
|
|
123 <option value="translation_bed">translation.bed</option>
|
|
124 <option value="translation_fasta">translation.fasta</option>
|
|
125 </param>
|
|
126 </inputs>
|
|
127 <outputs>
|
8
|
128 <data name="transcript_bed" format="bed" label="Ensembl ${species} transcripts.bed">
|
0
|
129 <filter>'transcript_bed' in output_choice</filter>
|
|
130 </data>
|
8
|
131 <data name="translation_bed" format="bed" label="Ensembl ${species} translation.bed">
|
0
|
132 <filter>'translation_bed' in output_choice</filter>
|
|
133 </data>
|
8
|
134 <data name="translation_fasta" format="fasta" label="Ensembl ${species} translation.fasta">
|
0
|
135 <filter>'translation_fasta' in output_choice</filter>
|
|
136 </data>
|
|
137 </outputs>
|
|
138 <tests>
|
|
139 <test>
|
|
140 <param name="species" value="human"/>
|
8
|
141 <param name="feature_src" value="history_bed"/>
|
0
|
142 <param name="input" value="human_transcripts.bed" ftype="bed"/>
|
8
|
143 <param name="ref_source" value="ensembl_rest"/>
|
0
|
144 <param name="output_choice" value="translation_bed,translation_fasta"/>
|
|
145 <output name="translation_bed">
|
|
146 <assert_contents>
|
|
147 <has_text text="ENST00000641515" />
|
|
148 </assert_contents>
|
|
149 </output>
|
|
150 <output name="translation_fasta">
|
|
151 <assert_contents>
|
|
152 <has_text text=">ENST00000641515" />
|
|
153 </assert_contents>
|
|
154 </output>
|
|
155 </test>
|
|
156 </tests>
|
|
157 <help><![CDATA[
|
|
158 usage: ensembl_cdna_translate.py [-h] [-s SPECIES] [-i INPUT] [-t TRANSCRIPTS]
|
|
159 [-r] [-f FASTA] [-b BED] [-m MIN_LENGTH] [-a]
|
|
160 [-v] [-d]
|
|
161
|
|
162 Retrieve Ensembl cDNAs and three frame translate
|
|
163
|
|
164 optional arguments:
|
|
165 -h, --help show this help message and exit
|
|
166 -s SPECIES, --species SPECIES
|
|
167 Ensembl Species to retrieve
|
|
168 -i INPUT, --input INPUT
|
|
169 Use this bed instead of retrieving cDNA from ensembl
|
|
170 (-) for stdin
|
|
171 -t TRANSCRIPTS, --transcripts TRANSCRIPTS
|
|
172 Path to output cDNA transcripts.bed (-) for stdout
|
|
173 -r, --raw Report transcript exacty as returned from Ensembl
|
|
174 -f FASTA, --fasta FASTA
|
|
175 Path to output translations.fasta
|
|
176 -b BED, --bed BED Path to output translations.bed
|
|
177 -m MIN_LENGTH, --min_length MIN_LENGTH
|
|
178 Minimum length of protein translation to report
|
|
179 -a, --all Report all translations (Default is non reference
|
|
180 protein sequences)
|
|
181 -v, --verbose Verbose
|
|
182 -d, --debug Debug
|
|
183
|
|
184 Esmebl REST API returns a 20 BED format with these additional columns::
|
|
185
|
|
186 second_name, cds_start_status, cds_end_status, exon_frames, type, gene_name, second_gene_name, gene_type
|
|
187
|
|
188 ]]></help>
|
|
189 <citations>
|
|
190 <citation type="doi">10.1093/bioinformatics/btu613</citation>
|
|
191 <citation type="doi">10.1093/nar/gku1010</citation>
|
|
192 </citations>
|
|
193 </tool>
|