annotate ensembl_cdna_translate.xml @ 8:5c92d0be6514 draft

Uploaded
author jjohnson
date Thu, 14 Dec 2017 13:32:00 -0500
parents d59e3ce10e74
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
1 <tool id="ensembl_cdna_translate" name="Ensembl cDNA Translations" version="0.1.0">
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
2 <description>using Ensembl REST API</description>
7
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
3 <macros>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
4 <import>macros.xml</import>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
5 </macros>
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
6 <requirements>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
7 <requirement type="package" version="0.4.10">requests-cache</requirement>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
8 <requirement type="package" version="1.62">biopython</requirement>
8
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
9 <requirement type="package" version="3.1.4">twobitreader</requirement>
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
10 </requirements>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
11 <stdio>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
12 <exit_code range="1:" />
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
13 </stdio>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
14 <command><![CDATA[
7
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
15 #if $features.feature_src == 'history_bed':
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
16 cat '$features.input'
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
17 #else
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
18 python '$__tool_directory__/ensembl_cdna_translate.py'
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
19 #if $species:
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
20 --species '$species'
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
21 #end if
7
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
22 $features.transcript_raw
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
23 #if $features.biotypes:
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
24 --biotypes '$features.biotypes'
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
25 #end if
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
26 #if $features.regions:
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
27 --regions '$features.regions'
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
28 #end if
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
29 #if str($output_choice).find('transcript_bed') >= 0:
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
30 --transcripts
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
31 #if str($output_choice).find('translation') >= 0:
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
32 '-' | tee '$transcript_bed'
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
33 #else
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
34 '$transcript_bed'
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
35 #end if
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
36 #elif str($output_choice).find('translation') >= 0:
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
37 --transcripts '-'
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
38 #end if
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
39 #end if
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
40 #if str($output_choice).find('translation') >= 0:
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
41 | python '$__tool_directory__/ensembl_cdna_translate.py' -i '-'
8
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
42 #if $ref.ref_source == 'cached':
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
43 --twobit='$ref.ref_loc.fields.path'
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
44 #elif $ref.ref_source == 'history':
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
45 --twobit='$ref.ref_file'
4
d1055a763560 Uploaded
jjohnson
parents: 2
diff changeset
46 #end if
8
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
47 --min_length $translations.min_length
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
48 #if $translations.enzyme:
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
49 --enzyme '$translations.enzyme'
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
50 #end if
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
51 $translations.translate_all
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
52 #if $features.feature_src == 'history_bed' and str($output_choice).find('transcript_bed') >= 0:
2
b7f2f5e3390c Uploaded
jjohnson
parents: 0
diff changeset
53 --transcripts '$transcript_bed'
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
54 #end if
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
55 #if str($output_choice).find('translation_bed') >= 0:
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
56 --bed '$translation_bed'
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
57 #end if
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
58 #if str($output_choice).find('translation_fasta') >= 0:
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
59 --fasta '$translation_fasta'
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
60 #end if
8
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
61 #if $features.biotypes:
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
62 --biotypes '$features.biotypes'
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
63 #end if
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
64 #end if
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
65 ]]></command>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
66 <inputs>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
67 <param name="species" type="text" value="" label="Ensembl species" >
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
68 <help>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
69 </help>
7
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
70 <expand macro="species_options" />
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
71 </param>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
72 <conditional name="features">
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
73 <param name="feature_src" type="select" label="Features to translate">
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
74 <option value="ensembl_rest">Retrieve from Ensembl</option>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
75 <option value="history_bed">Use Ensembl BED file</option>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
76 </param>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
77 <when value="ensembl_rest">
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
78 <param name="transcript_raw" type="boolean" truevalue="--raw" falsevalue="" checked="true"
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
79 label="Keep extra columns from ensembl BED"/>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
80 <param name="biotypes" type="text" value="" optional="true" label="Restrict Feature retrieval to these biotypes" >
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
81 <expand macro="biotypes_help" />
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
82 </param>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
83 <param name="regions" type="text" value="" optional="true" label="Restrict Feature retrieval to comma-separated list of regions" >
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
84 <help>Each region is specifed as: chr or chr:pos or chr:from-to</help>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
85 <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
86 </param>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
87 </when>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
88 <when value="history_bed">
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
89 <param name="input" type="data" format="bed" optional="true" label="A BED file with 12 columns"
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
90 help="thickStart and thickEnd define protein coding region, blocks define exon regions"/>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
91 <param name="biotypes" type="text" value="" optional="true" label="Restrict Feature translation to these biotypes" >
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
92 <expand macro="biotypes_help" />
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
93 </param>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
94 </when>
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
95 </conditional>
8
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
96 <conditional name="ref">
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
97 <param name="ref_source" type="select" label="Source for Genomic Sequence Data">
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
98 <option value="cached">Locally cached twobit</option>
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
99 <option value="history">History dataset twobit</option>
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
100 <option value="ensembl_rest">Retrieve sequences from Ensembl (Slow and only for Ensembl Transcripts)</option>
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
101 </param>
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
102 <when value="cached">
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
103 <param name="ref_loc" type="select" label="Select reference 2bit file">
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
104 <options from_data_table="twobit" />
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
105 </param>
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
106 </when>
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
107 <when value="history">
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
108 <param name="ref_file" type="data" format="twobit" label="reference 2bit file" />
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
109 </when>
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
110 <when value="ensembl_rest"/>
7
d59e3ce10e74 Uploaded
jjohnson
parents: 6
diff changeset
111 </conditional>
8
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
112 <section name="translations" expanded="false" title="Translation Options">
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
113 <param name="min_length" type="integer" value="10" min="1" label="Minimum length of protein translation to report"/>
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
114 <param name="translate_all" type="boolean" truevalue="--all" falsevalue="" checked="false"
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
115 label="Report all translations (Default is non reference protein sequences)"/>
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
116 <param name="enzyme" type="select" optional="true" label="Digest enzyme"
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
117 help="Remove frags that are in a reference protein">
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
118 <option value="trypsin">trypsin: ([KR](?=[^P]))|((?&lt;=W)K(?=P))|((?&lt;=M)R(?=P))</option>
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
119 </param>
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
120 </section>
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
121 <param name="output_choice" type="select" multiple="true" display="checkboxes" label="Outputs">
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
122 <option value="transcript_bed">transcripts.bed</option>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
123 <option value="translation_bed">translation.bed</option>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
124 <option value="translation_fasta">translation.fasta</option>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
125 </param>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
126 </inputs>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
127 <outputs>
8
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
128 <data name="transcript_bed" format="bed" label="Ensembl ${species} transcripts.bed">
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
129 <filter>'transcript_bed' in output_choice</filter>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
130 </data>
8
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
131 <data name="translation_bed" format="bed" label="Ensembl ${species} translation.bed">
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
132 <filter>'translation_bed' in output_choice</filter>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
133 </data>
8
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
134 <data name="translation_fasta" format="fasta" label="Ensembl ${species} translation.fasta">
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
135 <filter>'translation_fasta' in output_choice</filter>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
136 </data>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
137 </outputs>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
138 <tests>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
139 <test>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
140 <param name="species" value="human"/>
8
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
141 <param name="feature_src" value="history_bed"/>
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
142 <param name="input" value="human_transcripts.bed" ftype="bed"/>
8
5c92d0be6514 Uploaded
jjohnson
parents: 7
diff changeset
143 <param name="ref_source" value="ensembl_rest"/>
0
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
144 <param name="output_choice" value="translation_bed,translation_fasta"/>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
145 <output name="translation_bed">
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
146 <assert_contents>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
147 <has_text text="ENST00000641515" />
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
148 </assert_contents>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
149 </output>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
150 <output name="translation_fasta">
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
151 <assert_contents>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
152 <has_text text=">ENST00000641515" />
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
153 </assert_contents>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
154 </output>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
155 </test>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
156 </tests>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
157 <help><![CDATA[
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
158 usage: ensembl_cdna_translate.py [-h] [-s SPECIES] [-i INPUT] [-t TRANSCRIPTS]
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
159 [-r] [-f FASTA] [-b BED] [-m MIN_LENGTH] [-a]
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
160 [-v] [-d]
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
161
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
162 Retrieve Ensembl cDNAs and three frame translate
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
163
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
164 optional arguments:
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
165 -h, --help show this help message and exit
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
166 -s SPECIES, --species SPECIES
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
167 Ensembl Species to retrieve
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
168 -i INPUT, --input INPUT
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
169 Use this bed instead of retrieving cDNA from ensembl
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
170 (-) for stdin
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
171 -t TRANSCRIPTS, --transcripts TRANSCRIPTS
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
172 Path to output cDNA transcripts.bed (-) for stdout
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
173 -r, --raw Report transcript exacty as returned from Ensembl
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
174 -f FASTA, --fasta FASTA
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
175 Path to output translations.fasta
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
176 -b BED, --bed BED Path to output translations.bed
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
177 -m MIN_LENGTH, --min_length MIN_LENGTH
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
178 Minimum length of protein translation to report
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
179 -a, --all Report all translations (Default is non reference
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
180 protein sequences)
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
181 -v, --verbose Verbose
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
182 -d, --debug Debug
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
183
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
184 Esmebl REST API returns a 20 BED format with these additional columns::
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
185
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
186 second_name, cds_start_status, cds_end_status, exon_frames, type, gene_name, second_gene_name, gene_type
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
187
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
188 ]]></help>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
189 <citations>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
190 <citation type="doi">10.1093/bioinformatics/btu613</citation>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
191 <citation type="doi">10.1093/nar/gku1010</citation>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
192 </citations>
a8218b11216f Uploaded
jjohnson
parents:
diff changeset
193 </tool>