diff ensembl_cdna_translate.xml @ 0:a8218b11216f draft

Uploaded
author jjohnson
date Wed, 29 Nov 2017 15:55:59 -0500
parents
children b7f2f5e3390c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ensembl_cdna_translate.xml	Wed Nov 29 15:55:59 2017 -0500
@@ -0,0 +1,248 @@
+<tool id="ensembl_cdna_translate" name="Ensembl cDNA Translations" version="0.1.0">
+    <description>using Ensembl REST API</description>
+    <requirements>
+        <requirement type="package" version="0.4.10">requests-cache</requirement>
+        <requirement type="package" version="1.62">biopython</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command><![CDATA[
+        #if $input:
+            cat '$input' 
+        #else
+            python '$__tool_directory__/ensembl_cdna_translate.py'  
+            #if $species:
+                --species '$species'
+            #end if
+            $transcript_raw
+            #if $biotypes:
+                --biotypes '$biotypes'
+            #end if
+            #if str($output_choice).find('transcript_bed') >= 0:
+                --transcripts 
+                #if str($output_choice).find('translation') >= 0:
+                    '-' | tee '$transcript_bed'
+                #else
+                    '$transcript_bed'
+                #end if
+            #elif str($output_choice).find('translation') >= 0:
+                --transcripts '-'
+            #end if
+        #end if
+        #if str($output_choice).find('translation') >= 0:
+          | python '$__tool_directory__/ensembl_cdna_translate.py' -i '-' 
+            #if $input and str($output_choice).find('transcript_bed') >= 0:
+                --transcripts '$transcript_bed
+            #end if
+            #if str($output_choice).find('translation_bed') >= 0: 
+                --bed '$translation_bed'
+            #end if
+            #if str($output_choice).find('translation_fasta') >= 0: 
+                --fasta '$translation_fasta'
+            #end if
+            #if $enzyme:
+                --enzyme '$enzyme'
+            #end if
+        #end if
+       
+    ]]></command>
+    <inputs>
+        <param name="species" type="text" value="" label="Ensembl species" >
+            <help>
+            </help>
+            <option value="homo_sapiens">homo_sapiens  (Human) taxon_id: 9606</option>
+            <option value="mus_musculus">mus_musculus  (Mouse) taxon_id: 10090</option>
+            <option value="ailuropoda_melanoleuca">ailuropoda_melanoleuca  (Panda) taxon_id: 9646</option>
+            <option value="anas_platyrhynchos">anas_platyrhynchos  (Duck) taxon_id: 8839</option>
+            <option value="anolis_carolinensis">anolis_carolinensis  (Anole lizard) taxon_id: 28377</option>
+            <option value="astyanax_mexicanus">astyanax_mexicanus  (Cave fish) taxon_id: 7994</option>
+            <option value="bos_taurus">bos_taurus  (Cow) taxon_id: 9913</option>
+            <option value="caenorhabditis_elegans">caenorhabditis_elegans  (Caenorhabditis elegans) taxon_id: 6239</option>
+            <option value="callithrix_jacchus">callithrix_jacchus  (Marmoset) taxon_id: 9483</option>
+            <option value="canis_familiaris">canis_familiaris  (Dog) taxon_id: 9615</option>
+            <option value="carlito_syrichta">carlito_syrichta  (Tarsier) taxon_id: 1868482</option>
+            <option value="cavia_aperea">cavia_aperea  (Brazilian guinea pig) taxon_id: 37548</option>
+            <option value="cavia_porcellus">cavia_porcellus  (Guinea Pig) taxon_id: 10141</option>
+            <option value="chinchilla_lanigera">chinchilla_lanigera  (Long-tailed chinchilla) taxon_id: 34839</option>
+            <option value="chlorocebus_sabaeus">chlorocebus_sabaeus  (Vervet-AGM) taxon_id: 60711</option>
+            <option value="choloepus_hoffmanni">choloepus_hoffmanni  (Sloth) taxon_id: 9358</option>
+            <option value="ciona_intestinalis">ciona_intestinalis  (C.intestinalis) taxon_id: 7719</option>
+            <option value="ciona_savignyi">ciona_savignyi  (C.savignyi) taxon_id: 51511</option>
+            <option value="cricetulus_griseus_chok1gshd">cricetulus_griseus_chok1gshd  (Chinese hamster CHOK1GS) taxon_id: 10029</option>
+            <option value="cricetulus_griseus_crigri">cricetulus_griseus_crigri  (Chinese hamster CriGri) taxon_id: 10029</option>
+            <option value="danio_rerio">danio_rerio  (Zebrafish) taxon_id: 7955</option>
+            <option value="dasypus_novemcinctus">dasypus_novemcinctus  (Armadillo) taxon_id: 9361</option>
+            <option value="dipodomys_ordii">dipodomys_ordii  (Kangaroo rat) taxon_id: 10020</option>
+            <option value="drosophila_melanogaster">drosophila_melanogaster  (Fruitfly) taxon_id: 7227</option>
+            <option value="echinops_telfairi">echinops_telfairi  (Lesser hedgehog tenrec) taxon_id: 9371</option>
+            <option value="equus_caballus">equus_caballus  (Horse) taxon_id: 9796</option>
+            <option value="erinaceus_europaeus">erinaceus_europaeus  (Hedgehog) taxon_id: 9365</option>
+            <option value="felis_catus">felis_catus  (Cat) taxon_id: 9685</option>
+            <option value="ficedula_albicollis">ficedula_albicollis  (Flycatcher) taxon_id: 59894</option>
+            <option value="fukomys_damarensis">fukomys_damarensis  (Damara mole rat) taxon_id: 885580</option>
+            <option value="gadus_morhua">gadus_morhua  (Cod) taxon_id: 8049</option>
+            <option value="gallus_gallus">gallus_gallus  (Chicken) taxon_id: 9031</option>
+            <option value="gasterosteus_aculeatus">gasterosteus_aculeatus  (Stickleback) taxon_id: 69293</option>
+            <option value="gorilla_gorilla">gorilla_gorilla  (Gorilla) taxon_id: 9595</option>
+            <option value="heterocephalus_glaber_female">heterocephalus_glaber_female  (Naked mole-rat female) taxon_id: 10181</option>
+            <option value="heterocephalus_glaber_male">heterocephalus_glaber_male  (Naked mole-rat male) taxon_id: 10181</option>
+            <option value="ictidomys_tridecemlineatus">ictidomys_tridecemlineatus  (Squirrel) taxon_id: 43179</option>
+            <option value="jaculus_jaculus">jaculus_jaculus  (Lesser Egyptian jerboa) taxon_id: 51337</option>
+            <option value="latimeria_chalumnae">latimeria_chalumnae  (Coelacanth) taxon_id: 7897</option>
+            <option value="lepisosteus_oculatus">lepisosteus_oculatus  (Spotted gar) taxon_id: 7918</option>
+            <option value="loxodonta_africana">loxodonta_africana  (Elephant) taxon_id: 9785</option>
+            <option value="macaca_mulatta">macaca_mulatta  (Macaque) taxon_id: 9544</option>
+            <option value="meleagris_gallopavo">meleagris_gallopavo  (Turkey) taxon_id: 9103</option>
+            <option value="mesocricetus_auratus">mesocricetus_auratus  (Golden Hamster) taxon_id: 10036</option>
+            <option value="microcebus_murinus">microcebus_murinus  (Mouse Lemur) taxon_id: 30608</option>
+            <option value="microtus_ochrogaster">microtus_ochrogaster  (Prairie vole) taxon_id: 79684</option>
+            <option value="monodelphis_domestica">monodelphis_domestica  (Opossum) taxon_id: 13616</option>
+            <option value="mus_caroli">mus_caroli  (Ryukyu mouse) taxon_id: 10089</option>
+            <option value="mus_musculus_129s1svimj">mus_musculus_129s1svimj  (Mouse 129S1/SvImJ) taxon_id: 10090</option>
+            <option value="mus_musculus_aj">mus_musculus_aj  (Mouse A/J) taxon_id: 10090</option>
+            <option value="mus_musculus_akrj">mus_musculus_akrj  (Mouse AKR/J) taxon_id: 10090</option>
+            <option value="mus_musculus_balbcj">mus_musculus_balbcj  (Mouse BALB/cJ) taxon_id: 10090</option>
+            <option value="mus_musculus_c3hhej">mus_musculus_c3hhej  (Mouse C3H/HeJ) taxon_id: 10090</option>
+            <option value="mus_musculus_c57bl6nj">mus_musculus_c57bl6nj  (Mouse C57BL/6NJ) taxon_id: 10090</option>
+            <option value="mus_musculus_casteij">mus_musculus_casteij  (Mouse CAST/EiJ) taxon_id: 10091</option>
+            <option value="mus_musculus_cbaj">mus_musculus_cbaj  (Mouse CBA/J) taxon_id: 10090</option>
+            <option value="mus_musculus_dba2j">mus_musculus_dba2j  (Mouse DBA/2J) taxon_id: 10090</option>
+            <option value="mus_musculus_fvbnj">mus_musculus_fvbnj  (Mouse FVB/NJ) taxon_id: 10090</option>
+            <option value="mus_musculus_lpj">mus_musculus_lpj  (Mouse LP/J) taxon_id: 10090</option>
+            <option value="mus_musculus_nodshiltj">mus_musculus_nodshiltj  (Mouse NOD/ShiLtJ) taxon_id: 10090</option>
+            <option value="mus_musculus_nzohlltj">mus_musculus_nzohlltj  (Mouse NZO/HlLtJ) taxon_id: 10090</option>
+            <option value="mus_musculus_pwkphj">mus_musculus_pwkphj  (Mouse PWK/PhJ) taxon_id: 39442</option>
+            <option value="mus_musculus_wsbeij">mus_musculus_wsbeij  (Mouse WSB/EiJ) taxon_id: 10092</option>
+            <option value="mus_pahari">mus_pahari  (Shrew mouse) taxon_id: 10093</option>
+            <option value="mus_spretus_spreteij">mus_spretus_spreteij  (Algerian mouse) taxon_id: 10096</option>
+            <option value="mustela_putorius_furo">mustela_putorius_furo  (Ferret) taxon_id: 9669</option>
+            <option value="myotis_lucifugus">myotis_lucifugus  (Microbat) taxon_id: 59463</option>
+            <option value="nannospalax_galili">nannospalax_galili  (Upper Galilee mountains blind mole rat) taxon_id: 1026970</option>
+            <option value="nomascus_leucogenys">nomascus_leucogenys  (Gibbon) taxon_id: 61853</option>
+            <option value="notamacropus_eugenii">notamacropus_eugenii  (Wallaby) taxon_id: 9315</option>
+            <option value="ochotona_princeps">ochotona_princeps  (Pika) taxon_id: 9978</option>
+            <option value="octodon_degus">octodon_degus  (Degu) taxon_id: 10160</option>
+            <option value="oreochromis_niloticus">oreochromis_niloticus  (Tilapia) taxon_id: 8128</option>
+            <option value="ornithorhynchus_anatinus">ornithorhynchus_anatinus  (Platypus) taxon_id: 9258</option>
+            <option value="oryctolagus_cuniculus">oryctolagus_cuniculus  (Rabbit) taxon_id: 9986</option>
+            <option value="oryzias_latipes">oryzias_latipes  (Medaka) taxon_id: 8090</option>
+            <option value="otolemur_garnettii">otolemur_garnettii  (Bushbaby) taxon_id: 30611</option>
+            <option value="ovis_aries">ovis_aries  (Sheep) taxon_id: 9940</option>
+            <option value="pan_troglodytes">pan_troglodytes  (Chimpanzee) taxon_id: 9598</option>
+            <option value="papio_anubis">papio_anubis  (Olive baboon) taxon_id: 9555</option>
+            <option value="pelodiscus_sinensis">pelodiscus_sinensis  (Chinese softshell turtle) taxon_id: 13735</option>
+            <option value="peromyscus_maniculatus_bairdii">peromyscus_maniculatus_bairdii  (Northern American deer mouse) taxon_id: 230844</option>
+            <option value="petromyzon_marinus">petromyzon_marinus  (Lamprey) taxon_id: 7757</option>
+            <option value="poecilia_formosa">poecilia_formosa  (Amazon molly) taxon_id: 48698</option>
+            <option value="pongo_abelii">pongo_abelii  (Orangutan) taxon_id: 9601</option>
+            <option value="procavia_capensis">procavia_capensis  (Hyrax) taxon_id: 9813</option>
+            <option value="pteropus_vampyrus">pteropus_vampyrus  (Megabat) taxon_id: 132908</option>
+            <option value="rattus_norvegicus">rattus_norvegicus  (Rat) taxon_id: 10116</option>
+            <option value="saccharomyces_cerevisiae">saccharomyces_cerevisiae  (Saccharomyces cerevisiae) taxon_id: 4932</option>
+            <option value="sarcophilus_harrisii">sarcophilus_harrisii  (Tasmanian devil) taxon_id: 9305</option>
+            <option value="sorex_araneus">sorex_araneus  (Shrew) taxon_id: 42254</option>
+            <option value="sus_scrofa">sus_scrofa  (Pig) taxon_id: 9823</option>
+            <option value="taeniopygia_guttata">taeniopygia_guttata  (Zebra Finch) taxon_id: 59729</option>
+            <option value="takifugu_rubripes">takifugu_rubripes  (Fugu) taxon_id: 31033</option>
+            <option value="tetraodon_nigroviridis">tetraodon_nigroviridis  (Tetraodon) taxon_id: 99883</option>
+            <option value="tupaia_belangeri">tupaia_belangeri  (Tree Shrew) taxon_id: 37347</option>
+            <option value="tursiops_truncatus">tursiops_truncatus  (Dolphin) taxon_id: 9739</option>
+            <option value="vicugna_pacos">vicugna_pacos  (Alpaca) taxon_id: 30538</option>
+            <option value="xenopus_tropicalis">xenopus_tropicalis  (Xenopus) taxon_id: 8364</option>
+            <option value="xiphophorus_maculatus">xiphophorus_maculatus  (Platyfish) taxon_id: 8083</option>
+
+        </param>
+        <param name="biotypes" type="text" value="" optional="true" label="Restrict to these biotypes" >
+            <help><![CDATA[
+Example biotypes: 
+protein_coding, non_coding, pseudogene, nonsense_mediated_decay, non_stop_decay, 
+translated_processed_pseudogene, transcribed_processed_pseudogene, transcribed_unitary_pseudogene, transcribed_unprocessed_pseudogene, 
+polymorphic_pseudogene, processed_pseudogene, unprocessed_pseudogene, unitary_pseudogene, processed_transcript, 
+retained_intron, ccds_gene, sense_overlapping, sense_intronic, cdna_update, antisense, 
+LRG_gene, IG_C_gene, IG_D_gene, IG_J_gene, IG_LV_gene IG_V_gene, TR_C_gene, TR_D_gene, TR_J_gene, TR_V_gene, 
+IG_pseudogene, IG_C_pseudogene, IG_D_pseudogene, IG_J_pseudogene, IG_V_pseudogene, TR_J_pseudogene, TR_V_pseudogene, TEC, 
+ribozyme, RNase_P_RNA, guide_RNA, macro_lncRNA, bidirectional_promoter_lncRNA, 3prime_overlapping_ncRNA, antisense_RNA, vaultRNA, Y_RNA, SRP_RNA, RNase_MRP_RNA, IG_C_pseudogene, lncRNA, lincRNA, miRNA, snRNA, sRNA, telomerase_RNA, Mt_tRNA, Mt_rRNA, scaRNA, misc_RNA, rRNA, tRNA, scRNA, snoRNA, other
+            ]]></help>
+        </param>
+        <param name="input" type="data" format="bed" optional="true" label="A BED file with 12 columns, thickStart and thickEnd define protein coding region"/>
+        <param name="translate_all" type="boolean" truevalue="--all" falsevalue="" checked="false" 
+            label="Report all translations (Default is non reference protein sequences)"/>
+        <param name="transcript_raw" type="boolean" truevalue="--raw" falsevalue="" checked="true" 
+            label="Keep extra columns from ensembl BED"/>
+        <param name="output_choice" type="select" multiple="true" display="checkboxes" label="Outputs">
+            <option value="transcript_bed">transcripts.bed</option>
+            <option value="translation_bed">translation.bed</option>
+            <option value="translation_fasta">translation.fasta</option>
+        </param>
+        <param name="min_length" type="integer" value="7" min="1" label="Minimum length of protein translation to report"/>
+        <param name="enzyme" type="select" optional="true" label="Digest enzyme" 
+             help="Remove frags that are in a reference protein">
+            <option value="trypsin">trypsin:       ([KR](?=[^P]))|((?&lt;=W)K(?=P))|((?&lt;=M)R(?=P))</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="transcript_bed" format="bed" label="Ensembl $species transcripts.bed">
+            <filter>'transcript_bed' in output_choice</filter>
+        </data>
+        <data name="translation_bed" format="bed" label="Ensembl $species translation.bed">
+            <filter>'translation_bed' in output_choice</filter>
+        </data>
+        <data name="translation_fasta" format="fasta" label="Ensembl $species translation.fasta">
+            <filter>'translation_fasta' in output_choice</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="species" value="human"/>
+            <param name="input" value="human_transcripts.bed" ftype="bed"/>
+            <param name="output_choice" value="translation_bed,translation_fasta"/>
+            <output name="translation_bed">
+                <assert_contents>
+                    <has_text text="ENST00000641515" />
+                </assert_contents>
+            </output>
+            <output name="translation_fasta">
+                <assert_contents>
+                    <has_text text=">ENST00000641515" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+        usage: ensembl_cdna_translate.py [-h] [-s SPECIES] [-i INPUT] [-t TRANSCRIPTS]
+                                 [-r] [-f FASTA] [-b BED] [-m MIN_LENGTH] [-a]
+                                 [-v] [-d]
+
+Retrieve Ensembl cDNAs and three frame translate
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -s SPECIES, --species SPECIES
+                        Ensembl Species to retrieve
+  -i INPUT, --input INPUT
+                        Use this bed instead of retrieving cDNA from ensembl
+                        (-) for stdin
+  -t TRANSCRIPTS, --transcripts TRANSCRIPTS
+                        Path to output cDNA transcripts.bed (-) for stdout
+  -r, --raw             Report transcript exacty as returned from Ensembl
+  -f FASTA, --fasta FASTA
+                        Path to output translations.fasta
+  -b BED, --bed BED     Path to output translations.bed
+  -m MIN_LENGTH, --min_length MIN_LENGTH
+                        Minimum length of protein translation to report
+  -a, --all             Report all translations (Default is non reference
+                        protein sequences)
+  -v, --verbose         Verbose
+  -d, --debug           Debug
+
+Esmebl REST API returns a 20 BED format with these additional columns::
+
+  second_name, cds_start_status, cds_end_status, exon_frames, type, gene_name, second_gene_name, gene_type
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btu613</citation>
+        <citation type="doi">10.1093/nar/gku1010</citation>
+    </citations>
+</tool>