Mercurial > repos > jjohnson > ensembl_cdna_translate
view ensembl_cdna_translate.xml @ 8:5c92d0be6514 draft
Uploaded
author | jjohnson |
---|---|
date | Thu, 14 Dec 2017 13:32:00 -0500 |
parents | d59e3ce10e74 |
children |
line wrap: on
line source
<tool id="ensembl_cdna_translate" name="Ensembl cDNA Translations" version="0.1.0"> <description>using Ensembl REST API</description> <macros> <import>macros.xml</import> </macros> <requirements> <requirement type="package" version="0.4.10">requests-cache</requirement> <requirement type="package" version="1.62">biopython</requirement> <requirement type="package" version="3.1.4">twobitreader</requirement> </requirements> <stdio> <exit_code range="1:" /> </stdio> <command><![CDATA[ #if $features.feature_src == 'history_bed': cat '$features.input' #else python '$__tool_directory__/ensembl_cdna_translate.py' #if $species: --species '$species' #end if $features.transcript_raw #if $features.biotypes: --biotypes '$features.biotypes' #end if #if $features.regions: --regions '$features.regions' #end if #if str($output_choice).find('transcript_bed') >= 0: --transcripts #if str($output_choice).find('translation') >= 0: '-' | tee '$transcript_bed' #else '$transcript_bed' #end if #elif str($output_choice).find('translation') >= 0: --transcripts '-' #end if #end if #if str($output_choice).find('translation') >= 0: | python '$__tool_directory__/ensembl_cdna_translate.py' -i '-' #if $ref.ref_source == 'cached': --twobit='$ref.ref_loc.fields.path' #elif $ref.ref_source == 'history': --twobit='$ref.ref_file' #end if --min_length $translations.min_length #if $translations.enzyme: --enzyme '$translations.enzyme' #end if $translations.translate_all #if $features.feature_src == 'history_bed' and str($output_choice).find('transcript_bed') >= 0: --transcripts '$transcript_bed' #end if #if str($output_choice).find('translation_bed') >= 0: --bed '$translation_bed' #end if #if str($output_choice).find('translation_fasta') >= 0: --fasta '$translation_fasta' #end if #if $features.biotypes: --biotypes '$features.biotypes' #end if #end if ]]></command> <inputs> <param name="species" type="text" value="" label="Ensembl species" > <help> </help> <expand macro="species_options" /> </param> <conditional name="features"> <param name="feature_src" type="select" label="Features to translate"> <option value="ensembl_rest">Retrieve from Ensembl</option> <option value="history_bed">Use Ensembl BED file</option> </param> <when value="ensembl_rest"> <param name="transcript_raw" type="boolean" truevalue="--raw" falsevalue="" checked="true" label="Keep extra columns from ensembl BED"/> <param name="biotypes" type="text" value="" optional="true" label="Restrict Feature retrieval to these biotypes" > <expand macro="biotypes_help" /> </param> <param name="regions" type="text" value="" optional="true" label="Restrict Feature retrieval to comma-separated list of regions" > <help>Each region is specifed as: chr or chr:pos or chr:from-to</help> <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator> </param> </when> <when value="history_bed"> <param name="input" type="data" format="bed" optional="true" label="A BED file with 12 columns" help="thickStart and thickEnd define protein coding region, blocks define exon regions"/> <param name="biotypes" type="text" value="" optional="true" label="Restrict Feature translation to these biotypes" > <expand macro="biotypes_help" /> </param> </when> </conditional> <conditional name="ref"> <param name="ref_source" type="select" label="Source for Genomic Sequence Data"> <option value="cached">Locally cached twobit</option> <option value="history">History dataset twobit</option> <option value="ensembl_rest">Retrieve sequences from Ensembl (Slow and only for Ensembl Transcripts)</option> </param> <when value="cached"> <param name="ref_loc" type="select" label="Select reference 2bit file"> <options from_data_table="twobit" /> </param> </when> <when value="history"> <param name="ref_file" type="data" format="twobit" label="reference 2bit file" /> </when> <when value="ensembl_rest"/> </conditional> <section name="translations" expanded="false" title="Translation Options"> <param name="min_length" type="integer" value="10" min="1" label="Minimum length of protein translation to report"/> <param name="translate_all" type="boolean" truevalue="--all" falsevalue="" checked="false" label="Report all translations (Default is non reference protein sequences)"/> <param name="enzyme" type="select" optional="true" label="Digest enzyme" help="Remove frags that are in a reference protein"> <option value="trypsin">trypsin: ([KR](?=[^P]))|((?<=W)K(?=P))|((?<=M)R(?=P))</option> </param> </section> <param name="output_choice" type="select" multiple="true" display="checkboxes" label="Outputs"> <option value="transcript_bed">transcripts.bed</option> <option value="translation_bed">translation.bed</option> <option value="translation_fasta">translation.fasta</option> </param> </inputs> <outputs> <data name="transcript_bed" format="bed" label="Ensembl ${species} transcripts.bed"> <filter>'transcript_bed' in output_choice</filter> </data> <data name="translation_bed" format="bed" label="Ensembl ${species} translation.bed"> <filter>'translation_bed' in output_choice</filter> </data> <data name="translation_fasta" format="fasta" label="Ensembl ${species} translation.fasta"> <filter>'translation_fasta' in output_choice</filter> </data> </outputs> <tests> <test> <param name="species" value="human"/> <param name="feature_src" value="history_bed"/> <param name="input" value="human_transcripts.bed" ftype="bed"/> <param name="ref_source" value="ensembl_rest"/> <param name="output_choice" value="translation_bed,translation_fasta"/> <output name="translation_bed"> <assert_contents> <has_text text="ENST00000641515" /> </assert_contents> </output> <output name="translation_fasta"> <assert_contents> <has_text text=">ENST00000641515" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ usage: ensembl_cdna_translate.py [-h] [-s SPECIES] [-i INPUT] [-t TRANSCRIPTS] [-r] [-f FASTA] [-b BED] [-m MIN_LENGTH] [-a] [-v] [-d] Retrieve Ensembl cDNAs and three frame translate optional arguments: -h, --help show this help message and exit -s SPECIES, --species SPECIES Ensembl Species to retrieve -i INPUT, --input INPUT Use this bed instead of retrieving cDNA from ensembl (-) for stdin -t TRANSCRIPTS, --transcripts TRANSCRIPTS Path to output cDNA transcripts.bed (-) for stdout -r, --raw Report transcript exacty as returned from Ensembl -f FASTA, --fasta FASTA Path to output translations.fasta -b BED, --bed BED Path to output translations.bed -m MIN_LENGTH, --min_length MIN_LENGTH Minimum length of protein translation to report -a, --all Report all translations (Default is non reference protein sequences) -v, --verbose Verbose -d, --debug Debug Esmebl REST API returns a 20 BED format with these additional columns:: second_name, cds_start_status, cds_end_status, exon_frames, type, gene_name, second_gene_name, gene_type ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/btu613</citation> <citation type="doi">10.1093/nar/gku1010</citation> </citations> </tool>