view ensembl_variant_report.xml @ 10:d367a472e8a1 draft default tip

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/ensembl_variant_report commit d3b7ff1c2f0a1eed7a65af7208e987a35627402c-dirty
author jjohnson
date Wed, 20 Mar 2019 15:27:32 -0400
parents 0ef485da6ba6
children
line wrap: on
line source

<tool id="ensembl_variant_report" name="Ensembl Variant Report" version="0.3.0">
    <requirements>
        <requirement type="package" version="1.40">gtf_to_genes</requirement>
        <requirement type="package" version="3.1.4">twobitreader</requirement>
        <requirement type="package" version="1.62">biopython</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command><![CDATA[
         python '$__tool_directory__/ensembl_variant_report.py'
         #if $ref.ref_source == 'cached':
             --twobit="$ref.ref_loc.fields.path"
         #else
             --twobit="$ref.ref_file"
         #end if
         --gene_model="$gtf_file"
         #if $variant.fmt == 'vcf':
             --input="$variant.input_vcf"
             --format=snpeff
         #else
             --input="$variant.input_tsv"
            --pos_column=$variant.pos_column
            --ref_column=$variant.ref_column
            --alt_column=$variant.alt_column
            --transcript_column=$variant.transcript_column
            --dp_column=$variant.dp_column
            --dpr_column=$variant.dpr_column
         #end if
         #if str($filter.min_depth) != '':
             --min_depth=$filter.min_depth
         #end if
         #if str($filter.min_freq) != '':
             --min_freq=$filter.min_freq
         #end if
         #if str($report.readthrough) != '':
             --readthrough=$report.readthrough
         #end if
         #if str($report.leading_aa) != '':
             --leading_aa=$report.leading_aa
         #end if
         #if str($report.trailing_aa) != '':
             --trailing_aa=$report.trailing_aa
         #end if
         --output="$output"
    ]]></command>
    <inputs>
        <conditional name="variant">
            <param name="fmt" type="select" label="Input format for variants">
                <option value="vcf">snpEff vcf</option>
                <option value="tsv">tabular from snpsift extract</option>
            </param>
            <when value="vcf">
                <param name="input_vcf" type="data" format="vcf" label="snpEff VCF file"/>
            </when>
            <when value="tsv">
                <param name="input_tsv" type="data" format="tabular" label="tabular file"/>
                <param name="pos_column" type="data_column" data_ref="input_tsv" label="POS column"/>
                <param name="ref_column" type="data_column" data_ref="input_tsv" label="REF column"/>
                <param name="alt_column" type="data_column" data_ref="input_tsv" label="ALT column"/>
                <param name="transcript_column" type="data_column" data_ref="input_tsv" label="Transcript ID column"/>
                <param name="dp_column" type="data_column" data_ref="input_tsv" label="Read Depth (DP) column"/>
                <param name="dpr_column" type="data_column" data_ref="input_tsv" label="Allele Count (DPR ro AN) column"/>
            </when>
        </conditional>
        <conditional name="ref">
            <param name="ref_source" type="select" label="Source for Genomic Data">
                <option value="cached">Locally cached</option>
                <option value="history">History</option>
            </param>
            <when value="cached">
                <param name="ref_loc" type="select" label="Select reference 2bit file">
                    <options from_data_table="twobit" />
                </param>
            </when>
            <when value="history">
                <param name="ref_file" type="data" format="twobit" label="reference 2bit file" />
            </when>
        </conditional>
        <param name="gtf_file" type="data" format="gtf" label="Ensembl GTF file"/>
        <section name="filter" expanded="false" title="Filter Options">
            <param name="min_depth" type="integer" value="" optional="true" min="0" label="Minimum Read Depth to report" 
                   help="If a value is entered, ignore variants with DP INFO field less than this value"/>
            <param name="min_freq" type="float" value="" optional="true" min="0.0" max="1.0" label="Minimum Alt frequency to report"
                   help="If a value is entered, ignore variant alleles where its DPR count divided the sum of DPR is less than this value"/>
        </section>
        <section name="report" expanded="false" title="Report Options">
            <param name="readthrough" type="integer" value="0" optional="true" min="0" max="4" label="Number of readthrough stop codons to display"/>
            <param name="leading_aa" type="integer" value="10" optional="true" min="0" label="Number of Amino Acids prior to variant to display"
                   help="Ignored for frame shifts"/>
            <param name="trailing_aa" type="integer" value="10" optional="true" min="0" label="Number of Amino Acids following the variant to display"
                   help="Ignored for frame shifts"/>
        </section>
        
    </inputs>
    <outputs>
        <data name="output" format="tabular" >
            <actions>
                <action name="column_names" type="metadata" 
                 default="Gene,Ref_location,Ref_seq,Var_seq,Frequency,DP,Ensemble_Gene_transcript,AA_pos,AA_var,Protein_len,Stop_Codon,Variant_Peptide,Transcript_type"/>
            </actions>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="fmt" value="vcf"/>
            <param name="input_vcf" value="GRCh38_X400k_ANN.vcf" ftype="vcf"/>
            <param name="ref_source" value="history"/>
            <param name="ref_file" value="GRCh38_X.2bit" ftype="twobit"/>
            <param name="gtf_file" value="GRCh38_X.83.gtf" ftype="gtf"/>
            <output name="output">
                <assert_contents>
                    <has_text text="MGGQ_A_SASNSFSRLH" />
                    <not_has_text text="*REDHAAGPEA" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="fmt" value="vcf"/>
            <param name="input_vcf" value="GRCh38_X400k_EFF.vcf" ftype="vcf"/>
            <param name="ref_source" value="history"/>
            <param name="ref_file" value="GRCh38_X.2bit" ftype="twobit"/>
            <param name="gtf_file" value="GRCh38_X.83.gtf" ftype="gtf"/>
            <output name="output">
                <assert_contents>
                    <has_text text="MGGQ_A_SASNSFSRLH" />
                    <not_has_text text="*REDHAAGPEA" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="fmt" value="vcf"/>
            <param name="input_vcf" value="GRCh38_X400k_ANN.vcf" ftype="vcf"/>
            <param name="ref_source" value="history"/>
            <param name="ref_file" value="GRCh38_X.2bit" ftype="twobit"/>
            <param name="gtf_file" value="GRCh38_X.83.gtf" ftype="gtf"/>
            <param name="readthrough" value="1"/>
            <output name="output">
                <assert_contents>
                    <has_text text="MGGQ_A_SASNSFSRLH" />
                    <has_text text="*REDHAAGPEA" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="fmt" value="vcf"/>
            <param name="input_vcf" value="GRCh38_X400k_ANN.vcf" ftype="vcf"/>
            <param name="ref_source" value="history"/>
            <param name="ref_file" value="GRCh38_X.2bit" ftype="twobit"/>
            <param name="gtf_file" value="GRCh38_X.83.gtf" ftype="gtf"/>
            <param name="min_depth" value="100"/>
            <param name="min_freq" value=".80"/>
            <output name="output">
                <assert_contents>
                    <has_text text="EQLDAGVRYL_E_LRIAHMLEGS" />
                    <not_has_text text="EYDILVAEET_V_GEPWEDGFEA" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="fmt" value="tsv"/>
            <param name="input_tsv" value="GRCh38_X400k.tsv" ftype="tabular"/>
            <param name="ref_source" value="history"/>
            <param name="ref_file" value="GRCh38_X.2bit" ftype="twobit"/>
            <param name="gtf_file" value="GRCh38_X.83.gtf" ftype="gtf"/>
            <param name="pos_column" value="2"/>
            <param name="ref_column" value="3"/>
            <param name="alt_column" value="4"/>
            <param name="transcript_column" value="7"/>
            <param name="dp_column" value="8"/>
            <param name="dpr_column" value="9"/>
            <output name="output">
                <assert_contents>
                    <has_text text="MGGQ_A_SASNSFSRLH" />
                    <not_has_text text="*REDHAAGPEA" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Uses an Essembl GTF and a genome 2bit reference to report variant peptides from snpEff reported missense and frameshift variants.
Allows readthrough of stop codons, and reports the stop codons.  Translation readthrough is known to occur with some antibiotics.

The variant peptides can be converted to a fasta file with text and fasta tools, then used as input to epitope binding prediction 
applications such as netMHC or IEDB.

**Input**

Input can be a snpEff vcf file using either ANN or EFF annotations.

Alternatively, the input can be a tabular file that has columns:

  - genomic_location
  - reference_bases
  - variant_bases
  - Ensembl Transcript ID
  - Read Depth (DP)
  - AlleleDepth (DPR)

**Output**

Sample Output ::

  ====== ============= ======= ======= ========= === =============================== ====== ====== =========== =============== ======================= =======================
  Gene   Ref_location  Ref_seq Var_seq Frequency DP  Ensemble_Gene_transcript        AA_pos AA_var Protein_len Stop_Codon      Variant_Peptide         Transcript_type
  ====== ============= ======= ======= ========= === =============================== ====== ====== =========== =============== ======================= =======================
  ACTL8  1:18149510 +  G       T       1.00      12  ENSG00000117148|ENST00000375406 3      A3S    367         G-TGA           MA_S_RTVIIDHGSG         protein_coding
  BDH2   4:104013796 - A       G       0.47      159 ENSG00000164039|ENST00000511354 70     N70S   91          c-tag           TKKKQIDQFA_S_EVERLDVLFN nonsense_mediated_decay
  CENPE  4:104061993 - G       C       0.83      6   ENSG00000138778|ENST00000265148 1911   S1911T 2702        G-TAG           LKLERDQLKE_T_LQETKARDLE protein_coding
  CCHCR1 6:31110391 -  C       G       0.40      65  ENSG00000204536|ENST00000396268 865    S865C  872         C-TAA           QGDNLDRCSS_C_NPQMSS*    protein_coding
  NPRL3  16:138772 -   CT      CCT     0.58      123 ENSG00000103148|ENST00000399953 489    S489L  569         A-TGA-C,C-TGA-G LGA*TRSHPQCTRSPEP*      protein_coding
  ====== ============= ======= ======= ========= === =============================== ====== ====== =========== =============== ======================= =======================

The Variant_Peptide column:
  - misense:  prior amino acids _ variant amino acid _ following amino acids
  - frameshift: variant amino acids with stop codons indicated by *


    ]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btt385</citation>
    </citations>
</tool>