view summarize_poliovirus_alignment.xml @ 4:eae57366bc55 draft

planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
author sanbi-uwc
date Fri, 22 Jul 2022 10:01:30 +0000
parents 09b98e40ab59
children 7bcfc97b9284
line wrap: on
line source

<tool id="summarize_poliovirus_alignment" name="Summarize poliovirus alignment" version="0.2.0+galaxy1" profile="21.05">
    <requirements>
        <requirement type="package" version="3.9">python</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        python $__tool_directory__/summarize_alignment.py
            --summary_output_filename '$output1'
            --variant_list_outputs
        #for $key in $variant_list.keys()
            '$variant_list[$key]' 
        #end for
            --datasets
        #for $dataset in $alignment_assessments
             '$dataset' 
        #end for
        
    ]]></command>
    <inputs>
        <!-- input is list of reports for poliovirus sabin 1, 2 and 3 -->
        <param name="alignment_assessments" format="json" type="data_collection" collection_type="list" label="Poliovius alignment assessments" help="Input is a list of JSON reports from the assess_poliovirus_alignment tool" />
    </inputs>
    <outputs>
        <data name="output1" format="tabular" label="Poliovirus variant summary on ${on_string}">
            <actions>
                <action name="column_names" type="metadata" default="sample name,best matching reference,num variants,perc variants,quality,variant list" />
            </actions>
        </data>
        <collection name="variant_list" type="list" label="Poliovirus variant lists on ${on_string}" structured_like="alignment_assessments" format="tabular">
            <!-- todo - find a way to set the metadata columns for the tabular datasets -->
        </collection>
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <param name="alignment_assessments" ftype="json">
                <collection type="list">
                    <element name="sample1" value="sample1_output.json" />
                </collection>
            </param>
            <output name="output1" ftype="tabular">
                <assert_contents>
                    <has_text text="23:G:A;26:G:A;38:A:G;40:T:C;59:C:T;65:T:C;72:A:G;" />
                </assert_contents>
            </output>
            <output_collection name="variant_list">
                <element name="sample1" file="sample1_variants.tabular" ftype="tabular" />
            </output_collection>
        </test>
    </tests>
    <help><![CDATA[
        Given a list of outputs of the assess_poliovirus_alignment tool, make a summary and per sample variant list.

        In the summary file the columns are:

        1. sample name

        2. best matching reference: the workflow compares sequences against Sabin 1, 2 and 3 poliovirus reference sequences and select the one that is the closest matching

        3. number of variants relative to the best matching reference

        4. percentage of variation: variants are only counted in the VP1 gene and this percentage is calculated by comparing the number of variants to the length of the VP1 gene in the best matching reference.

        5. quality: this is an attempt at a quality score for the sequences

        6. variant list: a semi-colon separated list of variants. variant positions are in 1-based coordinates counting from the first base of the VP1 gene. A variant list is produced for each sample, these might be more readable.

        In the per-sample variant lists the columns are as follows:

        1. variant position in the genome

        2. variant position in VP1 (in 1-based coordinates)

        3. base in reference

        4. base in sequenced sample

    ]]></help>
    <citations>
        <citation type="bibtex"><![CDATA[
            @software{van_Heusden_Poliovirus_variation_reporting_2022,
                author = {van Heusden, Peter},
                month = {7},
                title = {{Poliovirus variation reporting scripts}},
                url = {https://github.com/pvanheus/polio_report},
                version = {0.1.0},
                year = {2022}
                }
        ]]></citation>
    </citations>
</tool>