view uniprotxml_downloader.xml @ 5:7be8e30d536f draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
author galaxyp
date Thu, 06 Jul 2023 21:15:29 +0000
parents e1ffb00a0436
children
line wrap: on
line source

<tool id="uniprotxml_downloader" name="UniProt" version="2.4.0" profile="21.01">
    <description>download proteome as XML or fasta</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="2.25.1">requests</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:"  level="fatal" description="Error downloading proteome." />
    </stdio>
    <command>
<![CDATA[
python '$__tool_directory__/uniprotxml_downloader.py'
#if $input_method.input_choice == 'common':
    --search-id $input_method.organism
    --field taxonomy_id
    #if $input_method.reviewed:
        --reviewed=$input_method.reviewed
    #end if
#elif $input_method.input_choice == 'enter_ids':
    --field $input_method.field
    #for $id in $input_method.ids.split(','):
        --search-id '$id'
    #end for
#elif $input_method.input_choice == 'history':
    --field $input_method.field
    --input='${input_method.id_file}'
    --column=#echo int(str($input_method.column)) - 1#
#end if
--format $format
--output '${proteome}'
]]>
    </command>
    <inputs>
        <conditional name="input_method">
            <param name="input_choice" type="select" label="Select">
                <option value="common">A Common Organism</option>
                <option value="enter_ids">A manually entered list of Uniprot IDs</option>
                <option value="history">A history dataset with a column containing Uniprot IDs</option>
            </param>
            <when value="common">
                <param name="organism" type="select" label="Common Organisms"
                       help="select species for protein database">
                    <options from_file="uniprot_taxons.loc">
                        <column name="name" index="0" />
                        <column name="value" index="1" />
                    </options>
                </param>
                <param name="reviewed" type="select" label="filter by reviewed status" optional="true">
                    <help><![CDATA[
                    UniProtKB/TrEMBL (unreviewed)is a large, automatically annotated database that may contain 
                    redundant sequences, but there is a higher chance peptides will be identified. 
                    UniProtKB/Swiss-Prot (reviewed) is a smaller, manually annotated database with 
                    less of a chance peptides will be identified but less sequence redundancy
                    ]]>
                    </help>
                    <option value="yes">UniProtKB/Swiss-Prot (reviewed only)</option>
                    <option value="no">UniProtKB/TrEMBL (unreviewed only)</option>
                </param>
            </when>
            <when value="enter_ids">
                <param name="ids" type="text" label="Search ID values"
                       help="Enter one or more IDs (separated by commas) from http://www.uniprot.org/proteomes/">
                    <validator type="regex" message="OrganismID[,OrganismID]">^\w+( \w+)*(,\w+( \w+)*)*$</validator>
                </param>
                <expand macro="query_field"/>
            </when>
            <when value="history">
                <param name="id_file" type="data" format="tabular,txt" label="Dataset (tab separated) with ID column"/>
                <param name="column" type="data_column" data_ref="id_file" label="Column with ID"/>
                <expand macro="query_field"/>
            </when>
        </conditional>
        <param name="format" type="select" label="uniprot output format">
            <option value="xml">xml</option>
            <option value="fasta">fasta</option>
        </param>
    </inputs>
    <outputs>
        <data format="uniprotxml" name="proteome" label="UniProt.${format}">
            <change_format>
                <when input="format" value="fasta" format="fasta" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="input_choice" value="enter_ids"/>
            <param name="ids" value="1566990"/>
            <param name="format" value="xml"/>
            <output name="proteome">
                <assert_contents>
                    <has_text text="&lt;/uniprot&gt;" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_choice" value="enter_ids"/>
            <param name="ids" value="765963,512562"/>
            <param name="field" value="taxonomy_id"/>
            <param name="format" value="fasta"/>
            <output name="proteome">
                <assert_contents>
                    <has_text text="Shi470" />
                    <has_text text="PeCan4" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_choice" value="enter_ids"/>
            <param name="ids" value="Shi470,PeCan4"/>
            <param name="field" value="taxonomy_name"/>
            <param name="format" value="fasta"/>
            <output name="proteome">
                <assert_contents>
                    <has_text text="Shi470" />
                    <has_text text="PeCan4" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_choice" value="enter_ids"/>
            <param name="ids" value="E1Q2I0,E1Q3C4"/>
            <param name="field" value="accession"/>
            <param name="format" value="fasta"/>
            <output name="proteome">
                <assert_contents>
                    <has_text text="E1Q2I0" />
                    <has_text text="E1Q3C4" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_choice" value="history"/>
            <param name="id_file" value="Helicobacter_strains.tsv" ftype="tabular"/>
            <param name="column" value="1"/>
            <param name="field" value="taxonomy_name"/>
            <param name="format" value="fasta"/>
            <output name="proteome">
                <assert_contents>
                    <has_text text="Shi470" />
                    <has_text text="PeCan4" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_choice" value="history"/>
            <param name="id_file" value="Helicobacter_strains_ids.tsv" ftype="tabular"/>
            <param name="column" value="2"/>
            <param name="field" value="taxonomy_id"/>
            <param name="format" value="fasta"/>
            <output name="proteome">
                <assert_contents>
                    <has_text text="Shi470" />
                    <has_text text="PeCan4" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_choice" value="history"/>
            <param name="id_file" value="Helicobacter_protein_accessions.tsv" ftype="tabular"/>
            <param name="column" value="1"/>
            <param name="field" value="accession"/>
            <param name="format" value="fasta"/>
            <output name="proteome">
                <assert_contents>
                    <has_text text="E1Q2I0" />
                    <has_text text="E1Q3C4" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
<![CDATA[
**UniProt Downloader**

Downloads either a UniProtXML file or a fasta file from UniProtKB

The Morpheus proteomics search algorithm can use the UniProtXML format as a search database.

Available proteomes: http://www.uniprot.org/proteomes/

Available taxon names: http://www.uniprot.org/taxonomy/

Example taxon: http://www.uniprot.org/taxonomy/512562

Example protein: https://www.uniprot.org/uniprotkb/E1Q2I0/entry

Description of query fields: https://www.uniprot.org/help/query-fields

IDs can be entered as text or read from a column in a tabular dataset from your history.

Example IDs and names releated to the Bacteria Helicobacter pylori (strain Shi470) ::


 - 512562
 - Shi470
 - Helicobacter pylori
 - Helicobacter
 - Helicobacteraceae

 Example protein accession numbers from Helicobacter pylori:

 - E1Q2I0
 - E1Q3C4


UniProtKB help: http://www.uniprot.org/help/uniprotkb

]]>
    </help>
    <citations>
      <citation type="doi">10.1093/nar/gku989</citation>
    </citations>
</tool>