view unipept.xml @ 1:b65ee881ca64 draft

planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95
author galaxyp
date Tue, 22 Jan 2019 20:58:28 -0500
parents b33376bf2290
children dca8a1fe0bf3
line wrap: on
line source

<tool id="unipept" name="Unipept" version="4.0.0">
    <description>retrieve taxonomy for peptides</description>
    <macros>
        <xml name="equate_il">
            <param name="equate_il" type="boolean" truevalue="-e" falsevalue="" checked="true" label="Equate isoleucine and leucine">
                <help>isoleucine (I) and leucine (L) are equated when matching tryptic peptides to UniProt records</help>
            </param>
        </xml>
        <xml name="extra">
            <param name="extra" type="boolean" truevalue="-x" falsevalue="" checked="false" label="retrieve extra information">
                <yield/>
            </param>
        </xml>
        <xml name="extra_true">
            <param name="extra" type="boolean" truevalue="-x" falsevalue="" checked="true" label="retrieve extra information">
                <yield/>
            </param>
        </xml>
        <xml name="names">
            <param name="names" type="boolean" truevalue="-n" falsevalue="" checked="true" label="names" >
                <help>return the names in complete taxonomic lineage</help>
            </param>
            <param name="allfields" type="boolean" truevalue="-A" falsevalue="" checked="false" label="allfields" >
                <help>include fields for most specific taxonomic classification: taxon_rank,taxon_id,taxon_name before lineage</help>
            </param>
        </xml>
        <xml name="domains">
            <param name="domains" type="boolean" truevalue="-D" falsevalue="" checked="false" label="group responses by GO namespace (biological process, molecular function, cellular component)">
                <yield/>
            </param>
        </xml>
        <xml name="selected_outputs">
            <param name="selected_outputs" type="select" multiple="true" display="checkboxes" label="Choose outputs">
                <option value="tsv" selected="true">Tabular with one line per peptide</option>
                <option value="csv">Comma Separated Values (.csv) with one line per peptide</option>
                <option value="json">JSON Taxomony Tree (for pept2lca, pep2taxa, and peptinfo)</option>
                <yield/>
                <option value="unmatched">Unmatched peptides</option>
            </param>
        </xml>
    </macros>
    <requirements>
        <requirement type="package" version="2.7">python</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command><![CDATA[
      python '$__tool_directory__/unipept.py' 
      ## --url 'http://morty.ugent.be/api/v1' -g -M 1  
      --api=$unipept.api
      $unipept.equate_il $unipept.extra 
      #if $unipept.api in ['pept2lca', 'pept2taxa', 'peptinfo']:
        $unipept.names $unipept.allfields
      #end if
      #if $unipept.api in ['pept2go', 'pept2funct', 'peptinfo']:
        $unipept.domains
      #end if
      $strict
      #if str($peptide_src.fmt) == 'proteomic':
        #if $peptide_src.input.datatype.file_ext == 'fasta':
          --fasta="$peptide_src.input"
        #elif $peptide_src.input.datatype.file_ext == 'mzid':
          --mzid="$peptide_src.input"
        #elif $peptide_src.input.datatype.file_ext == 'pepxml':
          --pepxml="$peptide_src.input"
        #end if
      #elif str($peptide_src.fmt) == 'tabular':
        --tabular="$peptide_src.input_tsv"
        #set $col = int(str($peptide_src.column)) - 1
        --column=$col
      #elif str($peptide_src.fmt) == 'fasta':
        --fasta="$peptide_src.input_fasta"
      #elif str($peptide_src.fmt) == 'mzid':
        --mzid="$peptide_src.input_mzid"
      #elif str($peptide_src.fmt) == 'pepxml':
        --pepxml="$peptide_src.input_pepxml"
      #end if
      #if 'json' in str($selected_outputs).split(',') and str($unipept.api) in ['pept2lca', 'pept2taxa', 'peptinfo']:
        --json $output_json
      #end if
      #if 'ec_json' in str($selected_outputs).split(',') and str($unipept.api) in ['pept2ec', 'pept2funct', 'peptinfo']:
        --ec_json $output_ec_json
      #end if
      #if 'tsv' in str($selected_outputs).split(','):
        --tsv $output_tsv
      #end if
      #if 'csv' in str($selected_outputs).split(','):
        --csv $output_csv
      #end if
      #if 'ec_tsv' in str($selected_outputs).split(',') and str($unipept.api) in ['pept2ec', 'pept2funct', 'peptinfo']:
        --ec_tsv $output_ec_tsv
      #end if
      #if 'go_tsv' in str($selected_outputs).split(',') and str($unipept.api) in ['pept2go', 'pept2funct', 'peptinfo']:
        --go_tsv $output_go_tsv
      #end if
      #if 'unmatched' in str($selected_outputs).split(','):
        --unmatched $output_unmatched
      #end if
    ]]></command>
    <inputs>
      <conditional name="unipept">
          <param name="api" type="select" label="Unipept application" >
              <option value="pept2lca" selected="true">pept2lca: lowest common ancestor</option>
              <option value="pept2taxa">pept2taxa: organisms associated with the UniProt entries containing a given tryptic peptide</option>
              <option value="pept2prot">pept2prot: UniProt entries containing a given tryptic peptide</option>
              <option value="pept2ec">pept2ec: Tryptic peptides and associated EC terms</option>
              <option value="pept2go">pept2go: Tryptic peptides and associated GO terms</option>
              <option value="pept2funct">pept2funct: Tryptic peptides and associated EC and GO terms</option>
              <option value="peptinfo">peptinfo: Tryptic peptides and associated EC and GO terms and lowest common ancestor taxonomy</option>
          </param>
          <when value="pept2lca">
              <expand macro="equate_il" />
              <expand macro="extra">
                  <help>Return the complete lineage of the taxonomic lowest common ancestor, and include ID fields.</help>
              </expand>
              <expand macro="names" />
          </when>
          <when value="pept2taxa">
              <expand macro="equate_il" />
              <expand macro="extra_true">
                  <help>Return the complete lineage of each organism, and include ID fields.</help>
              </expand>
              <expand macro="names" />
          </when>
          <when value="pept2prot">
              <expand macro="equate_il" />
              <expand macro="extra">
                  <help>Return additional information fields: taxon_name, ec_references, go_references, refseq_ids, refseq_protein_ids, insdc_ids, insdc_protein_ids
                        WARNING: Huge perfomance penalty!  Only use for small number of peptides when the extra infomation is required.
                  </help>
              </expand>
          </when>
          <when value="pept2ec">
              <expand macro="equate_il" />
              <expand macro="extra_true">
                  <help>Return the name of the EC-number.
                  </help>
              </expand>
          </when>
          <when value="pept2go">
              <expand macro="equate_il" />
              <expand macro="extra_true">
                  <help>Return the name of the GO-term.
                  </help>
              </expand>
              <expand macro="domains" />
          </when>
          <when value="pept2funct">
              <expand macro="equate_il" />
              <expand macro="extra_true">
                  <help>Return the name of the  EC-number and GO-term.
                  </help>
              </expand>
              <expand macro="domains" />
          </when>
          <when value="peptinfo">
              <expand macro="equate_il" />
              <expand macro="extra_true">
                  <help>Return the name of the  EC-number and GO-term.
                  </help>
              </expand>
              <expand macro="domains" />
              <expand macro="names" />
          </when>
      </conditional>
      <conditional name="peptide_src">
        <param name="fmt" type="select" label="Peptides input format" >
          <option value="proteomic">proteomics formats:  mzid, pepxml, fasta</option>
          <option value="tabular">tabular</option>
          <option value="fasta">fasta</option>
          <option value="mzid">mzid</option>
          <option value="pepxml">pepxml</option>
        </param>
        <when value="proteomic">
          <param name="input" type="data" format="mzid,pepxml,fasta" label="Peptide Input" />
        </when>
        <when value="tabular">
          <param name="input_tsv" type="data" format="tabular" label="Tabular Input Containing Peptide column" />
          <param name="column" label="Select column with peptides" type="data_column" numerical="false" data_ref="input_tsv" />
        </when>
        <when value="fasta">
          <param name="input_fasta" type="data" format="fasta" label="Peptide Fasta Input" />
        </when>
        <when value="mzid">
          <param name="input_mzid" type="data" format="mzid" label="mzIndetML Input" />
        </when>
        <when value="pepxml">
          <param name="input_pepxml" type="data" format="pepxml" label="mzIndetML Input" />
        </when>
      </conditional>
      <param name="selected_outputs" type="select" multiple="true" display="checkboxes" label="Choose outputs">
       <option value="tsv" selected="true">Tabular with one line per peptide</option>
       <option value="csv">Comma Separated Values (.csv) with one line per peptide</option>
        <option value="json">JSON Taxomony Tree (for pept2lca, pep2taxa, and peptinfo)</option>
        <option value="go_tsv">Peptide GO terms in normalized tabular (for pept2go, pept2funct, and peptinfo)</option>
        <option value="ec_tsv">Peptide EC terms in normalized tabular (for pept2ec, pept2funct, and peptinfo)</option>
        <option value="ec_json">JSON EC Coverage Tree (for pept2ec, pep2funct, and peptinfo)</option>
        <option value="unmatched">Unmatched peptides</option>
      </param>
      <param name="strict" type="boolean" truevalue="--strict" falsevalue="" checked="false" label="Exit with error on invalid peptides, otherwise ignore them"/>
    </inputs>
    <outputs>
      <data name="output_json" format="d3_hierarchy" label="${tool.name} ${unipept.api} on ${on_string} Taxonomy json"> 
        <filter>'json' in selected_outputs and unipept['api'] in ('pept2lca', 'pept2taxa', 'peptinfo')</filter>
        <change_format>
            <when input="api" value="pept2prot" format="json" />
        </change_format>
      </data> 
      <data name="output_ec_json" format="d3_hierarchy" label="${tool.name} ${unipept.api} on ${on_string} EC json">
        <filter>'ec_json' in selected_outputs and unipept['api'] in ('pept2ec', 'pept2funct', 'peptinfo')</filter>
      </data>
      <data name="output_tsv" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} tsv"> 
        <filter>'tsv' in selected_outputs</filter>
        <actions>
            <action name="comment_lines" type="metadata" default="1" />
            <!--
            <conditional name="unipept.api">
                <when value="pept2funct">
                    <action name="column_names" type="metadata" default="peptide,total_protein_count,ec_numbers,ec_protein_counts,ec_names,go_terms,go_protein_counts,go_names" />
                </when>
                <when value="pept2go">
                    <action name="column_names" type="metadata" default="peptide,total_protein_count,go_terms,go_protein_counts,go_names" />
                </when>
                <when value="pept2ec">
                    <action name="column_names" type="metadata" default="peptide,total_protein_count,ec_numbers,ec_protein_counts,ec_names" />
                </when>
            </conditional>
            -->
        </actions>
      </data> 
      <data name="output_csv" format="csv" label="${tool.name} ${unipept.api} on ${on_string} csv"> 
        <filter>'csv' in selected_outputs</filter>
      </data> 
      <data name="output_ec_tsv" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} EC tsv"> 
        <filter>'ec_tsv' in selected_outputs and unipept['api'] in ('pept2ec', 'pept2funct', 'peptinfo')</filter>
        <actions>
            <action name="column_names" type="metadata" default="Peptide,Total Protein Count,EC Number,Protein Count,EC Name" />
        </actions>
      </data> 
      <data name="output_go_tsv" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} GO tsv"> 
        <filter>'go_tsv' in selected_outputs and unipept['api'] in ('pept2go', 'pept2funct', 'peptinfo')</filter>
        <actions>
            <action name="column_names" type="metadata" default="Peptide,Total Protein Count,GO Term,Protein Count,GO Name" />
        </actions>
      </data> 
      <data name="output_unmatched" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} unmatched"> 
        <filter>'unmatched' in selected_outputs</filter>
        <actions>
            <action name="column_names" type="metadata" default="Unmatched Peptide" />
        </actions>
      </data> 
    </outputs>
    <tests>
      <test>
        <param name="api" value="pept2lca"/>
        <param name="fmt" value="tabular"/>
        <param name="input_tsv" value="tryptic.tsv"/>
        <param name="column" value="2"/>
        <param name="extra" value="True"/>
        <param name="names" value="True"/>
        <param name="selected_outputs" value="tsv,unmatched"/>
        <output name="output_tsv">
            <assert_contents>
              <has_text text="Homininae" />
            </assert_contents>
        </output>
        <output name="output_unmatched">
            <assert_contents>
              <has_text text="QTAMAV" />
            </assert_contents>
        </output>
      </test>
      <test>
        <param name="api" value="pept2lca"/>
        <param name="fmt" value="fasta"/>
        <param name="input_fasta" value="peptide.fa"/>
        <param name="equate_il" value="True"/>
        <param name="extra" value="True"/>
        <param name="names" value="True"/>
        <param name="selected_outputs" value="json,tsv"/>
        <output name="output_json">
            <assert_contents>
              <has_text text="VMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPHIPIDDLTMVVYDPDKGSNGTFLLSLGGPDAEAFSVSPERAAGSASVQVLVRVSALVDYERQTAMAV" />
            </assert_contents>
        </output>
        <output name="output_tsv">
            <assert_contents>
              <has_text text="9606" />
              <has_text text="9596" />
            </assert_contents>
        </output>
      </test>
      <test>
        <param name="api" value="pept2taxa"/>
        <param name="fmt" value="fasta"/>
        <param name="input_fasta" value="peptide.fa"/>
        <param name="equate_il" value="True"/>
        <param name="extra" value="False"/>
        <param name="names" value="False"/>
        <param name="selected_outputs" value="tsv"/>
        <output name="output_tsv">
            <assert_contents>
              <has_text text="sapiens" />
              <has_text text="paniscus" />
              <has_text text="Gorilla" />
            </assert_contents>
        </output>
      </test>
      <test>
        <param name="api" value="pept2funct"/>
        <param name="fmt" value="tabular"/>
        <param name="input_tsv" value="input.tsv"/>
        <param name="column" value="2"/>
        <param name="extra" value="True"/>
        <param name="names" value="True"/>
        <param name="selected_outputs" value="tsv,ec_tsv,go_tsv,unmatched"/>
        <output name="output_tsv">
            <assert_contents>
              <has_text text="GO:0004802" />
              <has_text text="2.2.1.1" />
            </assert_contents>
        </output>
        <output name="output_ec_tsv">
            <assert_contents>
              <has_text text="2.2.1.1" />
            </assert_contents>
        </output>
        <output name="output_go_tsv">
            <assert_contents>
              <has_text text="GO:0004802" />
            </assert_contents>
        </output>
      </test>
    </tests>
    <help><![CDATA[
    **Unipept** 

    Retrieve Uniprot and taxanomic information for trypic peptides.
    
    Unipept API documentation - http://unipept.ugent.be/apidocs 

    **Input**

    Input peptides can be retrieved from tabular, fasta, mzid, or pepxml datasets.  
 
    Processing deatils::

        The input peptides are split into typtic peptide fragments in order to match the Unipept records.   
        Only fragments that are complete tryptic peptides between 5 and 50 animo acid in length will be matched by Unipept.
        The match to the most specific tryptic fragment is reported.


    **Unipept APIs**

    **pept2prot**  - http://unipept.ugent.be/apidocs/pept2prot

    Returns the list of UniProt entries containing a given tryptic peptide. This is the same information as provided on the Protein matches tab when performing a search with the Tryptic Peptide Analysis in the web interface. 

    By default, each object contains the following information fields extracted from the UniProt record::

        peptide: the peptide that matched this record
        uniprot_id: the UniProt accession number of the matching record
        taxon_id: the NCBI taxon id of the organism associated with the matching record

    When the extra parameter is set to true, objects contain the following additional fields extracted from the UniProt record::

        taxon_name: the name of the organism associated with the matching UniProt record
        ec_references: a space separated list of associated EC numbers
        go_references: a space separated list of associated GO terms
        refseq_ids: a space separated list of associated RefSeq accession numbers
        refseq_protein_ids: a space separated list of associated RefSeq protein accession numbers
        insdc_ids: a space separated list of associated insdc accession numbers
        insdc_protein_ids: a space separated list of associated insdc protein accession numbers


    **pept2taxa**  - http://unipept.ugent.be/apidocs/pept2taxa

    Returns the set of organisms associated with the UniProt entries containing a given tryptic peptide. This is the same information as provided on the Lineage table tab when performing a search with the Tryptic Peptide Analysis in the web interface.

    By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::

        peptide: the peptide that matched this record
        taxon_id: the NCBI taxon id of the organism associated with the matching record
        taxon_name: the name of the organism associated with the matching record
        taxon_rank: the taxonomic rank of the organism associated with the matching record

    When the extra parameter is set to true, objects contain additional information about the lineages of the organism extracted from the NCBI taxonomy. The taxon id of each rank in the lineage is specified using the following information fields::

        superkingdom_id
        kingdom_id
        subkingdom_id
        superphylum_id
        phylum_id
        subphylum_id
        superclass_id
        class_id
        subclass_id
        infraclass_id
        superorder_id
        order_id
        suborder_id
        infraorder_id
        parvorder_id
        superfamily_id
        family_id
        subfamily_id
        tribe_id
        subtribe_id
        genus_id
        subgenus_id
        species_group_id
        species_subgroup_id
        species_id
        subspecies_id
        varietas_id
        forma_id


    **pept2lca**  - http://unipept.ugent.be/apidocs/pept2lca

    Returns the taxonomic lowest common ancestor for a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface.

    By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::

        peptide: the peptide that matched this record
        taxon_id: the NCBI taxon id of the organism associated with the matching record
        taxon_name: the name of the organism associated with the matching record
        taxon_rank: the taxonomic rank of the organism associated with the matching record

    When the extra parameter is set to true, objects contain additional information about the lineage of the taxonomic lowest common ancestor extracted from the NCBI taxonomy. The taxon id of each rank in the lineage is specified using the following information fields::

        superkingdom_id
        kingdom_id
        subkingdom_id
        superphylum_id
        phylum_id
        subphylum_id
        superclass_id
        class_id
        subclass_id
        infraclass_id
        superorder_id
        order_id
        suborder_id
        infraorder_id
        parvorder_id
        superfamily_id
        family_id
        subfamily_id
        tribe_id
        subtribe_id
        genus_id
        subgenus_id
        species_group_id
        species_subgroup_id
        species_id
        subspecies_id
        varietas_id
        forma_id

    **pept2ec**  - http://unipept.ugent.be/apidocs/pept2ec

    Returns the functional EC-numbers associated with a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface.

    By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::

        peptide: the peptide that matched this record
        total_protein_count: Total amount of proteins matched with the given peptide
        ec_number: EC-number associated with the current tryptic peptide.
        protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current EC-number.
        name: Optional, name of the EC-number. Included when the extra parameter is set to true.


    **pept2go**  - http://unipept.ugent.be/apidocs/pept2go

    Returns the functional GO-terms associated with a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface.

    By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::

        peptide: the peptide that matched this record
        total_protein_count: Total amount of proteins matched with the given peptide
        go_term: The GO-term associated with the current tryptic peptide.
        protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current GO-term.
        name: Optional, name of the GO-term. Included when the extra parameter is set to true.


    **pept2funct**  - http://unipept.ugent.be/apidocs/pept2funct

    Returns the functional EC-numbers and GO-terms associated with a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface.

    By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::

        peptide: the peptide that matched this record
        total_protein_count: Total amount of proteins matched with the given peptide
        ec_number: EC-number associated with the current tryptic peptide.
        protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current EC-number.
        name: Optional, name of the EC-number. Included when the extra parameter is set to true.
        go_term: The GO-term associated with the current tryptic peptide.
        protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current GO-term.
        name: Optional, name of the GO-term. Included when the extra parameter is set to true.


    **Attributions**

    The Unipept metaproteomics analysis pipeline
    Bart Mesuere1,*, Griet Debyser2, Maarten Aerts3, Bart Devreese2, Peter Vandamme3 andPeter Dawyndt1
    Article first published online: 11 FEB 2015
    DOI: 10.1002/pmic.201400361
    http://onlinelibrary.wiley.com/doi/10.1002/pmic.201400361/abstract;jsessionid=BFF1994E4C14DA73D7C907EB208AD710.f04t04

    ]]></help>
  <citations>
    <citation type="doi">doi:10.1002/pmic.201400361</citation>
  </citations>

</tool>