view data_manager/customProDB_annotation.xml @ 12:1ebf39dd0dca draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/bumbershoot/custom_pro_db commit e9e2b5f5d422c1ff2f4db1ef83eb585ae3815517
author galaxyp
date Mon, 26 Jun 2017 17:13:02 -0400
parents 63c48a6eddb6
children
line wrap: on
line source

<tool id="custom_pro_db_annotation_data_manager" name="CustomProDB Annotation" tool_type="manage_data" version="1.16.1.0">
  <description>builder</description>
  <requirements>
    <requirement type="package" version="3.3.1">r-base</requirement>
    <!--<requirement type="package" version="1.14.0">bioconductor-customprodb</requirement>-->
    <requirement type="package" version="1.18.0">bioconductor-rgalaxy</requirement>
    <requirement type="package" version="1.21.0">bioconductor-biocinstaller</requirement>
    <requirement type="package" version="1.20.3">bioconductor-variantannotation</requirement>
    <requirement type="package" version="1.26.4">bioconductor-genomicfeatures</requirement>
    <requirement type="package" version="1.11.1">r-devtools</requirement>
    <requirement type="package" version="3.98_1.4">r-xml</requirement>
    <requirement type="package" version="0.10.11">r-rmysql</requirement>
    <requirement type="package" version="1.0.2">r-testthat</requirement>
    <requirement type="package" version="0.1.0">r-getoptlong</requirement>
    <requirement type="package" version="1.1.2">r-stringi</requirement>
    <requirement type="package" version="1.1.0">r-stringr</requirement>
    <requirement type="package" version="1.10.0">r-data.table</requirement>
    <requirement type="package" version="0.4_10">r-sqldf</requirement>
    <requirement type="package" version="0.6_6">r-gsubfn</requirement>
    <requirement type="package" version="2.3_47">r-chron</requirement>
    <requirement type="package" version="0.3_10">r-proto</requirement>
    <requirement type="package" version="1.8.4">r-plyr</requirement>
  </requirements>
  <stdio>
    <exit_code range=":-1" />
    <exit_code range="1:" />
  </stdio>
  <command><![CDATA[
    Rscript --vanilla '$__tool_directory__/customProDB_annotation.R'
        --outputFile '${out_file}'

        #if str($transcriptome_annotation.source) == 'refseq':
          --dbkey '${transcriptome_annotation.dbkey}'
          --dbkey_description '${ transcriptome_annotation.dbkey.get_display_text().strip("\"'") }'
        #else:
          --ensembl_dataset '${transcriptome_annotation.ensembl_dataset.fields.dataset}'
          --ensembl_host '${transcriptome_annotation.ensembl_dataset.fields.host}'
          --dbkey_description '${transcriptome_annotation.ensembl_dataset.fields.name}'
        #end if

        --dbsnp '${dbsnp}'
        $cosmic
    2>&1
]]>
  </command>
  <inputs>
    <conditional name="transcriptome_annotation">
      <param name="source" type="select" label="What source do you want to use for mapping between genes, transcripts, and proteins?" help="RefSeq transcripts are like NM_xxxx, Ensembl transcripts are like ENSTxxxx">
        <option value="refseq">Annotate transcriptome with RefSeq (from NCBI/UCSC)</option>
        <option value="ensembl">Annotate transcriptome with Ensembl (from Biomart)</option>
      </param>
      <when value="refseq">
        <param type="genomebuild" name="dbkey" value="" label="UCSC dbKey for reference genome" help="e.g. hg38, hg19, mm10, canFam31" />
      </when>
      <when value="ensembl">
        <param type="select" name="ensembl_dataset" value="" label="Ensembl reference genome identifier">
          <options from_file="ensembl_datasets.loc">
            <column name="value" index="2" />
            <column name="dataset" index="0" />
            <column name="host" index="1" />
            <validator type="no_options" message="Ensembl dataset list not loaded"/>
          </options>
        </param>
      </when>
    </conditional>
    <param type="text" name="dbsnp" value="" label="dbSNP identifier (select organisms only, e.g. human, mouse, cow)" help="e.g. snp142" />
    <param type="boolean" name="cosmic" truevalue="--cosmic true" falsevalue="" label="Annotate somatic SNPs from COSMIC (human only)" />
  </inputs>
  <outputs>
    <data name="out_file" format="data_manager_json"/>
  </outputs>
  <help>

.. class:: infomark

This data manager creates the transcriptome annotation in the RData format needed by customProDB.
Two annotation sources are supported: UCSC and Ensembl.
Note that because UCSC's table browser only provides current gene annotations for a given genome assembly,
only the Ensembl annotation is entirely reproducible, i.e. running again with the same settings next month will create the same annotation.

Ensembl chromosome names (1,2, ...) are converted to UCSC format (chr1,chr2, ...) to ease integration with other Galaxy tools.
  </help>
  <citations>
  <citation type="doi">10.1093/bioinformatics/btt543</citation>
  <citation type="bibtex">@misc{toolsGalaxyP, author = {Chambers MC, et al.}, title = {Galaxy Proteomics Tools}, publisher = {GitHub}, journal = {GitHub repository},
                                year = {2017}, url = {https://github.com/galaxyproteomics/tools-galaxyp}}</citation> <!-- TODO: fix substitution of commit ", commit = {$sha1$}" -->
  </citations>
</tool>