view customProDB.xml @ 12:2656b09d2046 draft

Uploaded
author galaxyp
date Mon, 30 Jan 2017 09:43:57 -0500
parents 982fb2cde6c5
children 37cd89a40cea
line wrap: on
line source

<tool id="custom_pro_db" name="CustomProDB" version="1.14.0">
  <description>Generate protein FASTAs from exosome or transcriptome data</description>
  <requirements>
    <requirement type="package" version="1.14.0">bioconductor-customprodb</requirement>
  </requirements>
  <stdio>
    <exit_code range="1:" level="fatal" description="Job Failed" />
  </stdio>
  <command><![CDATA[
    Rscript --vanilla '$__tool_directory__/customProDB.R'
       --bam='$genome_annotation.bamInput'
       --bai='${genome_annotation.bamInput.metadata.bam_index}'
       --vcf='$genome_annotation.vcfInput'
       --rpkmCutoff=$rpkmCutoff
       --outputFile='${output_rpkm}'

       #if str($genome_annotation.source) == 'history':
            --exon_anno='$genome_annotation.exonAnno'
            --proteinseq='$genome_annotation.proteinSeq'
            --procodingseq='$genome_annotation.proCodingSeq'
            --ids='$genome_annotation.ids'
            #if str($genome_annotation.dbsnpInCoding) != 'None':
                --dbsnpinCoding='$genome_annotation.dbsnpInCoding'
            #end if
            #if str($genome_annotation.cosmic) != 'None':
                --cosmic='$genome_annotation.cosmic"
            #end if
       #else:
            #set index_path = $genome_annotation.builtin.fields.path
            --exon_anno='$index_path/exon_anno.RData'
            --proteinseq='$index_path/proseq.RData'
            --procodingseq='$index_path/procodingseq.RData'
            --ids='$index_path/ids.RData'
            #if $genome_annotation.dbsnpInCoding:
                --dbsnpinCoding='$index_path/dbsnpinCoding.RData'
            #end if
            #if $genome_annotation.cosmic:
                --cosmic='$index_path/cosmic.RData'
            #end if
       #end if
       2>1
]]>
  </command>
  <inputs>
    <conditional name="genome_annotation">
      <param name="source" type="select" label="Will you select a genome annotation from your history or use a built-in annotation?" help="See `Annotations` section of help below">
        <option value="builtin">Use a built-in genome annotation</option>
        <option value="history">Use annotation from your history</option>
      </param>
      <when value="builtin">
        <param name="builtin" type="select" label="Select genome annotation" help="If your genome of interest is not listed, contact the Galaxy team">
          <options from_data_table="customProDB">
            <filter type="sort_by" column="2"/>
            <validator type="no_options" message="No annotations are available for the selected input dataset"/>
          </options>
        </param>
        <param name="bamInput" type="data" format="bam" label="BAM file">
            <validator type="unspecified_build" />
            <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="customProDB" type="dataset_metadata_in_data_table" />
        </param>
        <!--<param name="baiInput" type="data" format="bam_index" label="BAM Index (BAI) file">
            <validator type="unspecified_build" />
            <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="customProDB" type="dataset_metadata_in_data_table" />
        </param>-->
        <param name="vcfInput" type="data" format="vcf" label="VCF file">
            <validator type="unspecified_build" />
            <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="customProDB" type="dataset_metadata_in_data_table" />
        </param>
        <param name="dbsnpInCoding" type="boolean" value="" label="Annotate SNPs with rsid from dbSNP (select organisms only)" />
        <param name="cosmic" type="boolean" value="" label="Annotate somatic SNPs from COSMIC (human only)" />
      </when>
      <when value="history">
        <param name="exonAnno" type="data" format="RData" help="A dataframe of exon annotations in an RData file" label="Exon Annotations" />
        <param name="proteinSeq" type="data" format="RData" help="A dataframe containing protein ids and protein sequences in an RData file" label="Protein Sequences" />
        <param name="proCodingSeq" type="data" format="RData" help="A dataframe cotaining coding sequences for each protein in an RData file" label="Protein Coding Sequences" />
        <param name="ids" type="data" format="RData" help="A dataframe cotaining IDs for each protein in an RData file" label="Protein IDs" />
        <param name="bamInput" type="data" format="bam" label="BAM file">
            <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" />
        </param>
        <!--<param name="baiInput" type="data" format="bam_index" label="BAM Index file">
            <validator check="dbkey" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" />
        </param>-->
        <param name="vcfInput" type="data" format="vcf" label="VCF file" />
        <param name="dbsnpInCoding" type="data" format="RData" label="A dataframe containing dbSNP rsids" optional="true" />
        <param name="cosmic" type="data" format="RData" label="A dataframe containing somatic SNPs from COSMIC (human only)" optional="true" />
      </when>
    </conditional>
    <param name="rpkmCutoff" type="float" value="1" min="0" label="Transcript Expression Cutoff (RPKM)" help="If non-zero, if a transcript does not meet this expression cutoff (based on RPKM) then it will not be included in the output database." />
  </inputs>
  <outputs>
    <data format="fasta" name="output_rpkm" from_work_dir="output_rpkm.fasta" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_rpkm.fasta"/>
    <data format="fasta" name="output_snv" from_work_dir="output_snv.fasta" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_snv.fasta"/>
    <data format="fasta" name="output_indel" from_work_dir="output_indel.fasta" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_indel.fasta"/>
  </outputs>
  <tests>
    <test>
      <param name="bamInput" value="test1_sort.bam" dbkey="hg19" />
      <param name="vcfInput" value="test1.vcf" dbkey="hg19" />
      <param name="source" value="history" />
      <param name="exonAnno" value="exon_anno.RData" />
      <param name="proteinSeq" value="proseq.RData" />
      <param name="proCodingSeq" value="procodingseq.RData" />
      <param name="ids" value="ids.RData" />
      <output name="output_rpkm" file="test_rpkm.fasta" />
      <output name="output_snv" file="test_snv.fasta" />
      <output name="output_indel" file="test_indel.fasta" />
    </test>
  </tests>
  <help><![CDATA[
**Description**

Generate protein FASTAs from exosome or transcriptome data (in the form of BAM files).

  ]]></help>
  <citations>
    <citation type="doi">10.1093/bioinformatics/btt543</citation>
    <citation type="bibtex">@misc{toolsGalaxyP, author = {Chambers MC, et al.}, title = {Galaxy Proteomics Tools}, publisher = {GitHub}, journal = {GitHub 
repository},
                                  year = {2017}, url = {https://github.com/galaxyproteomics/tools-galaxyp}}</citation> <!-- TODO: fix substitution of commit ", commit = 
{$sha1$}" -->
  </citations>
</tool>