Mercurial > repos > galaxyp > custom_pro_db

<tool id="custom_pro_db" name="CustomProDB" version="1.10.0">
  <description>Generate protein FASTAs from exosome or transcriptome data</description>
  <stdio>
    <exit_code range="1:" level="fatal" description="Job Failed" />
  </stdio>
  <command interpreter="Rscript --vanilla">customProDB.R

       --bam="$genome_annotation.bamInput"
       --bai="${genome_annotation.bamInput.metadata.bam_index}"
       --vcf="$genome_annotation.vcfInput"
       --rpkmCutoff=$rpkmCutoff
       --outputFile="${output_rpkm}"

       #if str($genome_annotation.source) == "history":
            --exon_anno="$genome_annotation.exonAnno"
            --proteinseq="$genome_annotation.proteinSeq"
            --procodingseq="$genome_annotation.proCodingSeq"
            --ids="$genome_annotation.ids"
            #if str($genome_annotation.dbsnpInCoding) != "None":
                --dbsnpinCoding="$genome_annotation.dbsnpInCoding"
            #end if
            #if str($genome_annotation.cosmic) != "None":
                --cosmic="$genome_annotation.cosmic"
            #end if
       #else:
            #set index_path = $genome_annotation.builtin.fields.path
            --exon_anno="$index_path/exon_anno.RData"
            --proteinseq="$index_path/proseq.RData"
            --procodingseq="$index_path/procodingseq.RData"
            --ids="$index_path/ids.RData"
            #if $genome_annotation.dbsnpInCoding:
                --dbsnpinCoding="$index_path/dbsnpinCoding.RData"
            #end if
            #if $genome_annotation.cosmic:
                --cosmic="$index_path/cosmic.RData"
            #end if
       #end if

2&gt;&amp;1</command>
  <inputs>
    <conditional name="genome_annotation">
      <param name="source" type="select" label="Will you select a genome annotation from your history or use a built-in annotation?" help="See `Annotations` section of help below">
        <option value="builtin">Use a built-in genome annotation</option>
        <option value="history">Use annotation from your history</option>
      </param>
      <when value="builtin">
        <param name="builtin" type="select" label="Select genome annotation" help="If your genome of interest is not listed, contact the Galaxy team">
          <options from_data_table="customProDB">
            <filter type="sort_by" column="2"/>
            <validator type="no_options" message="No annotations are available for the selected input dataset"/>
          </options>
        </param>
        <param name="bamInput" type="data" format="bam" label="BAM file">
            <validator type="unspecified_build" />
            <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="customProDB" type="dataset_metadata_in_data_table" />
        </param>
        <!--<param name="baiInput" type="data" format="bam_index" label="BAM Index (BAI) file">
            <validator type="unspecified_build" />
            <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="customProDB" type="dataset_metadata_in_data_table" />
        </param>-->
        <param name="vcfInput" type="data" format="vcf" label="VCF file">
            <validator type="unspecified_build" />
            <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="customProDB" type="dataset_metadata_in_data_table" />
        </param>
        <param name="labelrsid" type="boolean" value="" label="Annotate SNPs with rsid from dbSNP (select organisms only)" />
        <param name="cosmic" type="boolean" value="" label="Annotate somatic SNPs from COSMIC (human only)" />
      </when>
      <when value="history">
        <param name="exonAnno" type="data" format="RData" metadata_name="dbkey" help="A dataframe of exon annotations in an RData file" label="Exon Annotations" />
        <param name="proteinSeq" type="data" format="RData" metadata_name="dbkey" help="A dataframe containing protein ids and protein sequences in an RData file" label="Protein Sequences" />
        <param name="proCodingSeq" type="data" format="RData" metadata_name="dbkey" help="A dataframe cotaining coding sequences for each protein in an RData file" label="Protein Coding Sequences" />
        <param name="ids" type="data" format="RData" metadata_name="dbkey" help="A dataframe cotaining IDs for each protein in an RData file" label="Protein IDs" />
        <param name="bamInput" type="data" format="bam" label="BAM file">
            <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" />
        </param>
        <param name="baiInput" type="data" format="bam_index" label="BAM Index file">
            <validator check="dbkey" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" />
        </param>
        <param name="vcfInput" type="data" format="vcf" label="VCF file" />
        <param name="dbsnpInCoding" type="data" format="RData" label="A dataframe containing dbSNP rsids" optional="true" />
        <param name="cosmic" type="data" format="RData" label="A dataframe containing somatic SNPs from COSMIC (human only)" optional="true" />
      </when>
    </conditional>
    <param name="rpkmCutoff" type="float" value="1" min="0" label="Transcript Expression Cutoff (RPKM)" help="If non-zero, if a transcript does not meet this expression cutoff (based on RPKM) then it will not be included in the output database." />
  </inputs>
  <outputs>
    <data format="fasta" name="output_rpkm" from_work_dir="output_rpkm.fasta" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_rpkm.fasta"/>
    <data format="fasta" name="output_snv" from_work_dir="output_snv.fasta" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_snv.fasta"/>
    <data format="fasta" name="output_indel" from_work_dir="output_indel.fasta" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_indel.fasta"/>
  </outputs>
  <tests>
    <test>
      <param name="bamInput" value="test1_sort.bam" dbkey="hg19" />
      <param name="vcfInput" value="test1.vcf" dbkey="hg19" />
      <param name="source" value="history" />
      <param name="exonAnno" value="exon_anno.RData" />
      <param name="proteinSeq" value="proseq.RData" />
      <param name="proCodingSeq" value="procodingseq.RData" />
      <param name="ids" value="ids.RData" />
      <output name="output_rpkm" file="test_rpkm.fasta" />
      <output name="output_snv" file="test_snv.fasta" />
      <output name="output_indel" file="test_indel.fasta" />
    </test>
  </tests>
  <help>
**Description**

Generate protein FASTAs from exosome or transcriptome data (in the form of BAM files).
</help>
</tool>
author	galaxyp
date	Wed, 08 Jun 2016 14:04:54 -0400
parents	b83a4002aab1
children	ed65d110c1b5