view QDNAseq.xml @ 23:5f8b99ae75ef draft

Uploaded
author stef
date Wed, 18 Jun 2014 04:46:56 -0400
parents beb7abe277b3
children 9f4e0192de10
line wrap: on
line source

<tool id="QDNAseq" name="QDNAseq" version="0.0.1">
  
  <requirements>
    <requirement type="set_environment">QDNASEQ_SCRIPT_PATH</requirement>
    <requirement type="package" version="3.0.3">R</requirement>
    <requirement type="package" version="1.0.4">bioc_qdnaseq</requirement>
  </requirements>

  <description>Quantitative copy number abberation detection</description>
  
  <command interpreter="Rscript">
    QDNAseq.R 
    $qdnaseq_cfg <!-- use a tmp config file to pass all params to R by source() -->
  </command>

  <stdio>
    <!-- Anything higher than 0 means the R script didnt finish -->
    <!-- Because different R packages deal with err/warn differently unable to waterproof this -->
    <exit_code range="1:" level="fatal" description="R script didnt finish correctly, check log" />
  </stdio>
  
  <inputs>
    <!-- ==================== -->
    <!-- General inputs -->
    <!-- ==================== -->
    <param name="jobName" type="text" optional="false" label="Analysis/ouput name" help="Supply a name for the outputs to remind you what they contain" value="TEST">
      <validator type="empty_field" />
      <validator type="regex" message="This field should contain some non-whitespace character">.*\S</validator>
      <!-- <validator type="expression" message="Window Size must be even">value % 2 ==0</validator> -->
    </param>
    <param name="binSize" type="select" label="Select bin-size to use (kb)" help="Larger bin sizes provide faster analysis but lower resolution">
      <option value="1000">1Mb</option>
      <option value="100">100kb</option>
      <option value="30">30kb</option>
      <option value="15">15kb</option>
      <option value="5">5kb</option>
      <option value="1">1kb</option>
    </param>
    <param name="doCall" type="select" label="Also segment and call with CGHcall" help="This setting will be set to yes if called output is requested (see additional history outputs)">
      <option value="TRUE">yes</option>
      <option value="FALSE">no</option>
    </param>
    <param name="experimentType" type="select" label="Type of sequencing data" help="Currently only single end reads of lenght 50 are supported within galaxy">
      <option value="SR50">Single Read 50bp</option>
      <!-- <option value="PE1000">PairedEnd1000</option> -->
    </param>
    
    <!--
    <param name="excludeChrs" type="select" multiple="true" label="Exclude certain Chromosomes">
      <option value="X">X</option>
      <option value="Y">Y</option>
    </param>
    -->

    <!-- ==================== -->
    <!-- Input BAMs -->
    <!-- ==================== -->
    <param name="bams" type="data" multiple="true" optional="false" format="bam" label="Input BAMs" help="Select all BAM files to include in the analysis" />

    <!-- ==================== -->
    <!-- This section contains galaxy history output settings -->
    <!-- ==================== -->
    <conditional name="extra_history_outputs">
      <param name="show" type="select" label="Show additional history outputs">
        <option value="NO">Only output Report to history</option>
        <option value="YES">Select additional history outputs</option>
      </param>
      <when value="YES">
        <param name="readcounts_rds" type="select" label="Also output readcounts RDS (R object) to history">
          <option value="FALSE">no</option>
          <option value="TRUE">yes</option>
        </param>
        <param name="copynumbers_rds" type="select" label="Also output copynumbers RDS (R object) to history">
          <option value="FALSE">no</option>
          <option value="TRUE">yes</option>
        </param>
        <param name="calls_rds" type="select" label="Also output called segments RDS (R object) to history">
          <option value="FALSE">no</option>
          <option value="TRUE">yes</option>
        </param>
      </when>
      <when value="NO">
        <param name="readcounts_rds" type="hidden" value="FALSE" />
        <param name="copynumbers_rds" type="hidden" value="FALSE" />
        <param name="calls_rds" type="hidden" value="FALSE" />
      </when>
    </conditional>

    <!-- ==================== -->
    <!-- Option to use your own bin annotations -->
    <!-- ==================== -->
    <conditional name="use_own_binannotation">
      <param name="show" type="select" label="Use your own bin annotations from history">
        <option value="no">no</option>
        <option value="yes">yes</option>
      </param>
      <when value="yes">
        <param name="binannotation_file" type="data" multiple="false" format="rds" label="R data structure file with bin-annotations" help="If you made your own bin-annotations with the QDNAseq bioconductor package you can upload them to your history and select here" />
      </when>
      <when value="no">
        <param name="binannotation_file" type="hidden" value="" />
      </when>
    </conditional> 
    <!-- ==================== -->
    <!-- Optional advanced options -->
    <!-- ==================== -->
    <conditional name="advanced">
      <param name="show" type="select" label="Show advanced options">
        <option value="no">no</option>
        <option value="yes">yes</option>
      </param>
      <when value="yes">
        
        <param name="undo_splits" type="select" label="undoSplits" help="If set to sdundo, see undoSD below">
          <option value="sdundo">sdundo</option>
          <option value="prune">prune</option>
          <option value="none">none</option>
        </param>

        <param name="undoSD" size="10" type="float" value="1" label="undoSD" help='The number of SDs between means to keep a split if undo.splits="sdundo".' />
          
        <param name="blacklist" type="select" label="Filter blacklisted bins (blacklist)" help="Will exclude all blacklisted bins in the binannotation if set">
          <option value="TRUE">yes</option>
          <option value="FALSE">no</option>
        </param>

        <param name="mappability" type="integer" value="0" min="0" max="100" label="Filter bins with lower mappability" help="Will exclude all bins will lower mappability than this number (0-100)" />

        <param name="debug" type="select" label="DEBUG" help="Uses the inbuilt LGG data instead of input BAMs">
          <option value="FALSE">no</option>
          <option value="TRUE">yes</option>
        </param>
      </when>
      <!-- need to set defaults because params are passed to R anyway -->
      <when value="no">
        <param name="undoSD" type="hidden" value="1" />
        <param name="undo_splits" type="hidden" value="sdundo" />
        <param name="blacklist" type="hidden" value="TRUE" />
        <param name="mappability" type="hidden" value="0" />
        <param name="debug" type="hidden" value="FALSE" />
      </when>
    </conditional> 

    <!-- ==================== -->
    <!-- Optional graphical/plotting options -->
    <!-- ==================== -->
    <conditional name="plot_options">
      <param name="show" type="select" label="Show graphical options">
        <option value="no">no</option>
        <option value="yes">yes</option>
      </param>
      <when value="yes">
        <param name="plot_width" size="3" type="integer" value="960" label="Plot width" />
        <param name="plot_height" size="3" type="integer" value="480" label="Plot height" />
        <param name="exclude_chrs" type="select" multiple="true" label="Hide these chromosomes in plots" help="Currently only standard human chromosomes supported. NOTE: other filters might also exclude chromosomes">
          <option value="1">1</option>
          <option value="2">2</option>
          <option value="3">3</option>
          <option value="4">4</option>
          <option value="5">5</option>
          <option value="6">6</option>
          <option value="7">7</option>
          <option value="8">8</option>
          <option value="9">9</option>
          <option value="10">10</option>
          <option value="11">11</option>
          <option value="12">12</option>
          <option value="13">13</option>
          <option value="14">14</option>
          <option value="15">15</option>
          <option value="16">16</option>
          <option value="17">17</option>
          <option value="18">18</option>
          <option value="19">19</option>
          <option value="20">20</option>
          <option value="21">21</option>
          <option value="22">22</option>
          <option value="X" selected="true">X</option>
          <option value="Y" selected="true">Y</option>
        </param>
      </when>
      <when value="no">
        <param name="plot_width" type="hidden" value="960" />
        <param name="plot_height" type="hidden" value="480" />
        <param name="exclude_chrs" type="hidden" value="X,Y" />
      </when>
    </conditional>
  </inputs>
  <!-- ==================== -->
  <!-- This config is sourced in R code -->
  <!-- ==================== -->
  <configfiles>
    <configfile name="qdnaseq_cfg">
## this file was sourced in QDNAseq R wrapper script
## in this way all galaxy params are passes to R

## required params
as.integer( "${binSize}" ) -> binSize
"${experimentType}" -> experimentType
"${jobName}" -> outputName

## extra params
as.logical( "${doCall}" ) -> doCall
"${htmlFile}" -> outputHtml
"${htmlFile.files_path}" -> outputPath
"${use_own_binannotation.binannotation_file}" -> binAnnotations

## advanced options
as.double( "${advanced.undoSD}" ) -> undoSD
as.logical( "${advanced.debug}" ) -> debug
as.logical( "${advanced.blacklist}" ) -> filterBlacklistedBins
as.integer( "${advanced.mappability}" ) -> mappabilityCutoff
"${advanced.undo_splits}" -> undoSplits

## history output params
as.logical( "${extra_history_outputs.readcounts_rds}" ) -> doOutputReadcountsRds
as.logical( "${extra_history_outputs.copynumbers_rds}" ) -> doOutputCopynumbersRds
as.logical( "${extra_history_outputs.calls_rds}" ) -> doOutputCallsRds
"${rdsReadCounts}" -> readCountsDatasetFile
"${rdsCopyNumbers}" -> copyNumbersDatasetFile
"${rdsCalls}" -> calledSegmentsDatasetFile

## plotting params
as.integer( "${plot_options.plot_width}" ) -> PLOT_WIDTH
as.integer( "${plot_options.plot_height}" ) -> PLOT_HEIGHT
"${plot_options.exclude_chrs}" -> excludeChrsString

## input BAMs init
c() -> bamsPaths
c() -> bamsNames

#for bam in $bams# 
c( bamsPaths, "${bam}" ) -> bamsPaths
c( bamsNames, "${bam.name}" ) -> bamsNames
#end for

    </configfile>
  </configfiles>

  <!-- ==================== -->
  <!-- Main output is an html based report, additional on request -->
  <!-- ==================== -->
  <outputs>

    <data format="html" name="htmlFile" label="QDNAseq Report ${binSize}kb (${jobName})" />
    
    <data format="rds" name="rdsReadCounts" label="${jobName}_readCounts_${binSize}kb.rds">
      <filter> extra_history_outputs['readcounts_rds'] == "TRUE" </filter><!-- <filter>("readcounts_rds" in outputs)</filter> -->
    </data>

    <data format="rds" name="rdsCopyNumbers" label="${jobName}_copyNumbers_${binSize}kb.rds">
      <filter> extra_history_outputs['copynumbers_rds'] == "TRUE" </filter>
    </data>

    <data format="rds" name="rdsCalls" label="${jobName}_calls_${binSize}kb.rds">
      <filter> extra_history_outputs['calls_rds'] == "TRUE" </filter>
    </data>

  </outputs>

  <!-- ==================== -->
  <!-- Tests still to be done -->
  <!-- ==================== -->

  <!-- 
  <tests>
    <test>
      <param name="input1" value="5.bed" />
      <param name="distance" value="1" />
      <param name="minregions" value="2" />
      <param name="returntype" value="1" />
      <output name="output" file="gops-cluster-1.bed" />     
    </test>
  </tests>
  -->
  <!-- <requirements>
      <requirement type="package">ucsc_tools</requirement>
  </requirements>
-->

  <help>
.. class:: infomark

**Introduction**

This tool is a wrapper for the R Bioconductor package QDNAseq_

.. _QDNAseq: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html

It determines the copy number state of human chromosomes 1 - 22 for (shallow coverage) whole genome sequencing data.

-----

.. class:: warningmark

The input BAMs are expected to be **single end reads of 50bp length** mapped to GRCh37/hg19 genome build. Other experiment setups are currently not supported within galaxy. See the documentation of QDNAseq at bioconductor on how to deal with different setups.

.. class:: warningmark

Requires **internet access** for downloading bin-annotations from bitbucket and to show css styling of the final report

.. class:: warningmark

If the data is noisy, a **larger binsize** should be chosen

-----

**Example**

To be done

-----

**Citation**

For the underlying tool please cite: llari Scheinin, Daoud Sie et al. DNA copy number analysis of fresh and formalin-fixed specimens by whole-genome sequencing: improved correction of systematic biases and exclusion of problematic regions, (submitted). See also the bioconductor package_ documentation.

.. _package: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
  </help>

</tool>