Mercurial > repos > cjav > cummerbund
changeset 0:6e2abd76a853
Uploaded
author | cjav |
---|---|
date | Tue, 14 Feb 2012 11:58:00 -0500 |
parents | |
children | 65940895e1c6 |
files | cummerbund_wrapper.xml |
diffstat | 1 files changed, 286 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cummerbund_wrapper.xml Tue Feb 14 11:58:00 2012 -0500 @@ -0,0 +1,286 @@ +<tool id="cummerbund" name="cummeRbund" version="0.0.1"> + <description>is an R package that is designed to aid and simplify the task of analyzing Cufflinks RNA-Seq output</description> + <command interpreter="python"> + cummerbund_wrapper.py + --r-script ${script_file} + --html-report-from-directory "${output_html}" "${output_html.files_path}" + </command> + <inputs> + <conditional name="backend_database_source"> + <param name="backend_database_selector" type="select" label="Will you select a backend database file from the history or do you want to build a new one using cuffdiff output?"> + <option value="history" selected="true">Use backend database from the history</option> + <option value="cuffdiff_output">Build backend database using cuffdiff output</option> + </param> + <when value="cuffdiff_output"> + <param format="tabular" name="isoforms_fpkm_tracking" type="data" label="Transcript FPKM tracking"/> + <param format="tabular" name="isoforms_exp" type="data" label="Transcript differential expression testing"/> + <param format="tabular" name="genes_fpkm_tracking" type="data" label="Gene FPKM tracking"/> + <param format="tabular" name="genes_exp" type="data" label="Gene differential expression testing"/> + <param format="tabular" name="tss_groups_fpkm_tracking" type="data" label="TSS groups FPKM tracking"/> + <param format="tabular" name="tss_groups_exp" type="data" label="TSS groups differential expression testing"/> + <param format="tabular" name="cds_fpkm_tracking" type="data" label="CDS FPKM tracking"/> + <param format="tabular" name="cds_exp_diff" type="data" label="CDS FPKM differential expression testing"/> + <param format="tabular" name="cds_diff" type="data" label="CDS overloading diffential expression testing"/> + <param format="tabular" name="promoters_diff" type="data" label="Promoters differential expression testing"/> + <param format="tabular" name="splicing_diff" type="data" label="Splicing differential expression testing"/> + <param name="rebuild" type="hidden" value="TRUE"/> + </when> + <when value="history"> + <param name="input_database" type="data" format="data" label="Select backend database (sqlite)"/> + </when> + </conditional> + <repeat name="plots" title="Plots"> + <param name="width" type="text" value="1280" label="The width of the image"/> + <param name="height" type="text" value="960" label="The height of the image"/> + <conditional name="plot"> + <param name="type" type="select" label="Plot type"> + <option value="density" selected="true">Density</option> + <option value="boxplot">Boxplot</option> + <option value="scatter">Scatter</option> + <option value="volcano">Volcano</option> + <option value="heatmap">Heatmap</option> + <option value="cluster">Cluster</option> + <option value="expressionplot">Expression Plot</option> + <option value="expressionbarplot">Expression Bar Plot</option> + </param> + <when value="density"> + </when> + <when value="boxplot"> + </when> + <when value="scatter"> + <param name="x" type="text" label="Sample name for x axis"/> + <param name="y" type="text" label="Sample name for y axis"/> + <param name="smooth" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Add a smooth-fit regression line"/> + <conditional name="multiple_genes"> + <param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/> + <when value="T"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + </when> + </conditional> + </when> + <when value="volcano"> + <param name="x" type="text" label="First sample name for comparison"/> + <param name="y" type="text" label="Second sample name for comparison"/> + <conditional name="multiple_genes"> + <param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/> + <when value="T"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + </when> + </conditional> + </when> + <when value="heatmap"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <param name="clustering" type="select" label="Cluster by"> + <option value="row">Row</option> + <option value="column">Column</option> + <option value="both" selected="true">Both</option> + <option value="none">None</option> + </param> + <param name="labcol" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/> + <param name="labrow" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/> + <param name="border" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw border around plot?"/> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + </when> + <when value="cluster"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <param name="k" type="text" label="Number of pre-defined clusters to attempt to find."/> + <param name="iter_max" type="text" value="100" label="Max iterations"/> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + </when> + <when value="expressionplot"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <param name="draw_summary" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw a 'summary' line with mean FPKM + values for each condition?"/> + <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/> + <param name="gene_id" type="text" label="Gene ID"/> + </when> + <when value="expressionbarplot"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/> + <param name="gene_id" type="text" label="Gene ID"/> + </when> + </conditional> + </repeat> + </inputs> + <outputs> + <data format="data" name="output_database" label="${tool.name} on ${on_string}: Database File (sqlite)"> + <filter>backend_database_source['backend_database_selector'] == "cuffdiff_output"</filter> + </data> + <data format="html" name="output_html" label="${tool.name} on ${on_string} (HTML)"/> + </outputs> + +<!--> + <tests> + <test> + <param name="" value=""/> + <output name="" file=""/> + </test> + </tests> +--> + <configfiles> + <configfile name="script_file"> + +## Feature Selection ## +get_features <- function(myGenes, f="gene") { + if (f == "isoforms") + return(isoforms(myGenes)) + else if (f == "tss") + return(TSS(myGenes)) + else if (f == "cds") + return(CDS(myGenes)) + else + return(myGenes) +} + +## Main Function ## + +## Load cummeRbund library +library("cummeRbund") + +## Initialize cuff object +cuff <- readCufflinks(dir = "", +#if $backend_database_source.backend_database_selector == "cuffdiff_output": + dbFile = "${output_database}", + geneFPKM = "${genes_fpkm_tracking}", + geneDiff = "${genes_exp}", + isoformFPKM = "${isoforms_fpkm_tracking}", + isoformDiff = "${isoforms_exp}", + TSSFPKM = "${tss_groups_fpkm_tracking}", + TSSDiff = "${tss_groups_exp}", + CDSFPKM = "${cds_fpkm_tracking}", + CDSExpDiff = "${cds_exp_diff}", + CDSDiff = "${cds_diff}", + promoterFile = "${promoters_diff}", + splicingFile = "${splicing_diff}", + rebuild = T) +#else: + dbFile = "${backend_database_source.input_database}", + rebuild = F) +#end if + +#for $i, $p in enumerate($plots, start=1): + #set $filename = "plot%02d-%s.png" % ($i, $p.plot['type']) +png(filename = "${filename}", width = ${p.width}, height = ${p.height}) + + ## Density plot ## + #if $p.plot['type'] == "density": +csDensity(genes(cuff)) +devname = dev.off() + + ## Boxplot ## + #elif $p.plot['type'] == "boxplot": +csBoxplot(genes(cuff)) +devname = dev.off() + + ## Scatter ## + #elif $p.plot['type'] == "scatter": + #if $p.plot.multiple_genes['multiple_genes_selector']: +myGeneIds <- c() + #for $g in $p.plot.multiple_genes['genes']: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csScatter(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) + #else +csScatter(genes(cuff), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) + #end if +devname = dev.off() + + ## Volcano ## + #elif $p.plot['type'] == "volcano": + #if $p.plot.multiple_genes['multiple_genes_selector']: +myGeneIds <- c() + #for $g in $p.plot.multiple_genes['genes']: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csVolcano(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}") + #else +csVolcano(genes(cuff), "${p.plot.x}", "${p.plot.y}") + #end if +devname = dev.off() + + ## Heatmap ## + #elif $p.plot['type'] == "heatmap": +myGeneIds <- c() + #for $g in $p.plot.genes: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csHeatmap(get_features(myGenes, "${p.plot.features}"), clustering="${p.plot.clustering}", labCol="${p.plot.labcol}", labRow="${p.plot.labrow}", border="${p.plot.border}") +devname = dev.off() + + ## Cluster ## + #elif $p.plot['type'] == "cluster": +myGeneIds <- c() + #for $g in $p.plot.genes: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csCluster(get_features(myGenes, "${p.plot.features}"), k=${p.plot.k}, iter.max="${p.plot.iter_max}") +devname = dev.off() + + ## Expression Plot ## + #elif $p.plot['type'] == "expressionplot": +myGeneId <- "$p.plot.gene_id" +myGenes <- getGenes(cuff, myGeneId) +expressionPlot(get_features(myGenes, "${p.plot.features}"), drawSummary=${p.plot.draw_summary}, iter.max="${p.plot.show_error_bars}") +devname = dev.off() + + ## Expression Bar Plot ## + #elif $p.plot['type'] == "expressionbarplot": +myGeneId <- "$p.plot.gene_id" +myGenes <- getGenes(cuff, myGeneId) +expressionBarplot(get_features(myGenes, "${p.plot.features}"), iter.max="${p.plot.show_error_bars}") +devname = dev.off() + #end if + +#end for + </configfile> + </configfiles> + + <help> +This tool allows for persistent storage, access, exploration, and manipulation of Cufflinks high-throughput sequencing data. In addition, provides numerous plotting functions for commonly used visualizations. + </help> + +</tool>