Mercurial > repos > cjav > cummerbund
changeset 12:69cf88d9513e
Uploaded
author | cjav |
---|---|
date | Wed, 15 Feb 2012 14:09:10 -0500 |
parents | 8e13da66d9b8 |
children | 5aa7ab6227eb |
files | cummerbund_wrapper.py cummerbund_wrapper.xml datatypes_conf.xml |
diffstat | 3 files changed, 385 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cummerbund_wrapper.py Wed Feb 15 14:09:10 2012 -0500 @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +### Runs "r_script" and generates a HTML report +### Inspired on cuffdiff_wrapper.py and gatk_wrapper.py +### Carlos Borroto <carlos.borroto@gmail.com> + +import optparse, os, shutil, subprocess, sys, tempfile + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def html_report_from_directory( html_out, dir ): + html_out.write( '<html>\n<head>\n<title>Galaxy - cummeRbund Output</title>\n</head>\n<body>\n<p/>\n<ul>\n' ) + for fname in sorted( os.listdir( dir ) ): + html_out.write( '<li><a href="%s">%s</a></li>\n' % ( fname, fname ) ) + html_out.write( '</ul>\n</body>\n</html>\n' ) + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + + # wrapper options + parser.add_option('', '--r-script', dest='r_script', help='R script') + parser.add_option('', '--html-report-from-directory', dest='html_report_from_directory', type="string", nargs=2, help='"Target HTML File" "Directory"') + + (options, args) = parser.parse_args() + + (html_filename, html_dir) = options.html_report_from_directory + + # Make html report directory for output. + os.mkdir( html_dir ) + + # Make a tmp dir + tmp_dir = tempfile.mkdtemp( prefix='tmp-cummeRbund-' ) + + # Build command. + cmd = ( "Rscript --vanilla %s" % options.r_script ) + + # Debugging. + print cmd + + # Run command. + try: + tmp_name = tempfile.NamedTemporaryFile( dir=tmp_dir ).name + tmp_stderr = open( tmp_name, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=html_dir, stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + + # Get stderr, allowing for case where it's very large. + tmp_stderr = open( tmp_name, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + + # Error checking. + if returncode != 0: + raise Exception, stderr + except Exception, e: + stop_err( 'Error running R script. ' + str( e ) ) + + # write the html report + html_report_from_directory( open( html_filename, 'wb' ), html_dir ) + + # Clean up temp dirs + if os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + +if __name__=="__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cummerbund_wrapper.xml Wed Feb 15 14:09:10 2012 -0500 @@ -0,0 +1,302 @@ +<tool id="cummerbund" name="cummeRbund" version="0.0.3"> + + <description>R package designed to aid and simplify the task of analyzing Cufflinks RNA-Seq output</description> + + <command interpreter="python"> + cummerbund_wrapper.py + --r-script ${script_file} + --html-report-from-directory "${output_html}" "${output_html.files_path}" + </command> + + <inputs> + <conditional name="backend_database_source"> + <param name="backend_database_selector" type="select" label="Will you select a backend database file from the history or do you want to build a new one using cuffdiff output?"> + <option value="history" selected="true">Use backend database from the history</option> + <option value="cuffdiff_output">Build backend database using cuffdiff output</option> + </param> + <when value="cuffdiff_output"> + <param format="tabular" name="isoforms_fpkm_tracking" type="data" label="Transcript FPKM tracking"/> + <param format="tabular" name="isoforms_exp" type="data" label="Transcript differential expression testing"/> + <param format="tabular" name="genes_fpkm_tracking" type="data" label="Gene FPKM tracking"/> + <param format="tabular" name="genes_exp" type="data" label="Gene differential expression testing"/> + <param format="tabular" name="tss_groups_fpkm_tracking" type="data" label="TSS groups FPKM tracking"/> + <param format="tabular" name="tss_groups_exp" type="data" label="TSS groups differential expression testing"/> + <param format="tabular" name="cds_fpkm_tracking" type="data" label="CDS FPKM tracking"/> + <param format="tabular" name="cds_exp_diff" type="data" label="CDS FPKM differential expression testing"/> + <param format="tabular" name="cds_diff" type="data" label="CDS overloading diffential expression testing"/> + <param format="tabular" name="promoters_diff" type="data" label="Promoters differential expression testing"/> + <param format="tabular" name="splicing_diff" type="data" label="Splicing differential expression testing"/> + <param name="rebuild" type="hidden" value="TRUE"/> + </when> + <when value="history"> + <param name="input_database" type="data" format="data" label="Select backend database (sqlite)"/> + </when> + </conditional> + <repeat name="plots" title="Plots"> + <param name="width" type="text" value="1280" label="The width of the image"/> + <param name="height" type="text" value="960" label="The height of the image"/> + <conditional name="plot"> + <param name="type" type="select" label="Plot type"> + <option value="density" selected="true">Density</option> + <option value="boxplot">Boxplot</option> + <option value="scatter">Scatter</option> + <option value="volcano">Volcano</option> + <option value="heatmap">Heatmap</option> + <option value="cluster">Cluster</option> + <option value="expressionplot">Expression Plot</option> + <option value="expressionbarplot">Expression Bar Plot</option> + </param> + <when value="density"> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + </when> + <when value="boxplot"> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + </when> + <when value="scatter"> + <param name="x" type="text" label="Sample name for x axis"/> + <param name="y" type="text" label="Sample name for y axis"/> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="smooth" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Add a smooth-fit regression line"/> + <conditional name="multiple_genes"> + <param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/> + <when value="T"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + </when> + <when value="F"/> + </conditional> + </when> + <when value="volcano"> + <param name="x" type="text" label="First sample name for comparison"/> + <param name="y" type="text" label="Second sample name for comparison"/> + <conditional name="multiple_genes"> + <param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/> + <when value="T"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + </when> + <when value="F"/> + </conditional> + </when> + <when value="heatmap"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + <param name="clustering" type="select" label="Cluster by"> + <option value="row">Row</option> + <option value="column">Column</option> + <option value="both" selected="true">Both</option> + <option value="none">None</option> + </param> + <param name="labcol" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/> + <param name="labrow" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="border" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw border around plot?"/> + </when> + <when value="cluster"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + <param name="k" type="text" label="Number of pre-defined clusters to attempt to find."/> + <param name="iter_max" type="text" value="100" label="Max iterations"/> + </when> + <when value="expressionplot"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <param name="gene_id" type="text" label="Gene ID"/> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="draw_summary" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw a 'summary' line with mean FPKM + values for each condition?"/> + <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/> + </when> + <when value="expressionbarplot"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <param name="gene_id" type="text" label="Gene ID"/> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/> + </when> + </conditional> + </repeat> + </inputs> + + <outputs> + <data format="data" name="output_database" label="${tool.name} on ${on_string}: Database File (sqlite)"> + <filter>backend_database_source['backend_database_selector'] == "cuffdiff_output"</filter> + </data> + <data format="html" name="output_html" label="${tool.name} on ${on_string} (HTML)"/> + </outputs> + + <requirements> + <requirement type="binary">R</requirement> + </requirements> + +<!--> + <tests> + <test> + <param name="" value=""/> + <output name="" file=""/> + </test> + </tests> +--> + <configfiles> + <configfile name="script_file"> + +## Feature Selection ## +get_features <- function(myGenes, f="gene") { + if (f == "isoforms") + return(isoforms(myGenes)) + else if (f == "tss") + return(TSS(myGenes)) + else if (f == "cds") + return(CDS(myGenes)) + else + return(myGenes) +} + +## Main Function ## + +## Load cummeRbund library +library("cummeRbund") + +## Initialize cuff object +cuff <- readCufflinks(dir = "", +#if $backend_database_source.backend_database_selector == "cuffdiff_output": + dbFile = "${output_database}", + geneFPKM = "${genes_fpkm_tracking}", + geneDiff = "${genes_exp}", + isoformFPKM = "${isoforms_fpkm_tracking}", + isoformDiff = "${isoforms_exp}", + TSSFPKM = "${tss_groups_fpkm_tracking}", + TSSDiff = "${tss_groups_exp}", + CDSFPKM = "${cds_fpkm_tracking}", + CDSExpDiff = "${cds_exp_diff}", + CDSDiff = "${cds_diff}", + promoterFile = "${promoters_diff}", + splicingFile = "${splicing_diff}", + rebuild = T) +#else: + dbFile = "${backend_database_source.input_database}", + rebuild = F) +#end if + +#for $i, $p in enumerate($plots, start=1): + #set $filename = "plot%02d-%s.png" % ($i, $p.plot['type']) +png(filename = "${filename}", width = ${p.width}, height = ${p.height}) + + ## Density plot ## + #if $p.plot['type'] == "density": +csDensity(genes(cuff)) +devname = dev.off() + + ## Boxplot ## + #elif $p.plot['type'] == "boxplot": +csBoxplot(genes(cuff)) +devname = dev.off() + + ## Scatter ## + #elif $p.plot['type'] == "scatter": + #if $p.plot.multiple_genes['multiple_genes_selector']: +myGeneIds <- c() + #for $g in $p.plot.multiple_genes['genes']: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csScatter(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) + #else +csScatter(genes(cuff), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) + #end if +devname = dev.off() + + ## Volcano ## + #elif $p.plot['type'] == "volcano": + #if $p.plot.multiple_genes['multiple_genes_selector']: +myGeneIds <- c() + #for $g in $p.plot.multiple_genes['genes']: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csVolcano(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}") + #else +csVolcano(genes(cuff), "${p.plot.x}", "${p.plot.y}") + #end if +devname = dev.off() + + ## Heatmap ## + #elif $p.plot['type'] == "heatmap": +myGeneIds <- c() + #for $g in $p.plot.genes: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csHeatmap(get_features(myGenes, "${p.plot.features}"), clustering="${p.plot.clustering}", labCol="${p.plot.labcol}", labRow="${p.plot.labrow}", border="${p.plot.border}") +devname = dev.off() + + ## Cluster ## + #elif $p.plot['type'] == "cluster": +myGeneIds <- c() + #for $g in $p.plot.genes: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csCluster(get_features(myGenes, "${p.plot.features}"), k=${p.plot.k}, iter.max="${p.plot.iter_max}") +devname = dev.off() + + ## Expression Plot ## + #elif $p.plot['type'] == "expressionplot": +myGeneId <- "$p.plot.gene_id" +myGenes <- getGenes(cuff, myGeneId) +expressionPlot(get_features(myGenes, "${p.plot.features}"), drawSummary=${p.plot.draw_summary}, iter.max="${p.plot.show_error_bars}") +devname = dev.off() + + ## Expression Bar Plot ## + #elif $p.plot['type'] == "expressionbarplot": +myGeneId <- "$p.plot.gene_id" +myGenes <- getGenes(cuff, myGeneId) +expressionBarplot(get_features(myGenes, "${p.plot.features}"), iter.max="${p.plot.show_error_bars}") +devname = dev.off() + #end if + +#end for + </configfile> + </configfiles> + + <help> +This tool allows for persistent storage, access, exploration, and manipulation of Cufflinks high-throughput sequencing data. In addition, provides numerous plotting functions for commonly used visualizations. + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Wed Feb 15 14:09:10 2012 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<datatypes> + <registration> + <datatype extension="sqlite" type="galaxy.datatypes.binary:Binary" mimetype="application/octet-stream" display_in_upload="true"/> + </registration> +</datatypes>