Mercurial > repos > cjav > cummerbund
changeset 6:d13d6542ea7e
Deleted selected files
author | cjav |
---|---|
date | Tue, 14 Feb 2012 16:37:48 -0500 |
parents | bcd2194a2de1 |
children | fa50a3392110 |
files | cummerbund_wrapper.py cummerbund_wrapper.xml |
diffstat | 2 files changed, 0 insertions(+), 379 deletions(-) [+] |
line wrap: on
line diff
--- a/cummerbund_wrapper.py Tue Feb 14 16:19:47 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,77 +0,0 @@ -#!/usr/bin/env python - -### Runs "r_script" and generates a HTML report -### Inspired on cuffdiff_wrapper.py and gatk_wrapper.py -### Carlos Borroto <carlos.borroto@gmail.com> - -import optparse, os, shutil, subprocess, sys, tempfile - -def stop_err( msg ): - sys.stderr.write( "%s\n" % msg ) - sys.exit() - -def html_report_from_directory( html_out, dir ): - html_out.write( '<html>\n<head>\n<title>Galaxy - cummeRbund Output</title>\n</head>\n<body>\n<p/>\n<ul>\n' ) - for fname in sorted( os.listdir( dir ) ): - html_out.write( '<li><a href="%s">%s</a></li>\n' % ( fname, fname ) ) - html_out.write( '</ul>\n</body>\n</html>\n' ) - -def __main__(): - #Parse Command Line - parser = optparse.OptionParser() - - # wrapper options - parser.add_option('', '--r-script', dest='r_script', help='R script') - parser.add_option('', '--html-report-from-directory', dest='html_report_from_directory', type="string", nargs=2, help='"Target HTML File" "Directory"') - - (options, args) = parser.parse_args() - - (html_filename, html_dir) = options.html_report_from_directory - - # Make html report directory for output. - os.mkdir( html_dir ) - - # Make a tmp dir - tmp_dir = tempfile.mkdtemp( prefix='tmp-cummeRbund-' ) - - # Build command. - cmd = ( "Rscript --vanilla %s" % options.r_script ) - - # Debugging. - print cmd - - # Run command. - try: - tmp_name = tempfile.NamedTemporaryFile( dir=tmp_dir ).name - tmp_stderr = open( tmp_name, 'wb' ) - proc = subprocess.Popen( args=cmd, shell=True, cwd=html_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - - # Get stderr, allowing for case where it's very large. - tmp_stderr = open( tmp_name, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - - # Error checking. - if returncode != 0: - raise Exception, stderr - except Exception, e: - stop_err( 'Error running R script. ' + str( e ) ) - - # write the html report - html_report_from_directory( open( html_filename, 'wb' ), html_dir ) - - # Clean up temp dirs - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - -if __name__=="__main__": __main__()
--- a/cummerbund_wrapper.xml Tue Feb 14 16:19:47 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,302 +0,0 @@ -<tool id="cummerbund" name="cummeRbund" version="0.0.3"> - - <description>R package designed to aid and simplify the task of analyzing Cufflinks RNA-Seq output</description> - - <command interpreter="python"> - cummerbund_wrapper.py - --r-script ${script_file} - --html-report-from-directory "${output_html}" "${output_html.files_path}" - </command> - - <inputs> - <conditional name="backend_database_source"> - <param name="backend_database_selector" type="select" label="Will you select a backend database file from the history or do you want to build a new one using cuffdiff output?"> - <option value="history" selected="true">Use backend database from the history</option> - <option value="cuffdiff_output">Build backend database using cuffdiff output</option> - </param> - <when value="cuffdiff_output"> - <param format="tabular" name="isoforms_fpkm_tracking" type="data" label="Transcript FPKM tracking"/> - <param format="tabular" name="isoforms_exp" type="data" label="Transcript differential expression testing"/> - <param format="tabular" name="genes_fpkm_tracking" type="data" label="Gene FPKM tracking"/> - <param format="tabular" name="genes_exp" type="data" label="Gene differential expression testing"/> - <param format="tabular" name="tss_groups_fpkm_tracking" type="data" label="TSS groups FPKM tracking"/> - <param format="tabular" name="tss_groups_exp" type="data" label="TSS groups differential expression testing"/> - <param format="tabular" name="cds_fpkm_tracking" type="data" label="CDS FPKM tracking"/> - <param format="tabular" name="cds_exp_diff" type="data" label="CDS FPKM differential expression testing"/> - <param format="tabular" name="cds_diff" type="data" label="CDS overloading diffential expression testing"/> - <param format="tabular" name="promoters_diff" type="data" label="Promoters differential expression testing"/> - <param format="tabular" name="splicing_diff" type="data" label="Splicing differential expression testing"/> - <param name="rebuild" type="hidden" value="TRUE"/> - </when> - <when value="history"> - <param name="input_database" type="data" format="data" label="Select backend database (sqlite)"/> - </when> - </conditional> - <repeat name="plots" title="Plots"> - <param name="width" type="text" value="1280" label="The width of the image"/> - <param name="height" type="text" value="960" label="The height of the image"/> - <conditional name="plot"> - <param name="type" type="select" label="Plot type"> - <option value="density" selected="true">Density</option> - <option value="boxplot">Boxplot</option> - <option value="scatter">Scatter</option> - <option value="volcano">Volcano</option> - <option value="heatmap">Heatmap</option> - <option value="cluster">Cluster</option> - <option value="expressionplot">Expression Plot</option> - <option value="expressionbarplot">Expression Bar Plot</option> - </param> - <when value="density"> - <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> - </when> - <when value="boxplot"> - <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> - </when> - <when value="scatter"> - <param name="x" type="text" label="Sample name for x axis"/> - <param name="y" type="text" label="Sample name for y axis"/> - <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> - <param name="smooth" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Add a smooth-fit regression line"/> - <conditional name="multiple_genes"> - <param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/> - <when value="T"> - <param name="features" type="select" label="Expression levels to plot?"> - <option value="gene" selected="true">Genes</option> - <option value="isoforms">Isoforms</option> - <option value="tss">TSS</option> - <option value="cds">CDS</option> - </param> - <repeat name="genes" title="Genes"> - <param name="gene_id" type="text" label="Gene ID"/> - </repeat> - </when> - <when value="F"/> - </conditional> - </when> - <when value="volcano"> - <param name="x" type="text" label="First sample name for comparison"/> - <param name="y" type="text" label="Second sample name for comparison"/> - <conditional name="multiple_genes"> - <param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/> - <when value="T"> - <param name="features" type="select" label="Expression levels to plot?"> - <option value="gene" selected="true">Genes</option> - <option value="isoforms">Isoforms</option> - <option value="tss">TSS</option> - <option value="cds">CDS</option> - </param> - <repeat name="genes" title="Genes"> - <param name="gene_id" type="text" label="Gene ID"/> - </repeat> - </when> - <when value="F"/> - </conditional> - </when> - <when value="heatmap"> - <param name="features" type="select" label="Expression levels to plot?"> - <option value="gene" selected="true">Genes</option> - <option value="isoforms">Isoforms</option> - <option value="tss">TSS</option> - <option value="cds">CDS</option> - </param> - <repeat name="genes" title="Genes"> - <param name="gene_id" type="text" label="Gene ID"/> - </repeat> - <param name="clustering" type="select" label="Cluster by"> - <option value="row">Row</option> - <option value="column">Column</option> - <option value="both" selected="true">Both</option> - <option value="none">None</option> - </param> - <param name="labcol" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/> - <param name="labrow" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/> - <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> - <param name="border" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw border around plot?"/> - </when> - <when value="cluster"> - <param name="features" type="select" label="Expression levels to plot?"> - <option value="gene" selected="true">Genes</option> - <option value="isoforms">Isoforms</option> - <option value="tss">TSS</option> - <option value="cds">CDS</option> - </param> - <repeat name="genes" title="Genes"> - <param name="gene_id" type="text" label="Gene ID"/> - </repeat> - <param name="k" type="text" label="Number of pre-defined clusters to attempt to find."/> - <param name="iter_max" type="text" value="100" label="Max iterations"/> - </when> - <when value="expressionplot"> - <param name="features" type="select" label="Expression levels to plot?"> - <option value="gene" selected="true">Genes</option> - <option value="isoforms">Isoforms</option> - <option value="tss">TSS</option> - <option value="cds">CDS</option> - </param> - <param name="gene_id" type="text" label="Gene ID"/> - <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> - <param name="draw_summary" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw a 'summary' line with mean FPKM - values for each condition?"/> - <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/> - </when> - <when value="expressionbarplot"> - <param name="features" type="select" label="Expression levels to plot?"> - <option value="gene" selected="true">Genes</option> - <option value="isoforms">Isoforms</option> - <option value="tss">TSS</option> - <option value="cds">CDS</option> - </param> - <param name="gene_id" type="text" label="Gene ID"/> - <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> - <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/> - </when> - </conditional> - </repeat> - </inputs> - - <outputs> - <data format="data" name="output_database" label="${tool.name} on ${on_string}: Database File (sqlite)"> - <filter>backend_database_source['backend_database_selector'] == "cuffdiff_output"</filter> - </data> - <data format="html" name="output_html" label="${tool.name} on ${on_string} (HTML)"/> - </outputs> - - <requirements> - <requirement type="binary">R</requirement> - </requirements> - -<!--> - <tests> - <test> - <param name="" value=""/> - <output name="" file=""/> - </test> - </tests> ---> - <configfiles> - <configfile name="script_file"> - -## Feature Selection ## -get_features <- function(myGenes, f="gene") { - if (f == "isoforms") - return(isoforms(myGenes)) - else if (f == "tss") - return(TSS(myGenes)) - else if (f == "cds") - return(CDS(myGenes)) - else - return(myGenes) -} - -## Main Function ## - -## Load cummeRbund library -library("cummeRbund") - -## Initialize cuff object -cuff <- readCufflinks(dir = "", -#if $backend_database_source.backend_database_selector == "cuffdiff_output": - dbFile = "${output_database}", - geneFPKM = "${genes_fpkm_tracking}", - geneDiff = "${genes_exp}", - isoformFPKM = "${isoforms_fpkm_tracking}", - isoformDiff = "${isoforms_exp}", - TSSFPKM = "${tss_groups_fpkm_tracking}", - TSSDiff = "${tss_groups_exp}", - CDSFPKM = "${cds_fpkm_tracking}", - CDSExpDiff = "${cds_exp_diff}", - CDSDiff = "${cds_diff}", - promoterFile = "${promoters_diff}", - splicingFile = "${splicing_diff}", - rebuild = T) -#else: - dbFile = "${backend_database_source.input_database}", - rebuild = F) -#end if - -#for $i, $p in enumerate($plots, start=1): - #set $filename = "plot%02d-%s.png" % ($i, $p.plot['type']) -png(filename = "${filename}", width = ${p.width}, height = ${p.height}) - - ## Density plot ## - #if $p.plot['type'] == "density": -csDensity(genes(cuff)) -devname = dev.off() - - ## Boxplot ## - #elif $p.plot['type'] == "boxplot": -csBoxplot(genes(cuff)) -devname = dev.off() - - ## Scatter ## - #elif $p.plot['type'] == "scatter": - #if $p.plot.multiple_genes['multiple_genes_selector']: -myGeneIds <- c() - #for $g in $p.plot.multiple_genes['genes']: -myGeneIds <- c(myGeneIds, "$g['gene_id']") - #end for -myGenes <- getGenes(cuff, myGeneIds) -csScatter(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) - #else -csScatter(genes(cuff), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) - #end if -devname = dev.off() - - ## Volcano ## - #elif $p.plot['type'] == "volcano": - #if $p.plot.multiple_genes['multiple_genes_selector']: -myGeneIds <- c() - #for $g in $p.plot.multiple_genes['genes']: -myGeneIds <- c(myGeneIds, "$g['gene_id']") - #end for -myGenes <- getGenes(cuff, myGeneIds) -csVolcano(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}") - #else -csVolcano(genes(cuff), "${p.plot.x}", "${p.plot.y}") - #end if -devname = dev.off() - - ## Heatmap ## - #elif $p.plot['type'] == "heatmap": -myGeneIds <- c() - #for $g in $p.plot.genes: -myGeneIds <- c(myGeneIds, "$g['gene_id']") - #end for -myGenes <- getGenes(cuff, myGeneIds) -csHeatmap(get_features(myGenes, "${p.plot.features}"), clustering="${p.plot.clustering}", labCol="${p.plot.labcol}", labRow="${p.plot.labrow}", border="${p.plot.border}") -devname = dev.off() - - ## Cluster ## - #elif $p.plot['type'] == "cluster": -myGeneIds <- c() - #for $g in $p.plot.genes: -myGeneIds <- c(myGeneIds, "$g['gene_id']") - #end for -myGenes <- getGenes(cuff, myGeneIds) -csCluster(get_features(myGenes, "${p.plot.features}"), k=${p.plot.k}, iter.max="${p.plot.iter_max}") -devname = dev.off() - - ## Expression Plot ## - #elif $p.plot['type'] == "expressionplot": -myGeneId <- "$p.plot.gene_id" -myGenes <- getGenes(cuff, myGeneId) -expressionPlot(get_features(myGenes, "${p.plot.features}"), drawSummary=${p.plot.draw_summary}, iter.max="${p.plot.show_error_bars}") -devname = dev.off() - - ## Expression Bar Plot ## - #elif $p.plot['type'] == "expressionbarplot": -myGeneId <- "$p.plot.gene_id" -myGenes <- getGenes(cuff, myGeneId) -expressionBarplot(get_features(myGenes, "${p.plot.features}"), iter.max="${p.plot.show_error_bars}") -devname = dev.off() - #end if - -#end for - </configfile> - </configfiles> - - <help> -This tool allows for persistent storage, access, exploration, and manipulation of Cufflinks high-throughput sequencing data. In addition, provides numerous plotting functions for commonly used visualizations. - </help> - -</tool>