# HG changeset patch # User cjav # Date 1330533207 18000 # Node ID 1773e7dc45fe25d1640cb17c21285bc12fd6c83a # Parent e8a000b406f2e805ccca5154f9b30610d1366d51 Uploaded diff -r e8a000b406f2 -r 1773e7dc45fe cummerbund_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cummerbund_wrapper.py Wed Feb 29 11:33:27 2012 -0500 @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +### Runs "r_script" and generates a HTML report +### Inspired on cuffdiff_wrapper.py and gatk_wrapper.py +### Carlos Borroto + +import optparse, os, shutil, subprocess, sys, tempfile + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def html_report_from_directory( html_out, dir ): + html_out.write( '\n\nGalaxy - cummeRbund Output\n\n\n

\n

\n\n\n' ) + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + + # wrapper options + parser.add_option('', '--r-script', dest='r_script', help='R script') + parser.add_option('', '--html-report-from-directory', dest='html_report_from_directory', type="string", nargs=2, help='"Target HTML File" "Directory"') + + (options, args) = parser.parse_args() + + (html_filename, html_dir) = options.html_report_from_directory + + # Make html report directory for output. + os.mkdir( html_dir ) + + # Make a tmp dir + tmp_dir = tempfile.mkdtemp( prefix='tmp-cummeRbund-' ) + + # Build command. + cmd = ( "Rscript --vanilla %s" % options.r_script ) + + # Debugging. + print cmd + + # Run command. + try: + tmp_name = tempfile.NamedTemporaryFile( dir=tmp_dir ).name + tmp_stderr = open( tmp_name, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=html_dir, stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + + # Get stderr, allowing for case where it's very large. + tmp_stderr = open( tmp_name, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + + # Error checking. + if returncode != 0: + raise Exception, stderr + except Exception, e: + stop_err( 'Error running R script. ' + str( e ) ) + + # write the html report + html_report_from_directory( open( html_filename, 'wb' ), html_dir ) + + # Clean up temp dirs + if os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + +if __name__=="__main__": __main__() diff -r e8a000b406f2 -r 1773e7dc45fe cummerbund_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cummerbund_wrapper.xml Wed Feb 29 11:33:27 2012 -0500 @@ -0,0 +1,302 @@ + + + R package designed to aid and simplify the task of analyzing Cufflinks RNA-Seq output + + + cummerbund_wrapper.py + --r-script ${script_file} + --html-report-from-directory "${output_html}" "${output_html.files_path}" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + backend_database_source['backend_database_selector'] == "cuffdiff_output" + + + + + + R + + + + + + + + + +--> + + + +## Feature Selection ## +get_features <- function(myGenes, f="gene") { + if (f == "isoforms") + return(isoforms(myGenes)) + else if (f == "tss") + return(TSS(myGenes)) + else if (f == "cds") + return(CDS(myGenes)) + else + return(myGenes) +} + +## Main Function ## + +## Load cummeRbund library +library("cummeRbund") + +## Initialize cuff object +cuff <- readCufflinks(dir = "", +#if $backend_database_source.backend_database_selector == "cuffdiff_output": + dbFile = "${output_database}", + geneFPKM = "${genes_fpkm_tracking}", + geneDiff = "${genes_exp}", + isoformFPKM = "${isoforms_fpkm_tracking}", + isoformDiff = "${isoforms_exp}", + TSSFPKM = "${tss_groups_fpkm_tracking}", + TSSDiff = "${tss_groups_exp}", + CDSFPKM = "${cds_fpkm_tracking}", + CDSExpDiff = "${cds_exp_diff}", + CDSDiff = "${cds_diff}", + promoterFile = "${promoters_diff}", + splicingFile = "${splicing_diff}", + rebuild = T) +#else: + dbFile = "${backend_database_source.input_database}", + rebuild = F) +#end if + +#for $i, $p in enumerate($plots, start=1): + #set $filename = "plot%02d-%s.png" % ($i, $p.plot['type']) +png(filename = "${filename}", width = ${p.width}, height = ${p.height}) + + ## Density plot ## + #if $p.plot['type'] == "density": +csDensity(genes(cuff)) +devname = dev.off() + + ## Boxplot ## + #elif $p.plot['type'] == "boxplot": +csBoxplot(genes(cuff)) +devname = dev.off() + + ## Scatter ## + #elif $p.plot['type'] == "scatter": + #if $p.plot.multiple_genes['multiple_genes_selector']: +myGeneIds <- c() + #for $g in $p.plot.multiple_genes['genes']: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csScatter(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) + #else +csScatter(genes(cuff), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) + #end if +devname = dev.off() + + ## Volcano ## + #elif $p.plot['type'] == "volcano": + #if $p.plot.multiple_genes['multiple_genes_selector']: +myGeneIds <- c() + #for $g in $p.plot.multiple_genes['genes']: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csVolcano(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}") + #else +csVolcano(genes(cuff), "${p.plot.x}", "${p.plot.y}") + #end if +devname = dev.off() + + ## Heatmap ## + #elif $p.plot['type'] == "heatmap": +myGeneIds <- c() + #for $g in $p.plot.genes: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csHeatmap(get_features(myGenes, "${p.plot.features}"), clustering="${p.plot.clustering}", labCol="${p.plot.labcol}", labRow="${p.plot.labrow}", border="${p.plot.border}") +devname = dev.off() + + ## Cluster ## + #elif $p.plot['type'] == "cluster": +myGeneIds <- c() + #for $g in $p.plot.genes: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csCluster(get_features(myGenes, "${p.plot.features}"), k=${p.plot.k}, iter.max="${p.plot.iter_max}") +devname = dev.off() + + ## Expression Plot ## + #elif $p.plot['type'] == "expressionplot": +myGeneId <- "$p.plot.gene_id" +myGenes <- getGenes(cuff, myGeneId) +expressionPlot(get_features(myGenes, "${p.plot.features}"), drawSummary=${p.plot.draw_summary}, iter.max="${p.plot.show_error_bars}") +devname = dev.off() + + ## Expression Bar Plot ## + #elif $p.plot['type'] == "expressionbarplot": +myGeneId <- "$p.plot.gene_id" +myGenes <- getGenes(cuff, myGeneId) +expressionBarplot(get_features(myGenes, "${p.plot.features}"), iter.max="${p.plot.show_error_bars}") +devname = dev.off() + #end if + +#end for + + + + +This tool allows for persistent storage, access, exploration, and manipulation of Cufflinks high-throughput sequencing data. In addition, provides numerous plotting functions for commonly used visualizations. + + + diff -r e8a000b406f2 -r 1773e7dc45fe datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Wed Feb 29 11:33:27 2012 -0500 @@ -0,0 +1,6 @@ + + + + + +