changeset 0:8ba5f0460b1e

Uploaded
author jjohnson
date Mon, 01 Oct 2012 14:08:13 -0400
parents
children 6a9bc26ab8d9
files cummerbund_wrapper.py cummerbund_wrapper.xml
diffstat 2 files changed, 383 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cummerbund_wrapper.py	Mon Oct 01 14:08:13 2012 -0400
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+### Runs "r_script" and generates a HTML report
+### Inspired on cuffdiff_wrapper.py and gatk_wrapper.py
+### Carlos Borroto <carlos.borroto@gmail.com>
+
+import optparse, os, shutil, subprocess, sys, tempfile
+
+def stop_err( msg ):
+    sys.stderr.write( "%s\n" % msg )
+    sys.exit()
+
+def html_report_from_directory( html_out, dir ):
+    html_out.write( '<html>\n<head>\n<title>Galaxy - cummeRbund Output</title>\n</head>\n<body>\n<p/>\n<ul>\n' )
+    for fname in sorted( os.listdir( dir ) ):
+        html_out.write(  '<li><a href="%s">%s</a></li>\n' % ( fname, fname ) )
+    html_out.write( '</ul>\n</body>\n</html>\n' )
+
+def __main__():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+
+    # wrapper options
+    parser.add_option('', '--r-script', dest='r_script', help='R script')
+    parser.add_option('', '--html-report-from-directory', dest='html_report_from_directory', type="string", nargs=2, help='"Target HTML File" "Directory"')
+        
+    (options, args) = parser.parse_args()
+    
+    (html_filename, html_dir) = options.html_report_from_directory
+    
+    # Make html report directory for output.
+    os.mkdir( html_dir )
+    
+    # Make a tmp dir
+    tmp_dir = tempfile.mkdtemp( prefix='tmp-cummeRbund-' )
+    
+    # Build command.
+    cmd = ( "Rscript --vanilla %s" % options.r_script )
+    
+    # Debugging.
+    print cmd
+
+#liubo added, for test, look at the generated R script
+#    shutil.copy2(options.r_script, '/nfs/software/galaxy/r_script')
+
+    
+    # Run command.
+    try:
+        tmp_name = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
+        tmp_stderr = open( tmp_name, 'wb' )
+        proc = subprocess.Popen( args=cmd, shell=True, cwd=html_dir, stderr=tmp_stderr.fileno() )
+        returncode = proc.wait()
+        tmp_stderr.close()
+        
+        # Get stderr, allowing for case where it's very large.
+        tmp_stderr = open( tmp_name, 'rb' )
+        stderr = ''
+        buffsize = 1048576
+        try:
+            while True:
+                stderr += tmp_stderr.read( buffsize )
+                if not stderr or len( stderr ) % buffsize != 0:
+                    break
+        except OverflowError:
+            pass
+        tmp_stderr.close()
+        
+        # Error checking.
+        if returncode != 0:
+            raise Exception, stderr
+    except Exception, e:
+        stop_err( 'Error running R script. ' + str( e ) )
+    
+    # write the html report
+    html_report_from_directory( open( html_filename, 'wb' ), html_dir )
+    
+    # Clean up temp dirs
+    if os.path.exists( tmp_dir ):
+        shutil.rmtree( tmp_dir )
+
+if __name__=="__main__": __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cummerbund_wrapper.xml	Mon Oct 01 14:08:13 2012 -0400
@@ -0,0 +1,302 @@
+<tool id="cummerbund" name="cummeRbund" version="0.0.4">
+    
+    <description>R package designed to aid and simplify the task of analyzing Cufflinks RNA-Seq output</description>
+    
+    <command interpreter="python">
+        cummerbund_wrapper.py
+            --r-script ${script_file}
+            --html-report-from-directory "${output_html}" "${output_html.files_path}"
+    </command>
+    
+    <inputs>
+        <conditional name="backend_database_source">
+            <param name="backend_database_selector" type="select" label="Will you select a backend database file from the history or do you want to build a new one using cuffdiff output?">
+                <option value="history" selected="true">Use backend database from the history</option>
+                <option value="cuffdiff_output">Build backend database using cuffdiff output</option>
+            </param>
+            <when value="cuffdiff_output">
+                <param format="tabular" name="isoforms_fpkm_tracking" type="data" label="Transcript FPKM tracking"/>
+                <param format="tabular" name="isoforms_exp" type="data" label="Transcript differential expression testing"/>
+                <param format="tabular" name="genes_fpkm_tracking" type="data" label="Gene FPKM tracking"/>
+                <param format="tabular" name="genes_exp" type="data" label="Gene differential expression testing"/>
+                <param format="tabular" name="tss_groups_fpkm_tracking" type="data" label="TSS groups FPKM tracking"/>
+                <param format="tabular" name="tss_groups_exp" type="data" label="TSS groups differential expression testing"/>
+                <param format="tabular" name="cds_fpkm_tracking" type="data" label="CDS FPKM tracking"/>
+                <param format="tabular" name="cds_exp_diff" type="data" label="CDS FPKM differential expression testing"/>
+                <param format="tabular" name="cds_diff" type="data" label="CDS overloading diffential expression testing"/>
+                <param format="tabular" name="promoters_diff" type="data" label="Promoters differential expression testing"/>
+                <param format="tabular" name="splicing_diff" type="data" label="Splicing differential expression testing"/>
+                <param name="rebuild" type="hidden" value="TRUE"/>
+            </when>
+            <when value="history">
+                <param name="input_database" type="data" format="data" label="Select backend database (sqlite)"/>
+            </when>
+        </conditional>
+        <repeat name="plots" title="Plots">
+			<param name="width" type="text" value="1280" label="The width of the image"/>
+			<param name="height" type="text" value="960" label="The height of the image"/>
+			<conditional name="plot">
+				<param name="type" type="select" label="Plot type">
+					<option value="density" selected="true">Density</option>
+					<option value="boxplot">Boxplot</option>
+					<option value="scatter">Scatter</option>
+					<option value="volcano">Volcano</option>
+					<option value="heatmap">Heatmap</option>
+					<option value="cluster">Cluster</option>
+					<option value="expressionplot">Expression Plot</option>
+					<option value="expressionbarplot">Expression Bar Plot</option>
+				</param>
+				<when value="density">
+					<param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/>
+				</when>
+				<when value="boxplot">
+					<param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/>
+				</when>
+				<when value="scatter">
+					<param name="x" type="text" label="Sample name for x axis"/>
+					<param name="y" type="text" label="Sample name for y axis"/>
+					<param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/>
+					<param name="smooth" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Add a smooth-fit regression line"/>
+					<conditional name="multiple_genes">
+						<param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/>
+						<when value="T">
+                            <param name="features" type="select" label="Expression levels to plot?">
+                                <option value="gene" selected="true">Genes</option>
+                                <option value="isoforms">Isoforms</option>
+                                <option value="tss">TSS</option>
+                                <option value="cds">CDS</option>
+                            </param>
+							<repeat name="genes" title="Genes">
+								<param name="gene_id" type="text" label="Gene ID"/>
+							</repeat> 
+						</when>
+                        <when value="F"/>
+					</conditional>
+				</when>
+				<when value="volcano">
+					<param name="x" type="text" label="First sample name for comparison"/>
+					<param name="y" type="text" label="Second sample name for comparison"/>
+					<conditional name="multiple_genes">
+						<param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/>
+						<when value="T">
+                            <param name="features" type="select" label="Expression levels to plot?">
+                                <option value="gene" selected="true">Genes</option>
+                                <option value="isoforms">Isoforms</option>
+                                <option value="tss">TSS</option>
+                                <option value="cds">CDS</option>
+                            </param>
+							<repeat name="genes" title="Genes">
+								<param name="gene_id" type="text" label="Gene ID"/>
+							</repeat> 
+						</when>
+                        <when value="F"/>
+					</conditional>
+				</when>
+				<when value="heatmap">
+					<param name="features" type="select" label="Expression levels to plot?">
+						<option value="gene" selected="true">Genes</option>
+						<option value="isoforms">Isoforms</option>
+						<option value="tss">TSS</option>
+						<option value="cds">CDS</option>
+					</param>
+					<repeat name="genes" title="Genes">
+						<param name="gene_id" type="text" label="Gene ID"/>
+					</repeat>
+					<param name="clustering" type="select" label="Cluster by">
+						<option value="row">Row</option>
+						<option value="column">Column</option>
+						<option value="both" selected="true">Both</option>
+						<option value="none">None</option>
+					</param>
+					<param name="labcol" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/>
+					<param name="labrow" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/>
+					<param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/>
+					<param name="border" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw border around plot?"/>
+				</when>
+				<when value="cluster">
+					<param name="features" type="select" label="Expression levels to plot?">
+						<option value="gene" selected="true">Genes</option>
+						<option value="isoforms">Isoforms</option>
+						<option value="tss">TSS</option>
+						<option value="cds">CDS</option>
+					</param>
+					<repeat name="genes" title="Genes">
+						<param name="gene_id" type="text" label="Gene ID"/>
+					</repeat>
+                    <param name="k" type="text" label="Number of pre-defined clusters to attempt to find."/>
+                    <param name="iter_max" type="text" value="100" label="Max iterations"/>
+				</when>
+				<when value="expressionplot">
+					<param name="features" type="select" label="Expression levels to plot?">
+						<option value="gene" selected="true">Genes</option>
+						<option value="isoforms">Isoforms</option>
+						<option value="tss">TSS</option>
+						<option value="cds">CDS</option>
+					</param>
+					<param name="gene_id" type="text" label="Gene ID"/>
+					<param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/>
+                    <param name="draw_summary" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw a 'summary' line with mean FPKM
+          values for each condition?"/>
+                    <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/>
+				</when>
+				<when value="expressionbarplot">
+					<param name="features" type="select" label="Expression levels to plot?">
+						<option value="gene" selected="true">Genes</option>
+						<option value="isoforms">Isoforms</option>
+						<option value="tss">TSS</option>
+						<option value="cds">CDS</option>
+					</param>
+					<param name="gene_id" type="text" label="Gene ID"/>
+					<param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/>
+                    <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/>
+				</when>
+			</conditional>
+		</repeat>
+    </inputs>
+    
+    <outputs>
+        <data format="data" name="output_database" label="${tool.name} on ${on_string}: Database File (sqlite)">
+            <filter>backend_database_source['backend_database_selector'] == "cuffdiff_output"</filter>
+        </data>
+    	<data format="html" name="output_html" label="${tool.name} on ${on_string} (HTML)"/>
+    </outputs>
+
+    <requirements>
+        <requirement type="binary">R</requirement>
+    </requirements>
+
+<!-->
+  <tests>
+    <test>
+      <param name="" value=""/>
+      <output name="" file=""/>
+    </test>
+  </tests>
+-->
+    <configfiles>
+        <configfile name="script_file">
+
+## Feature Selection ##
+get_features &lt;- function(myGenes, f="gene") {
+	if (f == "isoforms")
+		return(isoforms(myGenes))
+	else if (f == "tss")
+		return(TSS(myGenes))
+	else if (f == "cds")
+		return(CDS(myGenes))
+	else
+		return(myGenes)
+}
+
+## Main Function ##
+
+## Load cummeRbund library
+library("cummeRbund")
+
+## Initialize cuff object
+cuff &lt;- readCufflinks(dir = "",
+#if $backend_database_source.backend_database_selector == "cuffdiff_output":
+                         dbFile = "${output_database}",
+                         geneFPKM = "${genes_fpkm_tracking}",
+                         geneDiff = "${genes_exp}",
+                         isoformFPKM = "${isoforms_fpkm_tracking}",
+                         isoformDiff = "${isoforms_exp}",
+                         TSSFPKM = "${tss_groups_fpkm_tracking}",
+                         TSSDiff = "${tss_groups_exp}",
+                         CDSFPKM = "${cds_fpkm_tracking}",
+                         CDSExpDiff = "${cds_exp_diff}",
+                         CDSDiff = "${cds_diff}",
+                         promoterFile = "${promoters_diff}",
+                         splicingFile = "${splicing_diff}",
+                         rebuild = T)
+#else:
+                         dbFile = "${backend_database_source.input_database}",
+                         rebuild = F)
+#end if
+
+#for $i, $p in enumerate($plots, start=1):
+	#set $filename = "plot%02d-%s.png" % ($i, $p.plot['type'])
+png(filename = "${filename}", width = ${p.width}, height = ${p.height})
+
+    ## Density plot ##
+	#if $p.plot['type'] == "density":
+csDensity(genes(cuff))
+devname = dev.off()
+
+    ## Boxplot ##
+	#elif $p.plot['type'] == "boxplot":
+csBoxplot(genes(cuff))
+devname = dev.off()
+
+    ## Scatter ##
+	#elif $p.plot['type'] == "scatter":
+        #if $p.plot.multiple_genes['multiple_genes_selector']:
+myGeneIds &lt;- c()
+            #for $g in $p.plot.multiple_genes['genes']:
+myGeneIds &lt;- c(myGeneIds, "$g['gene_id']")
+            #end for
+myGenes &lt;- getGenes(cuff, myGeneIds)
+csScatter(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth})
+        #else
+csScatter(genes(cuff), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth})
+        #end if
+devname = dev.off()
+
+    ## Volcano ##
+	#elif $p.plot['type'] == "volcano":
+        #if $p.plot.multiple_genes['multiple_genes_selector']:
+myGeneIds &lt;- c()
+            #for $g in $p.plot.multiple_genes['genes']:
+myGeneIds &lt;- c(myGeneIds, "$g['gene_id']")
+            #end for
+myGenes &lt;- getGenes(cuff, myGeneIds)
+csVolcano(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}")
+        #else
+csVolcano(genes(cuff), "${p.plot.x}", "${p.plot.y}")
+        #end if
+devname = dev.off()
+
+    ## Heatmap ##
+	#elif $p.plot['type'] == "heatmap":
+myGeneIds &lt;- c()
+		#for $g in $p.plot.genes:
+myGeneIds &lt;- c(myGeneIds, "$g['gene_id']")
+		#end for
+myGenes &lt;- getGenes(cuff, myGeneIds)
+csHeatmap(get_features(myGenes, "${p.plot.features}"), clustering="${p.plot.clustering}", labCol="${p.plot.labcol}", labRow="${p.plot.labrow}", border="${p.plot.border}")
+devname = dev.off()
+
+    ## Cluster ##
+	#elif $p.plot['type'] == "cluster":
+myGeneIds &lt;- c()
+    	#for $g in $p.plot.genes:
+myGeneIds &lt;- c(myGeneIds, "$g['gene_id']")
+		#end for
+myGenes &lt;- getGenes(cuff, myGeneIds)
+csCluster(get_features(myGenes, "${p.plot.features}"), k=${p.plot.k}, iter.max="${p.plot.iter_max}")
+devname = dev.off()
+
+    ## Expression Plot ##
+	#elif $p.plot['type'] == "expressionplot":
+myGeneId &lt;- "$p.plot.gene_id"
+myGenes &lt;- getGenes(cuff, myGeneId)
+expressionPlot(get_features(myGenes, "${p.plot.features}"), drawSummary=${p.plot.draw_summary}, iter.max="${p.plot.show_error_bars}")
+devname = dev.off()
+
+    ## Expression Bar Plot ##
+	#elif $p.plot['type'] == "expressionbarplot":
+myGeneId &lt;- "$p.plot.gene_id"
+myGenes &lt;- getGenes(cuff, myGeneId)
+expressionBarplot(get_features(myGenes, "${p.plot.features}"), iter.max="${p.plot.show_error_bars}")
+devname = dev.off()
+	#end if
+
+#end for
+        </configfile>
+    </configfiles>
+
+  <help>
+This tool allows for persistent storage, access, exploration, and manipulation of Cufflinks high-throughput sequencing data. In addition, provides numerous plotting functions for commonly used visualizations.
+  </help>
+
+</tool>