changeset 0:b23bb96ea991 draft

Uploaded
author kellrott
date Thu, 22 Nov 2012 01:29:57 -0500
parents
children 593c09e9c660
files ontologizer/._ontologizer.xml ontologizer/._ontologizer_wrap.py ontologizer/Ontologizer.jar ontologizer/ontologizer.xml ontologizer/ontologizer_wrap.py
diffstat 5 files changed, 158 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file ontologizer/._ontologizer.xml has changed
Binary file ontologizer/._ontologizer_wrap.py has changed
Binary file ontologizer/Ontologizer.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ontologizer/ontologizer.xml	Thu Nov 22 01:29:57 2012 -0500
@@ -0,0 +1,66 @@
+<tool id="ontologizer" name="Ontologizer" version="1.0.0">
+	<description>Ontologizer</description>
+	<command interpreter="python">ontologizer_wrap.py 
+#if str($mtc) != 'None': 
+--mtc $mtc 
+#end if
+--calculation $calculation
+#if str($filter) != ''
+--filter $filter
+#end if
+--go $go
+#if $ignore
+--ignore
+#end if
+--out $out
+--association $association
+--annotation $annotation
+--dot-out $dot
+--studyset $studyset
+--population $population
+	</command>
+	<inputs>
+		<param name="go" type="data" label="Gene Ontology File" help="Gene Ontology OBO file"/>
+		<param name="association" type="data" label="Gene Association File" help="File containing associations from genes to GO terms"/>
+		<param name="studyset" type="data" label="Study Set" help="File of the study set or a directory containing study set files"/>
+		<param name="population" type="data" label="Background Set file" help="File containing genes within the population"/>
+
+		<param name="mtc" type="select" label="Multiple Test Correction" help="Specifies the Multiple Test Correction method to use.">
+			<option value="None"/>
+			<option value="Benjamini-Hochberg"/>
+			<option value="Benjamini-Yekutieli"/>
+			<option value="Bonferroni"/>
+			<option value="Bonferroni-Holm"/>
+			<option value="Westfall-Young-Single-Step"/>
+		</param>
+		<param name="calculation" type="select" label="Set Method" help="Specifies the set calculation method to use.">
+			<option value="Parent-Child-Union"/>
+			<option value="Term-For-Term"/>  
+			<option value="MGSA"/>
+			<option value="Parent-Child-Intersection"/>
+			<option value="Topology-Elim"/>
+			<option value="Topology-Weighted"/>
+		</param>
+		<param name="filter" type="data" optional="true" label="Filter File" help="Filter the gene names by appling rules in a given file (currently only mapping supported)"/>
+		<param name="ignore" type="boolean" help="Ignore genes to which no association exist within the calculation"/>	
+		<param name="resamplingsteps" type="integer" value="1000" help="Specifies the number of steps used in resampling based MTCs"/>
+		<param name="dot_cutoff" type="float" value="0.1" help="For dot-file cutoff range between 0 and 0.5 specifies the maximum level on which a term is considered as significantly enriched (0.0 - 0.5)"/>
+	</inputs>
+	<outputs>
+		<data name="out" format="tabular" label="Study GO Stats" help="Result Data info"/>
+		<data name="annotation" format="tabular" label="Annotation" help="An additional file per study set which contains the annotations"/>
+		<data name="dot" format="dot" label="DOT file" help="For every studyset analysis write out an additional .dot file (GraphViz) containing the GOTerm graph with significant nodes. The optional argument in range between 0 and 0.5 specifies the maximum level on which a term is considered as significantly enriched. By appending a GO Term identifier (separated by a comma) the output is restriced to the subgraph originating at this GO Term."/>
+	</outputs>
+	<help>
+Ontologizer is a Java application called the Ontologizer that can be used to analyze 
+microarray and similar data for overrepresentation of GO terms. 
+You find find info about Ontologizer at http://compbio.charite.de/contao/index.php/ontologizer2.html
+
+The Gene Ontology File can be found at http://www.geneontology.org/GO.downloads.ontology.shtml
+
+The Gene Association Files can be found at http://www.geneontology.org/GO.downloads.annotations.shtml	
+
+Both the study set and population files are lists of genes that should map to the association file.
+	
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ontologizer/ontologizer_wrap.py	Thu Nov 22 01:29:57 2012 -0500
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import urllib
+import tempfile
+import shutil
+from glob import glob
+import subprocess
+
+from optparse import OptionParser
+
+basedir = os.path.dirname(os.path.realpath( __file__))
+
+jarURL="http://compbio.charite.de/contao/index.php/cmdlineOntologizer.html?file=tl_files/ontologizer/cmdline/Ontologizer.jar"
+
+goURL = "http://www.geneontology.org/ontology/obo_format_1_2/gene_ontology_ext.obo"
+humanGoaURL = "http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/gene-associations/gene_association.goa_human.gz?rev=HEAD"
+
+
+
+if __name__ == "__main__":
+    parser = OptionParser()
+    parser.add_option("-m", "--mtc", dest="mtc", help="Specifies the MTC method to use. Possible values are: 'Bonferroni' (default), 'None', 'Westfall-Young-Single-Step'", default=None)
+    parser.add_option("-c", "--calculation", dest="calculation", help="Specifies the calculation method to use. Possible values are: 'Parent-Child-Union', 'Parent-Child-Intersection', 'Term-For-Term' (default). For a full list, consult the output of the -h option.", default=None)
+    parser.add_option("-a", "--association", dest="association", help="File containing associations from genes to GO terms. Required", default=None)
+    parser.add_option("-d", "--dot", dest="dot", help="For every studyset analysis write out an additional .dot file (GraphViz) containing the GOTerm graph with significant nodes. The optional argument in range between 0 and 0.5 specifies the maximum level on which a term is considered as significantly enriched. By appending a GO Term identifier (separated by a comma) the output is restriced to the subgraph originating at this GO Term.", default="0.1")
+    parser.add_option("-t", "--dot-out", dest="dot_output", help="Name of the dot output file", default=None)    
+    parser.add_option("-f", "--filter", action="store_true", dest="filter", help="Filter the gene names by appling rules in a given file (currently only mapping supported).", default=False)
+    parser.add_option("-g", "--go", dest="go", help="Path to gene_ontology_edit.obo file (Required)", default=None)
+    parser.add_option("-i", "--ignore", action="store_true", dest="ignore", help="Ignore genes to which no association exist within the calculation.", default=False)
+    parser.add_option("-n", "--annotation", dest="annotation", help="Create an additional file per study set which contains the annotations.", default=None)
+    parser.add_option("-o", "--outfile", dest="out", help="Specfies the directory in which the results will be placed.", default=None)
+    parser.add_option("-p", "--population", dest="population", help="File containing genes within the population. Required", default=None)
+    parser.add_option("-r", "--resamplingsteps", dest="resamplingsteps", help="Specifies the number of steps used in resampling based MTCs", default=None)
+    parser.add_option("-s", "--studyset", dest="studyset", help="File of the study set or a directory containing study set files. Required", default=None)
+    parser.add_option("-b", "--basedir", dest="basedir", help="Where to store Ontologizer.jar. If not defined, assumed to be ./", default=None)
+    
+    options, args = parser.parse_args()
+    
+    
+    if options.basedir is not None:
+        basedir = options.basedir
+
+    jarPath = os.path.join( basedir, "Ontologizer.jar" )
+
+    if not os.path.exists(jarPath):
+        sys.stdout.write("Downloading Ontologizer.jar\n")
+        urllib.urlretrieve( jarURL, jarPath)
+    
+    cmdline = ["java", "-Xmx1024M", "-jar", jarPath]
+    
+    cmdline.extend( ["--go", options.go] )
+    cmdline.extend( ["--association", options.association] )
+    cmdline.extend( ["--studyset", options.studyset] )
+    cmdline.extend( ["--population", options.population] )
+    
+    tmpdir = tempfile.mkdtemp( prefix="ontologizer")
+    cmdline.extend( ["-o", tmpdir] )
+    
+    if options.annotation is not None:
+        cmdline.extend( ["-n"] )
+
+    if options.dot_output is not None and options.dot is not None:
+        cmdline.extend( ["--dot", options.dot] )
+    
+    if options.calculation is not None and options.calculation != 'None':
+        cmdline.extend(["--calculation", options.calculation])
+    
+    #sys.stdout.write("Running %s\n" % (" ".join(cmdline)))
+    sys.stdout.write("Running %s\n" % str(cmdline))
+    
+    proc = subprocess.Popen(cmdline, stderr=subprocess.PIPE)
+    stdout, stderr = proc.communicate()
+    
+    if proc.poll() != 0:
+        sys.stderr.write(stderr)
+
+
+    
+    tmp = glob(os.path.join(tmpdir, "table-*.txt"))
+    shutil.move(tmp[0], options.out)
+    
+    if options.annotation is not None:
+        tmp=glob(os.path.join(tmpdir, "anno-*.txt"))
+        shutil.move(tmp[0], options.annotation)
+
+    if options.dot_output is not None:
+        tmp=glob(os.path.join(tmpdir, "view-*.dot"))
+        shutil.move(tmp[0], options.dot_output)
+    
+    shutil.rmtree(tmpdir)