# HG changeset patch # User kellrott # Date 1353565797 18000 # Node ID b23bb96ea9919cbd6ecc28e6263576ef4cde2c6a Uploaded diff -r 000000000000 -r b23bb96ea991 ontologizer/._ontologizer.xml Binary file ontologizer/._ontologizer.xml has changed diff -r 000000000000 -r b23bb96ea991 ontologizer/._ontologizer_wrap.py Binary file ontologizer/._ontologizer_wrap.py has changed diff -r 000000000000 -r b23bb96ea991 ontologizer/Ontologizer.jar Binary file ontologizer/Ontologizer.jar has changed diff -r 000000000000 -r b23bb96ea991 ontologizer/ontologizer.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ontologizer/ontologizer.xml Thu Nov 22 01:29:57 2012 -0500 @@ -0,0 +1,66 @@ + + Ontologizer + ontologizer_wrap.py +#if str($mtc) != 'None': +--mtc $mtc +#end if +--calculation $calculation +#if str($filter) != '' +--filter $filter +#end if +--go $go +#if $ignore +--ignore +#end if +--out $out +--association $association +--annotation $annotation +--dot-out $dot +--studyset $studyset +--population $population + + + + + + + + + + + + + + + +Ontologizer is a Java application called the Ontologizer that can be used to analyze +microarray and similar data for overrepresentation of GO terms. +You find find info about Ontologizer at http://compbio.charite.de/contao/index.php/ontologizer2.html + +The Gene Ontology File can be found at http://www.geneontology.org/GO.downloads.ontology.shtml + +The Gene Association Files can be found at http://www.geneontology.org/GO.downloads.annotations.shtml + +Both the study set and population files are lists of genes that should map to the association file. + + + diff -r 000000000000 -r b23bb96ea991 ontologizer/ontologizer_wrap.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ontologizer/ontologizer_wrap.py Thu Nov 22 01:29:57 2012 -0500 @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +import sys +import os +import urllib +import tempfile +import shutil +from glob import glob +import subprocess + +from optparse import OptionParser + +basedir = os.path.dirname(os.path.realpath( __file__)) + +jarURL="http://compbio.charite.de/contao/index.php/cmdlineOntologizer.html?file=tl_files/ontologizer/cmdline/Ontologizer.jar" + +goURL = "http://www.geneontology.org/ontology/obo_format_1_2/gene_ontology_ext.obo" +humanGoaURL = "http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/gene-associations/gene_association.goa_human.gz?rev=HEAD" + + + +if __name__ == "__main__": + parser = OptionParser() + parser.add_option("-m", "--mtc", dest="mtc", help="Specifies the MTC method to use. Possible values are: 'Bonferroni' (default), 'None', 'Westfall-Young-Single-Step'", default=None) + parser.add_option("-c", "--calculation", dest="calculation", help="Specifies the calculation method to use. Possible values are: 'Parent-Child-Union', 'Parent-Child-Intersection', 'Term-For-Term' (default). For a full list, consult the output of the -h option.", default=None) + parser.add_option("-a", "--association", dest="association", help="File containing associations from genes to GO terms. Required", default=None) + parser.add_option("-d", "--dot", dest="dot", help="For every studyset analysis write out an additional .dot file (GraphViz) containing the GOTerm graph with significant nodes. The optional argument in range between 0 and 0.5 specifies the maximum level on which a term is considered as significantly enriched. By appending a GO Term identifier (separated by a comma) the output is restriced to the subgraph originating at this GO Term.", default="0.1") + parser.add_option("-t", "--dot-out", dest="dot_output", help="Name of the dot output file", default=None) + parser.add_option("-f", "--filter", action="store_true", dest="filter", help="Filter the gene names by appling rules in a given file (currently only mapping supported).", default=False) + parser.add_option("-g", "--go", dest="go", help="Path to gene_ontology_edit.obo file (Required)", default=None) + parser.add_option("-i", "--ignore", action="store_true", dest="ignore", help="Ignore genes to which no association exist within the calculation.", default=False) + parser.add_option("-n", "--annotation", dest="annotation", help="Create an additional file per study set which contains the annotations.", default=None) + parser.add_option("-o", "--outfile", dest="out", help="Specfies the directory in which the results will be placed.", default=None) + parser.add_option("-p", "--population", dest="population", help="File containing genes within the population. Required", default=None) + parser.add_option("-r", "--resamplingsteps", dest="resamplingsteps", help="Specifies the number of steps used in resampling based MTCs", default=None) + parser.add_option("-s", "--studyset", dest="studyset", help="File of the study set or a directory containing study set files. Required", default=None) + parser.add_option("-b", "--basedir", dest="basedir", help="Where to store Ontologizer.jar. If not defined, assumed to be ./", default=None) + + options, args = parser.parse_args() + + + if options.basedir is not None: + basedir = options.basedir + + jarPath = os.path.join( basedir, "Ontologizer.jar" ) + + if not os.path.exists(jarPath): + sys.stdout.write("Downloading Ontologizer.jar\n") + urllib.urlretrieve( jarURL, jarPath) + + cmdline = ["java", "-Xmx1024M", "-jar", jarPath] + + cmdline.extend( ["--go", options.go] ) + cmdline.extend( ["--association", options.association] ) + cmdline.extend( ["--studyset", options.studyset] ) + cmdline.extend( ["--population", options.population] ) + + tmpdir = tempfile.mkdtemp( prefix="ontologizer") + cmdline.extend( ["-o", tmpdir] ) + + if options.annotation is not None: + cmdline.extend( ["-n"] ) + + if options.dot_output is not None and options.dot is not None: + cmdline.extend( ["--dot", options.dot] ) + + if options.calculation is not None and options.calculation != 'None': + cmdline.extend(["--calculation", options.calculation]) + + #sys.stdout.write("Running %s\n" % (" ".join(cmdline))) + sys.stdout.write("Running %s\n" % str(cmdline)) + + proc = subprocess.Popen(cmdline, stderr=subprocess.PIPE) + stdout, stderr = proc.communicate() + + if proc.poll() != 0: + sys.stderr.write(stderr) + + + + tmp = glob(os.path.join(tmpdir, "table-*.txt")) + shutil.move(tmp[0], options.out) + + if options.annotation is not None: + tmp=glob(os.path.join(tmpdir, "anno-*.txt")) + shutil.move(tmp[0], options.annotation) + + if options.dot_output is not None: + tmp=glob(os.path.join(tmpdir, "view-*.dot")) + shutil.move(tmp[0], options.dot_output) + + shutil.rmtree(tmpdir)