Mercurial > repos > kellrott > ontologizer
changeset 0:b23bb96ea991 draft
Uploaded
| author | kellrott |
|---|---|
| date | Thu, 22 Nov 2012 01:29:57 -0500 |
| parents | |
| children | 593c09e9c660 |
| files | ontologizer/._ontologizer.xml ontologizer/._ontologizer_wrap.py ontologizer/Ontologizer.jar ontologizer/ontologizer.xml ontologizer/ontologizer_wrap.py |
| diffstat | 5 files changed, 158 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ontologizer/ontologizer.xml Thu Nov 22 01:29:57 2012 -0500 @@ -0,0 +1,66 @@ +<tool id="ontologizer" name="Ontologizer" version="1.0.0"> + <description>Ontologizer</description> + <command interpreter="python">ontologizer_wrap.py +#if str($mtc) != 'None': +--mtc $mtc +#end if +--calculation $calculation +#if str($filter) != '' +--filter $filter +#end if +--go $go +#if $ignore +--ignore +#end if +--out $out +--association $association +--annotation $annotation +--dot-out $dot +--studyset $studyset +--population $population + </command> + <inputs> + <param name="go" type="data" label="Gene Ontology File" help="Gene Ontology OBO file"/> + <param name="association" type="data" label="Gene Association File" help="File containing associations from genes to GO terms"/> + <param name="studyset" type="data" label="Study Set" help="File of the study set or a directory containing study set files"/> + <param name="population" type="data" label="Background Set file" help="File containing genes within the population"/> + + <param name="mtc" type="select" label="Multiple Test Correction" help="Specifies the Multiple Test Correction method to use."> + <option value="None"/> + <option value="Benjamini-Hochberg"/> + <option value="Benjamini-Yekutieli"/> + <option value="Bonferroni"/> + <option value="Bonferroni-Holm"/> + <option value="Westfall-Young-Single-Step"/> + </param> + <param name="calculation" type="select" label="Set Method" help="Specifies the set calculation method to use."> + <option value="Parent-Child-Union"/> + <option value="Term-For-Term"/> + <option value="MGSA"/> + <option value="Parent-Child-Intersection"/> + <option value="Topology-Elim"/> + <option value="Topology-Weighted"/> + </param> + <param name="filter" type="data" optional="true" label="Filter File" help="Filter the gene names by appling rules in a given file (currently only mapping supported)"/> + <param name="ignore" type="boolean" help="Ignore genes to which no association exist within the calculation"/> + <param name="resamplingsteps" type="integer" value="1000" help="Specifies the number of steps used in resampling based MTCs"/> + <param name="dot_cutoff" type="float" value="0.1" help="For dot-file cutoff range between 0 and 0.5 specifies the maximum level on which a term is considered as significantly enriched (0.0 - 0.5)"/> + </inputs> + <outputs> + <data name="out" format="tabular" label="Study GO Stats" help="Result Data info"/> + <data name="annotation" format="tabular" label="Annotation" help="An additional file per study set which contains the annotations"/> + <data name="dot" format="dot" label="DOT file" help="For every studyset analysis write out an additional .dot file (GraphViz) containing the GOTerm graph with significant nodes. The optional argument in range between 0 and 0.5 specifies the maximum level on which a term is considered as significantly enriched. By appending a GO Term identifier (separated by a comma) the output is restriced to the subgraph originating at this GO Term."/> + </outputs> + <help> +Ontologizer is a Java application called the Ontologizer that can be used to analyze +microarray and similar data for overrepresentation of GO terms. +You find find info about Ontologizer at http://compbio.charite.de/contao/index.php/ontologizer2.html + +The Gene Ontology File can be found at http://www.geneontology.org/GO.downloads.ontology.shtml + +The Gene Association Files can be found at http://www.geneontology.org/GO.downloads.annotations.shtml + +Both the study set and population files are lists of genes that should map to the association file. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ontologizer/ontologizer_wrap.py Thu Nov 22 01:29:57 2012 -0500 @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +import sys +import os +import urllib +import tempfile +import shutil +from glob import glob +import subprocess + +from optparse import OptionParser + +basedir = os.path.dirname(os.path.realpath( __file__)) + +jarURL="http://compbio.charite.de/contao/index.php/cmdlineOntologizer.html?file=tl_files/ontologizer/cmdline/Ontologizer.jar" + +goURL = "http://www.geneontology.org/ontology/obo_format_1_2/gene_ontology_ext.obo" +humanGoaURL = "http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/gene-associations/gene_association.goa_human.gz?rev=HEAD" + + + +if __name__ == "__main__": + parser = OptionParser() + parser.add_option("-m", "--mtc", dest="mtc", help="Specifies the MTC method to use. Possible values are: 'Bonferroni' (default), 'None', 'Westfall-Young-Single-Step'", default=None) + parser.add_option("-c", "--calculation", dest="calculation", help="Specifies the calculation method to use. Possible values are: 'Parent-Child-Union', 'Parent-Child-Intersection', 'Term-For-Term' (default). For a full list, consult the output of the -h option.", default=None) + parser.add_option("-a", "--association", dest="association", help="File containing associations from genes to GO terms. Required", default=None) + parser.add_option("-d", "--dot", dest="dot", help="For every studyset analysis write out an additional .dot file (GraphViz) containing the GOTerm graph with significant nodes. The optional argument in range between 0 and 0.5 specifies the maximum level on which a term is considered as significantly enriched. By appending a GO Term identifier (separated by a comma) the output is restriced to the subgraph originating at this GO Term.", default="0.1") + parser.add_option("-t", "--dot-out", dest="dot_output", help="Name of the dot output file", default=None) + parser.add_option("-f", "--filter", action="store_true", dest="filter", help="Filter the gene names by appling rules in a given file (currently only mapping supported).", default=False) + parser.add_option("-g", "--go", dest="go", help="Path to gene_ontology_edit.obo file (Required)", default=None) + parser.add_option("-i", "--ignore", action="store_true", dest="ignore", help="Ignore genes to which no association exist within the calculation.", default=False) + parser.add_option("-n", "--annotation", dest="annotation", help="Create an additional file per study set which contains the annotations.", default=None) + parser.add_option("-o", "--outfile", dest="out", help="Specfies the directory in which the results will be placed.", default=None) + parser.add_option("-p", "--population", dest="population", help="File containing genes within the population. Required", default=None) + parser.add_option("-r", "--resamplingsteps", dest="resamplingsteps", help="Specifies the number of steps used in resampling based MTCs", default=None) + parser.add_option("-s", "--studyset", dest="studyset", help="File of the study set or a directory containing study set files. Required", default=None) + parser.add_option("-b", "--basedir", dest="basedir", help="Where to store Ontologizer.jar. If not defined, assumed to be ./", default=None) + + options, args = parser.parse_args() + + + if options.basedir is not None: + basedir = options.basedir + + jarPath = os.path.join( basedir, "Ontologizer.jar" ) + + if not os.path.exists(jarPath): + sys.stdout.write("Downloading Ontologizer.jar\n") + urllib.urlretrieve( jarURL, jarPath) + + cmdline = ["java", "-Xmx1024M", "-jar", jarPath] + + cmdline.extend( ["--go", options.go] ) + cmdline.extend( ["--association", options.association] ) + cmdline.extend( ["--studyset", options.studyset] ) + cmdline.extend( ["--population", options.population] ) + + tmpdir = tempfile.mkdtemp( prefix="ontologizer") + cmdline.extend( ["-o", tmpdir] ) + + if options.annotation is not None: + cmdline.extend( ["-n"] ) + + if options.dot_output is not None and options.dot is not None: + cmdline.extend( ["--dot", options.dot] ) + + if options.calculation is not None and options.calculation != 'None': + cmdline.extend(["--calculation", options.calculation]) + + #sys.stdout.write("Running %s\n" % (" ".join(cmdline))) + sys.stdout.write("Running %s\n" % str(cmdline)) + + proc = subprocess.Popen(cmdline, stderr=subprocess.PIPE) + stdout, stderr = proc.communicate() + + if proc.poll() != 0: + sys.stderr.write(stderr) + + + + tmp = glob(os.path.join(tmpdir, "table-*.txt")) + shutil.move(tmp[0], options.out) + + if options.annotation is not None: + tmp=glob(os.path.join(tmpdir, "anno-*.txt")) + shutil.move(tmp[0], options.annotation) + + if options.dot_output is not None: + tmp=glob(os.path.join(tmpdir, "view-*.dot")) + shutil.move(tmp[0], options.dot_output) + + shutil.rmtree(tmpdir)
