# HG changeset patch # User kellrott # Date 1356125243 18000 # Node ID 593c09e9c6603a225da1c63f89ad8bac4ad00cd2 # Parent b23bb96ea9919cbd6ecc28e6263576ef4cde2c6a Uploaded diff -r b23bb96ea991 -r 593c09e9c660 ontologizer/._ontologizer.xml Binary file ontologizer/._ontologizer.xml has changed diff -r b23bb96ea991 -r 593c09e9c660 ontologizer/._ontologizer_wrap.py Binary file ontologizer/._ontologizer_wrap.py has changed diff -r b23bb96ea991 -r 593c09e9c660 ontologizer/ontologizer.xml --- a/ontologizer/ontologizer.xml Thu Nov 22 01:29:57 2012 -0500 +++ b/ontologizer/ontologizer.xml Fri Dec 21 16:27:23 2012 -0500 @@ -1,6 +1,9 @@ Ontologizer ontologizer_wrap.py +--association $association +--go $go +-w ./ #if str($mtc) != 'None': --mtc $mtc #end if @@ -8,23 +11,26 @@ #if str($filter) != '' --filter $filter #end if ---go $go #if $ignore --ignore #end if +#if $infiles.mode == "list": +--studyset ${infiles.studyset} +--population ${infiles.population} --out $out ---association $association --annotation $annotation --dot-out $dot ---studyset $studyset ---population $population +#end if +#if $infiles.mode == "matrix": +--matrix ${infiles.input_matrix} +--matrix-eval "${infiles.eval_text}" +--matrix-type ${infiles.matrix_type} +--out $outmatrix +#end if - - - - - - + + infiles['mode'] == "list" + + + infiles['mode'] == "list" + + + infiles['mode'] == "list" + + + + infiles['mode'] == "matrix" + Ontologizer is a Java application called the Ontologizer that can be used to analyze diff -r b23bb96ea991 -r 593c09e9c660 ontologizer/ontologizer_wrap.py --- a/ontologizer/ontologizer_wrap.py Thu Nov 22 01:29:57 2012 -0500 +++ b/ontologizer/ontologizer_wrap.py Fri Dec 21 16:27:23 2012 -0500 @@ -2,12 +2,14 @@ import sys import os +import re import urllib import tempfile +import csv import shutil from glob import glob import subprocess - +import array from optparse import OptionParser basedir = os.path.dirname(os.path.realpath( __file__)) @@ -18,7 +20,6 @@ humanGoaURL = "http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/gene-associations/gene_association.goa_human.gz?rev=HEAD" - if __name__ == "__main__": parser = OptionParser() parser.add_option("-m", "--mtc", dest="mtc", help="Specifies the MTC method to use. Possible values are: 'Bonferroni' (default), 'None', 'Westfall-Young-Single-Step'", default=None) @@ -35,6 +36,10 @@ parser.add_option("-r", "--resamplingsteps", dest="resamplingsteps", help="Specifies the number of steps used in resampling based MTCs", default=None) parser.add_option("-s", "--studyset", dest="studyset", help="File of the study set or a directory containing study set files. Required", default=None) parser.add_option("-b", "--basedir", dest="basedir", help="Where to store Ontologizer.jar. If not defined, assumed to be ./", default=None) + parser.add_option("-e", "--matrix", dest="matrix", help="Input Matrix (alternate to providing gene sets", default=None) + parser.add_option("-k", "--matrix-eval", dest="matrix_eval", help="Method to determine positive genes in an input matrix", default="value > 0.0") + parser.add_option("-w", "--workdir", dest="workdir", help="Working directory", default=None) + parser.add_option("--matrix-type", dest="matrix_type", help="Matrix mode output type(p, p.adjusted, p.min)", default="p") options, args = parser.parse_args() @@ -47,46 +52,143 @@ if not os.path.exists(jarPath): sys.stdout.write("Downloading Ontologizer.jar\n") urllib.urlretrieve( jarURL, jarPath) - - cmdline = ["java", "-Xmx1024M", "-jar", jarPath] - - cmdline.extend( ["--go", options.go] ) - cmdline.extend( ["--association", options.association] ) - cmdline.extend( ["--studyset", options.studyset] ) - cmdline.extend( ["--population", options.population] ) - - tmpdir = tempfile.mkdtemp( prefix="ontologizer") - cmdline.extend( ["-o", tmpdir] ) - - if options.annotation is not None: - cmdline.extend( ["-n"] ) - if options.dot_output is not None and options.dot is not None: - cmdline.extend( ["--dot", options.dot] ) - - if options.calculation is not None and options.calculation != 'None': - cmdline.extend(["--calculation", options.calculation]) + tmpdir = tempfile.mkdtemp( prefix="ontologizer", dir=options.workdir) - #sys.stdout.write("Running %s\n" % (" ".join(cmdline))) - sys.stdout.write("Running %s\n" % str(cmdline)) - - proc = subprocess.Popen(cmdline, stderr=subprocess.PIPE) - stdout, stderr = proc.communicate() - - if proc.poll() != 0: - sys.stderr.write(stderr) + if options.studyset is not None and options.population is not None: + cmdline = ["java", "-Xmx1024M", "-jar", jarPath] + + cmdline.extend( ["--go", options.go] ) + cmdline.extend( ["--association", options.association] ) + cmdline.extend( ["--studyset", options.studyset] ) + cmdline.extend( ["--population", options.population] ) + + cmdline.extend( ["-o", tmpdir] ) + + if options.annotation is not None: + cmdline.extend( ["-n"] ) + if options.dot_output is not None and options.dot is not None: + cmdline.extend( ["--dot", options.dot] ) + + if options.calculation is not None and options.calculation != 'None': + cmdline.extend(["--calculation", options.calculation]) + + #sys.stdout.write("Running %s\n" % (" ".join(cmdline))) + sys.stdout.write("Running %s\n" % str(cmdline)) + + proc = subprocess.Popen(cmdline, stderr=subprocess.PIPE) + stdout, stderr = proc.communicate() + + if proc.poll() != 0: + sys.stderr.write(stderr) + + tmp = glob(os.path.join(tmpdir, "table-*.txt")) + shutil.move(tmp[0], options.out) + + if options.annotation is not None: + tmp=glob(os.path.join(tmpdir, "anno-*.txt")) + shutil.move(tmp[0], options.annotation) + + if options.dot_output is not None: + tmp=glob(os.path.join(tmpdir, "view-*.dot")) + shutil.move(tmp[0], options.dot_output) + elif options.matrix is not None: + col_set = None + col_rev = None + row_set = None + handle = open(options.matrix) + reader = csv.reader(handle, delimiter="\t") + for row in reader: + if col_set is None: + col_set = {} + col_rev = {} + for i, a in enumerate(row): + if i != 0: + col_set[a] = i + col_rev[i] = a + row_set = [] + else: + row_set.append(row[0]) + handle.close() + + population_file = os.path.join(tmpdir, "pop_set") + study_dir = os.path.join(tmpdir, "study_set") + + handle = open(population_file, "w") + for r in row_set: + handle.write("%s\n" % (r)) + handle.close() - - tmp = glob(os.path.join(tmpdir, "table-*.txt")) - shutil.move(tmp[0], options.out) - - if options.annotation is not None: - tmp=glob(os.path.join(tmpdir, "anno-*.txt")) - shutil.move(tmp[0], options.annotation) + cmdline = ["java", "-Xmx1024M", "-jar", jarPath] + + cmdline.extend( ["--go", options.go] ) + cmdline.extend( ["--association", options.association] ) + cmdline.extend( ["--population", population_file] ) + + cmdline.extend( ["--studyset", study_dir] ) + + cmdline.extend( ["-o", tmpdir] ) + + os.mkdir(study_dir) + for col in col_set: + ihandle = open(options.matrix) + ohandle = open( os.path.join(study_dir, str(col_set[col])), "w") + head = True + reader = csv.reader(ihandle, delimiter="\t") + for row in reader: + if head: + head = False + else: + value = row[col_set[col]] + try: + value = float(value) + except ValueError: + pass + + if eval(options.matrix_eval, {"__builtins__":None}, { 'float' : float, 'int' : int, 'value' : value } ): + ohandle.write("%s\n" % (row[0])) + ihandle.close() + ohandle.close() - if options.dot_output is not None: - tmp=glob(os.path.join(tmpdir, "view-*.dot")) - shutil.move(tmp[0], options.dot_output) - + print cmdline + proc = subprocess.Popen(cmdline, stderr=subprocess.PIPE) + stdout, stderr = proc.communicate() + + if proc.poll() != 0: + sys.stderr.write(stderr) + + go_values = {} + + col_select_map = { + "p" : 9, + "p.adjusted" : 10, + "p.min" : 11 + } + + column_select = col_select_map[ options.matrix_type] + for a in glob(os.path.join(tmpdir,"table-*.txt")): + res = re.search("table-(\d+)-", a) + if res: + cur_col = int(res.group(1)) + head = True + ihandle = open(a) + reader = csv.reader(ihandle, delimiter="\t") + for row in reader: + if head: + head = False + else: + if row[0] not in go_values: + go_values[row[0]] = array.array("f", [float('nan')] * (len(col_set) )) + go_values[row[0]][cur_col-1] = float(row[column_select]) + ihandle.close() + ohandle = open(options.out, "w") + row = ["#go"] + [""] * (len(col_set)) + for c in col_set: + row[col_set[c]] = c + ohandle.write( "%s\n" % ("\t".join(row))) + for go in go_values: + ohandle.write("%s\t%s\n" % (go, "\t".join((str(f) for f in go_values[go]))) ) + ohandle.close() + shutil.rmtree(tmpdir)