# HG changeset patch # User adam-novak # Date 1371156433 14400 # Node ID ec5ade08ac8a9b5d67ed7a7c0d12e78af77d2e82 # Parent 95ff566506f4d01d85bd45a5c5da4bf257011df9 Hopefully fixed dependency XML. diff -r 95ff566506f4 -r ec5ade08ac8a hexagram/hexagram.py --- a/hexagram/hexagram.py Tue Jun 11 18:26:25 2013 -0400 +++ b/hexagram/hexagram.py Thu Jun 13 16:47:13 2013 -0400 @@ -59,9 +59,14 @@ help="Galaxy-escaped name of the query signature") parser.add_argument("--window_size", type=int, default=20, help="size of the window to use when looking for clusters") + parser.add_argument("--truncation_edges", type=int, default=10, + help="number of edges for DrL truncate to pass per node") parser.add_argument("--no-stats", dest="stats", action="store_false", default=True, help="disable cluster-finding statistics") + parser.add_argument("--include-singletons", dest="singletons", + action="store_true", default=False, + help="add self-edges to retain unconnected points") return parser.parse_args(args) @@ -677,8 +682,23 @@ print "Regularizing similarity matrix..." sys.stdout.flush() + # This holds a list of all unique signature names in the similarity matrix. + # We can use it to add edges to keep singletons. + signatures = set() + for parts in sim_reader: + # Keep the signature names used + signatures.add(parts[0]) + signatures.add(parts[1]) + + # Save the line to the regularized file sim_writer.list_line(parts) + + if options.singletons: + # Now add a self-edge on every node, so we don't drop nodes with no + # other strictly positive edges + for signature in signatures: + sim_writer.line(signature, signature, 1) sim_reader.close() sim_writer.close() @@ -689,7 +709,8 @@ # TODO: pass a truncation level print "DrL: Truncating..." sys.stdout.flush() - subprocess.check_call(["truncate", drl_basename]) + subprocess.check_call(["truncate", "-t", str(options.truncation_edges), + drl_basename]) # Run the DrL layout engine. print "DrL: Doing layout..." @@ -708,7 +729,9 @@ # This holds a reader for the DrL output coord_reader = tsv.TsvReader(open(drl_basename + ".coord", "r")) - # This holds a dict from signature name string to (x, y) float tuple + # This holds a dict from signature name string to (x, y) float tuple. It is + # also our official collection of node names that made it through DrL, and + # therefore need their score data sent to the client. nodes = {} print "Reading DrL output..." @@ -840,6 +863,13 @@ # This is the signature that this line is about signature_name = parts[0] + if signature_name not in nodes: + # This signature wasn't in our DrL output. Don't bother + # putting its layer data in our visualization. This saves + # space and makes the client-side layer counts accurate for + # the data actually displayable. + continue + # These are the scores for all the layers for this signature layer_scores = parts[1:] diff -r 95ff566506f4 -r ec5ade08ac8a hexagram/hexagram.xml --- a/hexagram/hexagram.xml Tue Jun 11 18:26:25 2013 -0400 +++ b/hexagram/hexagram.xml Thu Jun 13 16:47:13 2013 -0400 @@ -1,7 +1,17 @@ + Interactive hex grid clustering visualization + drl-graph-layout + + numpy + scipy + + + + + hg clone https://bitbucket.org/adam_novak/drl-graph-layout + + hg up -r drl-graph-layout-1.1 + make + + bin + $INSTALL_DIR/bin + + + $INSTALL_DIR/bin + + + + + + This installs the latest DrL Graph Layout tool from Adam Novak's Bitbucket, because Shawn Martin has stopped maintaining it. + + + diff -r 95ff566506f4 -r ec5ade08ac8a hexagram/tool_dependency.xml --- a/hexagram/tool_dependency.xml Tue Jun 11 18:26:25 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ - - - - - - - hg clone https://bitbucket.org/adam_novak/drl-graph-layout - - hg up -r drl-graph-layout-1.1 - make - - bin - $INSTALL_DIR/bin - - - $INSTALL_DIR/bin - - - - - - This installs the latest DrL Graph Layout tool from Adam Novak's Bitbucket, because Shawn Martin has stopped maintaining it. - - - diff -r 95ff566506f4 -r ec5ade08ac8a hexagram/tsv.pyc Binary file hexagram/tsv.pyc has changed