annotate convert_graph.py @ 3:b7cdfd11614a draft default tip

Uploaded
author bernhardlutz
date Tue, 04 Mar 2014 16:50:32 -0500
parents 89b606a49225
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
1 #!/usr/bin/env python
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
2 # Aufruf convert_graph.py --infile datei --informat typ --outfile ausgabedatei --outformat ausgabetyp
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
3
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
4 import sys, os
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
5 import networkx as nx
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
6 import argparse
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
7 import json
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
8
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
9 from xgmml_networkx import XGMMLParserHelper, XGMMLWriter
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
10 from networkx.readwrite import json_graph
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
11
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
12 #supported graph_types
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
13 graph_types = ["gml", "yaml", "gspan", "xgmml", "gexf", "graphml", "json", "pajek"]
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
14
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
15 func_dic_read= {'gml': nx.read_gml, 'yaml':nx.read_yaml, 'gexf': nx.read_gexf,
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
16 'graphml': nx.read_graphml, 'pajek': nx.read_pajek}
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
17
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
18 func_dic_write= {'gml': nx.write_gml, 'yaml':nx.write_yaml, 'gexf': nx.write_gexf,
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
19 'graphml': nx.write_graphml, 'pajek': nx.write_pajek}
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
20
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
21 #completely supported types by networkx
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
22 completely_supported_types = ["gml", "gexf", "yaml", "graphml", "pajek"]
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
23
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
24 def read_gspan(infile):
3
b7cdfd11614a Uploaded
bernhardlutz
parents: 2
diff changeset
25 G = nx.DiGraph()
2
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
26 idoffset=0
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
27 old_id_start=0
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
28 for line in infile:
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
29 line_split=line.split(" ")
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
30 length_split=len(line_split)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
31 if line[0] == "v":
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
32 G.add_node(idoffset, label=line_split[2].strip())
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
33 idoffset+=1
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
34 elif line[0] == "e":
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
35 if length_split < 3:
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
36 raise InvalidGraph(line)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
37 elif length_split > 3:
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
38 G.add_edge(old_id_start+int(line_split[1]), old_id_start+int(line_split[2]), label=line_split[3].strip())
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
39 else:
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
40 G.add_edge(old_id_start+int(line_split[1]), old_id_start+int(line_split[2]), label="")
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
41 elif line[0] == "t":
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
42 # its a new subgraph
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
43 #idoffset*=1
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
44 old_id_start=idoffset
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
45 #print(nx.is_connected(G))
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
46 return G
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
47
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
48
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
49 def write_gspan(graph, outfile):
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
50 # get all subgraphs only works with undirected
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
51 subgraphs=nx.connected_components(graph.to_undirected())
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
52 id_count=1
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
53 node_count=0
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
54 #get labels
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
55 label_dic=nx.get_node_attributes(graph,'label')
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
56 for s in subgraphs:
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
57 node_count_tree=0
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
58 node_dict={}
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
59 outfile.write("t # id "+str(id_count)+"\n")
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
60 # for every node in subgraph
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
61 for v in sorted(s):
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
62 # node id restart from 0 for every sub graph
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
63 node_dict[v]=node_count_tree
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
64 outfile.write("v "+str(node_count_tree)+" "+label_dic[v]+" \n")
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
65 node_count_tree+=1
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
66 node_count+=1
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
67
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
68 # all edges adjacent to a node of s
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
69 edges=nx.edges(graph, s)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
70 for e in sorted(edges):
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
71 #print(graph[e[0]][e[1]])
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
72 try:
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
73 outfile.write("e "+str(node_dict[e[0]])+" "+str(node_dict[e[1]])+" "+graph[e[0]][e[1]]['label']+"\n")
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
74 except KeyError:
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
75 outfile.write("e "+str(node_dict[e[0]])+" "+str(node_dict[e[1]]))
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
76
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
77 id_count+=1
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
78 def read_json(file):
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
79 json_string=file.read()
3
b7cdfd11614a Uploaded
bernhardlutz
parents: 2
diff changeset
80 #print(json_string)
b7cdfd11614a Uploaded
bernhardlutz
parents: 2
diff changeset
81 json_dict=json_graph.loads(json_string)
b7cdfd11614a Uploaded
bernhardlutz
parents: 2
diff changeset
82 #print(json_dict)
b7cdfd11614a Uploaded
bernhardlutz
parents: 2
diff changeset
83 #return json_graph.node_link_graph(json_dict, True, False)
b7cdfd11614a Uploaded
bernhardlutz
parents: 2
diff changeset
84 return json_dict
b7cdfd11614a Uploaded
bernhardlutz
parents: 2
diff changeset
85
2
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
86 def write_json(graph, outfile):
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
87 json_dict=json_graph.node_link_data(graph)
3
b7cdfd11614a Uploaded
bernhardlutz
parents: 2
diff changeset
88 json_string=json_graph.dumps(json_dict)
2
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
89 outfile.write(json_string)
3
b7cdfd11614a Uploaded
bernhardlutz
parents: 2
diff changeset
90 #print("did it")
2
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
91
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
92 def main( args ):
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
93
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
94 if args.informat not in graph_types:
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
95 print "EXCEPTION COMPUTER EXPLODING"
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
96 # everything networkx can do by itself ;)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
97 elif args.informat in completely_supported_types:
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
98 function = func_dic_read[args.informat]
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
99 graph = function(args.infile)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
100 elif args.informat == "gspan":
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
101 graph = read_gspan(args.infile)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
102 elif args.informat == "json":
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
103 graph = read_json(args.infile)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
104 elif args.informat == "xgmml":
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
105 xgmml=XGMMLParserHelper()
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
106 xgmml.parseFile(args.infile)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
107 graph=xgmml.graph()
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
108
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
109
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
110
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
111 if args.outformat in completely_supported_types:
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
112 function = func_dic_write[args.outformat]
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
113 function(graph, args.outfile)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
114 elif args.outformat == "gspan":
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
115 write_gspan(graph, args.outfile)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
116 elif args.outformat == "json":
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
117 write_json(graph, args.outfile)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
118 elif args.outformat == "xgmml":
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
119 #xgmml=XGMMLParserHelper(graph)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
120 #xgmml.parseFile(open(sys.argv[1]))
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
121 a=XGMMLWriter(args.outfile, graph, "MyGraph")
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
122
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
123 if __name__ == "__main__":
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
124
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
125 parser = argparse.ArgumentParser()
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
126 parser.add_argument('--infile', type=argparse.FileType('r'),
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
127 help="Specify the input file representing a graph")
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
128 parser.add_argument('--outfile', type=argparse.FileType('w'),
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
129 help="Specify one output file")
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
130 parser.add_argument('--informat', type=str,
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
131 help="Specify the format of the input graph", choices = graph_types)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
132 parser.add_argument('--outformat', type=str,
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
133 help="Specify the format of the output graph", choices = graph_types)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
134 if len(sys.argv) < 8:
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
135 print "Too few arguments..."
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
136 parser.print_help()
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
137 exit(1)
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
138 args = parser.parse_args()
89b606a49225 Uploaded
bernhardlutz
parents:
diff changeset
139 main( args )