comparison neo4j_loader.js @ 0:aa7a5cc0f59b default tip

commit
author ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
date Tue, 30 Dec 2014 18:27:26 +0900
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:aa7a5cc0f59b
1
2 // REQUIRE
3 var esc = require('querystring').escape,
4 neo4j = require('./neo4j_driver.js'),
5 fs = require('fs'),
6 readline = require('readline'),
7 mysql = require('mysql');
8
9 // ARGUMENTS
10 var file_nodes = process.argv[2],
11 file_relations = process.argv[3],
12 namespace = process.argv[4],
13 host = process.argv[5],
14 port = process.argv[6],
15 debug = Number(process.argv[7]);
16
17 // VARIABLES
18 var neo4jClient = neo4j.createClient({'host':host, 'port':port}),
19 uri_base = 'http://' + host + ':' + port + '/db/data/',
20 start = process.hrtime();
21
22 // MAIN
23 getBaseId(function(nid_base, rid_base){
24 console.log('BASE NODE ID:', nid_base);
25 console.log('BASE RELATIONSHIP ID:', rid_base);
26 // ADD EMPTY NODES FIRST, THEN ADD PROPERTIES TO THE NODES WITH APPROPRIATE ID.
27 // OTHERWISE, CANNOT GET CORRECT ID BECAUSE ADD NODE REQUESTS FINISHES IN NO ORDER.
28 addNodes(file_nodes, nid_base, function(nnum){
29 if(debug) elapsed_time('elapsed');
30 addNodeProperties(file_nodes, nid_base, function(nnum2){
31 if(debug) elapsed_time('elapsed');
32 addRelations(file_relations, nid_base, function(rnum){
33 if(debug) elapsed_time("elapsed");
34 saveMetadata(file_nodes, file_relations, nid_base, rid_base, nnum, rnum);
35
36 });
37 });
38 });
39 });
40
41 // MYSQL CONNECTION
42 function connect(callback){
43 var conn = mysql.createConnection({
44 'host': host,
45 'database':'sem4j',
46 'user':'sem4j',
47 'password':'sem4j'
48 });
49 callback(conn);
50 }
51 function saveMetadata(file_nodes, file_relations, nid_base, rid_base, nnum, rnum){
52 connect(function(conn){
53 //console.log(conn);
54 conn.query(
55 "INSERT INTO load_info VALUES(null,?,?,?,?,?,?,?);",
56 [getTime(), file_nodes, file_relations, nid_base, rid_base, nnum, rnum],
57 function (err, results) {
58 if (err) {
59 console.log(err);
60 }
61 console.log('--- results ---');
62 console.log(results);
63 conn.end(function(){
64 console.log('connection end');
65 })
66 });
67 });
68 }
69 function getTime() {
70 var str = "";
71
72 var currentTime = new Date()
73 var hours = currentTime.getHours()
74 var minutes = currentTime.getMinutes()
75 var seconds = currentTime.getSeconds()
76
77 if (minutes < 10) {
78 minutes = "0" + minutes
79 }
80 if (seconds < 10) {
81 seconds = "0" + seconds
82 }
83 str += hours + ":" + minutes + ":" + seconds + " ";
84 return str;
85 }
86
87 function elapsed_time(note){
88 //var elapsed_time = function(note){
89 var precision = 3; // 3 decimal places
90 var elapsed = process.hrtime(start)[1] / 1000000; // divide by a million to get nano to milli
91 console.log(process.hrtime(start)[0] + " s, " + elapsed.toFixed(precision) + " ms - " + note); // print message + time
92 start = process.hrtime(); // reset the timer
93 }
94 function getBaseId(callback){
95 // CREATE TEST NODE
96 neo4jClient.post('node', '', function(obj){
97 // NODE ID IS NOT REUSED, SO THIS IS THE BASE ID
98 var nid_base = Number(obj.self.split('/')[6]);
99 // CREATE TEST RELATIONSHIP
100 var data = { 'to' : uri_base + 'node/' + nid_base, 'type':'test', 'data':{} };
101 neo4jClient.post('node/' + nid_base + '/relationships/', data, function(obj) {
102 var rid_base = Number(obj.self.split('/')[6]);
103 // DELETE TEST RELATIONSHIP FIRST
104 // USE BATCH BECAUSE DELETE METHOD IS NOT DEFINED IN THE DRIVER
105 var data = [{'method':'DELETE', 'to':'relationship/' + rid_base, 'body':'', 'id':0}];
106 neo4jClient.post('batch/', data, function(){
107 // DELETE TEST NODE
108 var data = [{'method':'DELETE', 'to':'node/' + nid_base, 'body':'', 'id':0}];
109 neo4jClient.post('batch/', data, function(){
110 callback(nid_base, rid_base);
111 })
112 });
113 });
114 });
115 }
116 function addNodes(file, nid_base, callback){
117 var rs = fs.ReadStream(file);
118 var rl = readline.createInterface({'input': rs, 'output': {}});
119 var cnt_added = 0;
120 var cnt_line = 0;
121 var num_line = 0;
122 rl.on('line', function(line){
123 cnt_line += 1;
124 line = line.split("\t");
125 addNode(function(){
126 cnt_added += 1;
127 if(cnt_added % 100 == 0){
128 if(debug) console.log('ADDING EMPTY NODES STATUS:', cnt_added);
129 }
130 if(cnt_added == num_line){
131 console.log('ADDING EMPTY NODES FINISHED:', cnt_added);
132 callback(num_line);
133 }
134 });
135 }).on('pause', function(){
136 num_line = cnt_line;
137 console.log('ADDING EMPTY NODES START:', num_line);
138 });
139 rl.resume();
140 }
141 function addNode(callback){
142 neo4jClient.post('node', '', function(obj){
143 callback();
144 });
145 };
146 function addNodeProperties(file, nid_base, callback){
147 var rs = fs.ReadStream(file);
148 var rl = readline.createInterface({'input': rs, 'output': {}});
149 var cnt_added = 0;
150 var cnt_line = 0;
151 var num_line = 0;
152 rl.on('line', function(line){
153 cnt_line += 1;
154 line = line.split("\t");
155 addNodeProperty(Number(line[0]) + nid_base, line[1], line[2], line[3], namespace, file, function(){
156 cnt_added += 1;
157 if(cnt_added % 100 == 0){
158 if(debug) console.log('ADDING NODE PROPERTIES STATUS:', cnt_added);
159 }
160 if(cnt_added == num_line){
161 console.log('ADDING NODE PROPERTIES FINISHED:', cnt_added);
162 callback(num_line);
163 }
164 });
165 }).on('pause', function(){
166 num_line = cnt_line;
167 console.log('ADDING NODE PROPERTIES START:', num_line);
168 });
169 rl.resume();
170 }
171 function addNodeProperty(node_id, type, name, properties, namespace, filename, callback){
172
173 var body = JSON.parse(properties);
174 body['name'] = name;
175 body['type'] = type;
176 body['rowid'] = node_id;
177 body['namespace'] = namespace;
178 body['filename'] = filename;
179 var uri_node = uri_base + 'node/' + node_id;
180
181 // USE BATCH BECAUSE PUT METHOD IS NOT DEFINED IN THE DRIVER
182 var data = [
183 {'method':'PUT', 'to':'node/' + node_id + '/properties', 'body':body, 'id':0},
184 {'method':'POST', 'to':'index/node/' + esc(namespace), 'body':{uri:uri_node, key:'name', value:name}, 'id':1}];
185 //{'method':'POST', 'to':'index/node/' + esc(namespace), 'body':{uri:uri_node, key:'namespace', value:namespace}, 'id':2},
186 //{'method':'POST', 'to':'index/node/' + esc(namespace), 'body':{uri:uri_node, key:'filename', value:esc(filename)}, 'id':3}];
187
188 neo4jClient.post('batch/', data, function(obj){
189 //console.log('DEBUG: Added:', obj, node_id, name);
190 callback();
191 });
192
193 /*
194 // USE BATCH BECAUSE PUT METHOD IS NOT DEFINED IN THE DRIVER
195 data = [{'method':'PUT', 'to':'node/' + node_id + '/properties', 'body':body, 'id':0}];
196 neo4jClient.post('batch/', data, function(){
197 uri_node = uri_base + 'node/' + node_id;
198 var data = [{'method':'POST', 'to':'index/node/' + esc(index), 'body':{uri:uri_node, key:'name', value:name}, 'id':0},
199 {'method':'POST', 'to':'index/node/' + esc(index), 'body':{uri:uri_node, key:'namespace', value:namespace}, 'id':1},
200 {'method':'POST', 'to':'index/node/' + esc(index), 'body':{uri:uri_node, key:'filename', value:esc(filename)}, 'id':2}];
201 neo4jClient.post('batch/', data, function(obj){
202 console.log('DEBUG: Added:', obj, node_id, name);
203 callback();
204 });
205 });
206 */
207 };
208 function addRelations(file, nid_base, callback){
209 var rs = fs.ReadStream(file);
210 var rl = readline.createInterface({'input': rs, 'output': {}});
211 var cnt_added = 0, cnt_line = 0, num_line = 0;
212 rl.on('line', function(line){
213 cnt_line += 1;
214 line = line.split("\t");
215 addRelation(Number(line[0]) + nid_base, Number(line[1]) + nid_base, line[2], line[3], 'sem4j.org', file, function(){
216 cnt_added += 1;
217 if(cnt_added % 100 == 0){
218 if(debug) console.log('ADDING RELATIONS STATUS:', cnt_added);
219 }
220 if(cnt_added == num_line){
221 console.log('ADDING RELATIONS FINISHED:', cnt_added);
222 callback(cnt_added);
223 }
224 });
225 }).on('pause', function(){
226 num_line = cnt_line;
227 console.log('ADDING RELATIONS START:', num_line);
228 });
229 rl.resume();
230 }
231 function addRelation(source, target, type, properties, namespace, filename, callback){
232 var index = 'users';
233 var body = JSON.parse(properties);
234 var relation = { 'to' : uri_base + 'node/' + target, 'type': type, 'data' : body };
235 neo4jClient.post('node/' + source + '/relationships/', relation, function() {
236 callback();
237 });
238 /*
239 var data = [
240 {'method':'POST', 'to':'node/' + source + '/relationships/', 'body':relation, 'id':0},
241 {'method':'POST', 'to':'index/relationship/' + esc(index), 'body':{'uri':'{0}', key:'namespace', value:namespace}, 'id':1},
242 {'method':'POST', 'to':'index/relationship/' + esc(index), 'body':{'uri':'{0}', key:'filename', value:esc(filename)}, 'id':2}];
243 neo4jClient.post('batch/', data, function(obj){
244 //console.log(obj);
245 callback();
246 });
247 */
248 };
249
250 /*
251
252 function getNodeIDBase(callback){
253 var data = {
254 "query":"start n = node(*) return max(ID(n))",
255 "params":{}
256 };
257 neo4jClient.post('cypher', data, function(obj){
258 if(obj.data[0][0]){
259 var nid_base = obj.data[0][0];
260 }else{
261 var nid_base = 0;
262 }
263 callback(nid_base);
264 });
265 }
266
267 function addNodeOld(node_id, type, name, properties, callback){
268
269 index = 'idx_name';
270
271 //name = 'Neo';
272 //type = 'person';
273 var input = JSON.parse(properties);
274 //console.log(input);
275
276 //input = {'name':name, 'type':type, 'node_id':node_id};
277 input['name'] = name;
278 input['type'] = type;
279 input['node_id'] = node_id;
280 console.log(input);
281
282 neo4jClient.post('node', input, function(obj){
283 var data = { uri:obj.self, key:'name', value:name };
284 neo4jClient.post(['index/node', esc(index)], data, function(){
285 console.log('Added:', obj.self, node_id, name);
286 callback();
287 });
288 });
289 };
290
291 */