Mercurial > repos > ryotas > cypher_tools
comparison neo4j_loader.js @ 0:aa7a5cc0f59b default tip
commit
| author | ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp> |
|---|---|
| date | Tue, 30 Dec 2014 18:27:26 +0900 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:aa7a5cc0f59b |
|---|---|
| 1 | |
| 2 // REQUIRE | |
| 3 var esc = require('querystring').escape, | |
| 4 neo4j = require('./neo4j_driver.js'), | |
| 5 fs = require('fs'), | |
| 6 readline = require('readline'), | |
| 7 mysql = require('mysql'); | |
| 8 | |
| 9 // ARGUMENTS | |
| 10 var file_nodes = process.argv[2], | |
| 11 file_relations = process.argv[3], | |
| 12 namespace = process.argv[4], | |
| 13 host = process.argv[5], | |
| 14 port = process.argv[6], | |
| 15 debug = Number(process.argv[7]); | |
| 16 | |
| 17 // VARIABLES | |
| 18 var neo4jClient = neo4j.createClient({'host':host, 'port':port}), | |
| 19 uri_base = 'http://' + host + ':' + port + '/db/data/', | |
| 20 start = process.hrtime(); | |
| 21 | |
| 22 // MAIN | |
| 23 getBaseId(function(nid_base, rid_base){ | |
| 24 console.log('BASE NODE ID:', nid_base); | |
| 25 console.log('BASE RELATIONSHIP ID:', rid_base); | |
| 26 // ADD EMPTY NODES FIRST, THEN ADD PROPERTIES TO THE NODES WITH APPROPRIATE ID. | |
| 27 // OTHERWISE, CANNOT GET CORRECT ID BECAUSE ADD NODE REQUESTS FINISHES IN NO ORDER. | |
| 28 addNodes(file_nodes, nid_base, function(nnum){ | |
| 29 if(debug) elapsed_time('elapsed'); | |
| 30 addNodeProperties(file_nodes, nid_base, function(nnum2){ | |
| 31 if(debug) elapsed_time('elapsed'); | |
| 32 addRelations(file_relations, nid_base, function(rnum){ | |
| 33 if(debug) elapsed_time("elapsed"); | |
| 34 saveMetadata(file_nodes, file_relations, nid_base, rid_base, nnum, rnum); | |
| 35 | |
| 36 }); | |
| 37 }); | |
| 38 }); | |
| 39 }); | |
| 40 | |
| 41 // MYSQL CONNECTION | |
| 42 function connect(callback){ | |
| 43 var conn = mysql.createConnection({ | |
| 44 'host': host, | |
| 45 'database':'sem4j', | |
| 46 'user':'sem4j', | |
| 47 'password':'sem4j' | |
| 48 }); | |
| 49 callback(conn); | |
| 50 } | |
| 51 function saveMetadata(file_nodes, file_relations, nid_base, rid_base, nnum, rnum){ | |
| 52 connect(function(conn){ | |
| 53 //console.log(conn); | |
| 54 conn.query( | |
| 55 "INSERT INTO load_info VALUES(null,?,?,?,?,?,?,?);", | |
| 56 [getTime(), file_nodes, file_relations, nid_base, rid_base, nnum, rnum], | |
| 57 function (err, results) { | |
| 58 if (err) { | |
| 59 console.log(err); | |
| 60 } | |
| 61 console.log('--- results ---'); | |
| 62 console.log(results); | |
| 63 conn.end(function(){ | |
| 64 console.log('connection end'); | |
| 65 }) | |
| 66 }); | |
| 67 }); | |
| 68 } | |
| 69 function getTime() { | |
| 70 var str = ""; | |
| 71 | |
| 72 var currentTime = new Date() | |
| 73 var hours = currentTime.getHours() | |
| 74 var minutes = currentTime.getMinutes() | |
| 75 var seconds = currentTime.getSeconds() | |
| 76 | |
| 77 if (minutes < 10) { | |
| 78 minutes = "0" + minutes | |
| 79 } | |
| 80 if (seconds < 10) { | |
| 81 seconds = "0" + seconds | |
| 82 } | |
| 83 str += hours + ":" + minutes + ":" + seconds + " "; | |
| 84 return str; | |
| 85 } | |
| 86 | |
| 87 function elapsed_time(note){ | |
| 88 //var elapsed_time = function(note){ | |
| 89 var precision = 3; // 3 decimal places | |
| 90 var elapsed = process.hrtime(start)[1] / 1000000; // divide by a million to get nano to milli | |
| 91 console.log(process.hrtime(start)[0] + " s, " + elapsed.toFixed(precision) + " ms - " + note); // print message + time | |
| 92 start = process.hrtime(); // reset the timer | |
| 93 } | |
| 94 function getBaseId(callback){ | |
| 95 // CREATE TEST NODE | |
| 96 neo4jClient.post('node', '', function(obj){ | |
| 97 // NODE ID IS NOT REUSED, SO THIS IS THE BASE ID | |
| 98 var nid_base = Number(obj.self.split('/')[6]); | |
| 99 // CREATE TEST RELATIONSHIP | |
| 100 var data = { 'to' : uri_base + 'node/' + nid_base, 'type':'test', 'data':{} }; | |
| 101 neo4jClient.post('node/' + nid_base + '/relationships/', data, function(obj) { | |
| 102 var rid_base = Number(obj.self.split('/')[6]); | |
| 103 // DELETE TEST RELATIONSHIP FIRST | |
| 104 // USE BATCH BECAUSE DELETE METHOD IS NOT DEFINED IN THE DRIVER | |
| 105 var data = [{'method':'DELETE', 'to':'relationship/' + rid_base, 'body':'', 'id':0}]; | |
| 106 neo4jClient.post('batch/', data, function(){ | |
| 107 // DELETE TEST NODE | |
| 108 var data = [{'method':'DELETE', 'to':'node/' + nid_base, 'body':'', 'id':0}]; | |
| 109 neo4jClient.post('batch/', data, function(){ | |
| 110 callback(nid_base, rid_base); | |
| 111 }) | |
| 112 }); | |
| 113 }); | |
| 114 }); | |
| 115 } | |
| 116 function addNodes(file, nid_base, callback){ | |
| 117 var rs = fs.ReadStream(file); | |
| 118 var rl = readline.createInterface({'input': rs, 'output': {}}); | |
| 119 var cnt_added = 0; | |
| 120 var cnt_line = 0; | |
| 121 var num_line = 0; | |
| 122 rl.on('line', function(line){ | |
| 123 cnt_line += 1; | |
| 124 line = line.split("\t"); | |
| 125 addNode(function(){ | |
| 126 cnt_added += 1; | |
| 127 if(cnt_added % 100 == 0){ | |
| 128 if(debug) console.log('ADDING EMPTY NODES STATUS:', cnt_added); | |
| 129 } | |
| 130 if(cnt_added == num_line){ | |
| 131 console.log('ADDING EMPTY NODES FINISHED:', cnt_added); | |
| 132 callback(num_line); | |
| 133 } | |
| 134 }); | |
| 135 }).on('pause', function(){ | |
| 136 num_line = cnt_line; | |
| 137 console.log('ADDING EMPTY NODES START:', num_line); | |
| 138 }); | |
| 139 rl.resume(); | |
| 140 } | |
| 141 function addNode(callback){ | |
| 142 neo4jClient.post('node', '', function(obj){ | |
| 143 callback(); | |
| 144 }); | |
| 145 }; | |
| 146 function addNodeProperties(file, nid_base, callback){ | |
| 147 var rs = fs.ReadStream(file); | |
| 148 var rl = readline.createInterface({'input': rs, 'output': {}}); | |
| 149 var cnt_added = 0; | |
| 150 var cnt_line = 0; | |
| 151 var num_line = 0; | |
| 152 rl.on('line', function(line){ | |
| 153 cnt_line += 1; | |
| 154 line = line.split("\t"); | |
| 155 addNodeProperty(Number(line[0]) + nid_base, line[1], line[2], line[3], namespace, file, function(){ | |
| 156 cnt_added += 1; | |
| 157 if(cnt_added % 100 == 0){ | |
| 158 if(debug) console.log('ADDING NODE PROPERTIES STATUS:', cnt_added); | |
| 159 } | |
| 160 if(cnt_added == num_line){ | |
| 161 console.log('ADDING NODE PROPERTIES FINISHED:', cnt_added); | |
| 162 callback(num_line); | |
| 163 } | |
| 164 }); | |
| 165 }).on('pause', function(){ | |
| 166 num_line = cnt_line; | |
| 167 console.log('ADDING NODE PROPERTIES START:', num_line); | |
| 168 }); | |
| 169 rl.resume(); | |
| 170 } | |
| 171 function addNodeProperty(node_id, type, name, properties, namespace, filename, callback){ | |
| 172 | |
| 173 var body = JSON.parse(properties); | |
| 174 body['name'] = name; | |
| 175 body['type'] = type; | |
| 176 body['rowid'] = node_id; | |
| 177 body['namespace'] = namespace; | |
| 178 body['filename'] = filename; | |
| 179 var uri_node = uri_base + 'node/' + node_id; | |
| 180 | |
| 181 // USE BATCH BECAUSE PUT METHOD IS NOT DEFINED IN THE DRIVER | |
| 182 var data = [ | |
| 183 {'method':'PUT', 'to':'node/' + node_id + '/properties', 'body':body, 'id':0}, | |
| 184 {'method':'POST', 'to':'index/node/' + esc(namespace), 'body':{uri:uri_node, key:'name', value:name}, 'id':1}]; | |
| 185 //{'method':'POST', 'to':'index/node/' + esc(namespace), 'body':{uri:uri_node, key:'namespace', value:namespace}, 'id':2}, | |
| 186 //{'method':'POST', 'to':'index/node/' + esc(namespace), 'body':{uri:uri_node, key:'filename', value:esc(filename)}, 'id':3}]; | |
| 187 | |
| 188 neo4jClient.post('batch/', data, function(obj){ | |
| 189 //console.log('DEBUG: Added:', obj, node_id, name); | |
| 190 callback(); | |
| 191 }); | |
| 192 | |
| 193 /* | |
| 194 // USE BATCH BECAUSE PUT METHOD IS NOT DEFINED IN THE DRIVER | |
| 195 data = [{'method':'PUT', 'to':'node/' + node_id + '/properties', 'body':body, 'id':0}]; | |
| 196 neo4jClient.post('batch/', data, function(){ | |
| 197 uri_node = uri_base + 'node/' + node_id; | |
| 198 var data = [{'method':'POST', 'to':'index/node/' + esc(index), 'body':{uri:uri_node, key:'name', value:name}, 'id':0}, | |
| 199 {'method':'POST', 'to':'index/node/' + esc(index), 'body':{uri:uri_node, key:'namespace', value:namespace}, 'id':1}, | |
| 200 {'method':'POST', 'to':'index/node/' + esc(index), 'body':{uri:uri_node, key:'filename', value:esc(filename)}, 'id':2}]; | |
| 201 neo4jClient.post('batch/', data, function(obj){ | |
| 202 console.log('DEBUG: Added:', obj, node_id, name); | |
| 203 callback(); | |
| 204 }); | |
| 205 }); | |
| 206 */ | |
| 207 }; | |
| 208 function addRelations(file, nid_base, callback){ | |
| 209 var rs = fs.ReadStream(file); | |
| 210 var rl = readline.createInterface({'input': rs, 'output': {}}); | |
| 211 var cnt_added = 0, cnt_line = 0, num_line = 0; | |
| 212 rl.on('line', function(line){ | |
| 213 cnt_line += 1; | |
| 214 line = line.split("\t"); | |
| 215 addRelation(Number(line[0]) + nid_base, Number(line[1]) + nid_base, line[2], line[3], 'sem4j.org', file, function(){ | |
| 216 cnt_added += 1; | |
| 217 if(cnt_added % 100 == 0){ | |
| 218 if(debug) console.log('ADDING RELATIONS STATUS:', cnt_added); | |
| 219 } | |
| 220 if(cnt_added == num_line){ | |
| 221 console.log('ADDING RELATIONS FINISHED:', cnt_added); | |
| 222 callback(cnt_added); | |
| 223 } | |
| 224 }); | |
| 225 }).on('pause', function(){ | |
| 226 num_line = cnt_line; | |
| 227 console.log('ADDING RELATIONS START:', num_line); | |
| 228 }); | |
| 229 rl.resume(); | |
| 230 } | |
| 231 function addRelation(source, target, type, properties, namespace, filename, callback){ | |
| 232 var index = 'users'; | |
| 233 var body = JSON.parse(properties); | |
| 234 var relation = { 'to' : uri_base + 'node/' + target, 'type': type, 'data' : body }; | |
| 235 neo4jClient.post('node/' + source + '/relationships/', relation, function() { | |
| 236 callback(); | |
| 237 }); | |
| 238 /* | |
| 239 var data = [ | |
| 240 {'method':'POST', 'to':'node/' + source + '/relationships/', 'body':relation, 'id':0}, | |
| 241 {'method':'POST', 'to':'index/relationship/' + esc(index), 'body':{'uri':'{0}', key:'namespace', value:namespace}, 'id':1}, | |
| 242 {'method':'POST', 'to':'index/relationship/' + esc(index), 'body':{'uri':'{0}', key:'filename', value:esc(filename)}, 'id':2}]; | |
| 243 neo4jClient.post('batch/', data, function(obj){ | |
| 244 //console.log(obj); | |
| 245 callback(); | |
| 246 }); | |
| 247 */ | |
| 248 }; | |
| 249 | |
| 250 /* | |
| 251 | |
| 252 function getNodeIDBase(callback){ | |
| 253 var data = { | |
| 254 "query":"start n = node(*) return max(ID(n))", | |
| 255 "params":{} | |
| 256 }; | |
| 257 neo4jClient.post('cypher', data, function(obj){ | |
| 258 if(obj.data[0][0]){ | |
| 259 var nid_base = obj.data[0][0]; | |
| 260 }else{ | |
| 261 var nid_base = 0; | |
| 262 } | |
| 263 callback(nid_base); | |
| 264 }); | |
| 265 } | |
| 266 | |
| 267 function addNodeOld(node_id, type, name, properties, callback){ | |
| 268 | |
| 269 index = 'idx_name'; | |
| 270 | |
| 271 //name = 'Neo'; | |
| 272 //type = 'person'; | |
| 273 var input = JSON.parse(properties); | |
| 274 //console.log(input); | |
| 275 | |
| 276 //input = {'name':name, 'type':type, 'node_id':node_id}; | |
| 277 input['name'] = name; | |
| 278 input['type'] = type; | |
| 279 input['node_id'] = node_id; | |
| 280 console.log(input); | |
| 281 | |
| 282 neo4jClient.post('node', input, function(obj){ | |
| 283 var data = { uri:obj.self, key:'name', value:name }; | |
| 284 neo4jClient.post(['index/node', esc(index)], data, function(){ | |
| 285 console.log('Added:', obj.self, node_id, name); | |
| 286 callback(); | |
| 287 }); | |
| 288 }); | |
| 289 }; | |
| 290 | |
| 291 */ |
