Mercurial > repos > ryotas > cypher_tools
diff neo4j_loader.js @ 0:aa7a5cc0f59b default tip
commit
| author | ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp> |
|---|---|
| date | Tue, 30 Dec 2014 18:27:26 +0900 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/neo4j_loader.js Tue Dec 30 18:27:26 2014 +0900 @@ -0,0 +1,291 @@ + +// REQUIRE +var esc = require('querystring').escape, + neo4j = require('./neo4j_driver.js'), + fs = require('fs'), + readline = require('readline'), + mysql = require('mysql'); + +// ARGUMENTS +var file_nodes = process.argv[2], + file_relations = process.argv[3], + namespace = process.argv[4], + host = process.argv[5], + port = process.argv[6], + debug = Number(process.argv[7]); + +// VARIABLES +var neo4jClient = neo4j.createClient({'host':host, 'port':port}), + uri_base = 'http://' + host + ':' + port + '/db/data/', + start = process.hrtime(); + +// MAIN +getBaseId(function(nid_base, rid_base){ + console.log('BASE NODE ID:', nid_base); + console.log('BASE RELATIONSHIP ID:', rid_base); + // ADD EMPTY NODES FIRST, THEN ADD PROPERTIES TO THE NODES WITH APPROPRIATE ID. + // OTHERWISE, CANNOT GET CORRECT ID BECAUSE ADD NODE REQUESTS FINISHES IN NO ORDER. + addNodes(file_nodes, nid_base, function(nnum){ + if(debug) elapsed_time('elapsed'); + addNodeProperties(file_nodes, nid_base, function(nnum2){ + if(debug) elapsed_time('elapsed'); + addRelations(file_relations, nid_base, function(rnum){ + if(debug) elapsed_time("elapsed"); + saveMetadata(file_nodes, file_relations, nid_base, rid_base, nnum, rnum); + + }); + }); + }); +}); + +// MYSQL CONNECTION +function connect(callback){ + var conn = mysql.createConnection({ + 'host': host, + 'database':'sem4j', + 'user':'sem4j', + 'password':'sem4j' + }); + callback(conn); +} +function saveMetadata(file_nodes, file_relations, nid_base, rid_base, nnum, rnum){ + connect(function(conn){ + //console.log(conn); + conn.query( + "INSERT INTO load_info VALUES(null,?,?,?,?,?,?,?);", + [getTime(), file_nodes, file_relations, nid_base, rid_base, nnum, rnum], + function (err, results) { + if (err) { + console.log(err); + } + console.log('--- results ---'); + console.log(results); + conn.end(function(){ + console.log('connection end'); + }) + }); + }); +} +function getTime() { + var str = ""; + + var currentTime = new Date() + var hours = currentTime.getHours() + var minutes = currentTime.getMinutes() + var seconds = currentTime.getSeconds() + + if (minutes < 10) { + minutes = "0" + minutes + } + if (seconds < 10) { + seconds = "0" + seconds + } + str += hours + ":" + minutes + ":" + seconds + " "; + return str; +} + +function elapsed_time(note){ +//var elapsed_time = function(note){ + var precision = 3; // 3 decimal places + var elapsed = process.hrtime(start)[1] / 1000000; // divide by a million to get nano to milli + console.log(process.hrtime(start)[0] + " s, " + elapsed.toFixed(precision) + " ms - " + note); // print message + time + start = process.hrtime(); // reset the timer +} +function getBaseId(callback){ + // CREATE TEST NODE + neo4jClient.post('node', '', function(obj){ + // NODE ID IS NOT REUSED, SO THIS IS THE BASE ID + var nid_base = Number(obj.self.split('/')[6]); + // CREATE TEST RELATIONSHIP + var data = { 'to' : uri_base + 'node/' + nid_base, 'type':'test', 'data':{} }; + neo4jClient.post('node/' + nid_base + '/relationships/', data, function(obj) { + var rid_base = Number(obj.self.split('/')[6]); + // DELETE TEST RELATIONSHIP FIRST + // USE BATCH BECAUSE DELETE METHOD IS NOT DEFINED IN THE DRIVER + var data = [{'method':'DELETE', 'to':'relationship/' + rid_base, 'body':'', 'id':0}]; + neo4jClient.post('batch/', data, function(){ + // DELETE TEST NODE + var data = [{'method':'DELETE', 'to':'node/' + nid_base, 'body':'', 'id':0}]; + neo4jClient.post('batch/', data, function(){ + callback(nid_base, rid_base); + }) + }); + }); + }); +} +function addNodes(file, nid_base, callback){ + var rs = fs.ReadStream(file); + var rl = readline.createInterface({'input': rs, 'output': {}}); + var cnt_added = 0; + var cnt_line = 0; + var num_line = 0; + rl.on('line', function(line){ + cnt_line += 1; + line = line.split("\t"); + addNode(function(){ + cnt_added += 1; + if(cnt_added % 100 == 0){ + if(debug) console.log('ADDING EMPTY NODES STATUS:', cnt_added); + } + if(cnt_added == num_line){ + console.log('ADDING EMPTY NODES FINISHED:', cnt_added); + callback(num_line); + } + }); + }).on('pause', function(){ + num_line = cnt_line; + console.log('ADDING EMPTY NODES START:', num_line); + }); + rl.resume(); +} +function addNode(callback){ + neo4jClient.post('node', '', function(obj){ + callback(); + }); +}; +function addNodeProperties(file, nid_base, callback){ + var rs = fs.ReadStream(file); + var rl = readline.createInterface({'input': rs, 'output': {}}); + var cnt_added = 0; + var cnt_line = 0; + var num_line = 0; + rl.on('line', function(line){ + cnt_line += 1; + line = line.split("\t"); + addNodeProperty(Number(line[0]) + nid_base, line[1], line[2], line[3], namespace, file, function(){ + cnt_added += 1; + if(cnt_added % 100 == 0){ + if(debug) console.log('ADDING NODE PROPERTIES STATUS:', cnt_added); + } + if(cnt_added == num_line){ + console.log('ADDING NODE PROPERTIES FINISHED:', cnt_added); + callback(num_line); + } + }); + }).on('pause', function(){ + num_line = cnt_line; + console.log('ADDING NODE PROPERTIES START:', num_line); + }); + rl.resume(); +} +function addNodeProperty(node_id, type, name, properties, namespace, filename, callback){ + + var body = JSON.parse(properties); + body['name'] = name; + body['type'] = type; + body['rowid'] = node_id; + body['namespace'] = namespace; + body['filename'] = filename; + var uri_node = uri_base + 'node/' + node_id; + + // USE BATCH BECAUSE PUT METHOD IS NOT DEFINED IN THE DRIVER + var data = [ + {'method':'PUT', 'to':'node/' + node_id + '/properties', 'body':body, 'id':0}, + {'method':'POST', 'to':'index/node/' + esc(namespace), 'body':{uri:uri_node, key:'name', value:name}, 'id':1}]; + //{'method':'POST', 'to':'index/node/' + esc(namespace), 'body':{uri:uri_node, key:'namespace', value:namespace}, 'id':2}, + //{'method':'POST', 'to':'index/node/' + esc(namespace), 'body':{uri:uri_node, key:'filename', value:esc(filename)}, 'id':3}]; + + neo4jClient.post('batch/', data, function(obj){ + //console.log('DEBUG: Added:', obj, node_id, name); + callback(); + }); + + /* + // USE BATCH BECAUSE PUT METHOD IS NOT DEFINED IN THE DRIVER + data = [{'method':'PUT', 'to':'node/' + node_id + '/properties', 'body':body, 'id':0}]; + neo4jClient.post('batch/', data, function(){ + uri_node = uri_base + 'node/' + node_id; + var data = [{'method':'POST', 'to':'index/node/' + esc(index), 'body':{uri:uri_node, key:'name', value:name}, 'id':0}, + {'method':'POST', 'to':'index/node/' + esc(index), 'body':{uri:uri_node, key:'namespace', value:namespace}, 'id':1}, + {'method':'POST', 'to':'index/node/' + esc(index), 'body':{uri:uri_node, key:'filename', value:esc(filename)}, 'id':2}]; + neo4jClient.post('batch/', data, function(obj){ + console.log('DEBUG: Added:', obj, node_id, name); + callback(); + }); + }); + */ +}; +function addRelations(file, nid_base, callback){ + var rs = fs.ReadStream(file); + var rl = readline.createInterface({'input': rs, 'output': {}}); + var cnt_added = 0, cnt_line = 0, num_line = 0; + rl.on('line', function(line){ + cnt_line += 1; + line = line.split("\t"); + addRelation(Number(line[0]) + nid_base, Number(line[1]) + nid_base, line[2], line[3], 'sem4j.org', file, function(){ + cnt_added += 1; + if(cnt_added % 100 == 0){ + if(debug) console.log('ADDING RELATIONS STATUS:', cnt_added); + } + if(cnt_added == num_line){ + console.log('ADDING RELATIONS FINISHED:', cnt_added); + callback(cnt_added); + } + }); + }).on('pause', function(){ + num_line = cnt_line; + console.log('ADDING RELATIONS START:', num_line); + }); + rl.resume(); +} +function addRelation(source, target, type, properties, namespace, filename, callback){ + var index = 'users'; + var body = JSON.parse(properties); + var relation = { 'to' : uri_base + 'node/' + target, 'type': type, 'data' : body }; + neo4jClient.post('node/' + source + '/relationships/', relation, function() { + callback(); + }); + /* + var data = [ + {'method':'POST', 'to':'node/' + source + '/relationships/', 'body':relation, 'id':0}, + {'method':'POST', 'to':'index/relationship/' + esc(index), 'body':{'uri':'{0}', key:'namespace', value:namespace}, 'id':1}, + {'method':'POST', 'to':'index/relationship/' + esc(index), 'body':{'uri':'{0}', key:'filename', value:esc(filename)}, 'id':2}]; + neo4jClient.post('batch/', data, function(obj){ + //console.log(obj); + callback(); + }); + */ +}; + +/* + +function getNodeIDBase(callback){ + var data = { + "query":"start n = node(*) return max(ID(n))", + "params":{} + }; + neo4jClient.post('cypher', data, function(obj){ + if(obj.data[0][0]){ + var nid_base = obj.data[0][0]; + }else{ + var nid_base = 0; + } + callback(nid_base); + }); +} + +function addNodeOld(node_id, type, name, properties, callback){ + + index = 'idx_name'; + + //name = 'Neo'; + //type = 'person'; + var input = JSON.parse(properties); + //console.log(input); + + //input = {'name':name, 'type':type, 'node_id':node_id}; + input['name'] = name; + input['type'] = type; + input['node_id'] = node_id; + console.log(input); + + neo4jClient.post('node', input, function(obj){ + var data = { uri:obj.self, key:'name', value:name }; + neo4jClient.post(['index/node', esc(index)], data, function(){ + console.log('Added:', obj.self, node_id, name); + callback(); + }); + }); +}; + +*/ \ No newline at end of file
