2016-06-15 60 views
0

我有400,000行输入,我需要将其分解。不幸的是,我不能让这个脚本退出,直到它完成了所有的事情。当然,它总是耗尽内存。我以为,在设置。对()的值“结束”功能(将是有益的,但我看不到这个值,一旦数据。对已经完成。如何在mongo db写入后退出nodejs脚本

'use strict'; 
var mongoose = require('mongoose'); 
var fs = require('fs'); 
var parse = require('csv-parse'); 
var Schema = mongoose.Schema; 
var done; 

mongoose.connect('mongodb://127.0.0.1:27017/auth'); 

var userSchema = new mongoose.Schema({ 
    username: { 
    type: String, 
    unique: true 
    }, 
    password: String, 
    email: { 
    type: String, 
    unique: true 
    }, 
    isActive: String, 
    roles: { 
    account: { 
     type: mongoose.Schema.Types.ObjectId, 
     ref: 'Account' 
    } 
    }, 
    timeCreated: { 
    type: Date, 
    default: Date.now 
    }, 
    search: [String] 
}); 

var accountSchema = new mongoose.Schema({ 
    user: { 
    id: { 
     type: mongoose.Schema.Types.ObjectId, 
     ref: 'User' 
    }, 
    name: { 
     type: String, 
     default: '' 
    } 
    }, 
    isVerified: { 
    type: String, 
    default: '' 
    }, 
    verificationToken: { 
    type: String, 
    default: '' 
    }, 
    name: { 
    first: { 
     type: String, 
     default: '' 
    }, 
    middle: { 
     type: String, 
     default: '' 
    }, 
    last: { 
     type: String, 
     default: '' 
    }, 
    full: { 
     type: String, 
     default: '' 
    } 
    }, 
    company: { 
    type: String, 
    default: '' 
    }, 
    phone: { 
    type: String, 
    default: '' 
    }, 
    zip: { 
    type: String, 
    default: '' 
    }, 
    memberid: { 
    type: String, 
    default: '' 
    }, 
    status: { 
    id: { 
     type: String, 
     ref: 'Status' 
    }, 
    name: { 
     type: String, 
     default: '' 
    }, 
    userCreated: { 
     id: { 
     type: mongoose.Schema.Types.ObjectId, 
     ref: 'User' 
     }, 
     name: { 
     type: String, 
     default: '' 
     }, 
     time: { 
     type: Date, 
     default: Date.now 
     } 
    } 
    }, 
    userCreated: { 
    id: { 
     type: mongoose.Schema.Types.ObjectId, 
     ref: 'User' 
    }, 
    name: { 
     type: String, 
     default: '' 
    }, 
    time: { 
     type: Date, 
     default: Date.now 
    } 
    }, 
    search: [String] 
}); 

var User = mongoose.model('User', userSchema); 
var Account = mongoose.model('Account', accountSchema); 

fs.createReadStream('./ipart') 
    .pipe(parse({ 
    delimiter: ',' 
    })) 
    .on("data-invalid", function(data) {}) 
    .on('data', function(csvrow) { 
    var u = { 
     isActive: 'yes', 
     username: csvrow[0], 
     email: csvrow[0], 
     search: [ 
     csvrow[1] + ' ' + csvrow[2], 
     csvrow[0], 
     ] 
    }; 

    User.create(u, function(err, createdUser) { 
     if (err) { 
     console.log(err); 
     return; 
     } 
     var user = createdUser; 
     var displayName = csvrow[1] + ' ' + csvrow[2] || ''; 
     var nameParts = displayName.split(' '); 
     var acct = { 
     isVerified: 'no', 
     'name.first': nameParts[0], 
     'name.last': nameParts[1] || '', 
     'name.full': displayName, 
     user: { 
      id: user._id, 
      name: user.username 
     }, 
     search: [ 
      nameParts[0], 
      nameParts[1] || '' 
     ] 
     }; 

     Account.create(acct, function(err, account) { 
     if (err) { 
      return workflow.emit('exception', err); 
     } 
     var fieldstoset = { 
      roles: { 
      account: account._id 
      } 
     }; 
     User.findByIdAndUpdate(account.user.id, fieldstoset, function(err, user) { 
      if (err) throw err; 
     }); 
     }); 
    }); 
    }) 
    .on('end', function() { 
    console.log('complete'); 
    }); 

回答

0

你真的需要使用批量插入,我发现这个代码的地方,并粘贴它为您

  var Potato = mongoose.model('Potato', PotatoSchema); 
     var potatoBag = [/* a humongous amount of potato objects */]; 
     Potato.collection.insert(potatoBag, onInsert); 
     function onInsert(err, docs) { 
      if (err) { 
      // TODO: handle error 
      } else { 
      console.info('%d potatoes were successfully stored.', docs.length); 
      } 
     } 
0

我建议你打破汇入CSV数据到这些步骤之后您的整个逻辑: 1.写其中进口的CSV到一个简单的脚本文件像这样的临时集合:

YourImportScript

#!/bin/bash 
mongoimport -d YourDBName -c YourTempCollectionName --drop --type csv --file pathToYourCSVFile.csv --headerline 

2.运行创建用户之前的脚本:

var exec = require('child_process').exec; 
function importCSV(callback) { 
    exec("./pathToYourImportScript/YourImportScript", function (error, stdout, stderr) { 
     console.log(stdout); 
     if (error !== null) 
      console.log('exec error: ' + error); 
     }); 

     callback() 
    } 

MongoImport将导入CSV很快。

  1. 从临时集合中获取文档并将它们插入到您的用户集合中。 您还可以使用异步模块来控制你的代码模式整齐地流:
async.series([ 
        function (callback) { 
         //CSV Import function 
        }, 
        function (callback) { 
     //User Manupulation function 
     }]); 

而且最好是把标题到您的CSV栏,你可以创建一个模型导入时来自临时集合的文档,并且通过列标题(如username:myCSVModel.username而不是username: csvrow[0])获取用户的属性会更容易。