2017-09-26 53 views
0

我在csv文件中有9577个独特的记录。为什么我的代码使用insertMany()会跳过一些记录并多次插入相同的记录?

此代码插入9800条记录并且不是插入所有记录,而是插入其中一些记录。任何想法,为什么它不插入独特的9577记录,也有一些它们的重复?下面我还插入保持代码的一部分,所以你得到的全貌

function bulkImportToMongo(arrayToImport, mongooseModel) { 
    const Model = require(`../../../models/${mongooseModel}`); 
    let batchCount = Math.ceil(arrayToImport.length/100); 
    console.log(arrayToImport.length); 
    let ops = []; 

    for (let i = 0; i < batchCount; i++) { 
    // console.log(i); 
    let batch = arrayToImport.slice(i, i + 100); 
    console.log(batch.length); 
    ops.push(Model.insertMany(batch)); 
    } 
    return ops; 

    return Promise.all(ops).then(results => { 
    // results is an array of results for each batch 
    console.log("results: ", results); 
    }); 
} 

我解析csv文件

const Promise = require("bluebird"); 
const csv = require("fast-csv"); 
const path = require("path"); 
const fs = Promise.promisifyAll(require("fs")); 

const promiseCSV = Promise.method((filePath, options) => { 
    return new Promise((resolve, reject) => { 
    var records = []; 
    csv 
     .fromPath(filePath, options) 
     .on("data", record => { 
     records.push(record); 
     }) 
     .on("end",() => { 
     // console.log(records); 
     resolve(records); 
     }); 
    }); 
}); 

这里是连接结合在一起的脚本:

const path = require("path"); 
const promiseCSV = require("./helpers/ImportCSVFiles"); 
const { 
    connectToMongo, 
    bulkImportToMongo 
} = require("./helpers/mongoOperations"); 

const filePath = path.join(__dirname, "../../data/parts.csv"); 
const options = { 
    delimiter: ";", 
    noheader: true, 
    headers: [ 
    "facility", 
    "partNumber", 
    "partName", 
    "partDescription", 
    "netWeight", 
    "customsTariff" 
    ] 
}; 

connectToMongo("autoMDM"); 
promiseCSV(filePath, options).then(records => { 
    bulkImportToMongo(records, "parts.js"); 
}); 

回答

0

//看起来你的问题只是i ++。也许你的意思是我+ = 100?

for (let i = 0; i < batchCount; i+=100 /* NOT i++ */) { 
    //... 
} 
+0

没有它并没有解决问题... batchCount是多少批全阵列式已被划分在...我减少csv文件数量OS项目30和下降批量限制为5.它生成了6个batchCount。我还做了一个console.log的arrayToImport,它的长度是30以及所有的唯一对象。但是,当我在那里检查了约有9个不同的项目已经复制了几次的mongodb,但文档的总数仍然是30 ... –

0

我解决了它。

我希望这可以帮助其他... :-)

我有两个错误,在功能promiseCSV(改为parseCSV)和第二我在bulkImportToMongo有坏逻辑。

完整的解决方案:

我解析和进口602.198对象,这里是它使用节点--max_old_space_size = 8000在MacBook Pro上与RAM 8GB的时间花了多长时间。

控制台

➜ database git:(master) ✗ node --max_old_space_size=8000 partImport.js 
Connected to db! 
Time to parse file: : 5209.325ms 
Disconnected from db! 
Time to import parsed objects to db: : 153606.545ms 
➜ database git:(master) ✗ 

parseCSV.js

const csv = require("fast-csv"); 

function promiseCSV(filePath, options) { 
    return new Promise((resolve, reject) => { 
    console.time("Time to parse file"); 
    var records = []; 
    csv 
     .fromPath(filePath, options) 
     .on("data", record => { 
     records.push(record); 
     }) 
     .on("end",() => { 
     console.timeEnd("Time to parse file"); 
     resolve(records); 
     }); 
    }); 
} 

module.exports = promiseCSV; 

mongodb.js

const mongoose = require("mongoose"); 
mongoose.Promise = global.Promise; 

function connectToMongo(databaseName) { 
    mongoose.connect(`mongodb://localhost:27017/${databaseName}`, { 
    keepAlive: true, 
    reconnectTries: Number.MAX_VALUE, 
    useMongoClient: true 
    }); 
    console.log("Connected to db!"); 
} 

function disconnectFromMongo() { 
    mongoose.disconnect(); 
    console.log("Disconnected from db!"); 
} 

function bulkImportToMongo(arrayToImport, mongooseModel) { 
    const Model = require(`../../../models/${mongooseModel}`); 
    const batchSize = 100; 
    let batchCount = Math.ceil(arrayToImport.length/batchSize); 
    let recordsLeft = arrayToImport.length; 
    let ops = []; 
    let counter = 0; 
    for (let i = 0; i < batchCount; i++) { 
    let batch = arrayToImport.slice(counter, counter + batchSize); 
    counter += batchSize; 
    ops.push(Model.insertMany(batch)); 
    } 
    return Promise.all(ops); 
} 

module.exports.bulkImportToMongo = bulkImportToMongo; 
module.exports.connectToMongo = connectToMongo; 
module.exports.disconnectFromMongo = disconnectFromMongo; 

个partImport.js

const path = require("path"); 
const parseCSV = require("./helpers/parseCSV"); 
const { 
    connectToMongo, 
    disconnectFromMongo, 
    bulkImportToMongo 
} = require("./helpers/mongodb"); 

const filePath = path.join(__dirname, "../../data/parts.csv"); 
const options = { 
    delimiter: ";", 
    noheader: true, 
    headers: [ 
    "facility", 
    "partNumber", 
    "partName", 
    "partDescription", 
    "netWeight", 
    "customsTariff" 
    ] 
}; 

connectToMongo("autoMDM"); 
parseCSV(filePath, options) 
    .then(records => { 
    console.time("Time to import parsed objects to db"); 
    return bulkImportToMongo(records, "parts.js"); 
    }) 
    /* .then(result => 
    console.log("Total batches inserted: ", result, result.length) 
) */ 
    .then(() => { 
    disconnectFromMongo(); 
    console.timeEnd("Time to import parsed objects to db"); 
    }) 
    .catch(error => console.log(error)); 
相关问题