2016-12-05 61 views
0

我正在用Javascript(Express)为个人目的构建网站刮板。从循环中的偶数个对象创建数组

该脚本的目标是从外部来源刮取一些简单的文本数据,最终输出被刮取的数据为JSON对象。 但是,因为总会有偶数量的对象,所以我想将它们作为成对添加到数组中,这是我需要来自你们的一些指导的部分。

输出的电流只是一个普通的JSON对象/基于刮对象(通常在8-16)的量对象:

{ 
    name: "John Doe", 
    email: "[email protected], 
    status: "active" 
}, 

{ 
    name: "Jane Doe", 
    email: "[email protected], 
    status: "inactive" 
}, 


{ 
    name: "Johnny Walker", 
    email: "[email protected]", 
    status: "active" 
}, 

{ 
    name: "Jimmy Glenfiddich", 
    email: "[email protected] 
    status: "active" 
} 

和预期输出会是这个样子:

{ 

"pair-number": 1, 

"pair:" [ 

    { 
    name: "John Doe", 
    email: "[email protected], 
    status: "active" 
    }, 

    { 
    name: "Jane Doe", 
    email: "[email protected], 
    status: "inactive" 
    }, 
] 
}, 

{ 

"pair-number": 2, 

"pair:" [ 
    { 
    name: "Johnny Walker", 
    email: "[email protected]", 
    status: "active" 
    }, 

    { 
    name: "Jimmy Glenfiddich", 
    email: "[email protected] 
    status: "active" 
    } 
] 
} 

下面是使用MO我server.js

var express = require('express'); 
var request = require('request'); 
var cheerio = require('cheerio'); 
var app  = express(); 

var url = 'http://testurl.com; 
var name, email, status; 

app.get('/scrape', function(req, res) { 

    request(url, function (error, response, html) { 

     if (!error && response.statusCode == 200) { 

      var $ = cheerio.load(html); 

      data = {"name": name, "email": email, "status": status };  

      $('.scrape-class').filter(function() { 

       var that = $(this); 

       name = that.find('h5').text(); 
       email = that.find('.email').text(); 
       status = that.find('dl').children().first().text(); 

       data.name = name; 
       data.email = email; 
       data.status = status; 

       console.log(data); 
      }); 
     } 
    }); 
}); 

app.listen(80, function() { 
    console.log('Example app listening on port 80!') 
}) 

回答

0

尝试dulo(%),得到一个除法运算的余搞清楚,如果你的当前迭代是奇数还是偶数

这是一个基本的概念:

var arr = [1,2,3,4,5,6]; 
var results = []; 
for (var ii = 0, nn = arr.length; ii < nn; ii++) 
{ 
    if (ii % 2 == 0) //0 % 2 = 0; 1 % 2 = 1; 2 % 2 = 0; 3 % 2 = 1; 
    { 
     //Even iteration: push a new array with the current value into results array 
     results.push([arr[ii]]); 
    } 
    else 
    { 
     //Odd iteration: push current value into the last new array in results array 
     results[results.length - 1].push(arr[ii]); 
    } 
} 

//in the end, results array should look like: [[1,2],[3,4],[5,6]] 

在你的情况,你需要保持您的过滤器匿名函数外的迭代计数。

我编辑你的代码,并增加了新的results阵列:

var express = require('express'); 
var request = require('request'); 
var cheerio = require('cheerio'); 
var app  = express(); 

var url = 'http://testurl.com; 
var name, email, status; 

app.get('/scrape', function(req, res) { 

    request(url, function (error, response, html) { 

     if (!error && response.statusCode == 200) { 

      var $ = cheerio.load(html); 

      var results = []; //store final results here 

      var data = {"name": name, "email": email, "status": status }; 

      var ii = 0; 

      $('.scrape-class').filter(function() { 

       var that = $(this); 

       name = that.find('h5').text(); 
       email = that.find('.email').text(); 
       status = that.find('dl').children().first().text(); 

       data.name = name; 
       data.email = email; 
       data.status = status; 

       console.log(data); 

       if (ii % 2 == 0) { //0 % 2 = 0; 1 % 2 = 1; 2 % 2 = 0; 3 % 2 = 1; 
        //runs on iterations: 0, 2, 4, etc 
        results.push({"pair-number": results.length + 1, "pair": [data]}); 
       } else { 
        //runs on iterations: 1, 3, 5, etc 
        results[results.length - 1]["pair"].push(data); 
       } 

       ii++; 
      }); 

      console.log(results); //print collected data after crawl 
     } 
    }); 
}); 

app.listen(80, function() { 
    console.log('Example app listening on port 80!') 
})