2017-06-16 86 views
6

我正在运行一个nightmare.js脚本,我试图抓取页面上多个元素的截图。Nightmare.js截图缓冲区长度0

第一个元素被捕获得很好,但是在折叠下面的每个其他元素都是以零长度捕获的。我正在努力调试这个问题。任何帮助将不胜感激。

基本上,此脚本遍历页面并选择全部页面上与选择器匹配的元素。然后,使用async它收集响应并返回对象的缓冲区。问题是折叠下面的元素没有被截图(缓冲区长度最终为零)。我试图wait()并滚动到元素,但我还没有取得任何成功。

import * as Nightmare from 'nightmare' 
import * as vo from 'vo' 
import * as async from 'async' 
import * as fs from 'fs' 

const urls:String[] = [ 
    'https://yahoo.com/' 
] 


Nightmare.action('snap', function(selector:String, done:Function) { 
    const self = this; 

    this.evaluate_now(function (selector) { 
    return Array.from(document.querySelectorAll(selector)) 
    .map((ele:Element) => { 
     if (ele) { 
     const rect = ele.getBoundingClientRect() 
     const r:Function = Math.round 
     return { 
      x: r(rect.left), 
      y: r(rect.top), 
      width: r(rect.width), 
      height: r(rect.height) 
     } 
     } 
    }) 
    }, function(err, clips) { 
    if (err) return done(err) 
    if (!clips) return done(new Error(`Selector not found`)) 
    let snaps = [] 
    const snap = (clip, cb) => { 
     self 
     .scrollTo(clip.y - clip.height, clip.x) 
     .screenshot(clip, cb) 
     .run() 
    } 
    async.mapSeries(clips.reverse(), snap, (err, res) => { 
     done(err, res) 
    }) 
    }, selector) 
}) 

const scrape = (url) => { 
    const nightmare = Nightmare({ 
    show: true 
    }); 
    nightmare 
    .goto(url) 
    .snap('.navbar') 
    .end() 
    .then((buffers:Buffer[]) => { 
     buffers.forEach((data, index) => { 
     fs.writeFileSync(`images/navbar-${index}.png`, data) 
     }) 
    }) 
} 

urls.forEach(scrape) 
+0

我想重现这个问题,但我发现yahoo.com网页上没有这样的元素“.navbar”。你能澄清一下吗?当然,Evgeny, –

+0

。试用一个带有bootstrap的页面,例如:https://getbootstrap.com/ – auser

+0

您是否可以复制它,@EvgenySorokin? – auser

回答

1

从不同的流动尝试它,给了更好的结果: 在方法上的区别是:第一滚动到元素然后取其界限,然后进行截图。

const Nightmare = require('nightmare'); 
const fs = require('fs'); 
const nightmare = Nightmare({ 
    show: true, 
    openDevTools: false, 
    gotoTimeout: 45000 
}); 

nightmare.goto('https://www.google.co.in/?#safe=off&q=nightmare') 
    .wait(1000) 
    .evaluate(getElements, 'div.g') 
    .then(() => { 
    console.log("Calling screenshots: "); 
    getAllScreenshots(0); 
    }) 
    .catch(function(err) { 
    console.log(err); 
    }); 

function getAllScreenshots(index) { 
    console.log("Called with index: ", index) 
    nightmare.evaluate(function(index) { 
     const r = Math.round; 
     if(index >= window.__nightmare.output.length) { 
     return false; 
     } 
     var element = window.__nightmare.output[index]; 
     console.log(index, element.innerHTML); 
     element.scrollIntoView(false); 
     var bound = element.getBoundingClientRect(); 
     return { 
     x: r(bound.left)-10, 
     y: r(bound.top)-10, 
     width: r(element.clientWidth)+40, 
     height: r(element.clientHeight)+10 
     } 
    }, index) 
    .then(function(bound) { 
     if(!bound) { 
     return; 
     } 
     console.log("Taking screenshot: ", bound); 
     nightmare.wait(500).screenshot(__dirname + '/images/navbar' + index + '.png', bound) 
     .then(function() { 
      console.log("Calling Next of: ", index); 
      getAllScreenshots(index + 1); 
     }).catch(function(err) { 
      console.log(err); 
     }) 
    }) 
    .catch(function(err) { 
     console.log(err); 
    }); 
} 

function getElements(selector) { 
    var elements = document.querySelectorAll(selector); 
    window.__nightmare.output = elements; 
    console.log(elements.length); 
} 
+0

这太好了。谢谢! – auser

4

实际上,screenshot()函数从可见屏幕中获取坐标。
例如,如果任何元素的(x,y)是(10,1000)并且窗口大小是(800,600),则可以滚动(900:element.y,0),然后在(element.y- scroll.y = 100,element.x)

我终于得到了代码工作:

const Nightmare = require('nightmare'); 
const fs = require('fs'); 
const nightmare = Nightmare({ 
    show: true, 
    openDevTools: true, 
}); 

nightmare.goto('https://in.news.yahoo.com/') 
    .wait(1000) 
    .evaluate(getBounds, '.Cf') 
    .then(function(rects) { 
    console.log(rects); 

    function getScreenshot(rects, index) { 
     if (index == rects.length) return; 
     nightmare.scrollTo(rects[index].y, 0) 
     .screenshot(__dirname + '/images/navbar' + index + '.png', { 
      //60 is height of the top element which remains 
      x: rects[index].x-10, 
      y: 60, 
      width: rects[index].width+30, 
      height: rects[index].height +60 
     }) 
     .then(function() { 
      console.log("Calling next. " + index); 
      getScreenshot(rects, index + 1); 
     }).catch(function(err) { 
      console.log(err); 
     }) 
    }; 

    getScreenshot(rects, 0); 
    }) 
    .catch(function(err) { 
    console.log(err); 
    }); 

function getBounds(selector) { 
    var elements = document.querySelectorAll(selector); 
    if (elements && elements.length > 0) { 
    var arr = []; 
    const r = Math.round; 
    for (var ii = 0; ii < elements.length; ii++) { 
     var rect = elements[ii].getBoundingClientRect(); 
     arr.push({ 
     x: r(rect.left), 
     y: r(rect.top), 
     width: r(rect.width), 
     height: r(rect.height) 
     }) 
    } 
    console.log("Elements found: ", arr.length); 
    return arr; 
    } 
    return null; 
}