2014-02-18 43 views
1

我下面的教程这里列出抄袭:与phantomJS和的NodeJS

http://code.tutsplus.com/tutorials/screen-scraping-with-nodejs--net-25560

当我运行代码:

var host = 'http://www.shoutcast.com/?action=sub&cat=Hindi#134'; 
    var phantom = require('phantom'); 
phantom.create(function(ph) { 
return ph.createPage(function(page) { 
return page.open(host, function(status) { 
    console.log("opened site? ", status);   

     page.injectJs('http://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js', function() { 
      //jQuery Loaded. 
      //Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds. 
      setTimeout(function() { 
       return page.evaluate(function() { 

        //Get what you want from the page using jQuery. A good way is to populate an object with all the jQuery commands that you need and then return the object. 
        console.log(document.getElementsByClassName('transition')[0]); 

        return document.getElementsByClassName('transition')[0]; 



       }, function(result) { 
        console.log(result); 
        ph.exit(); 
       }); 
      }, 5000); 

     }); 
}); 
}); 
}); 

我得到以下错误:

phantom stdout: ReferenceError: Can't find variable: $ 


phantom stdout: phantomjs://webpage.evaluate():7 
phantomjs://webpage.evaluate():10 
phantomjs://webpage.evaluate():10 

我不知道这是什么意思,并没有帮助如何解决它... 这怎么解决?

基本上我希望所有的'a'标签有从我正在刮的网站的类转换。所有这些标签都在网站上异步加载。

+0

变量'h2Arr'从哪里来?在你展示的代码片段中,它不会在任何地方初始化。 – Stilltorik

+0

我刚更新了代码和错误。以前的代码来自旧的提交。 –

回答

3

$是由于jQuery和可能的冲突。你几乎不需要注入jQuery来刮掉类transition的'a'标签。你总是有document.querySelectordocument.querySelectorAll

var host = 'http://www.shoutcast.com/?action=sub&cat=Hindi#134'; 
var phantom = require('phantom'); 

phantom.create(function(ph) { 
    ph.createPage(function(page) { 

     page.open(host, function(status) { 

      console.log("opened site? ", status); 
      //Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds. 
      setTimeout(function() { 

       page.evaluate(function() { 
        // here you need to add more code to get the html/text 
        // more code incase you use querySelectorAll 
        return document.document.querySelector('a.transition'); 
        //return document.document.querySelectorAll('a.transition'); 
       }, 

       function(result) { 
        console.log(result); 
        ph.exit(); 
       }); 

      }, 5000); 

     }); 
    }); 
}); 

不过,我无法理解function (result) { console.log(result); ...}的编码方式。我不知道page.evaluate是否将回调函数作为第二个参数。请检查文档。