2014-11-09 90 views
0

这是我的代码:Scrapy请求回调不工作

class AAA(scrapy.Spider): 
    name = 'aaa' 
    start_urls = [ 
     'https://forum.lowyat.net/topic/377400/all' 
    ] 
    COOKIES_ENABLED = False 
    count = 0 
    check = 0 
    item = AAAItem() 
    toDownload = [] 


    def parse(self, response): 
     for sel in response.xpath('//*[@id="contentmiddle"]/div[3]/ol/li'): 
      self.item['name'] = sel.xpath('div/div/div[1]/p[1]/a/text()').extract() 
      self.item['date'] = sel.xpath('div/div/div[2]/p[4]/text()').extract() 
      lastUpdateDate = self.getLastUpdateDate() 
      date_object1 = self.convertToDate(self.item['date'][0]+"") 
      date_object2 = self.convertToDate(lastUpdateDate) 
      if date_object1 <= date_object2: 
       self.haha2(response) 
       self.stopSpider() 
      self.item['link'] = sel.xpath('div/div/div[4]/p[3]/a/@href').extract() 
      self.arrangeDownloadUrl() 
      yield self.item     

    def arrangeDownloadUrl(self): 
     try: 
      downloadUrl = "http://AAA.com"+self.item['link'][0]+"" 
      self.toDownload.append(downloadUrl) 
     except IndexError: 
      print 'file not downloaded, link dead' 

    def haha2(self, response): 
     for i in range (len(self.toDownload)): 
      Request(self.toDownload[i], self.haha3) 

    def haha3(self, response): 
     print 'haha3.................................................................' 


    def stopSpider(self): 
     raise scrapy.exceptions.CloseSpider('done') 


    def getLastUpdateDate(self): 
      date = "Nov 5, 2001 - 1:06 PM" 
      return date 

    def convertToDate(self, value): 
     result = datetime.strptime(value, '%b %d, %Y - %I:%S %p') 
     return result 

    def convertToString(self, value): 
     result = value.strftime("%b %w, %Y - %I:%S %p") 
     return result 

出于保护隐私的目的,我不得不改变页面的URL。 无论如何,问题是请求haha2函数未能请求回调,haha3 ... 它不会进入haha3函数,除非我用这样的东西调用它,像这样self.haha3(response) ...但这会打败目的因为我想打开链接并将响应作为我想要打开的链接...任何想法,我哪里出错了?

+0

您是否尝试过'产量请求(...)'(屈服吧)在haha2? – soooooot 2014-11-11 09:39:54

回答

0

尝试

def haha2(self, response): 
    for i in range (len(self.toDownload)): 
     yield Request(self.toDownload[i], callback=self.haha3)