2015-02-11 132 views
2

在我的scrapy代码中,FormRequest请求获取下一页我将重定向到主页,但在浏览器中页面将转到下一页。我想我已经给出了所有的形式数据。scrapy下一页重定向到主页

from scrapy.selector import Selector 
from scrapy.http import Request, FormRequest 
from scrapy.contrib.spiders import CrawlSpider 

from scrapy.shell import inspect_response 
class SampleSpider(CrawlSpider): 

    name = 'samplespider' 
    start_urls = ['http://jobs.hiltonworldwide.com/en/jobs/job-search-results'] 

    def parse(self, response): 

     sel = Selector(response) 
     inspect_response(response) 
     eventTarget = 'phmain_0$phmaincontent_0$phjobsearchresults_0$next_page' 
     VIEWSTATE = sel.xpath("//input[@name='__VIEWSTATE']/@value").extract()[0] 
     EVENTVALIDATION = sel.xpath("//input[@name='__EVENTVALIDATION']/@value").extract()[0] 
     hdnIPAddress = sel.xpath("//input[@name='phheader_0$hdnIPAddress']/@value").extract()[0] 
     hdnPageCount = sel.xpath("//input[@name='phmain_0$phmaincontent_0$phjobsearchresults_0$hdnPageCount']/@value").extract()[0] 
     hdnPageIndex = sel.xpath("//input[@name='phmain_0$phmaincontent_0$phjobsearchresults_0$hdnPageIndex']/@value").extract()[0] 

     form_data = { 
      '__EVENTTARGET': eventTarget, 
      '__EVENTARGUMENT': "", 
      '__LASTFOCUS': "", 
      '__VIEWSTATE': VIEWSTATE, 
      '__EVENTVALIDATION': EVENTVALIDATION, 
      'phheader_0$hdnIPAddress': hdnIPAddress, 
      'phmain_0$phbannerinfo_0$phcountryinfo_0$ddlCountry':"Worldwide", 
      'phmain_0$phmaincontent_0$phjobsearchresults_0$albLanguage': "91351", 
      'phmain_0$phmaincontent_0$phjobsearchresults_0$LoginEmail': "", 
      'phmain_0$phmaincontent_0$phjobsearchresults_0$hdnPageCount': hdnPageCount, 
      'phmain_0$phmaincontent_0$phjobsearchresults_0$hdnPageIndex': hdnPageIndex, 
      'phmain_0$phmaincontent_0$phjobsearch_0$ddlCity': "-1", 
      'phmain_0$phmaincontent_0$phjobsearch_0$albBrands': "-1", 
      'phmain_0$phmaincontent_0$phjobsearch_0$albTalentAreas': "-1", 
      } 

     yield FormRequest(
      'http://jobs.hiltonworldwide.com/en/jobs/job-search-results', 
      formdata=form_data, 
      callback=self.parse 
      ) 

有什么我失踪或做错了吗?这将是正确分页

回答

2

什么帮助了我,当我试图重现该问题的解决,是指定User-Agent头:

yield FormRequest(
    'http://jobs.hiltonworldwide.com/en/jobs/job-search-results', 
    formdata=form_data, 
    callback=self.parse, 
    headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36'} 
)