5
我试图通过点击浏览器中的链接来下载通过javascript动作请求的页面的html。我可以下载的第一页,因为它有一个通用网址:在python中下载html?
http://www.locationary.com/stats/hotzone.jsp?hz=1
但也有一起是数字(1〜10)的页面底部的链接。所以,如果你点击一个,它去,例如,第2页:
http://www.locationary.com/stats/hotzone.jsp?ACTION_TOKEN=hotzone_jsp$JspView$NumericAction&inPageNumber=2
当我把那个URL到我的程序,并尝试下载HTML,它给了我一个不同的页面上的HTML网站,我认为这是主页。
我怎样才能得到这个网址的HTML使用JavaScript,当没有特定的网址?
谢谢。
代码:
import urllib
import urllib2
import cookielib
import re
URL = ''
def load(url):
data = urllib.urlencode({"inUserName":"email", "inUserPass":"password"})
jar = cookielib.FileCookieJar("cookies")
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
opener.addheaders.append(('User-agent', 'Mozilla/5.0 (Windows NT 6.1; rv:13.0) Gecko/20100101 Firefox/13.0.1'))
opener.addheaders.append(('Referer', 'http://www.locationary.com/'))
opener.addheaders.append(('Cookie','site_version=REGULAR'))
request = urllib2.Request("https://www.locationary.com/index.jsp?ACTION_TOKEN=tile_loginBar_jsp$JspView$LoginAction", data)
response = opener.open(request)
page = opener.open("https://www.locationary.com/index.jsp?ACTION_TOKEN=tile_loginBar_jsp$JspView$LoginAction").read()
h = response.info().headers
jsid = re.findall(r'Set-Cookie: (.*);', str(h[5]))
data = urllib.urlencode({"inUserName":"email", "inUserPass":"password"})
jar = cookielib.FileCookieJar("cookies")
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
opener.addheaders.append(('User-agent', 'Mozilla/5.0 (Windows NT 6.1; rv:13.0) Gecko/20100101 Firefox/13.0.1'))
opener.addheaders.append(('Referer', 'http://www.locationary.com/'))
opener.addheaders.append(('Cookie','site_version=REGULAR; ' + str(jsid[0])))
request = urllib2.Request("https://www.locationary.com/index.jsp?ACTION_TOKEN=tile_loginBar_jsp$JspView$LoginAction", data)
response = opener.open(request)
page = opener.open(url).read()
print page
load(URL)