2016-02-29 115 views
1

我一直在使用IE从Excel中抓取这个网站,但最近使用IE已经不一致和缓慢。我的列表通常在500到1000左右,所以我必须在一夜之间运行宏。最近宏开始挂断。这就是为什么我决定第一次使用MSXML2进行资源管理器的搜索。XML网络动态密钥刮网站

该网站不需要认证,但它具有动态更改的隐藏输入。

我做了什么..我使用GET来拉动网站并提取动态密钥,然后尝试使用POST将输入数据发送到网站。我一直在获取服务器错误/运行时错误。我曾尝试使用不同的标题请求选项,但我仍然没有得到结果页。我也尝试使用MSXML2.ServerXMLHTTP。我在正确的轨道上吗?

感谢

Sub test_66() 
    Dim oXML_get 
    'Dim oXML_post 
    Dim sendText As String, s2 As String, n1 As Integer, postUrl As String,  sHTML As String, s1 As String 

    ' Instantiate MSXML2 
    Set oXML_get = New MSXML2.XMLHTTP 

    oXML_get.Open "GET", "http://www.phila.gov/revenue/realestatetax/default.aspx", False 
    oXML_get.setRequestHeader "Accept", "text/html;charset=UTF-8" 
    oXML_get.setRequestHeader "Accept-Encoding", "identity" 
    oXML_get.setRequestHeader "Accept-Charset", "UTF-8" 'Connection keep -alive 
    oXML_get.setRequestHeader "Connection", "keep -alive" 

    oXML_get.send 

    sHTML = oXML_get.responseText 
    'Debug.Print sHTML 
    Dim hDOC As MSHTML.HTMLDocument 
    Set hDOC = New MSHTML.HTMLDocument 
      hDOC.body.innerHTML = sHTML 
    s1 = Replace(hDOC.getElementsByTagName("input").Item(2).Value, "/", "%2F") 
    s2 = Replace(hDOC.getElementsByTagName("input").Item(3).Value, "/", "%2F") 

    sendText = "__VIEWSTATE=" & s1 & "&__EVENTVALIDATION=" & s2 & "&ctl00%24BodyContentPlaceHolder%24SearchByBRTControl%24txtTaxInfo=043185500&ctl00%24BodyContentPlaceHolder%24SearchByBRTControl%24btnTaxByBRT=%20>>" 
    Debug.Print sendText '"__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=" & s1 & "__EVENTVALIDATION=" & s2 & 

    oXML_get.Open "POST", "http://www.phila.gov/revenue/realestatetax/default.aspx", False 
    oXML_get.setRequestHeader "Content-Type", "application/x-www-form-urlencoded" 
    oXML_get.setRequestHeader "Accept", "text/html;charset=UTF-8" 
    oXML_get.setRequestHeader "Accept-Encoding", "identity" 
    oXML_get.setRequestHeader "Accept-Charset", "UTF-8" 'Connection keep -alive 
    'oXML_get.setRequestHeader "Connection", "keep -alive" 
    oXML_get.send (sendText) 

    Dim objIE As Object: Set objIE = CreateObject("InternetExplorer.Application") 
    objIE.navigate "about:blank" 
    objIE.Visible = True 
    objIE.document.Write oXML_get.responseText 
End Sub 

这是我收到运行时错误消息....

Server Error in '/revenue/RealEstateTax' Application. 
<!-- Web.Config Configuration File --> 

<configuration> 
    <system.web> 
     <customErrors mode="Off"/> 
    </system.web> 
</configuration> 
+0

检查,如果你只是缺少&'sendText = “__VIEWSTATE =” &S1&“&__ EVENTVALIDATION = “...' – omegastripes

+0

你说得对,我会更新并检查代码。感谢您的快速响应。 – user3121922

+0

我插入了&符号,但仍然收到来自服务器的相同错误。我已经更新了上面的代码。 – user3121922

回答

1

我从Web表单在Firefox提交网页上相同的搜索请求。从那以后,我打开开发者工具F12,网络选项卡,单击最后一个POST请求,打开参数部分,这里是已提交的参数的截图:

form data

原始表单数据:

__EVENTTARGET = & __EVENTARGUMENT = & __VIEWSTATE =%2FwEPDwULLTEyNDQ4MDU4OTkPZBYCZg9kFgICAw9kFgICDQ9kFgYCAQ9kFgICAw9kFgICAQ8QZBAVARUxNzAwIFNQUklORyBHQVJERU4gU1QVARUxNzAwIFNQUklORyBHQVJERU4gU1QUKwMBZxYBZmQCBQ8PFgIeBFRleHQFHFBsZWF​​zZ SBhZGQgYWRkcmVzcyB0byBsb29rdXBkZAINDw8WAh4HVmlzaWJsZWhkFgoCAQ88KwAKAQAPFgQeC18hRGF0YUJvdW5kZx4LXyFJdGVtQ291bnRmZGQCAw9kFgICBQ8PFgIeF0VuYWJsZUFqYXhTa2luUmVuZGVyaW5naGRkAgUPFCsAAg8WAh8EaGQQFgJmAgEWAg8WBB4LTmF2aWdhdGVVcmwFJC4uL0ZlZWRiYWNrRm9ybS5hc3B4P0JydE5vPTc3MjUzNDcwMB8EaGQPFgQfBQUdfi9QREZzL1BheW1lbnRfQWdyZWVtZW50cy5wZGYfBGhkDxYCZmYWAQVxVGVsZXJpay5XZWIuVUkuUmFkV2luZG93LCBUZWxlcmlrLldlYi5VSSwgVmVyc2lvbj0yMDEwLjEuNTE5LjQwLCBDdWx0dXJlPW5ldXRyYWwsIFB1YmxpY0tleVRva2VuPTEyMWZhZTc4MTY1YmEzZDQWBGYPDxYEHwUFJC4uL0ZlZWRiYWNrRm9ybS5hc3B4P0JydE5vPTc3MjUzNDcwMB8EaGRkAgEPDxYEHwUFHX4vUERGcy9QYXltZW50X0FncmVlbWVudHMucGRmHwRoZGQCBw88KwARAgAPFgQfAmcfA2ZkARAWABYAFgBkAgkPFgIeBXZhbHVlBQk3NzI1MzQ3MDBkGAIFQWN0bDAwJEJvZHlDb250ZW50UGxhY2VIb2xkZXIkR2V0VGF4SW5mb0NvbnRyb2wkZ3JkUGF5bWVudHNIaXN0b3J5DzwrAAwBCGZkBTJjdGwwMCRCb2R5Q29udGVudFBsYWNlSG9sZGVyJEdldFRheEluZm9Db250cm9sJGZybQ9nZD9K5t7genscvOsiNrdPkxL0VHWCYSsS%2FK3EZTRu3h3w & __EVENTVALIDATION =%2FwEWBQKkrNCPCgLRzsWTBwLlpIbACAKV6q2KD QKIvdHyCawQaHbBYSHV%2B%2FVvyLUTUY%2BhSsmbpTvj0W4ycfOa1RCO & ctl00%24BodyContentPlaceHolder%24SearchByAddressControl%24txtLookup由+属性= +地址& ctl00%24BodyContentPlaceHolder%24SearchByBRTControl%24txtTaxInfo = 043185500 & ctl00%24BodyContentPlaceHolder%24SearchByBRTControl%24btnTaxByBRT = +%3E%3E

请注意,有7个参数。所有这些应该是URL编码的。我稍微修改了代码并修改了代码,还添加了一些请求标头。下面的代码工作正确对我来说:

Option Explicit 

Sub test_66() 

    Dim s1 As String 
    Dim s2 As String 
    Dim sResp As String 
    Dim aTmp As Variant 
    Dim sBRTNumber As String 
    Dim sFormData As String 

    With CreateObject("MSXML2.XMLHTTP") 
     .Open "GET", "http://www.phila.gov/revenue/realestatetax/default.aspx", False 
     .setRequestHeader "Accept", "text/html;charset=UTF-8" 
     .setRequestHeader "Accept-Encoding", "identity" 
     .setRequestHeader "Accept-Charset", "UTF-8" 
     .setRequestHeader "Connection", "keep-alive" 
     .send 
     sResp = .responseText 
    End With 
    aTmp = Split(sResp, "id=""__VIEWSTATE"" value=""", 2) 
    s1 = aTmp(1) 
    aTmp = Split(s1, """", 2) 
    s1 = aTmp(0) 
    aTmp = Split(sResp, "id=""__EVENTVALIDATION"" value=""", 2) 
    s2 = aTmp(1) 
    aTmp = Split(s2, """", 2) 
    s2 = aTmp(0) 
    s1 = EncodeUriComponent(s1) 
    s2 = EncodeUriComponent(s2) 

    sBRTNumber = "043185500" 
    sFormData = Join(Array(_ 
     "__EVENTTARGET=", _ 
     "__EVENTARGUMENT=", _ 
     "__VIEWSTATE=" & s1, _ 
     "__EVENTVALIDATION=" & s2, _ 
     "ctl00%24BodyContentPlaceHolder%24SearchByAddressControl%24txtLookup=by+Property+Address", _ 
     "ctl00%24BodyContentPlaceHolder%24SearchByBRTControl%24txtTaxInfo=" & sBRTNumber, _ 
     "ctl00%24BodyContentPlaceHolder%24SearchByBRTControl%24btnTaxByBRT=+%3E%3E" _ 
     ), "&") 

    With CreateObject("MSXML2.XMLHTTP") 
     .Open "POST", "http://www.phila.gov/revenue/realestatetax/default.aspx", False 
     .setRequestHeader "Content-Type", "application/x-www-form-urlencoded" 
     .setRequestHeader "Accept", "text/html;charset=UTF-8" 
     .setRequestHeader "Accept-Encoding", "identity" 
     .setRequestHeader "Accept-Charset", "UTF-8" 
     .setRequestHeader "Connection", "keep-alive" 
     .setRequestHeader "Host", "www.phila.gov" 
     .setRequestHeader "Origin", "http://www.phila.gov" 
     .setRequestHeader "Referer", "http://www.phila.gov/revenue/realestatetax/default.aspx" 
     .send (sFormData) 
     sResp = .responseText 
    End With 

    With CreateObject("InternetExplorer.Application") 
     .navigate "about:blank" 
     .Visible = True 
     .document.write sResp 
    End With 

End Sub 

Function EncodeUriComponent(strText As String) As String 
    Static objHtmlfile As Object 
    If objHtmlfile Is Nothing Then 
     Set objHtmlfile = CreateObject("htmlfile") 
     objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript" 
    End If 
    EncodeUriComponent = objHtmlfile.parentWindow.encode(strText) 
End Function 

这里是IE窗口输出:

output

+0

谢谢omegastripes, 你是绝对正确的。我的代码上的问题是URL编码s1和s2。我想我可以手动编码这两个键,但我错了。我接受你的解决方案,但这是我如何解决这个问题。因为我在Excel 2013中,我使用内置的WorksheetFunction.EncodeURL来编码这两个键,它工作。 – user3121922

+0

我每次都从代码中学习新东西。我不能够感谢你。我只是使用你的数组拆分方法来提取数据!它太酷了。 – user3121922