下面是我适合你的情况很常见的例子,它显示了XHR和正则表达式的使用检索网页的HTML内容,从中提取的所有链接,并下载各个环节的目标文件:
Option Explicit
Sub Test()
' declare vars
Dim sUrl As String
Dim sReqProt As String
Dim sReqAddr As String
Dim sReqPath As String
Dim sContent As String
Dim oLinks As Object
Dim oMatch As Object
Dim sHref As String
Dim sHrefProt As String
Dim sHrefAddr As String
Dim sHrefPath As String
Dim sHrefFull As String
Dim n As Long
Dim aContent() As Byte
' set source URL
sUrl = "https:\\......\links.html"
' process source URL
SplitUrl sUrl, sReqProt, sReqAddr, sReqPath
If sReqProt = "" Then sReqProt = "http:"
sUrl = sReqProt & "//" & sReqAddr & "/" & sReqPath
' retrieve source page HTML content
With CreateObject("Microsoft.XMLHTTP")
.Open "GET", sUrl, False
.Send
sContent = .ResponseText
End With
' parse source page HTML content to extract all links
Set oLinks = CreateObject("Scripting.Dictionary")
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.IgnoreCase = True
.Pattern = "<a.*?href *= *(?:'|"")(.*?)(?:'|"").*?>"
For Each oMatch In .Execute(sContent)
sHref = oMatch.subMatches(0)
SplitUrl sHref, sHrefProt, sHrefAddr, sHrefPath
If sHrefProt = "" Then sHrefProt = sReqProt
If sHrefAddr = "" Then sHrefAddr = sReqAddr
sHrefFull = sHrefProt & "//" & sHrefAddr & "/" & sHrefPath
oLinks(oLinks.Count) = sHrefFull
Next
End With
' save each link target into file
For Each n In oLinks
sHref = oLinks(n)
With CreateObject("Microsoft.XMLHTTP")
.Open "GET", sHref, False
.Send
aContent = .ResponseBody
End With
With CreateObject("ADODB.Stream")
.Type = 1 ' adTypeBinary
.Open
.Write aContent
.SaveToFile "C:\Test\" & n & ".xml", 2 ' adSaveCreateOverWrite
.Close
End With
Next
End Sub
Sub SplitUrl(sUrl, sProt, sAddr, sPath)
' extract protocol, address and path from URL
Dim aSplit
aSplit = Split(sUrl, "//")
If UBound(aSplit) = 0 Then
sProt = ""
sAddr = sUrl
Else
sProt = aSplit(0)
sAddr = aSplit(1)
End If
aSplit = Split(sAddr, "/")
If UBound(aSplit) = 0 Then
sPath = sAddr
sAddr = ""
Else
sPath = Mid(sAddr, Len(aSplit(0)) + 2)
sAddr = aSplit(0)
End If
End Sub
此方法不采用IE自动化。通常情况下,IE的Cookie的过程足以引用当前会话,因此如果您的网站不使用附加程序进行身份验证并生成链接列表,那么该方法应该适用于您。
这是一个兔子洞,我已经倒了很多次。简短的回答是停止尝试让IE充当代理来下载文件。使用xmlHttp对象通过GetResponseHeader登录并收集/返回认证,然后使用ADO流保存文件。 – Jeeped
[This](http://stackoverflow.com/a/32429348/2165759)可能会有所帮助。 – omegastripes