选择包含要更新文章标题的文件路径的单元格区域,然后运行此过程。它会检查每个文件是否存在,如果存在,它将创建一个文件流对象来打开并读取文件。它将返回文章标题作为第一个Span标签后第二组H2标签之间的文本。不允许检查是否已达到第一个Span标签的末端。希望这可以帮助。
Sub UpdateArticleTitle()
Dim rngPath As Range
Dim tsObj As Object, tsFile As Object
Dim strLine As String
Dim bytSpanCount As Byte, bytH2Count As Byte
Dim strArticleTitle As String
' Go throught the range of selected fileds
For Each rngPath In ActiveWindow.RangeSelection
' Continue if the file exists
If Dir(rngPath.Value, vbNormal) <> "" Then
' Initialize the variables
bytSpanCount = 0
bytH2Count = 0
strArticleTitle = ""
' Create a file system object
Set tsObj = CreateObject("Scripting.FileSystemObject")
' Open the HTML file
Set tsFile = tsObj.Opentextfile(rngPath.Value)
Do Until tsFile.AtEndOfStream
' Read the file
strLine = tsFile.ReadLine
' Search for the first occurrence of <span>
If bytSpanCount = 0 Then
If InStr(1, LCase(strLine), "<span>") > 0 Then bytSpanCount = 1
' If <span> has been found, then search for <h2>
ElseIf bytSpanCount = 1 Then
If InStr(1, LCase(strLine), "<h2>") > 0 Then
If bytH2Count = 0 Then
bytH2Count = 1
' The second occurence of <h2> has been reached so extract the Article Title
Else
' Get all lines until the closing </h2> tag is found
Do Until InStr(1, LCase(strLine), "</h2>") > 0
strLine = strLine & tsFile.ReadLine
Loop
' Set the article title
strArticleTitle = Mid(strLine, InStr(1, LCase(strLine), "<h2>") + Len("<h2>"), InStr(1, LCase(strLine), "</h2>") - InStr(1, LCase(strLine), "<h2>") - Len("<h2>"))
' Exit the loop
Exit Do
End If
End If
End If
Loop
' Close the file
tsFile.Close
' Update the article title in the sheet
rngPath.Offset(0, 1).Value = strArticleTitle
Else
' Clear the article title if the file isn't found
rngPath.Offset(0, 1).ClearContents
End If
Next rngPath
Set tsObj = Nothing
Set tsFile = Nothing
End Sub
如果你有一个HTML文件列表中,你可以使用和的NodeJS cheerio(https://github.com/cheeriojs/cheerio)使用jQuery的语法来分析每个HTML文件,然后生成一个CSV文件,你可以导入到excel – 2014-12-03 17:43:54
加载每个文件到IE中,然后你可以使用像'IE.Document.getElementsByTagName(“h2”)(2).getElementsByTagName(“span”)(1).innerText'。 Google“VBA automate IE”例子 – 2014-12-03 18:14:33
@TimWilliams - 谢谢!我一定会尝试这个。 – lith0pedion 2014-12-04 18:29:24