2016-11-27 97 views
2

我有一个向服务器发送XMLHTTP请求的宏,它将得到的响应为纯文本字符串,而不是JSON格式字符串或其他标准格式(至少对于我所知道的) 。在Excel中解析字符串Vba

我想解析输出字符串,以访问数据以结构化的方式相同的方式为parseJson子程序在此link

我的问题是我不擅长使用正则表达式和我无法根据需要修改例行程序。

我需要解析的字符串具有以下结构:

  1. 该字符串是
  2. 每个单个参数是由它的参数定义的名称等于辛博尔,它的值,结束时用一个单一的线; “NID = 3;”“SID =测试”;
  3. 参数可以在“结构”中收集,以符号|并以他们的名字后跟;如| STEST; NID = 3; SID = Test; |
  4. 的结构还可以含有其它的结构

一个输出串的一个例子是下面

|KC;|AD;PE=5;PF=3;|CD;PE=5;HP=test;|CD;PE=3;HP=abc;| 

在这种情况下,存在一个宏结构KC含有结构AD一个。结构AD由参数PE,PF和2结构CD组成。而finaly结构CD有参数PEHP

所以我想解析字符串,以获得对象/字典反映这种结构,你能帮帮我吗?

将第一答案

大家好后,感谢你的帮助,但我想我应该让更多的清楚,我想获得的输出。 对于我有,我想有以下结构的物体的例子字符串:

<KC> 
    <AD> 
     <PE>5</PE> 
     <PF>3</PF> 
     <CD> 
      <PE>5</PE> 
      <HP>test</HP> 
     </CD> 
     <CD> 
      <PE>3</PE> 
      <HP>abc</HP> 
     </CD> 
    </AD> 
</KC> 

于是我开始写了一个可能的工作代码基础上,从@Nvj回答一些提示,并在此答案link

Option Explicit 
Option Base 1 

Sub Test() 

    Dim strContent As String 
    Dim strState As String 
    Dim varOutput As Variant 

    strContent = "|KC;|AD;PE=5;PF=3;|CD;PE=5;HP=test;|CD;PE=3;HP=abc;|" 
    Call ParseString(strContent, varOutput, strState) 

End Sub 

Sub ParseString(ByVal strContent As String, varOutput As Variant, strState As String) 
' strContent - source string 
' varOutput - created object or array to be returned as result 
' strState - Object|Array|Error depending on processing to be returned as state 
Dim objTokens As Object 
Dim lngTokenId As Long 
Dim objRegEx As Object 
Dim bMatched As Boolean 

Set objTokens = CreateObject("Scripting.Dictionary") 
lngTokenId = 0 
Set objRegEx = CreateObject("VBScript.RegExp") 
With objRegEx 
    .Global = True 
    .MultiLine = True 
    .IgnoreCase = True 
    .Pattern = "\|[A-Z]{2};" 'Pattern for the name of structures 
    Tokenize objTokens, objRegEx, strContent, lngTokenId, bMatched, "str" 
    .Pattern = "[A-Z]{2}=[^\|=;]+;" 'Pattern for parameters name and values 
    Tokenize objTokens, objRegEx, strContent, lngTokenId, bMatched, "par" 
End With 

End Sub 

Sub Tokenize(objTokens, objRegEx, strContent, lngTokenId, bMatched, strType) 
Dim strKey  As String 
Dim strKeyPar  As String 
Dim strKeyVal  As String 

Dim strWork  As String 
Dim strPar  As String 
Dim strVal  As String 
Dim strLevel  As String 

Dim strRes  As String 

Dim lngCopyIndex As Long 
Dim objMatch  As Object 

strRes = "" 
lngCopyIndex = 1 
With objRegEx 
    For Each objMatch In .Execute(strContent) 
     If strType = "str" Then 
      bMatched = True 
      With objMatch 
       strWork = Replace(.Value, "|", "") 
       strWork = Replace(strWork, ";", "") 
       strLevel = get_Level(strWork) 
       strKey = "<" & lngTokenId & strLevel & strType & ">" 
       objTokens(strKey) = strWork 
       strRes = strRes & Mid(strContent, lngCopyIndex, .FirstIndex - lngCopyIndex + 1) & strKey 
       lngCopyIndex = .FirstIndex + .Length + 1 
      End With 
      lngTokenId = lngTokenId + 1 
     ElseIf strType = "par" Then 

      strKeyPar = "<" & lngTokenId & "par>" 
      strKeyVal = "<" & lngTokenId & "val>" 
      strKey = strKeyPar & strKeyVal 
      bMatched = True 
      With objMatch 
       strWork = Replace(.Value, ";", "") 
       strPar = Split(strWork, "=")(0) 
       strVal = Split(strWork, "=")(1) 
       objTokens(strKeyPar) = strPar 
       objTokens(strKeyVal) = strVal 
       strRes = strRes & Mid(strContent, lngCopyIndex, .FirstIndex - lngCopyIndex + 1) & strKey 
       lngCopyIndex = .FirstIndex + .Length + 1 
      End With 
      lngTokenId = lngTokenId + 2 

     End If 
    Next 
    strContent = strRes & Mid(strContent, lngCopyIndex, Len(strContent) - lngCopyIndex + 1) 
End With 
End Sub 

Function get_Level(strInput As String) As String 

Select Case strInput 
    Case "KC" 
    get_Level = "L1" 
    Case "AD" 
    get_Level = "L2" 
    Case "CD" 
    get_Level = "L3" 
    Case Else 
    MsgBox ("Error") 
    End 
End Select 

End Function 

该函数创建与每个结构名称,参数名称和参数值的项目的字典作为该图所示 enter image description here 得益于功能get_Level关联到结构小时项提供了一个应该有助于保留原始数据层次结构的级别。

所以我缺少的是一个函数来创建一个具有输入字符串的原始结构的对象。这是Retrieve功能在这个答案link做什么,但我不知道如何适应我的情况

+0

你能证明你的代码?你有多进步? – NavkarJ

回答

1

我已经开始在VBA中为您指定的字符串结构编写一个解析器,它不是完整的,但我会发布它。也许你可以从中汲取一些想法。

Sub ParseString() 

    Dim str As String 
    str = "|KC;|AD;PE=5;PF=3;|CD;PE=5;HP=test;|CD;PE=3;HP=abc;|" 

    ' Declare an object dictionary 
    ' Make a reference to Microsoft Scripting Runtime in order for this to work 
    Dim dict As New Dictionary 

    ' If the bars are present in the first and last character of the string, replace them 
    str = Replace(str, "|", "", 1, 1) 
    If (Mid(str, Len(str), 1) = "|") Then 
     str = Mid(str, 1, Len(str) - 1) 
    End If 

    ' Split the string by bars 
    Dim substring_array() As String 
    substring_array = Split(str, "|") 

    ' Declare a regex object 
    ' Check the reference to Microsoft VBScript Regular Expressions 5.5 in order for this to work 
    Dim regex As New RegExp 
    With regex 
     .Global = True 
     .IgnoreCase = True 
     .MultiLine = True 
    End With 

    ' Object to store the regex matches 
    Dim matches As MatchCollection 
    Dim param_name_matches As MatchCollection 
    Dim parameter_value_matches As MatchCollection 

    ' Define some regex patterns 
    pattern_for_structure_name = "^[^=;]+;" 
    pattern_for_parameters = "[^=;]+=[^=;]+;" 
    pattern_for_parameter_name = "[^=;]=" 
    pattern_for_parameter_val = "[^=;];" 

    ' Loop through the elements of the array 
    Dim i As Integer 
    For i = 0 To UBound(substring_array) - LBound(substring_array) 

     ' Get the array element in a string 
     str1 = substring_array(i) 

     ' Check if it contains a structure name 
     regex.Pattern = pattern_for_structure_name 
     Set matches = regex.Execute(str1) 

     If matches.Count = 0 Then 

      ' This substring does not contain a structure name 
      ' Check if it contains parameters 
      regex.Pattern = pattern_for_parameter 
      Set matches = regex.Execute(matches(0).Value) 
      If matches.Count = 0 Then 

       ' There are no parameters as well as no structure name 
       ' This means the string had || - invalid string 
       MsgBox ("Invalid string") 

      Else 

       ' The string contains parameter names 
       ' Add each parameter name to the dictionary 
       Dim my_match As match 
       For Each my_match In matches 

        ' Get the name of the parameter 
        regex.Pattern = pattern_for_parameter_name 
        Set parameter_name_matches = regex.Execute(my_match.Value) 

        ' Check if the above returned any matches 
        If parameter_name_matches.Count = 1 Then 

         ' Remove = sign from the parameter name 
         parameter_name = Replace(parameter_name_matches(0).Value, "=", "") 

         ' Get the value of the parameter 
         regex.Pattern = pattern_for_parameter_value 
         Set parameter_value_matches = regex.Execute(my_match.Value) 

         ' Check if the above returned any matches 
         If parameter_value_matches.Count = 1 Then 

          ' Get the value 
          parameter_value = Replace(parameter_value_matches(0).Value, ";", "") 

          ' Add the parameter name and value as a key pair to the Dictionary object 
          dict.Item(parameter_name) = parameter_value 

         Else 

          ' Number of matches is either 0 or greater than 1 - in both cases the string is invalid 
          MsgBox ("Invalid string") 

         End If 

        Else 

         ' Parameter name did not match - invalid string 
         MsgBox ("Invalid string") 

        End If 

       Next 

      End If 

     ElseIf matches.Count = 1 Then 

      ' This substring contains a single structure name 
      ' Check if it has parameter names 

     Else 

      ' This substring contains more than one structure name - the original string is invalid 
      MsgBox ("Invalid string") 

     End If 

    Next i 

End Sub 
+0

嗨@Nvj,最后我决定用你的方法解决问题,谢谢 – MeSS83

+0

@ MeSS83你的欢迎 – NavkarJ

1

这看起来像一个简单的嵌套分隔字符串。一对夫妇的Split()功能将这样的伎俩:

Option Explicit 

Function parseString(str As String) As Collection 

    Dim a1() As String, i1 As Long, c1 As Collection 
    Dim a2() As String, i2 As Long, c2 As Collection 
    Dim a3() As String 

    a1 = Split(str, "|") 
    Set c1 = New Collection 
    For i1 = LBound(a1) To UBound(a1) 
     If a1(i1) <> "" Then 
      Set c2 = New Collection 
      a2 = Split(a1(i1), ";") 
      For i2 = LBound(a2) To UBound(a2) 
       If a2(i2) <> "" Then 
        a3 = Split(a2(i2), "=") 
        If UBound(a3) > 0 Then 
         c2.Add a3(1), a3(0) 
        ElseIf UBound(a3) = 0 Then 
         c2.Add a3(0) 
        End If 
       End If 
      Next i2 
      c1.Add c2 
     End If 
    Next i1 

    Set parseString = c1 

End Function 


Sub testParseString() 

    Dim c As Collection 

    Set c = parseString("|KC;|AD;PE=5;PF=3;|CD;PE=5;HP=test;|CD;PE=3;HP=abc;|") 

    Debug.Assert c(1)(1) = "KC" 
    Debug.Assert c(2)("PE") = "5" 
    Debug.Assert c(3)(1) = "CD" 
    Debug.Assert c(4)("HP") = "abc" 
    Debug.Assert c(4)(3) = "abc" 

End Sub 

注意,您可以通过两种,索引和密钥(如果键输入存在)地址值。如果没有提供密钥,则只能通过索引访问该值。您还可以递归地迭代集合以获取树结构中的所有值。

食物的思考:因为你的结构可能有重复的名称(在你的情况下,“CD”结构发生两次)集合/字典会发现它优雅地存储(由于关键冲突)有问题。解决这个问题的另一个好方法是使用DOMDocument创建XML结构并使用XPath访问其元素。请参阅Program with DOM in Visual Basic

更新:我也在下面添加了XML示例。看一看。

1

这是另一个关于使用DOMDocument XML分析器的字符串解析问题。您需要在VBA参考中包含Microsoft XML v.6.0。

Function parseStringToDom(str As String) As DOMDocument60 

    Dim a1() As String, i1 As Long 
    Dim a2() As String, i2 As Long 
    Dim a3() As String 

    Dim dom As DOMDocument60 
    Dim rt As IXMLDOMNode 
    Dim nd As IXMLDOMNode 

    Set dom = New DOMDocument60 
    dom.async = False 
    dom.validateOnParse = False 
    dom.resolveExternals = False 
    dom.preserveWhiteSpace = True 

    Set rt = dom.createElement("root") 
    dom.appendChild rt 

    a1 = Split(str, "|") 
    For i1 = LBound(a1) To UBound(a1) 
     If a1(i1) <> "" Then 
      a2 = Split(a1(i1), ";") 
      Set nd = dom.createElement(a2(0)) 
      For i2 = LBound(a2) To UBound(a2) 
       If a2(i2) <> "" Then 
        a3 = Split(a2(i2), "=") 
        If UBound(a3) > 0 Then 
         nd.appendChild dom.createElement(a3(0)) 
         nd.LastChild.Text = a3(1) 
        End If 
       End If 
      Next i2 
      rt.appendChild nd 
     End If 
    Next i1 

    Set parseStringToDom = dom 

End Function 


Sub testParseStringToDom() 

    Dim dom As DOMDocument60 

    Set dom = parseStringToDom("|KC;|AD;PE=5;PF=3;|CD;PE=5;HP=test;|CD;PE=3;HP=abc;|") 

    Debug.Assert Not dom.SelectSingleNode("/root/KC") Is Nothing 
    Debug.Assert dom.SelectSingleNode("/root/AD/PE").Text = "5" 
    Debug.Assert dom.SelectSingleNode("/root/CD[1]/HP").Text = "test" 
    Debug.Assert dom.SelectSingleNode("/root/CD[2]/HP").Text = "abc" 

    Debug.Print dom.XML 

End Sub 

正如您所见,这会将您的文本转换为保留所有结构并允许命名中的重复项的XML DOM文档。然后可以使用XPath访问任何节点或值。这也可以扩展到更多的嵌套层次和更多的结构。

这是它创造的幕后XML文档:

<root> 
    <KC/> 
    <AD> 
     <PE>5</PE> 
     <PF>3</PF> 
    </AD> 
    <CD> 
     <PE>5</PE> 
     <HP>test</HP> 
    </CD> 
    <CD> 
     <PE>3</PE> 
     <HP>abc</HP> 
    </CD> 
</root>