2015-09-25 90 views
0

我有一个XML如下例所示,它可以包含多达5000行,但我将它限制为20以便有点合理。如何优化此嵌套分组?

<PMT NM="rnt-model"> 
<PV V="L11-L23-L3448-L42375_MODEL1" C="1"></PV> 
<PV V="L11-L23-L3448-L448_MODEL2" C="1"></PV> 
<PV V="L11-L23-L3448-L448_MODEL3" C="1"></PV> 
<PV V="L11-L23-L3448-L448_MODEL4" C="1"></PV> 
<PV V="L11-L23-L3448-L448_MODEL5" C="2"></PV> 
<PV V="L11-L24-L319-L493_MODEL6" C="1"></PV> 
<PV V="L11-L25-L3288-L41931_MODEL7" C="1"></PV> 
<PV V="L110-L254-L3217-L41303_MODEL8" C="1"></PV> 
<PV V="L110-L254-L3217-L41303_MODEL9" C="1"></PV> 
<PV V="L110-L254-L3217-L41303_MODEL10" C="1"></PV> 
<PV V="L110-L254-L3217-L41303_MODEL11" C="1"></PV> 
<PV V="L110-L254-L3217-L41303_MODEL12" C="1"></PV> 
<PV V="L110-L254-L3217-L41303_MODEL13" C="1"></PV> 
<PV V="L110-L254-L3217-L41303_MODEL14" C="1"></PV> 
<PV V="L110-L254-L3217-L41303_MODEL15" C="1"></PV> 
<PV V="L110-L254-L3217-L41303_MODEL16" C="1"></PV> 
<PV V="L110-L254-L3217-L41303_MODEL17" C="1"></PV> 
<PV V="L110-L254-L3217-L41303_MODEL18" C="1"></PV> 
<PV V="L110-L254-L3218-L41307_MODEL19" C="1"></PV> 
<PV V="L110-L254-L3218-L41307_MODEL20" C="1"></PV> 
</PMT> 

必须将XML从此伪平面格式转换为基于@V的树结构。 @V需要首先根据下划线在2中进行拆分,然后在连字符上进行标记。

或者为了使其更清晰可见,以下是预期结果。

<root> 
<n id="L11"> 
    <n id="L23"> 
     <n id="L3448"> 
      <n id="L42375"> 
       <n m="MODEL1" c="1"></n> 
      </n> 
      <n id="L448"> 
       <n m="MODEL2" c="1"></n> 
       <n m="MODEL3" c="1"></n> 
       <n m="MODEL4" c="1"></n> 
       <n m="MODEL5" c="2"></n> 
      </n> 
     </n> 
    </n> 
    <!-- rest of rows below --> 

我设法使用下面的XSLT工作,并且当行数很少时它工作得很好。但是,使用真实活动XML时,需要花费很多时间来生成树,所以我想知道如何让效率更高。在XSLT2中做这件事非常简单,但是我为我正在开发的项目坚持使用1.0。使用(工作,但不是很有效)

XSLT代码

<?xml version="1.0" encoding="UTF-8"?> 
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> 
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/> 
<xsl:key name="level1" use="@L1" match="row"/> 
<xsl:key name="level2" use="@L2" match="row"/> 
<xsl:key name="level3" use="@L3" match="row"/> 
<xsl:key name="level4" use="@L4" match="row"/> 
<xsl:template match="//PMT[@NM='rnt-model']"> 
    <root> 
     <!-- Step 1 : generate a table with all the tokenized attributes --> 
     <xsl:variable name="theTree"> 
      <xsl:for-each select="PV"> 
       <row> 
        <xsl:for-each select="@V"> 
         <xsl:call-template name="tokenize_tree"> 
          <xsl:with-param name="list" select="substring-before(.,'_')"/> 
          <xsl:with-param name="delimiter" select="'-'"/> 
         </xsl:call-template> 
         <xsl:attribute name="M"><xsl:value-of select="substring-after(.,'_')"/></xsl:attribute> 
        </xsl:for-each> 
        <xsl:attribute name="C"><xsl:value-of select="@C"/></xsl:attribute> 
       </row> 
      </xsl:for-each> 
     </xsl:variable> 

     <!-- Step 2 : Group all --> 

     <xsl:for-each select="$theTree//row[generate-id()=generate-id(key('level1',@L1)[1])]"> 
      <xsl:variable name="theType" select="@L1"/> 
      <n id="{$theType}"> 
       <xsl:for-each select="$theTree/row[@L1=$theType][generate-id()=generate-id(key('level2',@L2)[1])]"> 
        <xsl:variable name="theCat" select="@L2"/> 
        <n id="{$theCat}"> 
         <xsl:for-each select="$theTree/row[@L2=$theCat][generate-id()=generate-id(key('level3',@L3)[1])]"> 
          <xsl:variable name="theSubCat" select="@L3"/> 
          <n id="{$theSubCat}"> 
           <xsl:for-each select="$theTree/row[@L3=$theSubCat][generate-id()=generate-id(key('level4',@L4)[1])]"> 
            <xsl:variable name="theSerie" select="@L4"/> 
            <n id="{$theSerie}"> 
             <xsl:for-each select="$theTree/row[@L4=$theSerie]"> 
              <n m="{@M}" c="{@C}"/> 
             </xsl:for-each> 
            </n> 
           </xsl:for-each> 
          </n> 
         </xsl:for-each> 
        </n> 
       </xsl:for-each> 
      </n> 
     </xsl:for-each> 
    </root> 
</xsl:template> 


<xsl:template name="tokenize_tree"> 
    <!--passed template parameter --> 
    <xsl:param name="list"/> 
    <xsl:param name="delimiter"/> 
    <xsl:choose> 
     <xsl:when test="contains($list, $delimiter)"> 
      <xsl:attribute name="{substring(substring-before($list,$delimiter),1,2)}"><xsl:value-of select="substring-before($list,$delimiter)"/></xsl:attribute> 
      <!-- get everything in front of the first delimiter --> 
      <xsl:call-template name="tokenize_tree"> 
       <!-- store anything left in another variable --> 
       <xsl:with-param name="list" select="substring-after($list,$delimiter)"/> 
       <xsl:with-param name="delimiter" select="$delimiter"/> 
      </xsl:call-template> 
     </xsl:when> 
     <xsl:otherwise> 
      <xsl:choose> 
       <xsl:when test="$list = ''"> 
        <xsl:text/> 
       </xsl:when> 
       <xsl:otherwise> 
        <xsl:attribute name="{substring($list,1,2)}"><xsl:value-of select="$list"/></xsl:attribute> 
       </xsl:otherwise> 
      </xsl:choose> 
     </xsl:otherwise> 
    </xsl:choose> 
</xsl:template> 
</xsl:stylesheet> 

,关于如何提高效率,它可以与更大的文件更快任何想法?

回答

1

效率是非常处理器相关,但也许你应该尝试的单遍方法:

XSLT 1.0

<xsl:stylesheet version="1.0" 
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> 
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/> 
<xsl:strip-space elements="*"/> 

<xsl:key name="level1" match="PV" use="substring-before(@V, '-')" /> 
<xsl:key name="level2" match="PV" use="substring-before(substring-after(@V, '-'), '-')" /> 
<xsl:key name="level3" match="PV" use="substring-before(substring-after(substring-after(@V, '-'), '-'), '-')" /> 
<xsl:key name="level4" match="PV" use="substring-before(substring-after(substring-after(substring-after(@V, '-'), '-'), '-'), '_')" /> 

<xsl:template match="/PMT"> 
    <root> 
     <xsl:for-each select="PV[count(. | key('level1', substring-before(@V, '-'))[1]) = 1]"> 
      <xsl:variable name="L1" select="substring-before(@V, '-')" />   
      <n id="{$L1}"> 
       <xsl:for-each select="key('level1', $L1)[count(. | key('level2', substring-before(substring-after(@V, '-'), '-'))[1]) = 1]"> 
       <xsl:variable name="L2" select="substring-before(substring-after(@V, '-'), '-')" />   
        <n id="{$L2}"> 
         <xsl:for-each select="key('level2', $L2)[count(. | key('level3', substring-before(substring-after(substring-after(@V, '-'), '-'), '-'))[1]) = 1]"> 
         <xsl:variable name="L3" select="substring-before(substring-after(substring-after(@V, '-'), '-'), '-')" />   
          <n id="{$L3}"> 
           <xsl:for-each select="key('level3', $L3)[count(. | key('level4', substring-before(substring-after(substring-after(substring-after(@V, '-'), '-'), '-'), '_'))[1]) = 1]"> 
           <xsl:variable name="L4" select="substring-before(substring-after(substring-after(substring-after(@V, '-'), '-'), '-'), '_')" />   
            <n id="{$L4}"> 
             <xsl:for-each select="key('level4', $L4)"> 
              <n m="{substring-after(substring-after(substring-after(substring-after(@V, '-'), '-'), '-'), '_')}" c="{@C}"/> 
             </xsl:for-each> 
            </n> 
           </xsl:for-each> 
          </n> 
         </xsl:for-each> 
        </n> 
       </xsl:for-each> 
      </n> 
     </xsl:for-each> 
    </root> 
</xsl:template> 

</xsl:stylesheet> 

还要注意的是一个明确的路径通常比更有效//

+0

谢谢,它确实有点快,但不是希望的speedbump。坦率地说,我不确定是否有可能的巨大改进,所以我们肯定会赞赏! – Wokoman