2010-05-04 196 views
1

我有两个需要合并的文档,这些文档以我看起来无法在其他示例中找到的方式发生。也就是说,它不仅需要匹配一个层次上的节点属性,还需要匹配低于该节点层次的属性的值,以获取该节点的值。使用XSL合并类似的XML文件的问题

我想借此样本:

<?xml version="1.0" encoding="UTF-8" ?> 
<marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim" 
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> 
    <marc:record> 
    <marc:datafield tag="035" ind1=" " ind2=" "> 
     <marc:subfield code="a">12345</marc:subfield> 
    </marc:datafield> 
    <marc:datafield tag="041" ind1=" " ind2=" "> 
     <marc:subfield code="a">eng</marc:subfield> 
    </marc:datafield> 
    <marc:datafield tag="650" ind1=" " ind2="4"> 
     <marc:subfield code="a">Art</marc:subfield> 
    </marc:datafield> 
    <marc:datafield tag="949" ind1=" " ind2=" "> 
     <marc:subfield code="i">Review of conference proceedings</marc:subfield> 
    </marc:datafield> 
    </marc:record> 
    <marc:record> 
    <marc:datafield tag="035" ind1=" " ind2=" "> 
     <marc:subfield code="a">54321</marc:subfield> 
    </marc:datafield> 
    <marc:datafield tag="041" ind1=" " ind2=" "> 
     <marc:subfield code="a">eng</marc:subfield> 
    </marc:datafield> 
    <marc:datafield tag="650" ind1=" " ind2="4"> 
     <marc:subfield code="a">Byzantine</marc:subfield> 
    </marc:datafield> 
    </marc:record> 
</marc:collection> 

当“数据字段”“035”,“子”的值“a”匹配例如“12345”

<marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim" 
xmlns:fn="http://www.w3.org/2005/xpath-functions" xmlns:xs="http://www.w3.org/2001/XMLSchema" 
xmlns:fo="http://www.w3.org/1999/XSL/Format"> 
    <marc:record> 
    <marc:datafield ind2=" " ind1=" " tag="035"> 
     <marc:subfield code="a">12345</marc:subfield> 
    </marc:datafield> 
    <marc:datafield ind2="4" ind1=" " tag="650"> 
     <marc:subfield code="a">General works</marc:subfield> 
     <marc:subfield code="x">Historians and critics</marc:subfield> 
     <marc:subfield code="x">Smith, John, 1834-1917</marc:subfield> 
    </marc:datafield> 
    <marc:datafield ind2="4" ind1=" " tag="650"> 
     <marc:subfield code="a">Généralités</marc:subfield> 
     <marc:subfield code="x">Historiens et critiques d'art</marc:subfield> 
     <marc:subfield code="x">Dietrichson, Lorentz, 1834-1917</marc:subfield> 
    </marc:datafield> 
    <marc:datafield ind2=" " ind1=" " tag="654"> 
     <marc:subfield code="a">General works</marc:subfield> 
    </marc:datafield> 
    <marc:datafield ind2=" " ind1=" " tag="654"> 
     <marc:subfield code="a">Généralités</marc:subfield> 
     <marc:subfield code="b">Historiens et critiques d'art</marc:subfield> 
     <marc:subfield code="b">Smith, John, 1834-1917</marc:subfield> 
    </marc:datafield> 
    </marc:record>  
    <marc:record> 
    <marc:datafield ind2=" " ind1=" " tag="035"> 
     <marc:subfield code="a">54321</marc:subfield> 
    </marc:datafield> 
    <marc:datafield ind2="4" ind1=" " tag="650"> 
     <marc:subfield code="a">General works</marc:subfield> 
     <marc:subfield code="x">Historians and critics</marc:subfield> 
     <marc:subfield code="x">Lange, Julius Henrik, 1838-1896</marc:subfield> 
    </marc:datafield> 
    </marc:record> 
</marc:collection> 

结果应该是:

<?xml version="1.0" encoding="UTF-8" ?> 
<marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim" 
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> 
    <marc:record> 
    <marc:datafield tag="035" ind1=" " ind2=" "> 
     <marc:subfield code="a">12345</marc:subfield> 
    </marc:datafield> 
    <marc:datafield tag="041" ind1=" " ind2=" "> 
     <marc:subfield code="a">eng</marc:subfield> 
    </marc:datafield> 
    <marc:datafield tag="650" ind1=" " ind2="4"> 
     <marc:subfield code="a">Art</marc:subfield> 
    </marc:datafield> 
    <marc:datafield ind2="4" ind1=" " tag="650"> 
     <marc:subfield code="a">General works</marc:subfield> 
     <marc:subfield code="x">Historians and critics</marc:subfield> 
     <marc:subfield code="x">Smith, John, 1834-1917</marc:subfield> 
    </marc:datafield> 
    <marc:datafield ind2="4" ind1=" " tag="650"> 
     <marc:subfield code="a">Généralités</marc:subfield> 
     <marc:subfield code="x">Historiens et critiques d'art</marc:subfield> 
     <marc:subfield code="x">Dietrichson, Lorentz, 1834-1917</marc:subfield> 
    </marc:datafield> 
    <marc:datafield ind2=" " ind1=" " tag="654"> 
     <marc:subfield code="a">General works</marc:subfield> 
    </marc:datafield> 
    <marc:datafield ind2=" " ind1=" " tag="654"> 
     <marc:subfield code="a">Généralités</marc:subfield> 
     <marc:subfield code="b">Historiens et critiques d'art</marc:subfield> 
     <marc:subfield code="b">Smith, John, 1834-1917</marc:subfield> 
    </marc:datafield> 
    <marc:datafield tag="949" ind1=" " ind2=" "> 
     <marc:subfield code="i">Review of conference proceedings</marc:subfield> 
    </marc:datafield> 
    </marc:record> 
    <marc:record> 
    <marc:datafield tag="035" ind1=" " ind2=" "> 
     <marc:subfield code="a">54321</marc:subfield> 
    </marc:datafield> 
    <marc:datafield tag="041" ind1=" " ind2=" "> 
     <marc:subfield code="a">eng</marc:subfield> 
    </marc:datafield> 
    <marc:datafield tag="650" ind1=" " ind2="4"> 
     <marc:subfield code="a">Byzantine</marc:subfield> 
    </marc:datafield> 
    <marc:datafield ind2="4" ind1=" " tag="650"> 
     <marc:subfield code="a">General works</marc:subfield> 
     <marc:subfield code="x">Historians and critics</marc:subfield> 
     <marc:subfield code="x">Lange, Julius Henrik, 1838-1896</marc:subfield> 
    </marc:datafield> 
    </marc:record> 
</marc:collection> 

我使用的是我发现的例子是没有查找试过,但没有人似乎工作。我没有包含任何我的XSL,因为我的所有结果都很糟糕。我一直在看它,就像它一定很简单,但我没有得到任何体面的结果。任何帮助或指针将不胜感激。

谢谢!

回答

0

我想我有一个答案给你。这不是最优雅的,但它的工作原理。基本上,您针对您要合并的XML文件之一运行样式表,然后使用文档功能访问其他XML文件。遍历第一个XML文件中的每条记录并找到匹配点。然后通过第二个文档进行迭代,并找到匹配的记录,并在拉适当的节点。

<?xml version="1.0" encoding="UTF-8"?> 

<xsl:variable name="doc2" select="document('FourBabyMarcs.xml')"/> 

<xsl:template match="/"> 
    <marc:collection> 
     <xsl:for-each select="marc:collection/marc:record"> 
      <marc:record> 

       <xsl:for-each select="marc:leader"> 
        <xsl:copy-of select="."/> 
       </xsl:for-each> 

       <xsl:for-each select="marc:controlfield"> 
        <xsl:copy-of select="."/> 
       </xsl:for-each> 

       <xsl:for-each select="marc:datafield"> 
        <xsl:copy-of select="."/> 
       </xsl:for-each> 

       <xsl:variable name="ID"> 
        <xsl:value-of select="marc:datafield[@tag='035']/marc:subfield[@code='a']"/> 
       </xsl:variable> 

       <xsl:for-each select="$doc2/*/marc:record"> 
         <xsl:if test="marc:datafield[@tag='035']/marc:subfield[@code='a']=$ID"> 
          <xsl:for-each select="marc:datafield"> 
           <xsl:if test="@tag='650'"> 
            <xsl:copy-of select="."/> 
           </xsl:if> 
           <xsl:if test="@tag='654'"> 
            <xsl:copy-of select="."/> 
           </xsl:if> 
          </xsl:for-each> 
         </xsl:if> 
       </xsl:for-each> 
      </marc:record> 
     </xsl:for-each> 
    </marc:collection> 
</xsl:template> 

0

以下解决方案使用密钥,使高效查找的合并文档中。假设应复制除匹配的datafield之外的所有datafield元素,并且每个record最多只有一个匹配的datafield。应该合并的文档的URL作为参数传递。

<?xml version="1.0" encoding="UTF-8"?> 
<xsl:stylesheet version="1.0" 
       xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
       xmlns:marc="http://www.loc.gov/MARC21/slim"> 
    <xsl:output method="xml" indent="yes"/> 
    <xsl:param name="mergeFile"/> 
    <xsl:variable name="mergeDoc" select="document($mergeFile)"/> 

    <xsl:key name="datafield" match="marc:datafield" 
     use="concat(@tag, '|', marc:subfield[@code='a'])"/> 

    <xsl:template match="/"> 
     <xsl:apply-templates select="node()|@*"/> 
    </xsl:template> 

    <xsl:template match="node()|@*"> 
     <xsl:copy> 
      <xsl:apply-templates select="node()|@*"/> 
     </xsl:copy> 
    </xsl:template> 

    <xsl:template match="marc:record"> 
     <xsl:copy> 
      <xsl:apply-templates select="node()|@*"/> 
      <xsl:apply-templates select="marc:datafield" mode="merge"/> 
     </xsl:copy> 
    </xsl:template> 

    <xsl:template match="marc:datafield" mode="merge"> 
     <xsl:variable name="datafieldKey" 
         select="concat(@tag, '|', marc:subfield[@code='a'])"/> 
     <!-- Make the other document the context node with for-each, so that 
      key lookups will consult that document instead of the source 
      document. --> 
     <xsl:for-each select="$mergeDoc"> 
      <xsl:for-each select="key('datafield', $datafieldKey)"> 
       <xsl:copy-of select="preceding-sibling::*"/> 
       <xsl:copy-of select="following-sibling::*"/> 
      </xsl:for-each> 
     </xsl:for-each> 
    </xsl:template> 

</xsl:stylesheet>