2017-06-22 101 views
0

我有以下代码:如何访问xml中下一个标记的文本内容?

public String depRel() throws SAXException, IOException, 
     ParserConfigurationException, ClassNotFoundException, 
     ClassCastException { 
    String xmlString = Features.dependencyGraph(); 
    ; 

    String result = ""; 
    String dependent = ""; 
    String governor = ""; 
    String type = ""; 

    // System.out.println("A value is :" + xmlString); 
    // aici il convertesc ca sa il pot citi ca si xml 
    Document document = convertStringToDocument(xmlString); 
    document.getDocumentElement().normalize(); 
    Element root = document.getDocumentElement(); 
    NodeList nList = document.getElementsByTagName("dependencies"); 
    for (int temp = 0; temp < nList.getLength(); temp++) { 
     Node node = nList.item(temp); 
     if (node.getNodeType() == Node.ELEMENT_NODE) { 
      Element eElement1 = (Element) node; 

     } 
     NodeList nodesDocPart = node.getChildNodes(); 
     for (int temp2 = 0; temp2 < nodesDocPart.getLength(); temp2++) { 

      Node n = nodesDocPart.item(temp2); 

      if (n.getNodeType() == Node.ELEMENT_NODE) { 
       Element el1 = (Element) n; 
       type = el1.getAttribute("type"); 
      } 

      // /////////////////////////////////////////////////sentence///////////////////////////////////////////// 
      NodeList nodesSentencePart = n.getChildNodes(); 
      for (int temp3 = 0; temp3 < nodesSentencePart.getLength(); temp3++) { 
       Node sentence = nodesSentencePart.item(temp3); 
       if (sentence.getNodeType() == Node.ELEMENT_NODE) { 

        Element eElement4 = (Element) sentence; 
        if (eElement4.getTagName().equals("dependent")) { 
         dependent = eElement4.getTextContent(); 
        } 
        if (eElement4.getTagName().equals("governor")) { 
         governor = eElement4.getTextContent(); 


enter code here 

而接下来的XML格式,它描述了一个依赖图的句子。 这句话是:在用视黄酸或PMA刺激后,在纯化的人单核细胞和巨噬细胞中,在U937前体细胞系中产生人类免疫缺陷病毒1型(HIV-1)后代。

<dependencies style="typed"> 
    <dep type="det"> 
    <governor idx="2">production</governor> 
    <dependent idx="1">The</dependent> 
    </dep> 
    <dep type="nsubjpass"> 
    <governor idx="14">followed</governor> 
    <dependent idx="2">production</dependent> 
    </dep> 
    <dep type="case"> 
    <governor idx="7">type</governor> 
    <dependent idx="3">of</dependent> 
    </dep> 
    <dep type="amod"> 
    <governor idx="7">type</governor> 
    <dependent idx="4">human</dependent> 
    </dep> 
    <dep type="compound"> 
    <governor idx="7">type</governor> 
    <dependent idx="5">immunodeficiency</dependent> 
    </dep> 
    <dep type="compound"> 
    <governor idx="7">type</governor> 
    <dependent idx="6">virus</dependent> 
    </dep> 
    <dep type="nmod:of"> 
    <governor idx="2">production</governor> 
    <dependent idx="7">type</dependent> 
    </dep> 
    <dep type="nummod"> 
    <governor idx="7">type</governor> 
    <dependent idx="8">1</dependent> 
    </dep> 
    <dep type="punct"> 
    <governor idx="10">HIV-1</governor> 
    <dependent idx="9">-LRB-</dependent> 
    </dep> 
    <dep type="appos"> 
    <governor idx="7">type</governor> 
    <dependent idx="10">HIV-1</dependent> 
    </dep> 
    <dep type="punct"> 
    <governor idx="10">HIV-1</governor> 
    <dependent idx="11">-RRB-</dependent> 
    </dep> 
    <dep type="dep"> 
    <governor idx="7">type</governor> 
    <dependent idx="12">progeny</dependent> 
    </dep> 
    <dep type="auxpass"> 
    <governor idx="14">followed</governor> 
    <dependent idx="13">was</dependent> 
    </dep> 
    <dep type="case"> 
    <governor idx="20">line</governor> 
    <dependent idx="15">in</dependent> 
    </dep> 
    <dep type="det"> 
    <governor idx="20">line</governor> 
    <dependent idx="16">the</dependent> 
    </dep> 
    <dep type="compound"> 
    <governor idx="20">line</governor> 
    <dependent idx="17">U937</dependent> 
    </dep> 
    <dep type="amod"> 
    <governor idx="20">line</governor> 
    <dependent idx="18">promonocytic</dependent> 
    </dep> 
    <dep type="compound"> 
    <governor idx="20">line</governor> 
    <dependent idx="19">cell</dependent> 
    </dep> 
    <dep type="nmod:in"> 
    <governor idx="14">followed</governor> 
    <dependent idx="20">line</dependent> 
    </dep> 
    <dep type="case"> 
    <governor idx="22">stimulation</governor> 
    <dependent idx="21">after</dependent> 
    </dep> 
    <dep type="nmod:after"> 
    <governor idx="14">followed</governor> 
    <dependent idx="22">stimulation</dependent> 
    </dep> 
    <dep type="dep"> 
    <governor idx="26">acid</governor> 
    <dependent idx="23">either</dependent> 
    </dep> 
    <dep type="case"> 
    <governor idx="26">acid</governor> 
    <dependent idx="24">with</dependent> 
    </dep> 
    <dep type="amod"> 
    <governor idx="26">acid</governor> 
    <dependent idx="25">retinoic</dependent> 
    </dep> 
    <dep type="nmod:with"> 
    <governor idx="22">stimulation</governor> 
    <dependent idx="26">acid</dependent> 
    </dep> 
    <dep type="cc"> 
    <governor idx="26">acid</governor> 
    <dependent idx="27">or</dependent> 
    </dep> 
    <dep type="nmod:with"> 
    <governor idx="22">stimulation</governor> 
    <dependent idx="28">PMA</dependent> 
    </dep> 
    <dep type="conj:or"> 
    <governor idx="26">acid</governor> 
    <dependent idx="28">PMA</dependent> 
    </dep> 
    <dep type="punct"> 
    <governor idx="14">followed</governor> 
    <dependent idx="29">,</dependent> 
    </dep> 
    <dep type="cc"> 
    <governor idx="14">followed</governor> 
    <dependent idx="30">and</dependent> 
    </dep> 
    <dep type="case"> 
    <governor idx="34">monocytes</governor> 
    <dependent idx="31">in</dependent> 
    </dep> 
    <dep type="amod"> 
    <governor idx="34">monocytes</governor> 
    <dependent idx="32">purified</dependent> 
    </dep> 
    <dep type="amod"> 
    <governor idx="34">monocytes</governor> 
    <dependent idx="33">human</dependent> 
    </dep> 
    <dep type="conj:and"> 
    <governor idx="14">followed</governor> 
    <dependent idx="34">monocytes</dependent> 
    </dep> 
    <dep type="cc"> 
    <governor idx="34">monocytes</governor> 
    <dependent idx="35">and</dependent> 
    </dep> 
    <dep type="conj:and"> 
    <governor idx="14">followed</governor> 
    <dependent idx="36">macrophages</dependent> 
    </dep> 
    <dep type="conj:and"> 
    <governor idx="34">monocytes</governor> 
    <dependent idx="36">macrophages</dependent> 
    </dep> 
    <dep type="punct"> 
    <governor idx="14">followed</governor> 
    <dependent idx="37">.</dependent> 
     </dep> 

如果我的标签“州长”我怎么可以访问标记“依赖”?因为我想获得一个word.How我可以把它所有的州长和所有家属?

+0

'我想获得的所有word'州长和所有的家属,是什么这里有个词吗?是“省长”节点的文本吗? – SomeDude

+0

这个词是从,我将parse.I必须保留判决也和从句子中的每个字的找州长及家属 – Nadd

回答

0

看来你想收集governor/dependent/word s。 你可以使用下面的代码来获得这种类的集合 - 我称之为GovernorDependentNode

class GovernorDependentNode 
{ 
    Node governor; 
    Node dependent; 
    String word; 
} 

List<GovernorDependentNode> getNodes(String word, InputSource is) 
{ 
    List<GovernorDependentNode> gdNodes = new ArrayList<GovernorDependentNode>(); 
    try 
    { 

     Object govs = XPathFactory.newInstance().newXPath().evaluate("//dep/governor[.='" + word + "']", is, XPathConstants.NODESET); 
     if (govs != null) 
     { 
      NodeList gNodes = (NodeList)govs; 
      for (int i = 0; i < gNodes.getLength(); i++) 
      { 
       GovernorDependentNode gdNode = new GovernorDependentNode(); 
       Node gNode = gNodes.item(i); 
       gdNode.governor = gNode; 
       gdNode.word = word; 
       NodeList childNodes = gNode.getParentNode().getChildNodes(); 
       for (int j = 0; j < childNodes.getLength(); j++) 
       { 
        Node n = childNodes.item(j); 
        if (n.getNodeName().equals("dependent")) 
        { 
         gdNode.dependent = n; 
         break; 
        } 
       } 
       gdNodes.add(gdNode); 

      } 
     } 
    } 
    catch (Exception e) 
    { 
     e.printStackTrace(); 
    } 

    return gdNodes; 
} 

使用类似方法:

InputSource is = new InputSource(new StringReader(xmlString)); 
List<GovernorDependentNode> nodes = getNodes("yourWord", is); 

的方法getNodes首先使用获取给定字governor节点xpath://dep/governor[.='word']

可能有几个,例如following这个词有9个节点,所以应该为它们中的每一个节点获得dependent节点,并且使用信息 - 调控器,从属节点和给定词来构造一个类。

为了打印节点列表,你可以使用:

List<GovernorDependentNode> nodes = getNodes("followed", inputSource); 
for (GovernorDependentNode node : nodes) 
{ 
     System.out.println("Word : " + node.word); 
     System.out.println("Governor : " + node.governor.getTextContent()); 
     System.out.println("Dependent : " + node.dependent.getTextContent()); 

} 

输出是:

Word : followed 
Governor : followed 
Dependent : production 
Word : followed 
Governor : followed 
Dependent : was 
Word : followed 
Governor : followed 
Dependent : line 
Word : followed 
Governor : followed 
Dependent : stimulation 
Word : followed 
Governor : followed 
Dependent : , 
Word : followed 
Governor : followed 
Dependent : and 
Word : followed 
Governor : followed 
Dependent : monocytes 
Word : followed 
Governor : followed 
Dependent : macrophages 
Word : followed 
Governor : followed 
Dependent : . 
+0

我不应该在解析XML文件的xmlString句子中的单词CURENT呢?因为当我打电话像输入源的梅托德是=新的InputSource(新StringReader(的xmlString))编译器知道的xmlString不是XML?这仅仅是XML格式 – Nadd

+0

一个String,编译器不知道这是否是XML或不。如果字符串不是xml格式,则XPathFactory.newInstance()。newXPath()。evaluate')行将引发异常。在将它传递给方法之前,您可以检查字符串是否为xml。 – SomeDude

+0

当运行此代码的异常出现在这一行:。\t \t \t \t \t \t \t \t对象GOVS = XPathFactory.newInstance()newXPath()评估( “// DEP /调速[=”“ + +字“']”,是,XPathConstants.NODESET); – Nadd

相关问题