2011-04-08 68 views

回答

1

HTML文件不符合XML标准。

2

如果您可以编辑您所提供的文本,只需使用CDATA

<content><![CDATA[Your stuff here with all the <em>HTML</em> tags you can think of.]]></content> 

然后SAX解析器的toString()将返回如下所示的字符串:Your stuff here with all the <em>HTML</em> tags you can think of.

2

您可以使用此方法将CDATA数据(参数数据:实际数据; TAG:其中CDATA需要放在XML标签的名称)

public static final String putCDATA(String data, String tag) { 
    if(data == null || data.length() <= 0 || tag == null || tag.length() <= 0) { 
     return null; 
    } 

    String newData = ""; 

    while(true) { 
     int firstIndex = data.indexOf("<" + tag + ">"); 
     firstIndex = firstIndex + new String("<" + tag + ">").length() - 1; 

     int lastIndex = data.indexOf("</" + tag + ">"); 

     if(firstIndex == -1 || lastIndex == -1) { 
      break; 
     } 

     String tagValue = data.substring(firstIndex + 1, lastIndex); 
     tagValue = "<![CDATA[" + tagValue + "]]>"; 

     newData += data.substring(0,firstIndex + 1); 
     newData += tagValue; 
     newData += data.substring(lastIndex, lastIndex + new String("<" + tag + ">").length() + 1); 

     data = data.substring(lastIndex + new String("<" + tag + ">").length() + 1, data.length()); 
    } 

    newData += data; 

    System.out.print("FORMATED: " + "\n" + newData); 
    return newData; 
}