我使用迭代器样式的API来解析带有Stax的XML流。问题XML编码
我开发了一个小型代码,可将大型XML文件剪切成多个文件。
然后我读正确的流程,但写的时候,我得到奇怪的字符(编码的问题)
public static void main(String[] args) throws Exception
{
int offre=0;
int i=0,j=0;
String Data="";
String nom="flux0.xml";
XMLEventReader reader = XMLInputFactory.newInstance().createXMLEventReader(new java.io.FileInputStream("CJ.xml"));
FileOutputStream output = new FileOutputStream(nom);
XMLOutputFactory xmlof = XMLOutputFactory.newInstance();
XMLEventWriter writer = xmlof.createXMLEventWriter(output);
XMLEventFactory eventFactory = XMLEventFactory.newInstance();
while (reader.hasNext() /*&& j<3000*/)
{
XMLEvent event = (XMLEvent) reader.next();
if (event.isStartElement())
{
if (event.asStartElement().getName().getLocalPart() == "OFFER")
{
offre++;
}
}
if(offre==5000)
{
i++;
nom="flux"+i+".xml";
output = new FileOutputStream(nom);
writer= xmlof.createXMLEventWriter(output);
if (event.getEventType() == event.CHARACTERS)
{
Characters characters = event.asCharacters();
String texte=characters.getData();
CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder();
Data= new String(encoder.encode(CharBuffer.wrap(texte.toCharArray())).array());
writer.add(eventFactory.createCharacters(Data));
}
else
{
writer.add(event);
}
nom="flux"+i+".xml";
offre=0;
}
else
{
if (event.getEventType() == event.CHARACTERS)
{
Characters characters = event.asCharacters();
String texte=characters.getData();
CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder();
Data= new String(encoder.encode(CharBuffer.wrap(texte.toCharArray())).array());
writer.add(eventFactory.createCharacters(Data));
}
else
{
writer.add(event);
}
}
writer.flush();
}
谢谢: 问题解决 – timo 2011-03-16 16:07:04