我尝试遵循dotnet(C#,框架3.5 SP1)中的大XML文件(我不是这些文件的提供者)的加载进度:从1MB到300MB网络文件共享。跟着加载巨大的XML文件的进度
我使用XmlReader加载目的而不是直接的XmlDocument.Load方法来加速加载过程。
顺便说一句,我发现在互联网上没有任何文件/关于如何遵循此加载进度的文件:没有代表/事件似乎存在。有什么方法可以执行此任务吗?具有这种用于保存XML的功能可能是一件好事。
谢谢
我尝试遵循dotnet(C#,框架3.5 SP1)中的大XML文件(我不是这些文件的提供者)的加载进度:从1MB到300MB网络文件共享。跟着加载巨大的XML文件的进度
我使用XmlReader加载目的而不是直接的XmlDocument.Load方法来加速加载过程。
顺便说一句,我发现在互联网上没有任何文件/关于如何遵循此加载进度的文件:没有代表/事件似乎存在。有什么方法可以执行此任务吗?具有这种用于保存XML的功能可能是一件好事。
谢谢
假设你从一个流中读取这里是如何做到这一点(非完美的)例子... 基本上,ProgressStreamWrapper封装了文件流,并在Position更改时引发一个事件。
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Reading big file...");
FileStream fileStream = File.OpenRead("c:\\temp\\bigfile.xml");
ProgressStreamWrapper progressStreamWrapper = new ProgressStreamWrapper(fileStream);
progressStreamWrapper.PositionChanged += (o, ea) => Console.WriteLine((double) progressStreamWrapper.Position/progressStreamWrapper.Length * 100 + "% complete");
XmlReader xmlReader = XmlReader.Create(progressStreamWrapper);
while (xmlReader.Read())
{
//read the xml document
}
Console.WriteLine("DONE");
Console.ReadLine();
}
}
public class ProgressStreamWrapper : Stream, IDisposable
{
public ProgressStreamWrapper(Stream innerStream)
{
InnerStream = innerStream;
}
public Stream InnerStream { get; private set; }
public override void Close()
{
InnerStream.Close();
}
void IDisposable.Dispose()
{
base.Dispose();
InnerStream.Dispose();
}
public override void Flush()
{
InnerStream.Flush();
}
public override IAsyncResult BeginRead(byte[] buffer, int offset, int count, AsyncCallback callback, object state)
{
return InnerStream.BeginRead(buffer, offset, count, callback, state);
}
public override int EndRead(IAsyncResult asyncResult)
{
int endRead = InnerStream.EndRead(asyncResult);
OnPositionChanged();
return endRead;
}
public override IAsyncResult BeginWrite(byte[] buffer, int offset, int count, AsyncCallback callback, object state)
{
return InnerStream.BeginWrite(buffer, offset, count, callback, state);
}
public override void EndWrite(IAsyncResult asyncResult)
{
InnerStream.EndWrite(asyncResult);
OnPositionChanged(); ;
}
public override long Seek(long offset, SeekOrigin origin)
{
long seek = InnerStream.Seek(offset, origin);
OnPositionChanged();
return seek;
}
public override void SetLength(long value)
{
InnerStream.SetLength(value);
}
public override int Read(byte[] buffer, int offset, int count)
{
int read = InnerStream.Read(buffer, offset, count);
OnPositionChanged();
return read;
}
public override int ReadByte()
{
int readByte = InnerStream.ReadByte();
OnPositionChanged();
return readByte;
}
public override void Write(byte[] buffer, int offset, int count)
{
InnerStream.Write(buffer, offset, count);
OnPositionChanged();
}
public override void WriteByte(byte value)
{
InnerStream.WriteByte(value);
OnPositionChanged();
}
public override bool CanRead
{
get { return InnerStream.CanRead; }
}
public override bool CanSeek
{
get { return InnerStream.CanSeek; }
}
public override bool CanTimeout
{
get { return InnerStream.CanTimeout; }
}
public override bool CanWrite
{
get { return InnerStream.CanWrite; }
}
public override long Length
{
get { return InnerStream.Length; }
}
public override long Position
{
get { return InnerStream.Position; }
set
{
InnerStream.Position = value;
OnPositionChanged();
}
}
public event EventHandler PositionChanged;
protected virtual void OnPositionChanged()
{
if (PositionChanged != null)
{
PositionChanged(this, EventArgs.Empty);
}
}
public override int ReadTimeout
{
get { return InnerStream.ReadTimeout; }
set { InnerStream.ReadTimeout = value; }
}
public override int WriteTimeout
{
get { return InnerStream.WriteTimeout; }
set { InnerStream.WriteTimeout = value; }
}
}
是的,我可以提供一个流的API参数,所以我会寻找这种解决方案(PositionChanged事件)。今天会更新。 – camous 2009-12-31 11:09:44
我想我们有同样的想法,但你的代码在这里第一,所以一个确定的+1 ;-p – 2009-12-31 11:10:20
如何使用DataSet.Read()?
,或者
// Create the document.
XmlDocument doc = new XmlDocument();
doc.Load(file);
// Loop through all the nodes, and create the list of Product objects .
List<Product> products = new List<Product>();
foreach (XmlElement element in doc.DocumentElement.ChildNodes)
{
Product newProduct = new Product();
newProduct.ID = Int32.Parse(element.GetAttribute("ID"));
newProduct.Name = element.GetAttribute("Name");
// If there were more than one child node, you would probably use
// another For Each loop here and move through the
// Element.ChildNodes collection.
newProduct.Price = Decimal.Parse(element.ChildNodes[0].InnerText);
products.Add(newProduct);
}
基本上,我尝试着重于加载机制,而不是解析之一:解析过程由外部API完成。 在你的例子中,'doc.Load(file);'将在这一步加载整个XML文件,并且只有当文件将被加载到内存中时才会进一步加载。 – camous 2009-12-31 11:02:59
随着内置装载机没有太多;但是,您可以编写拦截流 - 从此流加载文档,并通过事件公开Position
?即在Read
方法中(间隔)引发事件?
下面是在支持更新的例子读取和写入:
using System;
using System.IO;
using System.Xml;
class ChattyStream : Stream
{
private Stream baseStream;
public ChattyStream(Stream baseStream)
{
if (baseStream == null) throw new ArgumentNullException("baseStream");
this.baseStream = baseStream;
updateInterval = 1000;
}
public event EventHandler ProgressChanged;
protected virtual void OnProgressChanged()
{
var handler = ProgressChanged;
if (handler != null) handler(this, EventArgs.Empty);
}
private void CheckDisposed()
{
if (baseStream == null) throw new ObjectDisposedException(GetType().Name);
}
protected Stream BaseStream
{
get { CheckDisposed(); return baseStream; }
}
int pos, updateInterval;
public int UpdateInterval
{
get { return updateInterval; }
set
{
if (value <= 0) throw new ArgumentOutOfRangeException("value");
updateInterval = value;
}
}
protected void Increment(int value)
{
if (value > 0)
{
pos += value;
if (pos >= updateInterval)
{
OnProgressChanged();
pos = pos % updateInterval;
}
}
}
public override int Read(byte[] buffer, int offset, int count)
{
int result = BaseStream.Read(buffer, offset, count);
Increment(result);
return result;
}
public override void Write(byte[] buffer, int offset, int count)
{
BaseStream.Write(buffer, offset, count);
Increment(count);
}
public override void SetLength(long value)
{
BaseStream.SetLength(value);
}
public override void Flush()
{
BaseStream.Flush();
}
public override long Position
{
get { return BaseStream.Position; }
set { BaseStream.Position = value; }
}
public override long Seek(long offset, SeekOrigin origin)
{
return BaseStream.Seek(offset, origin);
}
public override long Length
{
get { return BaseStream.Length; }
}
public override bool CanWrite
{
get { return BaseStream.CanWrite; }
}
public override bool CanRead
{
get { return BaseStream.CanRead; }
}
public override bool CanSeek
{
get { return BaseStream.CanSeek; }
}
protected override void Dispose(bool disposing)
{
if (disposing && baseStream != null)
{
baseStream.Dispose();
}
baseStream = null;
base.Dispose(disposing);
}
public override void Close()
{
if (baseStream != null) baseStream.Close();
base.Close();
}
public override int ReadByte()
{
int val = BaseStream.ReadByte();
if (val >= 0) Increment(1);
return val;
}
public override void WriteByte(byte value)
{
BaseStream.WriteByte(value);
Increment(1);
}
}
static class Program
{
static void Main()
{
/* invent some big data */
const string path = "bigfile";
if (File.Exists(path)) File.Delete(path);
using (var chatty = new ChattyStream(File.Create(path)))
{
chatty.ProgressChanged += delegate
{
Console.WriteLine("Writing: " + chatty.Position);
};
using (var writer = XmlWriter.Create(chatty))
{
writer.WriteStartDocument();
writer.WriteStartElement("xml");
for (int i = 0; i < 50000; i++)
{
writer.WriteElementString("add", i.ToString());
}
writer.WriteEndElement();
writer.WriteEndDocument();
}
chatty.Close();
}
/* read it */
using (var chatty = new ChattyStream(File.OpenRead("bigfile")))
{
chatty.ProgressChanged += delegate
{
Console.WriteLine("Reading: " + chatty.Position);
};
// now read "chatty" with **any** API; XmlReader, XmlDocument, XDocument, etc
XmlDocument doc = new XmlDocument();
doc.Load(chatty);
}
}
}
你在哪里加载这些文件,在DOM /数据库或其他?你是否正在阅读并逐个节点地处理它们或将它们放入内存中? – A9S6 2009-12-31 10:08:59
我假设我忘了放一些信息: 我加载这些XML文件的API(我有源,但我不喜欢编辑逻辑/解析)解析文件(主要与Xpath)。此API在XML参数中接受XML文件(并使用XmlReader)或Stream的路径。 我并不关心快速的解析过程,只关注内存进程中的加载。 – camous 2009-12-31 11:07:27
XmlReader不支持XPath,只提供串行访问......如果您需要进行更复杂的处理,您可能需要使用XPathNaviator,或者如果内存使用率很重要,请尝试vtd-xml – 2010-01-01 03:19:18