2010-04-26 135 views
1

解析一个没有模式的大XML文件(2MB-20MB或更多)将采取什么样的路径(由于文件结构很奇怪,我不能推断出一个使用XSD.exe,检查下面的代码段)?解析第三方XML

选项

1)XML反序列化(但正如所说,我没有一个模式和XSD工具抱怨文件内容), 2)的LINQ to XML, 3)加载到XmlDocument的, 4)用XmlReader手动解析&东西。

这是XML文件片段:

<?xml version="1.0" encoding="utf-8"?> 
<xmlData date="29.04.2010 12:09:13"> 
<Table> 
    <ident>079186</ident> 
    <stock>0</stock> 
    <pricewotax>33.94000000</pricewotax> 
    <discountpercent>0.00000000</discountpercent> 
</Table> 
<Table> 
    <ident>079190</ident> 
    <stock>1</stock> 
    <pricewotax>10.50000000</pricewotax> 
    <discountpercent>0.00000000</discountpercent> 
    <pricebyquantity> 
    <Table> 
    <quantity>5</quantity> 
    <pricewotax>10.00000000</pricewotax> 
    <discountpercent>0.00000000</discountpercent> 
    </Table> 
    <Table> 
    <quantity>8</quantity> 
    <pricewotax>9.00000000</pricewotax> 
    <discountpercent>0.00000000</discountpercent> 
    </Table> 
    </pricebyquantity> 
</Table> 
</xmlData> 
+0

http://en.wikipedia.org/wiki/ERP => ERP? – lexu 2010-04-26 12:47:46

回答

0

这里的XSD:

<?xml version="1.0" encoding="utf-8"?> 
<xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema"> 
    <xs:element name="xmlData"> 
    <xs:complexType> 
     <xs:sequence> 
     <xs:element maxOccurs="unbounded" name="Table"> 
      <xs:complexType> 
      <xs:sequence> 
       <xs:element name="ident" type="xs:int" /> 
       <xs:element name="stock" type="xs:int" /> 
       <xs:element name="pricewotax" type="xs:double" /> 
       <xs:element name="discountpercent" type="xs:double" /> 
       <xs:element minOccurs="0" name="pricebyquantity"> 
       <xs:complexType> 
        <xs:sequence> 
        <xs:element maxOccurs="unbounded" name="Table"> 
         <xs:complexType> 
         <xs:sequence> 
          <xs:element name="quantity" type="xs:int" /> 
          <xs:element name="pricewotax" type="xs:double" /> 
          <xs:element name="discountpercent" type="xs:double" /> 
         </xs:sequence> 
         </xs:complexType> 
        </xs:element> 
        </xs:sequence> 
       </xs:complexType> 
       </xs:element> 
      </xs:sequence> 
      </xs:complexType> 
     </xs:element> 
     </xs:sequence> 
     <xs:attribute name="date" type="xs:string" use="required" /> 
    </xs:complexType> 
    </xs:element> 
</xs:schema> 

这里的序列化的类:

//------------------------------------------------------------------------------ 
// <auto-generated> 
//  This code was generated by a tool. 
//  Runtime Version:2.0.50727.3603 
// 
//  Changes to this file may cause incorrect behavior and will be lost if 
//  the code is regenerated. 
// </auto-generated> 
//------------------------------------------------------------------------------ 

// 
// This source code was auto-generated by xsd, Version=2.0.50727.1432. 
// 
namespace StockInfo { 
    using System.Xml.Serialization; 


    /// <remarks/> 
    [System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "2.0.50727.1432")] 
    [System.SerializableAttribute()] 
    [System.Diagnostics.DebuggerStepThroughAttribute()] 
    [System.ComponentModel.DesignerCategoryAttribute("code")] 
    [System.Xml.Serialization.XmlTypeAttribute(AnonymousType=true)] 
    [System.Xml.Serialization.XmlRootAttribute(Namespace="", IsNullable=false)] 
    public partial class xmlData { 

     private xmlDataTable[] tableField; 

     private string dateField; 

     /// <remarks/> 
     [System.Xml.Serialization.XmlElementAttribute("Table")] 
     public xmlDataTable[] Table { 
      get { 
       return this.tableField; 
      } 
      set { 
       this.tableField = value; 
      } 
     } 

     /// <remarks/> 
     [System.Xml.Serialization.XmlAttributeAttribute()] 
     public string date { 
      get { 
       return this.dateField; 
      } 
      set { 
       this.dateField = value; 
      } 
     } 
    } 

    /// <remarks/> 
    [System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "2.0.50727.1432")] 
    [System.SerializableAttribute()] 
    [System.Diagnostics.DebuggerStepThroughAttribute()] 
    [System.ComponentModel.DesignerCategoryAttribute("code")] 
    [System.Xml.Serialization.XmlTypeAttribute(AnonymousType=true)] 
    public partial class xmlDataTable { 

     private int identField; 

     private int stockField; 

     private double pricewotaxField; 

     private double discountpercentField; 

     private xmlDataTableTable[] pricebyquantityField; 

     /// <remarks/> 
     public int ident { 
      get { 
       return this.identField; 
      } 
      set { 
       this.identField = value; 
      } 
     } 

     /// <remarks/> 
     public int stock { 
      get { 
       return this.stockField; 
      } 
      set { 
       this.stockField = value; 
      } 
     } 

     /// <remarks/> 
     public double pricewotax { 
      get { 
       return this.pricewotaxField; 
      } 
      set { 
       this.pricewotaxField = value; 
      } 
     } 

     /// <remarks/> 
     public double discountpercent { 
      get { 
       return this.discountpercentField; 
      } 
      set { 
       this.discountpercentField = value; 
      } 
     } 

     /// <remarks/> 
     [System.Xml.Serialization.XmlArrayItemAttribute("Table", IsNullable=false)] 
     public xmlDataTableTable[] pricebyquantity { 
      get { 
       return this.pricebyquantityField; 
      } 
      set { 
       this.pricebyquantityField = value; 
      } 
     } 
    } 

    /// <remarks/> 
    [System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "2.0.50727.1432")] 
    [System.SerializableAttribute()] 
    [System.Diagnostics.DebuggerStepThroughAttribute()] 
    [System.ComponentModel.DesignerCategoryAttribute("code")] 
    [System.Xml.Serialization.XmlTypeAttribute(AnonymousType=true)] 
    public partial class xmlDataTableTable { 

     private int quantityField; 

     private double pricewotaxField; 

     private double discountpercentField; 

     /// <remarks/> 
     public int quantity { 
      get { 
       return this.quantityField; 
      } 
      set { 
       this.quantityField = value; 
      } 
     } 

     /// <remarks/> 
     public double pricewotax { 
      get { 
       return this.pricewotaxField; 
      } 
      set { 
       this.pricewotaxField = value; 
      } 
     } 

     /// <remarks/> 
     public double discountpercent { 
      get { 
       return this.discountpercentField; 
      } 
      set { 
       this.discountpercentField = value; 
      } 
     } 
    } 
} 

警告:反序列化可能不是解析一个20MB的文件最高效的方式。 XmlReader可能是最快的方法,但这意味着要手动完成任务。

+0

顺便说一句,我使用XmlSchemaInference类生成了xsd。 – code4life 2010-05-11 13:42:07

+0

谢谢,虽然我决定和Linq一起去Xml解析这个,所以我不依赖序列化。 – mare 2010-05-13 15:56:51

0

我将其加载到XmlDocument,然后使用XPath来进行相应的处理。在这里,LINQ可能是最好的选择,但我不是很熟悉,所以我不能说。

+0

我在某处读取加载到XmlDocument可能会导致高内存消耗,但我不确定它。 – mare 2010-05-03 14:11:21

+1

是的,它将不得不将整个文件加载到内存中。但在这种情况下,2到20MB不应该成为主要问题。 – 2010-05-03 17:26:24