2011-03-08 93 views
3

如何使用c#将csv数据导入到Oracle中。要导入的数据大小为3GB,行数为7512263.我设法将csv数据导入到Oracle中,但时间大约需要1个小时。如何加快将csv数据导入到oracle的时间。谢谢。 这是我的代码:如何使用c#将csv数据导入到Oracle中#

using System; 
using System.Collections.Generic; 
using System.Linq; 
using System.Text; 
using System.Diagnostics; 
using System.Threading; 
using System.Text.RegularExpressions; 
using System.IO; 
using FileHelpers; 
using System.Data.OracleClient; 


namespace sqlloader 
{ 
    class Program 
    { 

     static void Main(string[] args) 
     { 
      int jum; 
      int i; 
      bool isFirstLine = false; 
      FileHelperEngine engine = new FileHelperEngine(typeof(XL_XDR)); 

      //Connect To Database 
      string constr = "Data Source=(DESCRIPTION=(ADDRESS_LIST=" 
       + "(ADDRESS=(PROTOCOL=TCP)(HOST= pt-9a84825594af)(PORT=1521)))" 
       + "(CONNECT_DATA=(SERVER=DEDICATED)(SERVICE_NAME=o11g)));" 
       + "User Id=xl;Password=rahasia;"; 
      OracleConnection con = new OracleConnection(constr); 
      con.Open(); 



      // To Read Use: 
      XL_XDR[] res = engine.ReadFile("DataOut.csv") as XL_XDR[]; 


      jum = CountLinesInFile("DataOut.csv"); 

      FileInfo f2 = new FileInfo("DataOut.csv"); 
      long s2 = f2.Length; 
      int jmlRecord = jum - 1; 

      for (i = 0; i < jum; i++) 
      { 
       ShowPercentProgress("Processing...", i, jum); 
       Thread.Sleep(100); 

       if (isFirstLine == false) 
       { 
        isFirstLine = true; 
       } 
       else 
       { 
        string sql = "INSERT INTO XL_XDR (XDR_ID, XDR_TYPE, SESSION_START_TIME, SESSION_END_TIME, SESSION_LAST_UPDATE_TIME, " + 
           "SESSION_FLAG, VERSION, CONNECTION_ROW_COUNT, ERROR_CODE, METHOD, HOST_LEN, HOST, URL_LEN, URL, CONNECTION_START_TIME, " + 
           "CONNECTION_LAST_UPDATE_TIME, CONNECTION_FLAG, CONNECTION_ID, TOTAL_EVENT_COUNT, TUNNEL_PAIR_ID, RESPONSIVENESS_TYPE, " + 
           "CLIENT_PORT, PAYLOAD_TYPE, VIRTUAL_TYPE, VID_CLIENT, VID_SERVER, CLIENT_ADDR, SERVER_ADDR, CLIENT_TUNNEL_ADDR, " + 
           "SERVER_TUNNEL_ADDR, ERROR_CODE_2, IPID, C2S_PKTS, C2S_OCTETS, S2C_PKTS, S2C_OCTETS, NUM_SUCC_TRANS, CONNECT_TIME, " + 
           "TOTAL_RESP, TIMEOUTS, RETRIES, RAI, TCP_SYNS, TCP_SYN_ACKS, TCP_SYN_RESETS, TCP_SYN_FINS, EVENT_TYPE, FLAGS, TIME_STAMP, " + 
           "EVENT_ID, EVENT_CODE) VALUES (" + 
           "'" + res[i].XDR_ID + "', '" + res[i].XDR_TYPE + "', '" + res[i].SESSION_START_TIME + "', '" + res[i].SESSION_END_TIME + "', " + 
           "'" + res[i].SESSION_LAST_UPDATE_TIME + "', '" + res[i].SESSION_FLAG + "', '" + res[i].VERSION + "', '" + res[i].CONNECTION_ROW_COUNT + "', " + 
           "'" + res[i].ERROR_CODE + "', '" + res[i].METHOD + "', '" + res[i].HOST_LEN + "', '" + res[i].HOST + "', " + 
           "'" + res[i].URL_LEN + "', '" + res[i].URL + "', '" + res[i].CONNECTION_START_TIME + "', '" + res[i].CONNECTION_LAST_UPDATE_TIME + "', " + 
           "'" + res[i].CONNECTION_FLAG + "', '" + res[i].CONNECTION_ID + "', '" + res[i].TOTAL_EVENT_COUNT + "', '" + res[i].TUNNEL_PAIR_ID + "', " + 
           "'" + res[i].RESPONSIVENESS_TYPE + "', '" + res[i].CLIENT_PORT + "', '" + res[i].PAYLOAD_TYPE + "', '" + res[i].VIRTUAL_TYPE + "', " + 
           "'" + res[i].VID_CLIENT + "', '" + res[i].VID_SERVER + "', '" + res[i].CLIENT_ADDR + "', '" + res[i].SERVER_ADDR + "', " + 
           "'" + res[i].CLIENT_TUNNEL_ADDR + "', '" + res[i].SERVER_TUNNEL_ADDR + "', '" + res[i].ERROR_CODE_2 + "', '" + res[i].IPID + "', " + 
           "'" + res[i].C2S_PKTS + "', '" + res[i].C2S_OCTETS + "', '" + res[i].S2C_PKTS + "', '" + res[i].S2C_OCTETS + "', " + 
           "'" + res[i].NUM_SUCC_TRANS + "', '" + res[i].CONNECT_TIME + "', '" + res[i].TOTAL_RESP + "', '" + res[i].TIMEOUTS + "', " + 
           "'" + res[i].RETRIES + "', '" + res[i].RAI + "', '" + res[i].TCP_SYNS + "', '" + res[i].TCP_SYN_ACKS + "', " + 
           "'" + res[i].TCP_SYN_RESETS + "', '" + res[i].TCP_SYN_FINS + "', '" + res[i].EVENT_TYPE + "', '" + res[i].FLAGS + "', " + 
           "'" + res[i].TIME_STAMP + "', '" + res[i].EVENT_ID + "', '" + res[i].EVENT_CODE + "')"; 

        OracleCommand command = new OracleCommand(sql, con); 
        command.ExecuteNonQuery(); 

       } 




      } 

      Console.WriteLine("Successfully Inserted"); 
      Console.WriteLine(); 
      Console.WriteLine("Number of Row Data: " + jmlRecord.ToString()); 
      Console.WriteLine(); 
      Console.WriteLine("The size of {0} is {1} bytes.", f2.Name, f2.Length); 
      con.Close(); 




     } 

     static void ShowPercentProgress(string message, int currElementIndex, int totalElementCount) 
     { 
      if (currElementIndex < 0 || currElementIndex >= totalElementCount) 
      { 
       throw new InvalidOperationException("currElement out of range"); 
      } 
      int percent = (100 * (currElementIndex + 1))/totalElementCount; 
      Console.Write("\r{0}{1}% complete", message, percent); 
      if (currElementIndex == totalElementCount - 1) 
      { 
       Console.WriteLine(Environment.NewLine); 
      } 
     } 

     static int CountLinesInFile(string f) 
     { 
      int count = 0; 
      using (StreamReader r = new StreamReader(f)) 
      { 
       string line; 
       while ((line = r.ReadLine()) != null) 
       { 
        count++; 
       } 
      } 
      return count; 
     } 

    } 

    [DelimitedRecord(",")] 
    public class XL_XDR 
    { 
     public string XDR_ID; 
     public string XDR_TYPE; 
     public string SESSION_START_TIME; 
     public string SESSION_END_TIME; 
     public string SESSION_LAST_UPDATE_TIME; 
     public string SESSION_FLAG; 
     public string VERSION; 
     public string CONNECTION_ROW_COUNT; 
     public string ERROR_CODE; 
     public string METHOD; 
     public string HOST_LEN; 
     public string HOST; 
     public string URL_LEN; 
     public string URL; 
     public string CONNECTION_START_TIME; 
     public string CONNECTION_LAST_UPDATE_TIME; 
     public string CONNECTION_FLAG; 
     public string CONNECTION_ID; 
     public string TOTAL_EVENT_COUNT; 
     public string TUNNEL_PAIR_ID; 
     public string RESPONSIVENESS_TYPE; 
     public string CLIENT_PORT; 
     public string PAYLOAD_TYPE; 
     public string VIRTUAL_TYPE; 
     public string VID_CLIENT; 
     public string VID_SERVER; 
     public string CLIENT_ADDR; 
     public string SERVER_ADDR; 
     public string CLIENT_TUNNEL_ADDR; 
     public string SERVER_TUNNEL_ADDR; 
     public string ERROR_CODE_2; 
     public string IPID; 
     public string C2S_PKTS; 
     public string C2S_OCTETS; 
     public string S2C_PKTS; 
     public string S2C_OCTETS; 
     public string NUM_SUCC_TRANS; 
     public string CONNECT_TIME; 
     public string TOTAL_RESP; 
     public string TIMEOUTS; 
     public string RETRIES; 
     public string RAI; 
     public string TCP_SYNS; 
     public string TCP_SYN_ACKS; 
     public string TCP_SYN_RESETS; 
     public string TCP_SYN_FINS; 
     public string EVENT_TYPE; 
     public string FLAGS; 
     public string TIME_STAMP; 
     public string EVENT_ID; 
     public string EVENT_CODE; 


    } 
} 
+4

为什么不使用绑定变量? Oracle可能花费大部分时间*解析*查询。解析一次,执行很多。 – 2011-03-08 17:21:48

回答

0

只删除调用CountLinesInFile可能会帮助,因为在该方法中,你正在阅读的行中的所有文件中的行,正如你说该文件是biiig ...

2

我在ODB.NET中使用批量绑定方法运气良好。在此页面上搜索“ArrayBindCount”。

http://dotnetslackers.com/articles/ado_net/BulkOperationsUsingOracleDataProviderForNETODPNET.aspx

这将允许您一次插入,而不是成千上万的小刀片的一切。您目前看到的小时数将变成分钟数。

+0

虽然这可能在理论上回答这个问题,[这将是更可取的](http://meta.stackexchange.com/q/8259)在这里包括答案的基本部分,并提供参考链接。 – 2013-01-08 22:53:56

2

虽然强烈支持文森特的建议,使用绑定变量(这可能会是一个巨大的性能增益,以及防止DBA限制你粉碎共享池)和布罗斯托的建议做批量绑定,我会倾向于质疑为什么你会用C#编写这类东西。使用外部表格这样做会更有效率,因此您的应用程序会将文件放置在数据库服务器的文件系统上,并且通过外部表定义来完成数据的解析和加载(或者甚至让应用程序调用SQL * Loader)。这可以让您利用Oracle已经针对此类处理进行了优化的代码。

此外,由于您要一开始就对文件中的行数进行一次计算,以显示进度条,您可以通过从更容易获取的信息中获取大致的行数来优化该行数。您大致知道文件中每行有多少个字节,并且文件的大小是一个相对容易检索的文件属性。这应该允许您估算文件中相对准确的行数,这对于进度条应该足够了。