2011-05-05 42 views
0

好了,所以我一直在寻找一个解决方案的互联网,但尚未拿出任何迹象动态CSV负荷在SQL Server

我有什么是CSV - 这个CSV可以有任意数量的未知列

eg

Col 1, Col 2, Col 3 

我用BULK INSERT #temp FROM ...从CSV插入,但是这依赖于我有一张桌子前,手加载到 - 这是哪里的问题出现了 - 我不加载CSV

之前知道我的表结构

有没有办法动态地创建基于CSV的表,以实时加载数据?

感谢 罗布

+0

感谢您的编辑Marc_S--抱歉关于格式化 – Rob 2011-05-05 10:57:08

+0

我在想我可以创建一个列作为ntext并将csv行加载到它中 - 然后使用数据库解析它,但必须有更好的方法吗? – Rob 2011-05-05 11:19:01

回答

1

CSV解析是不平凡的(考虑到文本识别符,包含换行符,预选赛逃逸机制等值)。有几个.NET库可以为你做所有这些工作(例如http://www.codeproject.com/KB/database/CsvReader.aspx),所以我认为使用不同的技术比如PowerShell或SQL CLR更容易使用现有的库 - 而不是,试图推出在T-SQL自己的CSV分析器...

咦,刚刚找到的老论坛帖子这个漂亮的和简单的解决方案(http://forums.databasejournal.com/showthread.php?t= 47966):

select * 
from OpenRowset('MSDASQL', 'Driver={Microsoft Text Driver (*.txt; *.csv)}; DefaultDir=D:\;', 
'select * from test1.csv') 

不幸的是,它并没有对其中的文本驱动程序不是默认安装最新的Windows版本的工作...

+0

是否有另一种方法从MSSQL读取CSV - 因为OpenRowset需要授予许可 – Rob 2011-05-05 13:22:17

2

我多次面临同样的任务。我最终做的是为负载编写一个简单的c#脚本。我承认,每次我必须稍微更改脚本,因为每次需求不同时,CSV文件都有特定的特性等等。这意味着我的代码很可能不会立即为您工作,但是我希望它可以帮助你很多。

主要的C#文件是program.cs。下面是它的源:

using System; 
using System.IO; 
using System.Collections.Generic; 
using System.Linq; 
using System.Text; 
using System.Text.RegularExpressions; 
using System.Collections; 

namespace CsvToSql 
{ 
    class Program 
    { 
     static string server = @"localhost"; 
     static string database = @"test"; 
     static bool hasHeaders = false; 
     static string fieldLength = "max"; 

     static string fieldPattern = "[%fieldName%] [nvarchar](%fieldLength%) NULL,\n";   
     static string createTablePattern = 
@" 
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[%tableName%]') AND type in (N'U')) 
DROP TABLE [dbo].[%tableName%] 
GO 
SET ANSI_NULLS ON 
GO 
SET QUOTED_IDENTIFIER ON 
GO 
CREATE TABLE [dbo].[%tableName%](
%fields% 
) ON [PRIMARY] 
"; 
     static string commandScriptPattern = 
@"sqlcmd -S %server% -E -d %database% -i %tableScriptName% 
bcp %database%.dbo.%tableName% in %headrelsessFileName% -c -t^^ -r \n -T -S %server% 
"; 
     private static void Main(string[] args) 
     { 
      server = System.Configuration.ConfigurationSettings.AppSettings["server"] ?? server; 
      database = System.Configuration.ConfigurationSettings.AppSettings["database"] ?? database; 
      hasHeaders = System.Configuration.ConfigurationSettings.AppSettings["hasHeaders"] == "true"; 
      fieldLength = System.Configuration.ConfigurationSettings.AppSettings["fieldLength"] ?? fieldLength; 

      string[] fileNames = Directory.GetFiles(".", "*.csv"); 
      foreach (string fileName in fileNames) 
      { 
       Console.WriteLine("Processing {0}", fileName); 
       Process(fileName); 
      } 
      WriteExecuteAllFile(fileNames); 
      WriteCleanUpFile(fileNames); 
     } 

     private static void Process(string fileName) 
     { 
      string[] fieldNames = ReadHeaders(fileName); 
      ProduceTableScript(fileName, fieldNames); 
      ProduceCommandScript(fileName); 
     } 

     private static void WriteExecuteAllFile(string[] fileNames) 
     { 
      StringBuilder sb = new StringBuilder(); 
      foreach (string fileName in fileNames) 
      { 
       sb.Append("call "); 
       sb.AppendLine(GetCommandScriptName(fileName)); 
      } 
      SaveStringToFile(sb.ToString(), "_all.cmd"); 
     } 

     private static void WriteCleanUpFile(string[] fileNames) 
     { 
      StringBuilder sb = new StringBuilder(); 
      foreach (string fileName in fileNames) 
      { 
       sb.Append("del "); 
       sb.AppendLine(GetCommandScriptName(fileName)); 
       sb.Append("del "); 
       sb.AppendLine(GetHeaderlessFileName(fileName)); 
       sb.Append("del "); 
       sb.AppendLine(GetTableScriptName(fileName)); 
      } 
      sb.AppendLine("del _all.cmd"); 
      sb.AppendLine("del _cleanup.cmd"); 
      SaveStringToFile(sb.ToString(), "_cleanup.cmd"); 
     } 

     private static string[] ReadHeaders(string fileName) 
     {    
      using (FileStream fs = File.OpenRead(fileName)) 
      using (StreamReader sr = new StreamReader(fs)) 
      { 
       if (hasHeaders) 
       { 
         string[] result = ParseQutationLineToList(sr.ReadLine()); 
         ProduceHeaderlessFile(sr, fs.Name); 
         return result; 
       } 
       else 
       { 
        string s = sr.ReadLine(); 
        string[] fields = ParseQutationLineToList(s); 
        fs.Seek(0, SeekOrigin.Begin); 
        sr.DiscardBufferedData(); 
        string[] result = new string[fields.Length]; 
        for (int i = 0; i < fields.Length; i++) 
        { 
         result[i] = "F" + (i + 1).ToString(); 
        } 
        ProduceHeaderlessFile(sr, fs.Name); 
        return result; 
       } 
      } 
     } 

     private static void ProduceTableScript(string fileName, string[] fieldNames) 
     { 
      string tableName = GetTableName(fileName); 
      string fields = fieldNames.Aggregate("", (s, i) => s + fieldPattern.Replace("%fieldName%", i).Replace("%fieldLength%", fieldLength)); 
      string table = createTablePattern.Replace("%fields%", fields).Replace("%tableName%", tableName); 
      SaveStringToFile(table, GetTableScriptName(fileName)); 
     } 


     private static void ProduceCommandScript(string fileName) 
     { 
      string content = commandScriptPattern; 
      content = content.Replace("%server%", server); 
      content = content.Replace("%database%", database); 
      content = content.Replace("%tableName%", GetTableName(fileName)); 
      content = content.Replace("%tableScriptName%", GetTableScriptName(fileName)); 
      content = content.Replace("%headrelsessFileName%", GetHeaderlessFileName(fileName)); 
      SaveStringToFile(content, GetCommandScriptName(fileName)); 
     } 

     private static void ProduceHeaderlessFile(StreamReader sr, string basefileName) 
     { 
      string headerlessFileName = GetHeaderlessFileName(basefileName); 
      if (File.Exists(headerlessFileName)) 
      { 
       return; 
      } 

      int counter = 0; 

      using(FileStream fs = File.Open(headerlessFileName, FileMode.Create, FileAccess.Write, FileShare.Read)) 
      using (StreamWriter sw = new StreamWriter(fs)) 
      { 
       while(!sr.EndOfStream) 
       { 
        //sw.WriteLine(sr.ReadLine().Replace("\"", "")); 
        sw.WriteLine(ParseLine(sr.ReadLine())); 
        counter++; 
       } 
       sw.Flush(); 
       fs.Flush(); 
      } 

      Console.WriteLine("Written {0} records to {1}", counter, headerlessFileName); 
     } 

     private static string ParseLine(string s) 
     { 
      if (s.TrimStart(' ', '\t').StartsWith("\"")) 
      { 
       return ParseQutationLine(s); 
      } 

      return s.Replace(',', '^'); 
     } 

     // Some tables has the default field terminator (comma) inside them 
     // this is why we have to parse 
     private static string ParseQutationLine(string s) 
     { 

      string[] fields = ParseQutationLineToList(s); 

      StringBuilder sb = new StringBuilder(); 

      foreach (string field in fields) 
      { 
       sb.Append(field.Trim('"')); 
       sb.Append('^'); 
       if (field.IndexOf('^') >= 0) 
       { 
        throw new ApplicationException("String contains separator character. " + s); 
       } 
      } 

      return sb.ToString().Substring(0, sb.Length - 1); 
     } 

     private static string[] ParseQutationLineToList(string s) 
     { 
      JouniHeikniemi.Tools.Strings.CsvReader cr = new JouniHeikniemi.Tools.Strings.CsvReader(); 
      ArrayList result = new ArrayList(); 
      cr.ParseCsvFields(result, s); 
      return (string[])result.ToArray(typeof(string)); 
     } 

     private static void SaveStringToFile(string s, string fileName) 
     { 
      using (FileStream fs = File.Open(fileName, FileMode.Create, FileAccess.Write, FileShare.Read)) 
      using (StreamWriter sw = new StreamWriter(fs)) 
      { 
       sw.Write(s); 
       sw.Flush(); 
       fs.Flush(); 
      } 
     } 

     private static string GetTableName(string fileName) 
     { 
      return "_" + Path.GetFileNameWithoutExtension(fileName).Replace('.', '_'); 
     } 

     private static string GetHeaderlessFileName(string fileName) 
     { 
      return Path.ChangeExtension(fileName, "inp"); 
     } 

     private static string GetTableScriptName(string fileName) 
     { 
      return Path.ChangeExtension(fileName, "tbl"); 
     } 

     private static string GetCommandScriptName(string fileName) 
     { 
      return Path.ChangeExtension(fileName, "cmd"); 
     } 
    } 
} 

此文件使用I found in internet库解析CSV文件。请注意,我看到有效的CSV文件,该库无法解析。对于CsvReader.cs文件的全文如下:

using System; 
using System.Collections; 
using System.IO; 
using System.Text; 

namespace JouniHeikniemi.Tools.Strings { 

    /// <summary> 
    /// A data-reader style interface for reading Csv (and otherwise-char-separated) files. 
    /// </summary> 
    public class CsvReader : IDisposable { 

    #region Private variables 

    private Stream stream; 
    private StreamReader reader; 
    private char separator; 

    #endregion 

    #region Constructors 

    public CsvReader() { separator = ','; } 

     /// <summary> 
    /// Creates a new Csv reader for the given stream. 
    /// </summary> 
    /// <param name="s">The stream to read the CSV from.</param> 
    public CsvReader(Stream s) : this(s, null, ',') { } 

    /// <summary> 
    /// Creates a new reader for the given stream and separator. 
    /// </summary> 
    /// <param name="s">The stream to read the separator from.</param> 
    /// <param name="separator">The field separator character</param> 
    public CsvReader(Stream s, char separator) : this(s, null, separator) { } 

    /// <summary> 
    /// Creates a new Csv reader for the given stream and encoding. 
    /// </summary> 
    /// <param name="s">The stream to read the CSV from.</param> 
    /// <param name="enc">The encoding used.</param> 
    public CsvReader(Stream s, Encoding enc) : this(s, enc, ',') { } 

    /// <summary> 
    /// Creates a new reader for the given stream, encoding and separator character. 
    /// </summary> 
    /// <param name="s">The stream to read the data from.</param> 
    /// <param name="enc">The encoding used.</param> 
    /// <param name="separator">The separator character between the fields</param> 
    public CsvReader(Stream s, Encoding enc, char separator) { 

     this.separator = separator; 
     this.stream = s; 
     if (!s.CanRead) { 
     throw new CsvReaderException("Could not read the given data stream!"); 
     } 
     reader = (enc != null) ? new StreamReader(s, enc) : new StreamReader(s); 
    } 

    /// <summary> 
    /// Creates a new Csv reader for the given text file path. 
    /// </summary> 
    /// <param name="filename">The name of the file to be read.</param> 
    public CsvReader(string filename) : this(filename, null, ',') { } 

    /// <summary> 
    /// Creates a new reader for the given text file path and separator character. 
    /// </summary> 
    /// <param name="filename">The name of the file to be read.</param> 
    /// <param name="separator">The field separator character</param> 
    public CsvReader(string filename, char separator) : this(filename, null, separator) { } 

    /// <summary> 
    /// Creates a new Csv reader for the given text file path and encoding. 
    /// </summary> 
    /// <param name="filename">The name of the file to be read.</param> 
    /// <param name="enc">The encoding used.</param> 
    public CsvReader(string filename, Encoding enc) 
     : this(filename, enc, ',') { } 

    /// <summary> 
    /// Creates a new reader for the given text file path, encoding and field separator. 
    /// </summary> 
    /// <param name="filename">The name of the file to be read.</param> 
    /// <param name="enc">The encoding used.</param> 
    /// <param name="separator">The field separator character.</param> 
    public CsvReader(string filename, Encoding enc, char separator) 
     : this(new FileStream(filename, FileMode.Open), enc, separator) { } 

    #endregion 

    #region Properties 

    /// <summary> 
    /// The separator character for the fields. Comma for normal CSV. 
    /// </summary> 
    public char Separator { 
     get { return separator; } 
     set { separator = value; } 
    } 

    #endregion 

    #region Parsing 

    /// <summary> 
    /// Returns the fields for the next row of data (or null if at eof) 
    /// </summary> 
    /// <returns>A string array of fields or null if at the end of file.</returns> 
    public string[] GetCsvLine() { 

     string data = reader.ReadLine(); 
     if (data == null) return null; 
     if (data.Length == 0) return new string[0]; 

     ArrayList result = new ArrayList(); 

     ParseCsvFields(result, data); 

     return (string[])result.ToArray(typeof(string)); 
    } 

    // Parses the fields and pushes the fields into the result arraylist 
    public void ParseCsvFields(ArrayList result, string data) { 

     int pos = -1; 
     while (pos < data.Length) 
     result.Add(ParseCsvField(data, ref pos)); 
    } 

    // Parses the field at the given position of the data, modified pos to match 
    // the first unparsed position and returns the parsed field 
    private string ParseCsvField(string data, ref int startSeparatorPosition) { 

     if (startSeparatorPosition == data.Length-1) { 
     startSeparatorPosition++; 
     // The last field is empty 
     return ""; 
     } 

     int fromPos = startSeparatorPosition + 1; 

     // Determine if this is a quoted field 
     if (data[fromPos] == '"') { 
     // If we're at the end of the string, let's consider this a field that 
     // only contains the quote 
     if (fromPos == data.Length-1) { 
      fromPos++; 
      return "\""; 
     } 

     // Otherwise, return a string of appropriate length with double quotes collapsed 
     // Note that FSQ returns data.Length if no single quote was found 
     int nextSingleQuote = FindSingleQuote(data, fromPos+1); 
     startSeparatorPosition = nextSingleQuote+1; 
     return data.Substring(fromPos+1, nextSingleQuote-fromPos-1).Replace("\"\"", "\""); 
     } 

     // The field ends in the next separator or EOL 
     int nextSeparator = data.IndexOf(separator, fromPos); 
     if (nextSeparator == -1) { 
     startSeparatorPosition = data.Length; 
     return data.Substring(fromPos); 
     } 
     else { 
     startSeparatorPosition = nextSeparator; 
     return data.Substring(fromPos, nextSeparator - fromPos); 
     } 
    } 

    // Returns the index of the next single quote mark in the string 
    // (starting from startFrom) 
    private static int FindSingleQuote(string data, int startFrom) { 

     int i = startFrom-1; 
     while (++i < data.Length) 
     if (data[i] == '"') { 
      // If this is a double quote, bypass the chars 
      if (i < data.Length-1 && data[i+1] == '"') { 
      i++; 
      continue; 
      } 
      else 
      return i; 
     } 
     // If no quote found, return the end value of i (data.Length) 
     return i; 
    } 

    #endregion 


    /// <summary> 
    /// Disposes the reader. The underlying stream is closed. 
    /// </summary> 
    public void Dispose() { 
     // Closing the reader closes the underlying stream, too 
     if (reader != null) reader.Close(); 
     else if (stream != null) 
     stream.Close(); // In case we failed before the reader was constructed 
     GC.SuppressFinalize(this); 
    } 
    } 


    /// <summary> 
    /// Exception class for CsvReader exceptions. 
    /// </summary> 
    [Serializable] 
    public class CsvReaderException : ApplicationException { 

    /// <summary> 
    /// Constructs a new CsvReaderException. 
    /// </summary> 
    public CsvReaderException() : this("The CSV Reader encountered an error.") { } 

    /// <summary> 
    /// Constructs a new exception with the given message. 
    /// </summary> 
    /// <param name="message">The exception message.</param> 
    public CsvReaderException(string message) : base(message) { } 

    /// <summary> 
    /// Constructs a new exception with the given message and the inner exception. 
    /// </summary> 
    /// <param name="message">The exception message.</param> 
    /// <param name="inner">Inner exception that caused this issue.</param> 
    public CsvReaderException(string message, Exception inner) : base(message, inner) { } 

    /// <summary> 
    /// Constructs a new exception with the given serialization information. 
    /// </summary> 
    /// <param name="info"></param> 
    /// <param name="context"></param> 
    protected CsvReaderException(System.Runtime.Serialization.SerializationInfo info, 
           System.Runtime.Serialization.StreamingContext context) 
     : base(info, context) { } 

    } 

} 

我也有一个配置文件CsvToSql.exe.config:

<?xml version="1.0" encoding="utf-8" ?> 
<configuration> 
    <appSettings> 
    <add key="server" value="localhost"/> 
    <add key="database" value="test"/> 
    <!-- If your csv files have header, set this to true to generate field names from headers--> 
    <!-- Otherwise set it to false to generate names F1, F2, F3, etc.--> 
    <add key="hasHeaders" value="false"/> 
    <!-- This is the lenght of nvarchar field created can be a number or 'max'--> 
    <add key="fieldLength" value="500"/> 
    </appSettings> 
</configuration> 

这编译一大堆build.cmd的脚本:

%systemroot%\Microsoft.NET\Framework\v3.5\csc.exe /out:CsvToSql.exe Program.cs CsvReader.cs 

这是我如何使用它:

  • 运行BU ild.cmd编译CsvToSql.exe
  • 编辑CsvToSql.exe.config适合你的情况
  • 认沽CSV文件在同一文件夹中的可执行文件和配置文件
  • 运行CsvToSql。exe
  • 可执行文件未连接到数据库。相反,它会生成许多文件: * .tbl文件是表定义,* .inp文件是bcp命令行实用程序的输入文件,* .cmd文件是运行表创建脚本和bcp命令行实用程序的文件。 _all.cmd为所有表运行* .cmd,_cleanup.cmd删除CsvToSql.exe生成的所有文件
  • 运行_all.cmd文件
  • 转到您的SQL并查看已生成的内容。对脚本和/或配置进行更改,漂洗并重复

这个脚本有很多暗示,还有很多硬编码的东西。这就是我通常会快速更改每次需要将一组CSV加载到SQL中的新时间。

祝你好运,如果你有任何问题,请不要犹豫,问。 该脚本需要.NET 3.5 如果没有关于我正在加载的特殊数据,我通常会在15分钟内启动并运行此脚本。如果遇到麻烦,可能需要更长的时间。

+0

不错的代码,我可以借用它的一部分用于我正在开发的项目! – 2011-05-05 12:23:50

+0

非常好的代码,我可以在将来使用它 - 谢谢你,但我正在寻找一个数据库唯一的解决方案 – Rob 2011-05-05 13:20:47