我有一个大约5MB的CSV数据库,邮政编码,城市和州正在尝试导入SQL Server CE数据库。从多个线程访问SQL Server CE是否安全?
使用单线程,估计该过程需要大约3小时才能完成。虽然这对完成工作很好,但我想尝试在多个线程中分割任务,以缩短总共3小时的时间。如果我在每个线程上创建一个SqlCeConnection
对象,是否可以安全地在每个线程上同时运行命令?
我有一种感觉,会有并发和死锁的问题。这里是我找到的CSV数据库:http://www.unitedstateszipcodes.org/zip-code-database/
这里是我的相关代码:
List<AddressSet> addressList;
public void OpenCSV(string file)
{
var addresses = from line in File.ReadAllLines(file).Skip(1)
let columns = line.Split(',')
select new AddressSet
{
ZipCode = columns[0].Replace("\"", "").Trim(),
City = columns[2].Replace("\"", "").Trim(),
State = columns[5].Replace("\"", "").Trim()
};
addressList = addresses.ToList();
Thread worker = new Thread(new ThreadStart(ProcessData));
worker.Start();
}
private void ProcessData()
{
try
{
int i = 1;
DateTime operationStart = DateTime.Now;
foreach (AddressSet address in addressList)
{
int stateId = InsertState(address.State);
int zipCodeId = InsertZipCode(address.ZipCode, stateId);
int cityId = InsertCity(address.City, stateId);
UpdateRelationships(zipCodeId, cityId);
float pct = i/(float)addressList.Count() * 100;
TimeSpan timeSinceStart = DateTime.Now.Subtract(operationStart);
TimeSpan totalTime = TimeSpan.FromMilliseconds(timeSinceStart.TotalMilliseconds/(pct/100));
TimeSpan timeLeft = totalTime - timeSinceStart;
//richTextBox1.BeginInvoke((MethodInvoker)(() => richTextBox1.Text = pct.ToString("N2") + "% (" + i + " of " + addressList.Count().ToString() + ") " + address.City + ", " + address.State + " " + address.ZipCode
// + "\nEstimated Total Time: " + totalTime.Days.ToString() + " days, " + totalTime.Hours.ToString() + " hours, " + totalTime.Minutes.ToString() + " minutes" +
// " - Time Left: " + timeLeft.Days.ToString() + " days, " + timeLeft.Hours.ToString() + " hours, " + timeLeft.Minutes.ToString() + " minutes"));
richTextBox1.BeginInvoke((MethodInvoker)(() => richTextBox1.Text = pct.ToString("N2") + "% (" + i + " of " + addressList.Count().ToString() + ") " + address.City + ", " + address.State + " " + address.ZipCode
+ "\nEstimated Total Time: " + totalTime.ToString("h'h 'm'm 's's'") +
"\nTime Left: " + timeLeft.ToString("h'h 'm'm 's's'") +
"\nRunning Time: " + timeSinceStart.ToString("h'h 'm'm 's's'")));
richTextBox1.BeginInvoke((MethodInvoker)(() => richTextBox1.SelectionStart = richTextBox1.Text.Length));
richTextBox1.BeginInvoke((MethodInvoker)(() => richTextBox1.ScrollToCaret()));
i++;
}
this.Invoke(new Action(() =>
{
MessageBox.Show("Done!");
btnChooseCSV.Enabled = true;
}));
}
catch (Exception ex)
{
this.Invoke(new Action(() =>
{
MessageBox.Show(ex.Message);
}));
}
}
private int InsertZipCode(string zipCode, int stateId)
{
string connstr = System.Configuration.ConfigurationManager.ConnectionStrings["AddressInformation"].ConnectionString;
SqlCeConnection connection = new SqlCeConnection(connstr);
connection.Open();
SqlCeCommand command = new SqlCeCommand("SELECT COUNT(*) FROM ZipCode WHERE ZipCode = @ZipCode", connection);
command.Parameters.AddWithValue("ZipCode", zipCode);
int result = (int)command.ExecuteScalar();
// if nothing found, insert
if (result == 0)
{
command = new SqlCeCommand("INSERT INTO ZipCode(ZipCode, StateId) VALUES(@ZipCode, @StateId)", connection);
command.Parameters.AddWithValue("ZipCode", zipCode);
command.Parameters.AddWithValue("StateId", stateId);
command.ExecuteNonQuery();
command = new SqlCeCommand("SELECT @@IDENTITY", connection);
}
if (result == 1)
{
command = new SqlCeCommand("SELECT ZipCodeId FROM ZipCode WHERE ZipCode = @ZipCode", connection);
command.Parameters.AddWithValue("ZipCode", zipCode);
}
string test = command.ExecuteScalar().ToString();
result = int.Parse(test);
connection.Close();
return result;
}
private int InsertCity(string city, int stateId)
{
string connstr = System.Configuration.ConfigurationManager.ConnectionStrings["AddressInformation"].ConnectionString;
SqlCeConnection connection = new SqlCeConnection(connstr);
connection.Open();
SqlCeCommand command = new SqlCeCommand("SELECT COUNT(*) FROM City WHERE CityName = @City", connection);
command.Parameters.AddWithValue("City", city);
int result = (int)command.ExecuteScalar();
// if nothing found, insert
if (result == 0)
{
command = new SqlCeCommand("INSERT INTO City(CityName, StateId) VALUES(@City, @StateId)", connection);
command.Parameters.AddWithValue("City", city);
command.Parameters.AddWithValue("StateId", stateId);
command.ExecuteNonQuery();
command = new SqlCeCommand("SELECT @@IDENTITY", connection);
}
if (result == 1)
{
command = new SqlCeCommand("SELECT CityId FROM City WHERE CityName = @City", connection);
command.Parameters.AddWithValue("City", city);
}
string test = command.ExecuteScalar().ToString();
result = int.Parse(test);
connection.Close();
return result;
}
private int InsertState(string state)
{
string connstr = System.Configuration.ConfigurationManager.ConnectionStrings["AddressInformation"].ConnectionString;
SqlCeConnection connection = new SqlCeConnection(connstr);
connection.Open();
SqlCeCommand command = new SqlCeCommand("SELECT COUNT(*) FROM State WHERE State = @State", connection);
command.Parameters.AddWithValue("State", state);
int result = (int)command.ExecuteScalar();
// if nothing found, insert
if (result == 0)
{
command = new SqlCeCommand("INSERT INTO State(State) VALUES(@State)", connection);
command.Parameters.AddWithValue("State", state);
command.ExecuteNonQuery();
command = new SqlCeCommand("SELECT @@IDENTITY", connection);
}
if (result == 1)
{
command = new SqlCeCommand("SELECT StateId FROM State WHERE State = @State", connection);
command.Parameters.AddWithValue("State", state);
}
string test = command.ExecuteScalar().ToString();
result = int.Parse(test);
connection.Close();
return result;
}
private void UpdateRelationships(int zipCodeId, int cityId)
{
string connstr = System.Configuration.ConfigurationManager.ConnectionStrings["AddressInformation"].ConnectionString;
SqlCeConnection connection = new SqlCeConnection(connstr);
connection.Open();
SqlCeCommand command = new SqlCeCommand("INSERT INTO CityZipCode(CityId, ZipCodeId) VALUES(@CityId, @ZipCodeId)", connection);
command.Parameters.AddWithValue("CityId", cityId);
command.Parameters.AddWithValue("ZipCodeId", zipCodeId);
command.ExecuteNonQuery();
connection.Close();
}
编辑:
只是为了澄清,我不只是简单地插入的信息,每行来自CSV文件。我正在改变数据的布局方式,将每个项目插入单独的表格并添加每个实体之间的关系。
例如,一个城市可以有多个邮政编码,并且邮政编码有时可以覆盖多个城市,以便通过多对多关系来表示。城市和邮政编码只有一个州,所以这种关系是多对一的。
我有一张城市,邮政编码和州的表。我也有一张表格,将城市与邮政编码联系起来。我将需要修改我的关系表模式以使具有相同名称的城市可以存在多个状态。关系表应该包含城市,州和邮政编码,而不仅仅是城市和邮政编码。
我的最终目标是将带有密码保护的SQL Server CE数据库与另一个应用程序一起用于城市,州和邮政编码验证。我不想分发CSV数据库,因为任何人都可以改变它来通过验证。
你可以,但它可能不会让它更快。为了使速度更快,请选择所有状态等,并将其添加到字典以便更快地访问,并记住在更新数据库时更新它们。 – Casperah 2013-04-04 16:11:12
我会试试看。谢谢你的提示。 – 2013-04-04 16:13:02
另请参阅重新使用线程内的连接。通常我会同意使用内置的连接池等,但就你的情况而言,在线程上有很多细粒度的动作被采用(希望)会很快,并且每次连接这些连接的开销都是不必要的 - 特别是因为这是一个正在进行的操作,您不应该像您可能通过网络一样发生间歇性连接失败。 – 2013-04-04 18:14:04