我正在使用Microsoft DocumentFormat.OpenXml SDK从Excel文件中读取数据。 虽然这样做,我正在考虑如果一个单元格有空白值(如果是,也请阅读)。使用Microsoft DocumentFormat.OpenXml SDK读取c#中的excel文件
现在,面临着其中workSheet.SheetDimension为空的Excel表之一的问题,因此代码抛出异常。使用
代码:
类OpenXMLHelper { //一个辅助函数来打开使用的OpenXML Excel文件,并且从一个 工作表//返回包含所有的数据的数据表。 // //我们在使用OLEDB读取Excel数据时遇到了很多问题(例如,ACE驱动程序不再存在于新服务器上, // OLEDB由于安全问题而不工作,并且公然忽略空白行工作表顶部),所以这是一个更稳定的数据读取方法。 //
public static DataTable ExcelWorksheetToDataTable(string pathFilename)
{
try
{
DataTable dt = new DataTable();
string dimensions = string.Empty;
using (FileStream fs = new FileStream(pathFilename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
using (SpreadsheetDocument document = SpreadsheetDocument.Open(fs, false))
{
// Find the sheet with the supplied name, and then use that
// Sheet object to retrieve a reference to the first worksheet.
//Sheet theSheet = document.WorkbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == worksheetName).FirstOrDefault();
//--Sheet theSheet = document.WorkbookPart.Workbook.Descendants<Sheet>().FirstOrDefault();
//--if (theSheet == null)
//-- throw new Exception("Couldn't find the worksheet: "+ theSheet.Id);
// Retrieve a reference to the worksheet part.
//WorksheetPart wsPart = (WorksheetPart)(document.WorkbookPart.GetPartById(theSheet.Id));
//--WorksheetPart wsPart = (WorksheetPart)(document.WorkbookPart.GetPartById(theSheet.Id));
WorkbookPart workbookPart = document.WorkbookPart;
WorksheetPart wsPart = workbookPart.WorksheetParts.FirstOrDefault();
Worksheet workSheet = wsPart.Worksheet;
dimensions = workSheet.SheetDimension.Reference.InnerText; // Get the dimensions of this worksheet, eg "B2:F4"
int numOfColumns = 0;
int numOfRows = 0;
CalculateDataTableSize(dimensions, ref numOfColumns, ref numOfRows);
//System.Diagnostics.Trace.WriteLine(string.Format("The worksheet \"{0}\" has dimensions \"{1}\", so we need a DataTable of size {2}x{3}.", worksheetName, dimensions, numOfColumns, numOfRows));
SheetData sheetData = workSheet.GetFirstChild<SheetData>();
IEnumerable<Row> rows = sheetData.Descendants<Row>();
string[,] cellValues = new string[numOfColumns, numOfRows];
int colInx = 0;
int rowInx = 0;
string value = "";
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
// Iterate through each row of OpenXML data, and store each cell's value in the appropriate slot in our [,] string array.
foreach (Row row in rows)
{
for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
// *DON'T* assume there's going to be one XML element for each column in each row...
Cell cell = row.Descendants<Cell>().ElementAt(i);
if (cell.CellValue == null || cell.CellReference == null)
continue; // eg when an Excel cell contains a blank string
// Convert this Excel cell's CellAddress into a 0-based offset into our array (eg "G13" -> [6, 12])
colInx = GetColumnIndexByName(cell.CellReference); // eg "C" -> 2 (0-based)
rowInx = GetRowIndexFromCellAddress(cell.CellReference) - 1; // Needs to be 0-based
// Fetch the value in this cell
value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
value = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
}
cellValues[colInx, rowInx] = value;
}
}
// Copy the array of strings into a DataTable.
// We don't (currently) make any attempt to work out which columns should be numeric, rather than string.
for (int col = 0; col < numOfColumns; col++)
{
//dt.Columns.Add("Column_" + col.ToString());
dt.Columns.Add(cellValues[col, 0]);
}
//foreach (Cell cell in rows.ElementAt(0))
//{
// dt.Columns.Add(GetCellValue(doc, cell));
//}
for (int row = 0; row < numOfRows; row++)
{
DataRow dataRow = dt.NewRow();
for (int col = 0; col < numOfColumns; col++)
{
dataRow.SetField(col, cellValues[col, row]);
}
dt.Rows.Add(dataRow);
}
dt.Rows.RemoveAt(0);
//#if DEBUG
// // Write out the contents of our DataTable to the Output window (for debugging)
// string str = "";
// for (rowInx = 0; rowInx < maxNumOfRows; rowInx++)
// {
// for (colInx = 0; colInx < maxNumOfColumns; colInx++)
// {
// object val = dt.Rows[rowInx].ItemArray[colInx];
// str += (val == null) ? "" : val.ToString();
// str += "\t";
// }
// str += "\n";
// }
// System.Diagnostics.Trace.WriteLine(str);
//#endif
return dt;
}
}
}
catch (Exception ex)
{
return null;
}
}
public static void CalculateDataTableSize(string dimensions, ref int numOfColumns, ref int numOfRows)
{
// How many columns & rows of data does this Worksheet contain ?
// We'll read in the Dimensions string from the Excel file, and calculate the size based on that.
// eg "B1:F4" -> we'll need 6 columns and 4 rows.
//
// (We deliberately ignore the top-left cell address, and just use the bottom-right cell address.)
try
{
string[] parts = dimensions.Split(':'); // eg "B1:F4"
if (parts.Length != 2)
throw new Exception("Couldn't find exactly *two* CellAddresses in the dimension");
numOfColumns = 1 + GetColumnIndexByName(parts[1]); // A=1, B=2, C=3 (1-based value), so F4 would return 6 columns
numOfRows = GetRowIndexFromCellAddress(parts[1]);
}
catch
{
throw new Exception("Could not calculate maximum DataTable size from the worksheet dimension: " + dimensions);
}
}
public static int GetRowIndexFromCellAddress(string cellAddress)
{
// Convert an Excel CellReference column into a 1-based row index
// eg "D42" -> 42
// "F123" -> 123
string rowNumber = System.Text.RegularExpressions.Regex.Replace(cellAddress, "[^0-9 _]", "");
return int.Parse(rowNumber);
}
public static int GetColumnIndexByName(string cellAddress)
{
// Convert an Excel CellReference column into a 0-based column index
// eg "D42" -> 3
// "F123" -> 5
var columnName = System.Text.RegularExpressions.Regex.Replace(cellAddress, "[^A-Z_]", "");
int number = 0, pow = 1;
for (int i = columnName.Length - 1; i >= 0; i--)
{
number += (columnName[i] - 'A' + 1) * pow;
pow *= 26;
}
return number - 1;
}
}[enter image description here][1]
只分析行不会获取所需的结果。 Issue: 行1:5单元格(全部具有值) 行2:6单元格(单元格1,2为空白)。 所以发生了什么是放置数据时,第1行将被打印,但第2行有2个空白单元格,在插入到数据表中时会向左移动。正因为如此,我考虑了SheetDimension。 任何建议如何解决。 代码片段可以更清晰。 –