用OLEDB方式讀取EXCEL的速度是非??斓?。但是當(dāng)Excel數(shù)據(jù)量很大時(shí)。會(huì)非常占用內(nèi)存,當(dāng)內(nèi)存不夠時(shí)會(huì)拋出內(nèi)存溢出的異常。
OLEDB方式將Excel作為一個(gè)數(shù)據(jù)源,直接用Sql語(yǔ)句操作數(shù)據(jù),并且不需要安裝Office Excel就可以使用。但缺點(diǎn)是不能靈活操作Excel,例如設(shè)置字體,單元格格式等。
一、讀取Excel
連接字符串的設(shè)置:讀取“.xls”時(shí)使用"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + pathName + ";Extended Properties='Excel 8.0;HDR=Yes;IMEX=1;'" 讀取“.xlsx”時(shí)使用 "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + pathName + ";Extended Properties='Excel 12.0;HDR=Yes;IMEX=1;'"
其中HDR和IMEX的設(shè)置:
HDR=Yes,這代表第一行是標(biāo)題,不做為數(shù)據(jù)使用,系統(tǒng)默認(rèn)的是YES
當(dāng) IMEX=0 時(shí)為“匯出模式”,這個(gè)模式開(kāi)啟的 Excel 檔案只能用來(lái)做“寫(xiě)入”用途。 0 ---輸出模式;
當(dāng) IMEX=1 時(shí)為“匯入模式”,這個(gè)模式開(kāi)啟的 Excel 檔案只能用來(lái)做“讀取”用途。 1---輸入模式;
當(dāng) IMEX=2 時(shí)為“鏈接模式”,這個(gè)模式開(kāi)啟的 Excel 檔案可同時(shí)支持“讀取”與“寫(xiě)入”用途。2----鏈接模式(完全更新能力)
GetOleDbSchemaTable:GetOleDbSchemaTable 的第一個(gè)參數(shù)是架構(gòu)參數(shù),它是一個(gè) OleDbSchemaGuid 類型的標(biāo)識(shí),指定了要返回的架構(gòu)信息的類型(如表、列和主鍵)。例如:System.Data.OleDb.OleDbSchemaGuid.Tables
第二個(gè)參數(shù)是一個(gè)限制對(duì)象數(shù)組,對(duì) DataTable 架構(gòu)中返回的行進(jìn)行過(guò)濾(例如,您可以指定對(duì)表的名稱、類型、所有者和/或架構(gòu)的限制)。制數(shù)組如下所示:{TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE}。 例如:new object[] { null, null, "Sheet1$", null }
下面貼上代碼
1、獲取Excel的工作表名集合
/// <summary>
/// C#中獲取Excel文件的表名
/// </summary>
/// <param name="excelFileName">路徑名</param>
/// <returns></returns>
public static List<string> GetExcelTableName(string pathName)
{
List<string> tableName = new List<string>();
if (File.Exists(pathName))
{
string strConn = string.Empty;
FileInfo file = new FileInfo(pathName);
string extension = file.Extension;
switch (extension)
{
case ".xls":
strConn = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + pathName + ";Extended Properties='Excel 8.0;HDR=Yes;IMEX=1;'";
break;
case ".xlsx":
strConn = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + pathName + ";Extended Properties='Excel 12.0;HDR=Yes;IMEX=1;'";
break;
default:
strConn = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + pathName + ";Extended Properties='Excel 8.0;HDR=Yes;IMEX=1;'";
break;
}
using (System.Data.OleDb.OleDbConnection conn = new System.Data.OleDb.OleDbConnection(strConn))
{
conn.Open();
System.Data.DataTable dt = conn.GetOleDbSchemaTable(System.Data.OleDb.OleDbSchemaGuid.Tables, null);
foreach (System.Data.DataRow row in dt.Rows)
{
string strSheetTableName = row["TABLE_NAME"].ToString();
//過(guò)濾無(wú)效SheetName
if (strSheetTableName.Contains("$") && strSheetTableName.Replace("'", "").EndsWith("$"))
{
strSheetTableName = strSheetTableName.Replace("'", ""); //可能會(huì)有 '1X$' 出現(xiàn)
strSheetTableName = strSheetTableName.Substring(0, strSheetTableName.Length - 1);
tableName.Add(strSheetTableName);
}
}
}
}
return tableName;
}
2、 獲取EXCEL工作表的列名 返回list集合
/// <summary>
/// 獲取EXCEL工作表的列名 返回list集合
/// </summary>
/// <param name="Path">Excel路徑名</param>
/// <returns></returns>
public static List<string> getExcelFileInfo(string pathName)
{
string strConn;
List<string> lstColumnName = new List<string>();
FileInfo file = new FileInfo(pathName);
if (!file.Exists) { throw new Exception("文件不存在"); }
string extension = file.Extension;
switch (extension)
{
case ".xls":
strConn = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + pathName + ";Extended Properties='Excel 8.0;HDR=Yes;IMEX=2;'";
break;
case ".xlsx":
strConn = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + pathName + ";Extended Properties='Excel 12.0;HDR=Yes;IMEX=2;'";
break;
default:
strConn = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + pathName + ";Extended Properties='Excel 8.0;HDR=Yes;IMEX=1;'";
break;
}
System.Data.OleDb.OleDbConnection conn = new System.Data.OleDb.OleDbConnection(strConn);
conn.Open();
System.Data.DataTable table = conn.GetOleDbSchemaTable(System.Data.OleDb.OleDbSchemaGuid.Tables, new object[] { null, null, null, null });
foreach (System.Data.DataRow drow in table.Rows)
{
string TableName = drow["Table_Name"].ToString();
if (TableName.Contains("$") && TableName.Replace("'", "").EndsWith("$"))
{
System.Data.DataTable tableColumns = conn.GetOleDbSchemaTable(System.Data.OleDb.OleDbSchemaGuid.Columns, new object[] { null, null, TableName, null });
foreach (System.Data.DataRow drowColumns in tableColumns.Rows)
{
string ColumnName = drowColumns["Column_Name"].ToString();
lstColumnName.Add(ColumnName);
}
}
}
return lstColumnName;
}
3、OLEDB方式讀取Excel,返回DataTable
在實(shí)際應(yīng)用中碰到了問(wèn)題。某一列的前幾十行全是數(shù)字,而在中間混雜了文字,結(jié)果讀取的時(shí)候沒(méi)有讀取到,為空白。后來(lái)查找資料發(fā)現(xiàn)原來(lái)OLEDB會(huì)根據(jù)表中數(shù)據(jù)的前8行來(lái)決定該列的類型。問(wèn)題解決:我把HDR設(shè)置為no,把原來(lái)的列名認(rèn)為是普通數(shù)據(jù),這樣只要保證列名不為純數(shù)字就行了,最后再重新組織以下列名就好了?;蛘咧苯釉贓xcel中把單元格設(shè)置為 文本。
/// <summary>
/// OLEDB方式讀取Excel
/// </summary>
/// <param name="pathName">Excel路徑</param>
/// <param name="sheetName">工作表名,默認(rèn)讀取第一個(gè)有數(shù)據(jù)的工作表(至少有2列數(shù)據(jù))</param>
/// <returns></returns>
public static System.Data.DataTable DBExcelToDataTable(string pathName,string sheetName="")
{
System.Data.DataTable dt = new System.Data.DataTable();
string ConnectionString = string.Empty;
FileInfo file = new FileInfo(pathName);
if (!file.Exists) { throw new Exception("文件不存在"); }
string extension = file.Extension;
switch (extension) // 連接字符串
{
case ".xls":
ConnectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + pathName + ";Extended Properties='Excel 8.0;HDR=no;IMEX=1;'";
break;
case ".xlsx":
ConnectionString = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + pathName + ";Extended Properties='Excel 12.0;HDR=no;IMEX=1;'";
break;
default:
ConnectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + pathName + ";Extended Properties='Excel 8.0;HDR=no;IMEX=1;'";
break;
}
System.Data.OleDb.OleDbConnection con = new System.Data.OleDb.OleDbConnection(ConnectionString);
try
{
con.Open();
if (sheetName != "") //若指定了工作表名
{ //讀Excel的過(guò)程中,發(fā)現(xiàn)dt末尾有些行是空的,所以在sql語(yǔ)句中加了Where 條件篩選符合要求的數(shù)據(jù)。OLEDB會(huì)自動(dòng)生成列名F1,F2……Fn
System.Data.OleDb.OleDbCommand cmd = new System.Data.OleDb.OleDbCommand("select * from [" + sheetName + "$] where F1 is not null ", con);
System.Data.OleDb.OleDbDataAdapter apt = new System.Data.OleDb.OleDbDataAdapter(cmd);
try
{
apt.Fill(dt);
}
catch(Exception ex) { throw new Exception("該Excel文件中未找到指定工作表名," + ex.Message); }
dt.TableName = sheetName;
}
else
{
//默認(rèn)讀取第一個(gè)有數(shù)據(jù)的工作表
var tables = con.GetOleDbSchemaTable(System.Data.OleDb.OleDbSchemaGuid.Tables, new object[] { });
if (tables.Rows.Count == 0)
{ throw new Exception("Excel必須包含一個(gè)表"); }
foreach (System.Data.DataRow row in tables.Rows)
{
string strSheetTableName = row["TABLE_NAME"].ToString();
//過(guò)濾無(wú)效SheetName
if (strSheetTableName.Contains("$") && strSheetTableName.Replace("'", "").EndsWith("$"))
{
System.Data.DataTable tableColumns = con.GetOleDbSchemaTable(System.Data.OleDb.OleDbSchemaGuid.Columns, new object[] { null, null, strSheetTableName, null });
if (tableColumns.Rows.Count < 2) //工作表列數(shù)
continue;
System.Data.OleDb.OleDbCommand cmd = new System.Data.OleDb.OleDbCommand("select * from [" + strSheetTableName + "] where F1 is not null", con);
System.Data.OleDb.OleDbDataAdapter apt = new System.Data.OleDb.OleDbDataAdapter(cmd);
apt.Fill(dt);
dt.TableName = strSheetTableName.Replace("$", "").Replace("'", "");
break;
}
}
}
if (dt.Rows.Count < 2)
throw new Exception("表必須包含數(shù)據(jù)");
//重構(gòu)字段名
System.Data.DataRow headRow = dt.Rows[0];
foreach (System.Data.DataColumn c in dt.Columns)
{
string headValue = (headRow[c.ColumnName] == DBNull.Value || headRow[c.ColumnName] == null) ? "" : headRow[c.ColumnName].ToString().Trim();
if (headValue.Length == 0)
{ throw new Exception("必須輸入列標(biāo)題"); }
if (dt.Columns.Contains(headValue))
{ throw new Exception("不能用重復(fù)的列標(biāo)題:" + headValue); }
c.ColumnName = headValue;
}
dt.Rows.RemoveAt(0);
return dt;
}
catch (Exception ee)
{ throw ee; }
finally
{ con.Close(); }
}
測(cè)試讀了24000行23列的Excel,耗費(fèi)第一次3501毫秒,第二次2721,第三次2774……
一、寫(xiě)入EXCEL
向Excel中寫(xiě)入數(shù)據(jù)的情況下,就要使用insert語(yǔ)句直接向Excel中輸入了,當(dāng)然是用update更新,delete刪除也行。
起先在OLEDB連接Excel的擴(kuò)展屬性中我都用的HDR=Yes;IMEX=2。實(shí)際運(yùn)行起來(lái)時(shí)發(fā)現(xiàn)在寫(xiě)入Office 97-2003時(shí)沒(méi)問(wèn)題,但是在操作Office 2007,卻出現(xiàn)了"不能修改表“Sheet1”的設(shè)計(jì)。它在只讀數(shù)據(jù)庫(kù)中。"之后把IMEX設(shè)置為0,或者直接不加HDR和IMEX后居然都可以運(yùn)行了。
/// <summary>
/// OLEDB方式導(dǎo)出DataTable
/// </summary>
/// <param name="Path">路徑</param>
/// <param name="dt">DataTable</param>
public static void DTToExcel(string Path, System.Data.DataTable dt)
{
string strCon = string.Empty;
FileInfo file = new FileInfo(Path);
string extension = file.Extension;
switch (extension)
{
case ".xls":
strCon = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + Path + ";Extended Properties=Excel 8.0;";
//strCon = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + Path + ";Extended Properties='Excel 8.0;HDR=Yes;IMEX=0;'";
//strCon = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + Path + ";Extended Properties='Excel 8.0;HDR=Yes;IMEX=2;'";
break;
case ".xlsx":
//strCon = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + Path + ";Extended Properties=Excel 12.0;";
//strCon = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + Path + ";Extended Properties='Excel 12.0;HDR=Yes;IMEX=2;'"; //出現(xiàn)錯(cuò)誤了
strCon = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + Path + ";Extended Properties='Excel 12.0;HDR=Yes;IMEX=0;'";
break;
default:
strCon = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + Path + ";Extended Properties='Excel 8.0;HDR=Yes;IMEX=0;'";
break;
}
try
{
using (System.Data.OleDb.OleDbConnection con = new System.Data.OleDb.OleDbConnection(strCon))
{
con.Open();
StringBuilder strSQL = new StringBuilder();
System.Data.OleDb.OleDbCommand cmd;
try
{
cmd = new System.Data.OleDb.OleDbCommand(string.Format("drop table {0}", dt.TableName), con); //覆蓋文件時(shí)可能會(huì)出現(xiàn)Table 'Sheet1' already exists.所以這里先刪除了一下
cmd.ExecuteNonQuery();
}
catch { }
//創(chuàng)建表格字段
strSQL.Append("CREATE TABLE ").Append("[" + dt.TableName + "]");
strSQL.Append("(");
for (int i = 0; i < dt.Columns.Count; i++)
{
strSQL.Append("[" + dt.Columns[i].ColumnName + "] text,");
}
strSQL = strSQL.Remove(strSQL.Length - 1, 1);
strSQL.Append(")");
cmd = new System.Data.OleDb.OleDbCommand(strSQL.ToString(), con);
cmd.ExecuteNonQuery();
//添加數(shù)據(jù)
for (int i = 0; i < dt.Rows.Count; i++)
{
strSQL.Clear();
StringBuilder strvalue = new StringBuilder();
for (int j = 0; j < dt.Columns.Count; j++)
{
strvalue.Append("'" + dt.Rows[i[j].ToString().Replace("'", "''") + "'");
if (j != dt.Columns.Count - 1)
{
strvalue.Append(",");
}
else
{
}
}
cmd.CommandText = strSQL.Append(" insert into [" + dt.TableName + "] values (").Append(strvalue).Append(")").ToString();
cmd.ExecuteNonQuery();
}
con.Close();
}
}
catch { }
}
測(cè)試將24000行23列的DataTable寫(xiě)入Excel平均要00:02:40.1184609,2分40秒。還有就是不能在Excel里設(shè)置樣式。
由于上段時(shí)間要解析數(shù)據(jù),就從網(wǎng)上收集整理了這些方法,寫(xiě)在這里。方便以后還能用到
|