I have 3 million rows and 40 columns of CSV file. I would like to move CSV data into SQL server using SqlBulkCopy concept. For that I moved each column and rows into data table using CSV reader. While inserting rows in data table using while loop, I am getting the memory exception. Note: 438184 (i value of below sample) record inserted successfully. After that I got memory exception.
SqlConnection con = new SqlConnection(connectionString);
con.Open();
SqlCommand SqlCmd = new SqlCommand();
SqlCmd.CommandTimeout = 0;
SqlCmd.Connection = con;
SqlTransaction transaction = con.BeginTransaction();
var fileStream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
StreamReader streamReader = new StreamReader(fileStream);
CsvReader reader = new CsvReader(streamReader);
DataTable table = new DataTable();
reader.ReadHeaderRecord();
foreach (var c in reader.HeaderRecord)
{
table.Columns.Add(c);
}
table.Rows.Clear();
int i = 0;
while (reader.HasMoreRecords)
{
DataRecord record = reader.ReadDataRecord();
table.Rows.Add(record[0], record[1], record[2], record[3], record[4], record[5], record[6], record[7], record[8], record[9], record[10], record[11], record[12], record[13], record[14], record[15], record[16], record[17], record[18], record[19], record[20], record[21], record[22], record[23], record[24], record[25], record[26], record[27], record[28], record[29], record[30], record[31], record[32], record[33], record[34], record[35], record[36], record[37], record[38], record[39]);
i++;
}
SqlBulkCopy copy = new SqlBulkCopy(con, SqlBulkCopyOptions.KeepIdentity, transaction);
copy.DestinationTableName = "SampleCSV";
copy.WriteToServer(table);
transaction.Commit();
con.Close();
Can anyone suggest me how to solve this?
if you get a Memory Exception that means you cannot load all rows in a DataTable at once. You will need to execute a SqlBulkCopy every X
row and clear the DataTable
before processing more.
By example, the following code will execute the SqlBulkCopy every 100,000 rows loaded in the DataTable
then clear all rows before loading more.
int i = 0;
while (reader.HasMoreRecords)
{
// INSERT rows every x
if(i % 100000 == 0)
{
ExecuteBulkCopy(conn, transaction, table);
table.Rows.Clear();
}
DataRecord record = reader.ReadDataRecord();
table.Rows.Add(record[0], record[1], record[2], record[3], record[4], record[5], record[6], record[7], record[8], record[9], record[10], record[11], record[12], record[13], record[14], record[15], record[16], record[17], record[18], record[19], record[20], record[21], record[22], record[23], record[24], record[25], record[26], record[27], record[28], record[29], record[30], record[31], record[32], record[33], record[34], record[35], record[36], record[37], record[38], record[39]);
i++;
}
// INSERT remaining row
if(table.Rows.Count > 0)
{
ExecuteBulkCopy(conn, transaction, table);
}
public void ExecuteBulkCopy(SqlConnection con, SqlTransaction transaction, DataTable table)
{
SqlBulkCopy copy = new SqlBulkCopy(con, SqlBulkCopyOptions.KeepIdentity, transaction);
copy.DestinationTableName = "SampleCSV";
copy.WriteToServer(table);
transaction.Commit();
}
EDIT: Answer sub-question
Can I reduce this time if I use Bulk insert or any other?
SqlBulkCopy
is the fastest way to insert. Even my library .NET Bulk Operations use SqlBulkCopy under the hood.
20 minutes for 3 million data is very slow, but it can also be normal depending on your table (char column, trigger, index, etc.)
Here are two configurations to add that will have the most impact on performance.
By example 5000 It's often faster to insert multiple times with smaller batch than a VERY big batch.
new SqlBulkCopy(connectionString, SqlBulkCopyOptions.TableLock))
By locking the table, the SqlBulkCopy will perform faster.
SqlBulkCopy copy = new SqlBulkCopy(con, SqlBulkCopyOptions.KeepIdentity, transaction);
//you can define any Banch of Data insert
copy.BatchSize(10000);
copy.DestinationTableName = "SampleCSV";
copy.WriteToServer(table);
transaction.Commit();
con.Close();