C# - Importing a CSV where empty strings do not get sent through bulk import to database

.net c# csv mysql sqlbulkcopy

Question

So I have a database of site location information which contains 27 columns. I have coded a bulk import feature that takes a CSV file and lets you modify information. Everything works well when the CSV file contains information in every column, but when there is an empty string/null it will push an empty string to the database and wipe out what was there before.

For the purposes of being able to quickly update certain attributes in the database my client wants to only have to put the primary key of the site(which I have coded the import to make updateable) and fill in any other attribute without having to fill in the rest. So for example a CSV file for a location might look something like this:

SITE32,,,,BS,11111,,43607,123566789,123456789,2.2.2.2,1.1.1.1.1,1.1.1.1,0,Test,Test,Testing,2,12123,5002,N/A,4,00201,3,000,3703,5

Say this site already exists in the database, I just want to update the fields that are filled in. The ones left blank should just stay the same.

This is the code I have so far

       protected void btnBulkSite_click(object sender, EventArgs e)
            {

                if (FileUpLoad1.HasFile)
                {
                    FileUpLoad1.SaveAs(@"C:\temp\" + FileUpLoad1.FileName);
                    btnBulkSite.Text = "File Uploaded: " + FileUpLoad1.FileName;
                }
                else
                {
                    btnBulkSite.Text = "No File Uploaded.";
                }
                DataTable tblcsv = new DataTable();

                tblcsv.Columns.Add("SERVER_ID", typeof(string));
                tblcsv.Columns.Add("SITE_NAME", typeof(string));
                tblcsv.Columns.Add("SITE_ADDRESS", typeof(string));
                tblcsv.Columns.Add("SITE_CITY", typeof(string));
                tblcsv.Columns.Add("SITE_STATE", typeof(string));
                tblcsv.Columns.Add("SITE_ZIPCODE", typeof(string));
                tblcsv.Columns.Add("SITE_COUNTY", typeof(string));
                tblcsv.Columns.Add("SITE_INTERNALZIP", typeof(string));
                tblcsv.Columns.Add("SITE_PHONE_NUM", typeof(string));
                tblcsv.Columns.Add("SITE_FAX_NUM", typeof(string));
                tblcsv.Columns.Add("SERVER_SUBNET_ADDR", typeof(string));
                tblcsv.Columns.Add("SERVER_IP_ADDR", typeof(string));
                tblcsv.Columns.Add("SERVER_GATEWAY_ADDR", typeof(string));
                tblcsv.Columns.Add("COSTCENTER_NUM");
                tblcsv.Columns.Add("DCMF_NAME", typeof(string));
                tblcsv.Columns.Add("LU_ID", typeof(string));
                tblcsv.Columns.Add("XIDPU_ID", typeof(string));
                tblcsv.Columns.Add("TRAININGSITE_IND");
                tblcsv.Columns.Add("PBA_FICS_NUM");
                tblcsv.Columns.Add("PBA_CITY_ID", typeof(string));
                tblcsv.Columns.Add("REGION_NAME", typeof(string));
                tblcsv.Columns.Add("SITETYPE_ID");
                tblcsv.Columns.Add("PBA_OFFICE_ID", typeof(string));
                tblcsv.Columns.Add("SITEORIGIN_ID");
                tblcsv.Columns.Add("REGION_ID", typeof(string));
                tblcsv.Columns.Add("PBA_BANK_ID", typeof(string));
                tblcsv.Columns.Add("SITE_REGION_ID");



                System.IO.StreamReader stream = new System.IO.StreamReader(FileUpLoad1.PostedFile.InputStream);

                string ReadCSV = stream.ReadToEnd();
                foreach (string csvRow in ReadCSV.Split('\n'))
                    {

                        if (!string.IsNullOrEmpty(csvRow))
                        {
                        tblcsv.Rows.Add();
                        int count = 0;

                        foreach (string FileRec in csvRow.Split(','))
                        {
                            tblcsv.Rows[tblcsv.Rows.Count - 1][count] = FileRec;

                            count++;
                        }
                    }
                }

                RemoveAllNullColumnsFromDataTable(tblcsv);

            }

  public void RemoveAllNullColumnsFromDataTable(DataTable tblcsv)
        {
            for (int h = 0; h < tblcsv.Rows.Count; h++)
            {
                if (tblcsv.Rows[h].IsNull(0) == true)
                {
                    tblcsv.Rows[h].Delete();
                }

            }
            tblcsv.AcceptChanges();
            foreach (var column in tblcsv.Columns.Cast<DataColumn>().ToArray())
            {
                if (tblcsv.AsEnumerable().All(dr => dr.IsNull(column)))
                    tblcsv.Columns.Remove(column);
            }
            tblcsv.AcceptChanges();
            InsertCSVRecords(tblcsv);
        }

        public void InsertCSVRecords(DataTable csvdt)
        {
            connection();

            //SqlBulkCopy objbulk = new SqlBulkCopy(con);
            var objbulk = new BulkOperation(con);
            objbulk.AllowUpdatePrimaryKeys = true;
            objbulk.DestinationTableName = "SITE_INFO";


            objbulk.ColumnMappings.Add("SERVER_ID", "SERVER_ID", true);
            objbulk.ColumnMappings.Add("SITE_NAME", "SITE_NAME");
            objbulk.ColumnMappings.Add("SITE_ADDRESS", "SITE_ADDRESS");
            objbulk.ColumnMappings.Add("SITE_CITY", "SITE_CITY");
            objbulk.ColumnMappings.Add("SITE_STATE", "SITE_STATE");
            objbulk.ColumnMappings.Add("SITE_ZIPCODE", "SITE_ZIPCODE");
            objbulk.ColumnMappings.Add("SITE_COUNTY", "SITE_COUNTY");
            objbulk.ColumnMappings.Add("SITE_INTERNALZIP", "SITE_INTERNALZIP");
            objbulk.ColumnMappings.Add("SITE_PHONE_NUM", "SITE_PHONE_NUM");
            objbulk.ColumnMappings.Add("SITE_FAX_NUM", "SITE_FAX_NUM");
            objbulk.ColumnMappings.Add("SERVER_SUBNET_ADDR", "SERVER_SUBNET_ADDR");
            objbulk.ColumnMappings.Add("SERVER_IP_ADDR", "SERVER_IP_ADDR");
            objbulk.ColumnMappings.Add("SERVER_GATEWAY_ADDR", "SERVER_GATEWAY_ADDR");
            objbulk.ColumnMappings.Add("COSTCENTER_NUM", "COSTCENTER_NUM");
            objbulk.ColumnMappings.Add("DCMF_NAME", "DCMF_NAME");
            objbulk.ColumnMappings.Add("LU_ID", "LU_ID");
            objbulk.ColumnMappings.Add("XIDPU_ID", "XIDPU_ID");
            objbulk.ColumnMappings.Add("TRAININGSITE_IND", "TRAININGSITE_IND");
            objbulk.ColumnMappings.Add("PBA_FICS_NUM", "PBA_FICS_NUM");
            objbulk.ColumnMappings.Add("PBA_CITY_ID", "PBA_CITY_ID");
            objbulk.ColumnMappings.Add("REGION_NAME", "REGION_NAME");
            objbulk.ColumnMappings.Add("SITETYPE_ID", "SITETYPE_ID");
            objbulk.ColumnMappings.Add("PBA_OFFICE_ID", "PBA_OFFICE_ID");
            objbulk.ColumnMappings.Add("SITEORIGIN_ID", "SITEORIGIN_ID");
            objbulk.ColumnMappings.Add("REGION_ID", "REGION_ID");
            objbulk.ColumnMappings.Add("PBA_BANK_ID", "PBA_BANK_ID");
            objbulk.ColumnMappings.Add("SITE_REGION_ID", "SITE_REGION_ID");



            con.Open();
            objbulk.BulkUpdate(csvdt);

            con.Close();

        }

My logic is that once the information is imported from the CSV file it is moved to a datatable, and if it contains a null value then the column in the datatable is removed. And so there is nothing to map to the BulkUpdate column mappings and thus no data that should be pushed to the database for that column.

This however is not working for some reason and I don't know why... is there a better way?

Any help would be appreciated, thanks.

Accepted Answer

There are a multitude of solutions to get where you need.

One problem I see is that if the person is uploading items with multiple rows and those rows have different data (say one column is blank on one row but filled in on a second row), that will cause it to break since the column will have deleted itself.

SITE32,,1,2,,...
SITE32,1,2,,,...

In this case, columns 1, 3 and 4 (zero-basing here) would be deleted since they have null values which would defeat your intended purpose. Your logic as written will only work if each row contains data in the same columns.

I would recommend loading the data into a temporary table and finish the task in SQL which would give you more control over the data load process. If you have access to the database, write a stored procedure and then pass each row of the data table in to the stored procedure and perform your upsert operation from there.

SqlCommand cmd = new SqlCommand("",conn());
cmd.CommandText = "CREATE TABLE #tmp (
[SERVER_ID] NVARCHAR(MAX),
[SITE_NAME] NVARCHAR(MAX),
[SITE_ADDRESS] NVARCHAR(MAX),
[SITE_CITY] NVARCHAR(MAX),
[SITE_STATE] NVARCHAR(MAX),
[SITE_ZIPCODE] NVARCHAR(MAX),
[SITE_COUNTY] NVARCHAR(MAX),
[SITE_INTERNALZIP] NVARCHAR(MAX),
[SITE_PHONE_NUM] NVARCHAR(MAX),
[SITE_FAX_NUM] NVARCHAR(MAX),
[SERVER_SUBNET_ADDR] NVARCHAR(MAX),
[SERVER_IP_ADDR] NVARCHAR(MAX),
[SERVER_GATEWAY_ADDR] NVARCHAR(MAX),
[COSTCENTER_NUM] NVARCHAR(MAX),
[DCMF_NAME] NVARCHAR(MAX),
[LU_ID] NVARCHAR(MAX),
[XIDPU_ID] NVARCHAR(MAX),
[TRAININGSITE_IND] NVARCHAR(MAX),
[PBA_FICS_NUM] NVARCHAR(MAX),
[PBA_CITY_ID] NVARCHAR(MAX),
[REGION_NAME] NVARCHAR(MAX),
[SITETYPE_ID] NVARCHAR(MAX),
[PBA_OFFICE_ID] NVARCHAR(MAX),
[SITEORIGIN_ID] NVARCHAR(MAX),
[REGION_ID] NVARCHAR(MAX),
[PBA_BANK_ID] NVARCHAR(MAX),
[SITE_REGION_ID] NVARCHAR(MAX)
);
conn.Open();
cmd.ExecuteNonQuery();

SqlBulkCopy bc = new SqlBulkCopy();
bc.DestinationTableName = "#tmp";
bc.BulkCopyTimeout = 600;
bc.WriteToServer(dt);
bc.Close();

cmd.CommandText = "UPDATE t SET t.SERVER_ID = CASE WHEN tmp.SERVER_ID <> '' THEN tmp.SERVER_ID ELSE t.SERVER_ID END, ..... t.SITE_REGION_ID = CASE WHEN tmp.SITE_REGION_ID <> '' THEN tmp.SERVER_ID ELSE t.SERVER_ID END FROM Table t INNER JOIN #tmp AS tmp ON t.SERVER_ID = tmp.SERVER_ID";
cmd.ExecuteNonQuery();
conn.Close();

The above code needs to be adjusted and revised for your purposes. This is just an example. And please note that the connection cannot close between your operations or the temporary table will be deleted. You need to perform both the insert and update using the same connection.


Popular Answer

Build up individual update statement
You can chain updates

  update [test].[dbo].[Table_1] set value1 = 'newOne' where iden = 1;
  update [test].[dbo].[Table_1] set value1 = 'newTwo' where iden = 2;

But that would be a open to SQL injection so you should use parameters
I am not sure if you can chain multiple parameter based updates in one
If not and speed is an issue I would do an update asynch so you are creating the next update while the current is executing.



Licensed under: CC-BY-SA with attribution
Not affiliated with Stack Overflow
Is this KB legal? Yes, learn why
Licensed under: CC-BY-SA with attribution
Not affiliated with Stack Overflow
Is this KB legal? Yes, learn why