import text file(500 MB) data into SQL Server

asp.net c# sqlbulkcopy sql-server

Question

Here is my code that I am using for importing text files larger than 500 MB into database.

I want to do it in batches so that if any format related error occur in text file during execution at least half of it contents uploaded.

If any other suggestion for uploading such a large text file please specify.

private DataTable CreateDataTableFromFileLoop()
{
    string filename = "";

    if (fileuploadExcel.HasFile)
    {
       try
       {
           filename = Path.GetFileName(fuTextLoop.FileName);
           fuTextLoop.SaveAs(Server.MapPath("~/callText") + filename);
           //StatusLabel.Text = "Upload status: File uploaded!";
       }
       catch (Exception ex)
       {
           StatusLabel.Text = "Upload status: The file could not be uploaded. The following error occured: " + ex.Message;
       }
    }

    DataTable dt = new DataTable();
    DataColumn dc;
    DataRow dr;

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Sr No";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Mobile";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Name";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Fath_Hus_Name";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Address";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "City";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "PIN Code";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Contact Number";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Activation_date";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Subs_type";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Status";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Subs_name";
    dc.Unique = false;
    dt.Columns.Add(dc);

    StreamReader sr = new StreamReader(Server.MapPath("~/callText") + filename);

    SqlConnection conn = new  SqlConnection("Server=.;Database=temp;Trusted_Connection=True;");

if (ddlSub.SelectedValue.ToString() == "Reliance")
{
   try
   {
        string input;
        string mob_chk;

        while ((input = sr.ReadLine()) != null)
        {
            string[] s = input.Split(new char[] { ',' });

            dr = dt.NewRow();
            SqlCommand comm = new SqlCommand("select Mobile from call where Mobile='" + s[1] + "'", conn);
            conn.Open();
            SqlDataReader sdr = comm.ExecuteReader();

            if (sdr.HasRows)
            {
                goto end_of_loop;
            }
            if (!sdr.HasRows)
            {
                dr["Sr No"] = s[0];
                dr["Mobile"] = s[1];
                dr["Name"] = s[3];
                dr["Fath_Hus_Name"] = s[4]+s[5]+s[6] + s[7];
                dr["Address"] = s[8]+s[9];
                dr["City"] = s[10];
                dr["PIN Code"] = s[11];
                dr["Contact Number"] = s[16];
                dr["Activation_date"] = s[18];
                dr["Subs_type"] = s[15];
                //dr["Status"] = s[10];
                dr["Subs_name"] = ddlSub.SelectedValue.ToString();
            }

            dt.Rows.Add(dr);
        end_of_loop:
            conn.Close();
        }

        sr.Close();

        dt.Rows[0].Delete();
   }
   catch (Exception ex)
   {
           StatusLabel.Text = "Upload status: The file could not be uploaded. The following error occured: " + ex.Message;
   }
}

if (dt.Rows.Count > 0)
{
   return dt;
}
else
{
   return null;
}

Accepted Answer

If the file will be processed entirely you can process it backwards and truncate it after each record has been processed, that way you can always process the entire file and free the disk space of the rows as you process them. And you can "safely" read the entire file when resuming from an error.

first you'd have to declare a FileStream to keep track of the file and pass that to the StreamReader

FileStram fs = new FileStream(Server.MapPath("~/callText") + filename, FileMode.Open);
StreamReader sr = new StreamReader(fs);

now you can read the file backwards until a \n or a \r is found and then "sr.ReadLine", but first you need the size of the file, so you know what size it should have after you are done with the row.

long oldLen = fs.Length;
fs.Seek(-2,SeekOrigin.End);//dont really begin from the end, because a line-end is very likely to be right there
//here goes a simple while to read the file backwards until you find '\n' or '\r'. 
...
//you should aso check for BOF
fs.Seek(1,SeekOrigin.Current); //this is case that sr.Readline() gets confused with the line-end I just found
//here you can input=sr.ReadLine() and all you have to do
...
...

now you can just truncate it with the old size minus the size of the row you just read.

fs.SetLength(oldLen - Encoding.Unicode.GetByteCount(input));//replace Unicode with whatever encoding the file has.

you could also accumulate the sizes of the rows you read and truncate the file once theres an error or the operation finished


Popular Answer

You're talking about text files, right. Because your code has fileuploadexcel...

Anyway - if it is text, by far the easiest way is to use http://www.filehelpers.com

It supports fixed and delimited formats and is fast and easy to use.

It has various errormodes, one of them allows you to do this

engine.ErrorManager.ErrorMode = ErrorMode.SaveAndContinue;

records = engine.ReadFile(...

if (engine.ErrorManager.HasErrors)
   foreach (ErrorInfo err in engine.ErrorManager.Errors)
   {
      Console.WriteLine(err.LineNumber); 
      Console.WriteLine(err.RecordString);
      Console.WriteLine(err.ExceptionInfo.ToString());`
   }

The final result is all records that match the format are imported...



Licensed under: CC-BY-SA with attribution
Not affiliated with Stack Overflow
Is this KB legal? Yes, learn why
Licensed under: CC-BY-SA with attribution
Not affiliated with Stack Overflow
Is this KB legal? Yes, learn why