将文本文件(500 MB)数据导入SQL Server

asp.net c# sqlbulkcopy sql-server

这是我用于将大于500 MB的文本文件导入数据库的代码。

我想分批进行,以便在执行期间如果文本文件中出现任何格式相关的错误,则至少有一半的内容被上传。

如果有任何其他建议上传如此大的文本文件,请注明。

private DataTable CreateDataTableFromFileLoop()
{
    string filename = "";

    if (fileuploadExcel.HasFile)
    {
       try
       {
           filename = Path.GetFileName(fuTextLoop.FileName);
           fuTextLoop.SaveAs(Server.MapPath("~/callText") + filename);
           //StatusLabel.Text = "Upload status: File uploaded!";
       }
       catch (Exception ex)
       {
           StatusLabel.Text = "Upload status: The file could not be uploaded. The following error occured: " + ex.Message;
       }
    }

    DataTable dt = new DataTable();
    DataColumn dc;
    DataRow dr;

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Sr No";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Mobile";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Name";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Fath_Hus_Name";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Address";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "City";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "PIN Code";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Contact Number";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Activation_date";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Subs_type";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Status";
    dc.Unique = false;
    dt.Columns.Add(dc);

    dc = new DataColumn();
    dc.DataType = System.Type.GetType("System.String");
    dc.ColumnName = "Subs_name";
    dc.Unique = false;
    dt.Columns.Add(dc);

    StreamReader sr = new StreamReader(Server.MapPath("~/callText") + filename);

    SqlConnection conn = new  SqlConnection("Server=.;Database=temp;Trusted_Connection=True;");

if (ddlSub.SelectedValue.ToString() == "Reliance")
{
   try
   {
        string input;
        string mob_chk;

        while ((input = sr.ReadLine()) != null)
        {
            string[] s = input.Split(new char[] { ',' });

            dr = dt.NewRow();
            SqlCommand comm = new SqlCommand("select Mobile from call where Mobile='" + s[1] + "'", conn);
            conn.Open();
            SqlDataReader sdr = comm.ExecuteReader();

            if (sdr.HasRows)
            {
                goto end_of_loop;
            }
            if (!sdr.HasRows)
            {
                dr["Sr No"] = s[0];
                dr["Mobile"] = s[1];
                dr["Name"] = s[3];
                dr["Fath_Hus_Name"] = s[4]+s[5]+s[6] + s[7];
                dr["Address"] = s[8]+s[9];
                dr["City"] = s[10];
                dr["PIN Code"] = s[11];
                dr["Contact Number"] = s[16];
                dr["Activation_date"] = s[18];
                dr["Subs_type"] = s[15];
                //dr["Status"] = s[10];
                dr["Subs_name"] = ddlSub.SelectedValue.ToString();
            }

            dt.Rows.Add(dr);
        end_of_loop:
            conn.Close();
        }

        sr.Close();

        dt.Rows[0].Delete();
   }
   catch (Exception ex)
   {
           StatusLabel.Text = "Upload status: The file could not be uploaded. The following error occured: " + ex.Message;
   }
}

if (dt.Rows.Count > 0)
{
   return dt;
}
else
{
   return null;
}

一般承认的答案

如果文件将被完全处理,您可以向后处理它并在处理完每条记录后截断它,这样您就可以随时处理整个文件并在处理时释放行的磁盘空间。并且当您从错误中恢复时,您可以“安全地”读取整个文件。

首先,您必须声明一个FileStream来跟踪文件并将其传递给StreamReader

FileStram fs = new FileStream(Server.MapPath("~/callText") + filename, FileMode.Open);
StreamReader sr = new StreamReader(fs);

现在你可以向后读取文件,直到找到\ n或者\ r \ n然后“sr.ReadLine”,但首先你需要文件的大小,这样你知道在完成行之后应该有多大的文件。

long oldLen = fs.Length;
fs.Seek(-2,SeekOrigin.End);//dont really begin from the end, because a line-end is very likely to be right there
//here goes a simple while to read the file backwards until you find '\n' or '\r'. 
...
//you should aso check for BOF
fs.Seek(1,SeekOrigin.Current); //this is case that sr.Readline() gets confused with the line-end I just found
//here you can input=sr.ReadLine() and all you have to do
...
...

现在你可以用旧尺寸减去你刚读过的行的大小来截断它。

fs.SetLength(oldLen - Encoding.Unicode.GetByteCount(input));//replace Unicode with whatever encoding the file has.

您还可以累积读取的行的大小,并在错误或操作完成后截断文件


热门答案

你正在谈论文本文件,对。因为你的代码有fileuploadexcel ...

无论如何 - 如果它是文本,到目前为止最简单的方法是使用http://www.filehelpers.com

它支持固定和分隔格式,并且快速且易于使用。

它有各种错误模式,其中一个允许你这样做

engine.ErrorManager.ErrorMode = ErrorMode.SaveAndContinue;

records = engine.ReadFile(...

if (engine.ErrorManager.HasErrors)
   foreach (ErrorInfo err in engine.ErrorManager.Errors)
   {
      Console.WriteLine(err.LineNumber); 
      Console.WriteLine(err.RecordString);
      Console.WriteLine(err.ExceptionInfo.ToString());`
   }

最终结果是导入了与格式匹配的所有记录...



许可下: CC-BY-SA with attribution
不隶属于 Stack Overflow
这个KB合法吗? 是的,了解原因
许可下: CC-BY-SA with attribution
不隶属于 Stack Overflow
这个KB合法吗? 是的,了解原因