2015年9月22日 星期二

【MS-SQL】Batch process by Table-Valued Parameters

Entity Framework 大量 update 時使用 SaveChanges() 會是單筆一次 update 造成效能不彰,這裡參考 Yun-Long Lin 的 solution 是採用 資料表值參數 搭配 預存程式 的方式一次傳遞整個資料集,再於預存程序中進行 update,以降低 IO 需求。


資料表
USE [MyLab]
GO

SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

CREATE TABLE [dbo].[Product](
 [id] [uniqueidentifier] NOT NULL CONSTRAINT [DF_Product_id]  DEFAULT (newid()),
 [name] [nvarchar](50) NOT NULL,
 [descrp] [nvarchar](max) NULL,
 [lastEditAt] [datetime] NOT NULL CONSTRAINT [DF_Product_lastEditAt]  DEFAULT (getdate()),
 CONSTRAINT [PK_Product] PRIMARY KEY CLUSTERED 
(
 [id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

GO

建使用者定義資料表類型
USE [MyLab]
GO

CREATE TYPE [dbo].[Product] AS TABLE(
 [id] [uniqueidentifier] NOT NULL,
 [name] [nvarchar](50) NOT NULL,
 [descrp] [nvarchar](max) NULL,
 [lastEditAt] [smalldatetime] NOT NULL
)
GO

預存程序(建之前 SSMS 要重連,不然似乎會抓不到上一步建的資料表類型)
USE [MyLab]
GO

SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

CREATE Procedure [dbo].[sp_BatchEditProd]
(
  @fromClient Product readonly,
  @opMsg nvarchar(MAX) output 
)
As 
  UPDATE Product
  SET 
  name = cli.name,
  descrp = cli.descrp,
  lastEditAt = GETDATE() 
   
  FROM Product srv 
  INNER JOIN @fromClient cli
  ON srv.id = cli.id
GO

建測試資料
private static string BatchInsertProd(List lstOfProd)
{
    List lst = new List();
    for (int i = 0; i < 20000; i++)
    {
        Product prod = new Product();
        prod.id = Guid.NewGuid();
        prod.name = MyExts.GetRandomString(6);
        prod.descrp = MyExts.GetRandomString(25);
        prod.lastEditAt = DateTime.Now;
        lst.Add(prod);
    }
    db.Product.AddRange(lst);
    db.SaveChanges();

    return string.Empty;
}

亂數
public static string GetRandomString(int len)
{
    var str = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
    var rand = new Random(Guid.NewGuid().GetHashCode());
    var builder = new StringBuilder();
    for (var i = 0; i < len; i++)
    {
        builder.Append(str[rand.Next(0, str.Length)]);
    }
    return builder.ToString();
}

最後就是使用 Stopwatch 比較兩個方案的耗時

SaveChanges
private static void UpdateProd(List lstOfProd)
{            
    StirData(lstOfProd);
    sw.Start();
    db.SaveChanges();
    sw.Stop();
    Console.WriteLine("UpdateProd m-sec: " + sw.ElapsedMilliseconds);
}

Solution
private static string BatchUpdateProd(List lstOfProd)
{
    StirData(lstOfProd);
    sw.Start();
    string sqlCmd = "sp_BatchEditProd @fromClient, @opMsg out";
    string outMsg = string.Empty;

    var dt = lstOfProd.ToDataTable();
    List lstOfPars = new List() {
                                       new SqlParameter() {
                                           Direction = ParameterDirection.Input,
                                           ParameterName = "fromClient",
                                           SqlDbType = SqlDbType.Structured,
                                           TypeName = "Product",
                                           Value = dt
                                       },
                                       new SqlParameter() {
                                           Direction = ParameterDirection.Output,
                                           ParameterName = "opMsg",
                                           SqlDbType = SqlDbType.NVarChar,
                                           Size = 40,
                                           Value = null
                                       }
                                   };
    db.Database.ExecuteSqlCommand(sqlCmd, lstOfPars.ToArray());
    sw.Stop();
    Console.WriteLine("BatchUpdateProd m-sec: " + sw.ElapsedMilliseconds);
    return outMsg;
} 

其中,StirData() 會製造 20000筆資料異動,與重設 Stopwatch
private static void StirData(List lstOfProd)
{
    sw.Reset();
    for (int i = 0; i < 20000; i++)
    {
        Product prod = lstOfProd[i];                
        prod.name = MyExts.GetRandomString(6);
        prod.descrp = MyExts.GetRandomString(25);
        prod.lastEditAt = DateTime.Now;
    }
} 

最後就是結果比較

ref: Yun-Long Lin - 使用資料表值參數進行資料的批次作業

沒有留言:

張貼留言