Insert dummy data in tables with relationship
Torno spesso sull’argomento relativo ai test di carico / performance dei database e sull’importanza di poter profilare l’utilizzo che avranno con una mole di dati quantomeno simile allo scenario di produzione.
Diventa importante, quindi, avere un qualsivoglia meccanismo di generazione automatica di dati in modo tale da demandare ad un processo automatico il popolamento delle nostre tabelle.
Ho già parlato qui di una possibile stored procedure che consenta di inserire dati “finti” sulla base del tipo di dati con cui è modellata ciascuna colonna.
Il problema, di questa implementazione, è l’impossibilità di inserire valori su colonne che dipendono da valori di una tabella padre.
Prendiamo, ad esempio, due ipotetiche tabelle che consentono di memorizzare l’intestazione e le righe di fatture.
Qualcosa come:
La necessità, sulla tabella “figlia”, è quella di inserire, nelle colonne di foreign key, soltanto valori che siano presenti nelle colonne primary key della tabella “padre”.
Effettuo il setup delle tabelle come mostrate in figura:
/* start setup database for test */
USE demo;
GO
if object_id('dbo.invoiceRow') is not null
begin
drop table dbo.invoiceRow
end
GO
if object_id('dbo.invoiceHeader') is not null
begin
drop table dbo.invoiceHeader
end
GO
create table invoiceHeader
(
idInvoiceHeader smallint identity(-32000,1),
idInvoiceHeader2 smallint not null,
invoiceDate date,
idCustomer int
)
alter table invoiceHeader add constraint PKInvoiceHeader primary key (idInvoiceHeader, idInvoiceHeader2)
GO
create table invoiceRow
(
idInvoiceRow smallint primary key identity(-32000,1),
idInvoiceHeader smallint,
idInvoiceHeader2 smallint,
idProduct int,
qty smallint,
price money
)
alter table invoiceRow add CONSTRAINT FKInvoiceHeader
FOREIGN KEY (idInvoiceHeader, idInvoiceHeader2)
REFERENCES InvoiceHeader (idInvoiceHeader, idInvoiceHeader2)
GO
/* end setup database for test */
Di seguito, invece, una possibile implementazione della procedura in grado di inserire valori in entrambe le tabelle:
IF EXISTS (SELECT * FROM sys.objects WHERE type = 'P' AND name = 'up_insertDummyDataWithFK')
DROP PROCEDURE dbo.up_insertDummyDataWithFK
GO
CREATE PROCEDURE dbo.up_insertDummyDataWithFK
(
@schemaName nvarchar( 250 ) ,
@tableName nvarchar( 250 ) ,
@numberOfRows int
)
AS
BEGIN
SET NOCOUNT ON;
DECLARE @i int = 0, @tsql varchar( max ) = '';
WHILE (@i < @numberOfRows)
BEGIN
/* check if exists relationship in the table */
IF EXISTS (SELECT TOP 1 1 FROM sys.foreign_keys WHERE parent_object_id = OBJECT_ID( @tableName ))
BEGIN
declare @TSqlFromForeignKeyValues nvarchar(max)
declare @nrFK tinyint
declare @x tinyint = 0
declare @PrimaryKeyValues table (columnName varchar(100), value nvarchar(100), tsqlStatement nvarchar(max))
/* retrieve primary keys from parent table */
INSERT @PrimaryKeyValues (columnName, tsqlStatement)
SELECT DISTINCT CU.COLUMN_NAME, 'SELECT TOP 1 @v = ' + CU.COLUMN_NAME + ' FROM ' + PK.TABLE_NAME + ' ORDER BY NEWID()' as tSQL
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS C
INNER JOIN INFORMATION_SCHEMA.TABLE_CONSTRAINTS FK ON C.CONSTRAINT_NAME = FK.CONSTRAINT_NAME
INNER JOIN INFORMATION_SCHEMA.TABLE_CONSTRAINTS PK ON C.UNIQUE_CONSTRAINT_NAME = PK.CONSTRAINT_NAME
INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE CU ON C.CONSTRAINT_NAME = CU.CONSTRAINT_NAME
INNER JOIN
(
SELECT i1.TABLE_NAME , i2.COLUMN_NAME
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS i1
INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE i2 ON i1.CONSTRAINT_NAME = i2.CONSTRAINT_NAME
WHERE i1.CONSTRAINT_TYPE = 'PRIMARY KEY'
) PT ON PT.TABLE_NAME = PK.TABLE_NAME
WHERE FK.TABLE_NAME = @tableName AND FK.TABLE_SCHEMA = @schemaName;
set @nrFK = (select @@rowcount)
--> select @nrFK as numberOfForeignKeys
declare @v nvarchar(100)
declare @cName varchar(100) = ''
/* for each primary key retrieve a valid value from parent table */
while (@x < @nrFK)
begin
set @TSqlFromForeignKeyValues =
(
select top 1 [@PrimaryKeyValues].tSQLStatement
from @PrimaryKeyValues
where [@PrimaryKeyValues].[value] is null
order by [@PrimaryKeyValues].tsqlStatement
)
if (@x > 0)
begin
declare @tsqlModified nvarchar(max)
set @tsqlModified = replace(@TSqlFromForeignKeyValues,'ORDER BY NEWID()',' WHERE ' + @cName + ' = ' + @v + ' ORDER BY NEWID()')
exec sp_executesql @tsqlModified, N'@v nvarchar(100) out', @v out
end
else
begin
exec sp_executesql @TSqlFromForeignKeyValues, N'@v nvarchar(100) out', @v out
end
set @cName = (select top 1 [@PrimaryKeyValues].columnName from @PrimaryKeyValues where [@PrimaryKeyValues].[value] is null order by [@PrimaryKeyValues].tsqlStatement)
update @PrimaryKeyValues set [@PrimaryKeyValues].[value] = @v where [@PrimaryKeyValues].tsqlStatement = @TSqlFromForeignKeyValues
set @x += 1
end
END
declare @FKColumnName nvarchar(max) = ''
declare @FKValues nvarchar(max) = ''
if exists(select top 1 1 from @PrimaryKeyValues)
begin
select
@FKColumnName = @FKColumnName + [@PrimaryKeyValues].columnName + ', ' ,
@FKValues = @FKValues + [@PrimaryKeyValues].[value] + ', ' from @PrimaryKeyValues
set @FKColumnName = left(@FKColumnName, len(@FKColumnName)-1)
set @FKColumnName = ', ' + @FKColumnName
set @FKValues = left(@FKValues, len(@FKValues)-1)
set @FKValues = ', ' + @FKValues
end
delete from @PrimaryKeyValues
/* retrieve column list without foreign keys */
declare @columnListWhitoutFK nvarchar(max)
set @columnListWhitoutFK = ''
SELECT @columnListWhitoutFK = @columnListWhitoutFK + T.columnName + ', ' FROM
(
SELECT C.name as columnName
FROM sys.columns C
JOIN sys.tables T on t.OBJECT_ID = c.OBJECT_ID
WHERE T.name = @tableName AND T.schema_id = schema_id(@schemaName) AND
C.is_identity = 0
EXCEPT
SELECT COL_NAME(fc.parent_object_id, fc.parent_column_id) AS FKColumnName
FROM sys.foreign_keys AS f
INNER JOIN sys.foreign_key_columns AS fc ON f.OBJECT_ID = fc.constraint_object_id
where OBJECT_NAME(f.parent_object_id) = @tableName and f.schema_id = schema_id(@schemaName)
) T
set @columnListWhitoutFK = left(@columnListWhitoutFK, len(@columnListWhitoutFK)-1)
/* generate random value for each column (no foreign keys) */
SELECT @tsql = @tsql + CASE
WHEN c.column_id = 1 THEN 'INSERT ' + @schemaName + '.[' + t.name + '] ' +
'(' + @columnListWhitoutFK + @FKColumnName + ') VALUES ( '
ELSE ''
END +
CASE
WHEN c.is_identity = 0 THEN CASE
WHEN y.name IN ( 'bit' , 'bigint' , 'int' , 'smallint' , 'tinyint' , 'float' , 'decimal' ,
'numeric' , 'money' , 'smallmoney' , 'real' )
THEN SUBSTRING( CAST(1000*RAND() AS VARCHAR(50)) , 1 , c.max_length)
WHEN y.name IN ( 'binary' , 'varbinary' )
THEN SUBSTRING( '0x546869732069732044756D6D792044617461' , 1, c.max_length )
WHEN y.name IN ( 'varchar' , 'char' , 'text', 'nchar' , 'nvarchar' , 'ntext' )
THEN '''' + SUBSTRING( 'some data some data some data' , 1 , c.max_length /2 ) + ''''
WHEN y.name IN ( 'date' , 'time' , 'datetime' , 'datetime2' , 'smalldatetime' , 'datetimeoffset' )
THEN '''' + CONVERT( varchar( 25 ) , GETDATE( ) , 121 ) + ''''
WHEN y.name IN ( 'uniqueidentifier' )
THEN '''' + CAST( NEWID( ) AS varchar( 36 )) + ''''
ELSE '' END + CASE
WHEN c.column_id = (SELECT MAX( column_id ) FROM sys.columns WHERE OBJECT_ID = c.OBJECT_ID )THEN @FKValues + ');'
ELSE ','
END
ELSE ''
END
FROM sys.tables AS t
INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
INNER JOIN sys.columns c ON t.OBJECT_ID = c.OBJECT_ID
INNER JOIN sys.types AS y ON c.user_type_id = y.user_type_id
WHERE
t.name = @tableName AND
s.name = @schemaName AND
c.name IN
(
SELECT
COLUMN_NAME as columnName
FROM INFORMATION_SCHEMA.COLUMNS
WHERE
TABLE_NAME = @tableName AND
TABLE_SCHEMA = @schemaName
EXCEPT
SELECT
COL_NAME( fc.parent_object_id , fc.parent_column_id )AS FKColumnName
FROM sys.foreign_keys AS f
INNER JOIN sys.foreign_key_columns AS fc ON f.OBJECT_ID = fc.constraint_object_id
WHERE
OBJECT_NAME( f.parent_object_id ) = @tableName AND
f.schema_id = SCHEMA_ID( @schemaName )
)
SET @i+=1;
/* insert the values */
--> SELECT @tsql
EXEC ( @tsql );
SET @tsql = '';
END
END;
GO
Un esempio di caricamento dati (attenzione: naturalmente va popolata prima la tabella “padre”):
/* test the procedure (before the parent, next the child table) */
EXEC dbo.up_insertDummyDataWithFK @schemaName = 'dbo', @tableName = 'invoiceHeader' , @numberOfRows = 10;
EXEC dbo.up_insertDummyDataWithFK @schemaName = 'dbo', @tableName = 'invoiceRow' , @numberOfRows = 50;
GO
Vedo qualche dato:
/* select the dummy data */
select count(*) as nrHeaders, (select count(*) from invoiceRow) as nrRows from invoiceHeader
select top 2 * from invoiceHeader
select top 2 * from invoiceRow
GO