Thinking about Data Compression
Nell’ultimo post, qui, ho parlato di come poter comprimere i dati, tramite SQLCLR, per quelle colonne che contengono informazioni che non possono essere compresse con gli strumenti nativi che SQL Server, dalla versione 2008 nelle edizioni Enterprise o Datacenter, mette a disposizione.
In questo post condivido un semplice modo per poter capire / studiare come la compressione, a livello di ROW o di PAGE, potrebbe incidere sul nostro database.
L’idea è quella di ottenere una serie di informazioni che possano fornirci le dimensioni attuali e le dimensioni che andremmo ad avere.
Qualcosa come:
Questo lo script:
SET NOCOUNT ON;
DECLARE @option varchar( 10 );
SET @option = 'Page';
/* SET @option = 'Row' */
IF OBJECT_ID( 'tempdb..#indexDetails' )IS NOT NULL
DROP TABLE #indexDetails;
SELECT
c.name AS schemaName ,
b.name AS objectName ,
a.name AS indexName ,
a.indid ,
dpages ,
CONVERT( numeric( 20 , 2 ) , ROUND( dpages * 8 / 1024.0 , 2 ))AS dataPagesMB,
reserved ,
CONVERT( numeric( 20 , 2 ) , ROUND( reserved * 8 / 1024.0 , 2 ))AS reservedMB,
rowcnt
INTO #indexDetails
FROM sys.sysindexes AS a
INNER JOIN sysobjects AS b ON a.id = b.id
INNER JOIN sys.schemas AS c ON b.uid = c.schema_id
WHERE
b.type = 'U' AND
(a.name NOT LIKE '_WA_Sys%' OR a.name IS NULL) AND
CONVERT( numeric( 20 , 2 ) , ROUND( dpages * 8 / 1024.0 , 2 )) >= 0
ORDER BY dpages;
IF OBJECT_ID( 'tempdb..#error' )IS NOT NULL
DROP TABLE #error;
CREATE TABLE #error
(
error_desc varchar( 2000 ) ,
err_number int ,
err_severity int ,
err_state int ,
err_procedure varchar( 126 ) ,
err_line int ,
err_message varchar( 2048 )
);
IF OBJECT_ID( 'tempdb..#dataCompressionDetails' )IS NOT NULL
DROP TABLE #dataCompressionDetails;
CREATE TABLE #dataCompressionDetails
(
object_name sysname ,
schema_name sysname ,
index_id int ,
partition_number int ,
size_with_current_compression_setting_KB bigint ,
size_with_requested_compression_setting_KB bigint ,
sample_size_with_current_compression_setting_KB bigint ,
sample_size_with_requested_compression_setting_KB bigint
);
DECLARE @TSql varchar( 8000 );
DECLARE myCursor CURSOR
FOR SELECT
'BEGIN TRY INSERT INTO #dataCompressionDetails EXEC sp_estimate_data_compression_savings @schema_name = ''' + c.name + ''',
@object_name = ''' + b.name + ''', @index_id = ' + CONVERT( varchar( 10 ) , a.indid ) + ', @partition_number = NULL,
@data_compression = ''' + @option + ''' ; END TRY BEGIN CATCH INSERT INTO #error SELECT
''[' + c.name + '].[' + b.name + '] failed in compression eval. The minimum row size
plus internal overhead exceeds the maximum allowable table row size (8060 bytes).'' AS Error_Desc ,
ERROR_NUMBER() AS ErrorNumber ,ERROR_SEVERITY() AS ErrorSeverity ,ERROR_STATE() AS ErrorState ,
ERROR_PROCEDURE() AS ErrorProcedure ,ERROR_LINE() AS ErrorLine ,ERROR_MESSAGE() AS ErrorMessage;
END CATCH;'
FROM sys.sysindexes AS a
INNER JOIN sysobjects AS b ON a.id = b.id
INNER JOIN sys.schemas AS c ON b.uid = c.schema_id
WHERE
b.type = 'U' AND
(a.name NOT LIKE '_WA_Sys%' OR a.name IS NULL) AND
CONVERT( numeric( 20 , 2 ) , ROUND( dpages * 8 / 1024.0 , 2 )) >= 0
ORDER BY
dpages;
OPEN myCursor;
FETCH NEXT FROM myCursor INTO @TSql;
WHILE @@FETCH_STATUS = 0
BEGIN
EXEC ( @TSql );
FETCH NEXT FROM myCursor INTO @TSql;
END;
CLOSE myCursor;
DEALLOCATE myCursor;
/* Total saving */
SELECT
cast(SUM( size_with_current_compression_setting_KB ) / 1024.0 as decimal(20,2)) AS [Original size MB] ,
cast(SUM( size_with_current_compression_setting_KB ) / 1024.0 / 1024.0 as decimal(20,2)) AS [Original size GB] ,
cast(SUM( size_with_requested_compression_setting_KB ) / 1024.0 as decimal(20,2)) AS [Compression size MB] ,
cast(SUM( size_with_requested_compression_setting_KB ) / 1024.0 / 1024.0 as decimal(20,2)) AS [Compression size GB] ,
cast(SUM( size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB ) / 1024.0 as decimal(20,2)) AS [Total Saving MB] ,
cast(SUM( size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB ) / 1024.0 / 1024.0 as decimal(20,2)) AS [Total Saving GB]
FROM #dataCompressionDetails;
/* Summary */
SELECT
a.schemaName + '.' + a.objectName AS [Table] ,
a.indexName AS [Index] ,
size_with_current_compression_setting_KB AS [Size current KB] ,
size_with_requested_compression_setting_KB AS [Size compression KB] ,
Compression_rate = CONVERT( numeric( 20 , 2 ) , ROUND( size_with_requested_compression_setting_KB * 1.0 / size_with_current_compression_setting_KB , 2 )) ,
size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB AS [Data Compression Saving KB] ,
CAST((size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB) / 1024.0 AS numeric( 20 , 2 ))AS [Data Compression Saving MB],
CAST((size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB) / 1024.0 / 1024.0 AS numeric( 20 , 2 ))AS [Data Compression Saving GB]
FROM #indexDetails AS a
INNER JOIN #dataCompressionDetails AS b ON a.objectName COLLATE Latin1_General_CI_AS = b.object_name COLLATE Latin1_General_CI_AS
AND a.indid = b.index_id
WHERE CONVERT( numeric( 20 , 2 ) , ROUND( size_with_requested_compression_setting_KB * 1.0 / size_with_current_compression_setting_KB , 2 )) <= 1
AND size_with_current_compression_setting_KB > 0
AND CONVERT( numeric( 20 , 2 ) , ROUND( size_with_requested_compression_setting_KB * 1.0 / size_with_current_compression_setting_KB , 2 )) < 1
ORDER BY
size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB DESC;
/* Total */
SELECT
schemaName + '.' + objectName AS [Table] ,
SUM( size_current_KB )AS [size current KB] ,
SUM( size_compression_KB )AS [Size compression KB] ,
CONVERT( numeric( 20 , 2 ) , ROUND( SUM( size_compression_KB ) * 1.0 / SUM( size_current_KB ) * 1.0 , 2 ))AS [Compression rate] ,
SUM( size_current_KB ) - SUM( size_compression_KB )AS [DataCompression saving KB] ,
CAST((SUM( size_current_KB ) - SUM( size_compression_KB )) / 1024.0 AS decimal( 20 , 2 ))AS [DataCompression saving MB],
CAST((SUM( size_current_KB ) - SUM( size_compression_KB )) / 1024.0 / 1024.0 AS decimal( 20 , 2 ))AS [DataCompression saving GB]
FROM(
SELECT
a.schemaName ,
a.objectName ,
a.indexName ,
size_with_current_compression_setting_KB AS size_current_KB ,
size_with_requested_compression_setting_KB AS size_compression_KB ,
Compression_rate = CONVERT( numeric( 20 , 2 ) , ROUND( size_with_requested_compression_setting_KB * 1.0 / size_with_current_compression_setting_KB , 2 )) ,
size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB AS DataCompression_Saving_KB
FROM #indexDetails AS a
INNER JOIN #dataCompressionDetails AS b ON a.objectName COLLATE Latin1_General_CI_AS = b.object_name COLLATE Latin1_General_CI_AS
AND a.indid = b.index_id
WHERE CONVERT( numeric( 20 , 2 ) , ROUND( size_with_requested_compression_setting_KB * 1.0 / size_with_current_compression_setting_KB , 2 )) <= 1
AND size_with_current_compression_setting_KB > 0
AND CONVERT( numeric( 20 , 2 ) , ROUND( size_with_requested_compression_setting_KB * 1.0 / size_with_current_compression_setting_KB , 2 )) < 1 )AS a
GROUP BY
schemaName ,
objectName;
/* No compression */
SELECT * FROM #error;