Example: Index fragmentation with insert/updates, measuring it and fixing it
This blog shows a very simple example to illustrate data fragmentation, measuring it, seeing behind the scene data and steps to address it. I recommend reading the overall series on index fragementation at
https://blogs.msdn.com/sqlserverstorageengine/archive/tags/Index+Fragmentation+Series/default.aspx
-- TSQL Script
create database indextest
go
use indextest
go
-- create the index after loading the data
drop table t_ci
go
create table t_ci (c1 int, c2 char (100), c3 int, c4 varchar(1000))
go
-- load the data
declare @i int
select @i = 0
while (@i < 1000)
begin
insert into t_ci values (@i, 'hello', @i+10000, replicate ('a', 100))
set @i = @i + 1
end
-- find fragmentation
select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent
from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')
OUTPUT
-- main cause of fragmentation is that initial page allocation is using mixed extent
avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count avg_page_space_used_in_percent
---------------------------- -------------------------- -------------------- ------------------------------
40 7.25 4 94.127625401532
(1 row(s) affected)
--find extent allocations. Note, first 8 pages are allocated from mixed extents
-- in the output below, I removed some columns that are not relevant here.
dbcc extentinfo (0, 't_ci', 0)
OUTPUT
file_id page_id pg_alloc ext_size object_id index_id partition_number
----------- ----------- ----------- ----------- ----------- ----------- ----------------
1 153 1 1 2137058649 0 1
1 155 1 1 2137058649 0 1
1 156 1 1 2137058649 0 1
1 157 1 1 2137058649 0 1
1 158 1 1 2137058649 0 1
1 159 1 1 2137058649 0 1
1 180 1 1 2137058649 0 1
1 181 1 1 2137058649 0 1
1 208 8 8 2137058649 0 1
1 216 8 8 2137058649 0 1
1 224 5 8 2137058649 0 1
-- this shows all the pages alllocated to t_ci
-- I looked at the pages and I found total of 30 pages with 4 fragements. So the average size is 7.5
dbcc ind ('indextest', 't_ci', 0)
-- create the clustered index
create clustered index ci on t_ci(c1)
go
-- measure the fragementation
select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent
from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')
OUTPUT
-- you don't see any fragementation because SQL Server allocates uniform extents.
-- In the output below, I removed the row for non-leaf index pages
avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count avg_page_space_used_in_percent
---------------------------- -------------------------- -------------------- ------------------------------
0 28 1 98.3726958240672
-- this shows all the pages alllocated to t_ci
-- I looked at the pages and I found that logical/physical order was perfactly alinged.
-- also, all pages were allocated from uniform extents except for PFS page.
dbcc ind ('indextest', 't_ci', 1)
-- here is the output of page allocations
dbcc extentinfo (0, 't_ci', 1)
file_id page_id pg_alloc ext_size object_id index_id partition_number
----------- ----------- ----------- ----------- ----------- ----------- ----------------
1 183 1 1 2137058649 1 1
1 232 8 8 2137058649 1 1
1 240 8 8 2137058649 1 1
1 248 8 8 2137058649 1 1
-- create the index and load the data. This is different from the previous example
-- as here we create the index on the empty table and then load the data.
drop table t_ci
go
create table t_ci (c1 int, c2 char (100), c3 int, c4 varchar(1000))
go
-- create the clustered index
create clustered index ci on t_ci(c1)
go
-- load the data
declare @i int
select @i = 0
while (@i < 1000)
begin
insert into t_ci values (@i, 'hello', @i+10000, replicate ('a', 100))
set @i = @i + 1
end
-- measure the fragementation
select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent
from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')
-- output: again, you see the fragementaion becuase first 8 page allcoations come from mixed page extents
-- This is because when you are loading into empty table, SQL Server cannot predict its size. So
-- it does mixed page allocations. This is different when you create the indec
-- after loading the data because at that time it knows the size of the index.
-- Also note that the average used percentage in pages was close to 100%. This means that
-- the pages were not split in half. This is one of the optimizations SQL Server does when
-- loading ordered data.
avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count avg_page_space_used_in_percent
---------------------------- -------------------------- -------------------- ------------------------------
14.2857142857143 5.6 5 98.3726958240672
--find extent allocations. Note, first 8 pages are allocated from mixed extents
dbcc extentinfo (0, 't_ci', 1)
file_id page_id pg_alloc ext_size object_id index_id partition_number
----------- ----------- ----------- ----------- ----------- ----------- ----------------
1 180 1 1 5575058 1 1
1 182 1 1 5575058 1 1
1 183 1 1 5575058 1 1
1 142 1 1 5575058 1 1
1 143 1 1 5575058 1 1
1 153 1 1 5575058 1 1
1 154 1 1 5575058 1 1
1 155 1 1 5575058 1 1
1 192 8 8 5575058 1 1
1 200 8 8 5575058 1 1
1 208 5 8 5575058 1 1
drop index ci on t_ci
-- create the clustered index to start with unfragmented data
create clustered index ci on t_ci(c1)
go
-- update all rows such that each row 900 bytes. This will cause page splits thereby
-- lead to fragmentation
update t_ci set c4 = replicate ('b', 1000)
-- measure the fragementation
select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent
from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')
-- Severe fragementation because of page splits. Also note that the space usage in each page is around 50%
-- This affects the performance in two ways (1) you will need to read more pages to get to the
-- same number of rows as pages are 1/2 full. (2) Since there are 117 fragements, it will cause
-- more physical IOs if we are scanning for a range of keys.
--
-- Just to emphasize, the fragmentation happens because of DML operations. So if there is very little
-- DML activity on a table, it may not get fragmented. Other important point to understand
-- is that fragementaion ONLY impacts range queries but NOT singleton selects.So again if
-- you determine that most access thru an index are singleton SELECT, there is no performance benefit
-- in defragmenting it except of course you will be able to space by compacting the data on the page.
--
avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count avg_page_space_used_in_percent
---------------------------- -------------------------- -------------------- ------------------------------
40.4332129963899 2.40869565217391 115 50.0636520879664
--find extent allocations. Note, that the pages are allocated from mixed extents only. This
-- is because the table is already bigger than the 8 page threshold
dbcc extentinfo (0, 't_ci', 1)
file_id page_id pg_alloc ext_size object_id index_id partition_number
----------- ----------- ----------- ----------- ----------- ----------- ----------------
1 183 1 1 5575058 1 1
1 440 8 8 5575058 1 1
1 448 8 8 5575058 1 1
1 456 8 8 5575058 1 1
1 464 8 8 5575058 1 1
1 472 8 8 5575058 1 1
1 480 8 8 5575058 1 1
1 488 8 8 5575058 1 1
1 496 8 8 5575058 1 1
1 504 8 8 5575058 1 1
1 512 8 8 5575058 1 1
1 520 8 8 5575058 1 1
1 528 8 8 5575058 1 1
1 536 8 8 5575058 1 1
1 544 8 8 5575058 1 1
1 552 8 8 5575058 1 1
1 560 8 8 5575058 1 1
1 568 8 8 5575058 1 1
1 576 7 8 5575058 1 1
-- you can use the following command to show all the pages alllocated to t_ci and their links.
dbcc ind ('indextest', 't_ci', 1)
-- Now do an index defrag.
alter index ci on t_ci reorganize
-- measure the fragementation
select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent
from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')
-- note that the fragmentation was reduced significantly and the space usage on pages is around 100%. In other
-- words, the defragementation makes the physical order same as the logical order and compacts the pages.
-- The reason you see the fragementation is that the 'Reorganize' command does not allocate new extents
-- as index rebuild command. It works with the already allocated pages and moves the data around and then
-- deallocates pages not needed. If you choose to 'rebuild' the index, then the fragmentation will be completely
-- removed but you must question if it is worth it? Index Reorganize takes very little space (one extra page) and
-- breaks the work into smaller transactions. So you minimze large log chain and space overhead.On the flip side,
-- it is single threaded and fully logged operation.
avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count avg_page_space_used_in_percent
---------------------------- -------------------------- -------------------- ------------------------------
3.4965034965035 10.2142857142857 14 98.3726958240672
Comments
- Anonymous
June 01, 2009
PingBack from http://uniformstores.info/story.php?id=19093