排查适用于 SQL Server 的 Azure 扩展的问题

适用范围:SQL Server

查询 Azure Resource Graph 以识别已启用 Azure Arc 的服务器上 SQL Server 的 Azure 扩展的状态。 本文演示标识不正常的扩展的查询。

提示

如果尚不熟悉,请先了解 Azure Resource Graph:

识别不正常的扩展

此查询返回已安装扩展但运行不正常的服务器上的 SQL Server 实例。 日期被硬编码到查询中。 它返回扩展状态不正常的资源,或者扩展上次上传时间不在 2024 年 5 月 (2024/05) 或 2024 年 6 月 (2024/06) 的资源。 替换资源的这些日期。

resources
| where type == "microsoft.hybridcompute/machines/extensions"
| where properties.type in ("WindowsAgent.SqlServer","LinuxAgent.SqlServer")
| where properties.instanceView.status.message !contains "SQL Server Extension Agent: Healthy" or (properties.instanceView.status.message !contains "timestampUTC : 2024/05" and properties.instanceView.status.message !contains "timestampUTC : 2024/06") or properties.instanceView.status.message !contains "uploadStatus : OK"
| project id, resourceGroup, subscriptionId, 
    ExtensionHealth = iif(properties.instanceView.status.message !contains "SQL Server Extension Agent: Healthy", "Unhealthy", "Healthy"),
    LastUploadTimestamp = iif(indexof(properties.instanceView.status.message,"timestampUTC : ") > 0, iif(properties.instanceView.status.message !contains "timestampUTC : 2024/06", substring(properties.instanceView.status.message,indexof(properties.instanceView.status.message,"timestampUTC : ") + 15, 10),"Recent"),"no timestamp"),
    LastUploadStatus = iif(indexof(properties.instanceView.status.message,"uploadStatus : OK") > 0, "OK", "Unhealthy"),
    Message = properties.instanceView.status.message

若要确定可能的具体问题,请查看查询结果中 Message 属性的值。

识别不正常的扩展 (PowerShell)

此示例在 PowerShell 中运行。 使用 PowerShell,可使用未硬编码的日期运行。 该示例返回扩展状态为不正常的资源,或者扩展上次上传时间不在本月或上个月的资源。

# PowerShell script to execute an Azure Resource Graph query using Azure CLI
# where the extension status is unhealthy or the extension last upload time isn't in this month or the previous month.

# Requires the Az.ResourceGraph PowerShell module

# Login to Azure if needed
#az login

$currentYear = (Get-Date).Year
$currentMonth = "{0:D2}" -f (Get-Date).Month
$previousMonth = "{0:D2}" -f ((Get-Date).Month-1)
$currentDay = "{0:D2}" -f (Get-Date).Day
$currentYearMonth = "$currentYear/$currentMonth"
$previousYearMonth = "$currentYear/$previousMonth"
$currentDate = "$currentYear/$currentMonth/$currentDay"

# Define the Azure Resource Graph query
$query = @"
Resources
| where type == 'microsoft.hybridcompute/machines/extensions' 
| where properties.type in ('WindowsAgent.SqlServer','LinuxAgent.SqlServer') 
| where properties.instanceView.status.message !contains 'SQL Server Extension Agent: Healthy' 
    or (properties.instanceView.status.message !contains 'timestampUTC : $previousYearMonth' 
            and properties.instanceView.status.message !contains 'timestampUTC : $currentYearMonth') 
    or properties.instanceView.status.message !contains 'uploadStatus : OK' 
| project id, resourceGroup, subscriptionId, 
    ExtensionHealth = iif(properties.instanceView.status.message !contains 'SQL Server Extension Agent: Healthy', 'Unhealthy', 'Healthy'), 
    LastUploadTimestamp = iif(indexof(properties.instanceView.status.message,'timestampUTC : ') > 0, iif(properties.instanceView.status.message !contains 'timestampUTC : $currentYearMonth', substring(properties.instanceView.status.message,indexof(properties.instanceView.status.message,'timestampUTC : ') + 15, 10),'Recent'),'no timestamp'),
    LastUploadStatus = iif(indexof(properties.instanceView.status.message,'uploadStatus : OK') > 0, 'OK', 'Unhealthy'), 
    Message = properties.instanceView.status.message
"@

# Execute the Azure Resource Graph query
$result = Search-AzGraph -Query $query

# Output the results
$result | Format-Table -Property ExtensionHealth, LastUploadTimestamp, LastUploadStatus, Message

若要确定可能的具体问题,请查看结果中 Message 列的值。

识别缺少更新的扩展

识别最近未更新状态的扩展。 此查询按扩展上次更新状态的天数排序,返回 SQL Server 的 Azure 扩展列表。 值为“-1”表示扩展已崩溃,并且扩展状态中存在调用堆栈。

// Show the timestamp extracted
// If an extension has crashed (i.e. no heartbeat), fill timestamp with "1900/01/01, 00:00:00.000"
//
resources
| where type =~ 'microsoft.hybridcompute/machines/extensions'
| extend extensionStatus = parse_json(properties).instanceView.status.message
| extend timestampExtracted = extract(@"timestampUTC\s*:\s*(\d{4}/\d{2}/\d{2}, \d{2}:\d{2}:\d{2}\.\d{3})", 1, tostring(extensionStatus))
| extend timestampNullFilled = iff(isnull(timestampExtracted) or timestampExtracted == "", "1900/01/01, 00:00:00.000", timestampExtracted)
| extend timestampKustoFormattedString = strcat(replace(",", "", replace("/", "-", replace("/", "-", timestampNullFilled))), "Z")
| extend agentHeartbeatUtcTimestamp = todatetime(timestampKustoFormattedString)
| extend agentHeartbeatLagInDays = datetime_diff('day', now(), agentHeartbeatUtcTimestamp)
| project id, extensionStatus, agentHeartbeatUtcTimestamp, agentHeartbeatLagInDays
| limit 100
| order by ['agentHeartbeatLagInDays'] asc

此查询按扩展名上次更新状态的天数分组,返回扩展计数。 值为“-1”表示扩展已崩溃,并且扩展状态中存在调用堆栈。

// Aggregate by timestamp
//
// -1: Crashed extension with no heartbeat, we got a stacktrace instead
//  0: Healthy
// >1: Stale/Offline
//
resources
| where type =~ 'microsoft.hybridcompute/machines/extensions'
| extend extensionStatus = parse_json(properties).instanceView.status.message
| extend timestampExtracted = extract(@"timestampUTC\s*:\s*(\d{4}/\d{2}/\d{2}, \d{2}:\d{2}:\d{2}\.\d{3})", 1, tostring(extensionStatus))
| extend timestampNullFilled = iff(isnull(timestampExtracted) or timestampExtracted == "", "1900/01/01, 00:00:00.000", timestampExtracted)
| extend timestampKustoFormattedString = strcat(replace(",", "", replace("/", "-", replace("/", "-", timestampNullFilled))), "Z")
| extend agentHeartbeatUtcTimestamp = todatetime(timestampKustoFormattedString)
| extend agentHeartbeatLagInDays = iff(agentHeartbeatUtcTimestamp == todatetime("1900/01/01, 00:00:00.000Z"), -1, datetime_diff('day', now(), agentHeartbeatUtcTimestamp))
| summarize numExtensions = count() by agentHeartbeatLagInDays
| order by numExtensions desc