NUMA 노드에서 메모리 할당
다음 샘플 코드에서는 NUMA 함수 GetNumaHighestNodeNumber, GetNumaProcessorNode 및 VirtualAllocExNuma의 사용을 보여 줍니다. 또한 QueryWorkingSetEx 함수를 사용하여 페이지가 할당된 NUMA 노드를 검색하는 방법을 보여 줍니다.
#define _WIN32_WINNT 0x0600
#include <windows.h>
#include <stdlib.h>
#include <stdio.h>
#include <psapi.h>
#include <tchar.h>
SIZE_T AllocationSize;
DWORD PageSize;
void DumpNumaNodeInfo (PVOID Buffer, SIZE_T Size);
void __cdecl wmain (int argc, const wchar_t* argv[])
{
ULONG HighestNodeNumber;
ULONG NumberOfProcessors;
PVOID* Buffers = NULL;
if (argc > 1)
{
(void)swscanf_s (argv[1], L"%Ix", &AllocationSize);
}
if (AllocationSize == 0)
{
AllocationSize = 16*1024*1024;
}
//
// Get the number of processors and system page size.
//
SYSTEM_INFO SystemInfo;
GetSystemInfo (&SystemInfo);
NumberOfProcessors = SystemInfo.dwNumberOfProcessors;
PageSize = SystemInfo.dwPageSize;
//
// Get the highest node number.
//
if (!GetNumaHighestNodeNumber (&HighestNodeNumber))
{
_tprintf (_T("GetNumaHighestNodeNumber failed: %d\n"), GetLastError());
goto Exit;
}
if (HighestNodeNumber == 0)
{
_putts (_T("Not a NUMA system - exiting"));
goto Exit;
}
//
// Allocate array of pointers to memory blocks.
//
Buffers = (PVOID*) malloc (sizeof(PVOID)*NumberOfProcessors);
if (Buffers == NULL)
{
_putts (_T("Allocating array of buffers failed"));
goto Exit;
}
ZeroMemory (Buffers, sizeof(PVOID)*NumberOfProcessors);
//
// For each processor, get its associated NUMA node and allocate some memory from it.
//
for (UCHAR i = 0; i < NumberOfProcessors; i++)
{
UCHAR NodeNumber;
if (!GetNumaProcessorNode (i, &NodeNumber))
{
_tprintf (_T("GetNumaProcessorNode failed: %d\n"), GetLastError());
goto Exit;
}
_tprintf (_T("CPU %u: node %u\n"), (ULONG)i, NodeNumber);
PCHAR Buffer = (PCHAR)VirtualAllocExNuma(
GetCurrentProcess(),
NULL,
AllocationSize,
MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE,
NodeNumber
);
if (Buffer == NULL)
{
_tprintf (_T("VirtualAllocExNuma failed: %d, node %u\n"), GetLastError(), NodeNumber);
goto Exit;
}
PCHAR BufferEnd = Buffer + AllocationSize - 1;
SIZE_T NumPages = ((SIZE_T)BufferEnd)/PageSize - ((SIZE_T)Buffer)/PageSize + 1;
_putts (_T("Allocated virtual memory:"));
_tprintf (_T("%p - %p (%6Iu pages), preferred node %u\n"), Buffer, BufferEnd, NumPages, NodeNumber);
Buffers[i] = Buffer;
//
// At this point, virtual pages are allocated but no valid physical
// pages are associated with them yet.
//
// The FillMemory call below will touch every page in the buffer, faulting
// them into our working set. When this happens physical pages will be allocated
// from the preferred node we specified in VirtualAllocExNuma, or any node
// if the preferred one is out of pages.
//
FillMemory (Buffer, AllocationSize, 'x');
//
// Check the actual node number for the physical pages that are still valid
// (if system is low on physical memory, some pages could have been trimmed already).
//
DumpNumaNodeInfo (Buffer, AllocationSize);
_putts(_T(""));
}
Exit:
if (Buffers != NULL)
{
for (UINT i = 0; i < NumberOfProcessors; i++)
{
if (Buffers[i] != NULL)
{
VirtualFree (Buffers[i], 0, MEM_RELEASE);
}
}
free (Buffers);
}
}
void DumpRegion (PVOID StartPtr, PVOID EndPtr, BOOL Valid, DWORD Node)
{
DWORD_PTR StartPage = ((DWORD_PTR)StartPtr)/PageSize;
DWORD_PTR EndPage = ((DWORD_PTR)EndPtr)/PageSize;
DWORD_PTR NumPages = (EndPage - StartPage) + 1;
if (!Valid)
{
_tprintf (_T("%p - %p (%6Iu pages): no valid pages\n"), StartPtr, EndPtr, NumPages);
}
else
{
_tprintf (_T("%p - %p (%6Iu pages): node %u\n"), StartPtr, EndPtr, NumPages, Node);
}
}
void DumpNumaNodeInfo (PVOID Buffer, SIZE_T Size)
{
DWORD_PTR StartPage = ((DWORD_PTR)Buffer)/PageSize;
DWORD_PTR EndPage = ((DWORD_PTR)Buffer + Size - 1)/PageSize;
DWORD_PTR NumPages = (EndPage - StartPage) + 1;
PCHAR StartPtr = (PCHAR)(PageSize*StartPage);
_putts (_T("Checking NUMA node:"));
PPSAPI_WORKING_SET_EX_INFORMATION WsInfo = (PPSAPI_WORKING_SET_EX_INFORMATION)
malloc (NumPages*sizeof(PSAPI_WORKING_SET_EX_INFORMATION));
if (WsInfo == NULL)
{
_putts (_T("Could not allocate array of PSAPI_WORKING_SET_EX_INFORMATION structures"));
return;
}
for (DWORD_PTR i = 0; i < NumPages; i++)
{
WsInfo[i].VirtualAddress = StartPtr + i*PageSize;
}
BOOL bResult = QueryWorkingSetEx(
GetCurrentProcess(),
WsInfo,
(DWORD)NumPages*sizeof(PSAPI_WORKING_SET_EX_INFORMATION)
);
if (!bResult)
{
_tprintf (_T("QueryWorkingSetEx failed: %d\n"), GetLastError());
free (WsInfo);
return;
}
PCHAR RegionStart = NULL;
BOOL RegionIsValid = false;
DWORD RegionNode = 0;
for (DWORD_PTR i = 0; i < NumPages; i++)
{
PCHAR Address = (PCHAR)WsInfo[i].VirtualAddress;
BOOL IsValid = WsInfo[i].VirtualAttributes.Valid;
DWORD Node = WsInfo[i].VirtualAttributes.Node;
if (i == 0)
{
RegionStart = Address;
RegionIsValid = IsValid;
RegionNode = Node;
}
if (IsValid != RegionIsValid || Node != RegionNode)
{
DumpRegion (RegionStart, Address - 1, RegionIsValid, RegionNode);
RegionStart = Address;
RegionIsValid = IsValid;
RegionNode = Node;
}
if (i == (NumPages - 1))
{
DumpRegion (RegionStart, Address + PageSize - 1, IsValid, Node);
}
}
free (WsInfo);
}
관련 항목