NLS: Internationalized domain name mitigation sample
The sample application described in this topic demonstrates how certain NLS functions can be used to mitigate some of the security issues associated with internationalized domain names (IDNs). This sample demonstrates the following NLS API functions:
- GetLocaleInfoEx, with the LCType parameter set to LOCALE_SSCRIPTS
- GetStringScripts
- VerifyScripts
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// IdnMitigation.cpp
#include "stdafx.h"
#include "windows.h"
#include <stdio.h>
#include <tchar.h>
#include "malloc.h"
// Print out a string using code points for the non-ASCII values
void DumpString(LPCWSTR pInput)
{
while (*pInput != 0)
{
if (*pInput < 0x80)
wprintf(L"%c", *pInput);
else
wprintf(L"\\x%4.4x", *pInput);
pInput++;
}
wprintf(L"\n");
}
// Test a string to see if its expected for a locale
void TestString(LPCWSTR strTest, LPCWSTR strLocale)
{
// We could count the strings, but we really don't expect large script combinations
WCHAR pLocaleScripts[255];
WCHAR pStringScripts[255];
// Show the inputs
wprintf(L"Testing string with locale %s:\n", strLocale);
DumpString(strTest);
// Get the expected scripts
if (GetLocaleInfoEx(strLocale, LOCALE_SSCRIPTS, pLocaleScripts, 255) == 0)
{
// Unexpected Error
wprintf(L"ERROR in GetLocaleInfoEx: %d\n", GetLastError());
return;
}
// Get the actual scripts. We're expecting inherited and common characters (like ,:, etc.)
if (GetStringScripts(0, strTest, -1, pStringScripts, 255) == 0 &&
GetLastError() != ERROR_SUCCESS)
{
// Unexpected Error
wprintf(L"ERROR in GetStringScripts: %d\n", GetLastError());
return;
}
// Show what we found
wprintf(L"Locale Scripts: %s\n", pLocaleScripts);
wprintf(L"String Scripts: %s\n", pStringScripts);
// Test the output
if (VerifyScripts(NULL, pLocaleScripts, -1, pStringScripts, -1))
{
wprintf(L"These script(s) are expected in this locale\n");
}
else
{
if (VerifyScripts(VS_ALLOW_LATIN, pLocaleScripts, -1, pStringScripts, -1))
{
wprintf(L"These script(s) are not expected for this locale, unless Latin is allowed\n");
}
else
{
wprintf(L"These script(s) are not expected in this locale\n");
}
}
wprintf(L"\n");
}
int __cdecl wmain(int argc, WCHAR* argv[])
{
LPWSTR strLatin = L"This is an entirely Latn string, even with characters like these: ÀäëêðąĐĽŌņǖǾǽỨ";
LPWSTR strMixed = L"This string has Сугі׀׀іс, Hebrew, and GRΕΕΚ 十 Chinese 工口 and PUA  characters. Depending on font it may look like Latin";
LPWSTR strChinese = L"香港特別行政區";
LPWSTR strCyrillic = L"русский";
LPWSTR strCyrlLatn = L"русский (Russian)";
TestString(strLatin, L"en-US");
TestString(strMixed, L"en-US");
TestString(strChinese, L"zh-HK");
TestString(strCyrillic, L"ru-RU");
TestString(strCyrlLatn, L"ru-RU");
TestString(strCyrlLatn, L"en-US");
}
/* This code example produces the following output:
Testing string with locale en-US:
This is an entirely Latn string, even with characters like these: \x00c0\x00e4\x00eb\x00ea\x00f0\x0105\x0110\x013d\x014c\x0146\x01d6\x01fe\x01fd\x1ee8
Locale Scripts: Latn;
String Scripts: Latn;
These script(s) are expected in this locale
Testing string with locale en-US:
This string has \x0421\x0443\x0433\x0456\x05c0\x05c0\x0456\x0441, Hebrew, and GR\x0395\x0395\x039a \x5341 Chinese \x5de5\x53e3 and PUA \xf8d9 characters. Depending on font it may look like Latin
Locale Scripts: Latn;
String Scripts: Cyrl;Grek;Hani;Hebr;Latn;Zzzz;
These script(s) are not expected in this locale
Testing string with locale zh-HK:
\x9999\x6e2f\x7279\x5225\x884c\x653f\x5340
Locale Scripts: Hani;
String Scripts: Hani;
These script(s) are expected in this locale
Testing string with locale ru-RU:
\x0440\x0443\x0441\x0441\x043a\x0438\x0439
Locale Scripts: Cyrl;
String Scripts: Cyrl;
These script(s) are expected in this locale
Testing string with locale ru-RU:
\x0440\x0443\x0441\x0441\x043a\x0438\x0439 (Russian)
Locale Scripts: Cyrl;
String Scripts: Cyrl;Latn;
These script(s) are not expected for this locale, unless Latin is allowed
Testing string with locale en-US:
\x0440\x0443\x0441\x0441\x043a\x0438\x0439 (Russian)
Locale Scripts: Latn;
String Scripts: Cyrl;Latn;
These script(s) are not expected in this locale
*/