字串索引
System.Globalization.StringInfo 類別會提供可以將文字字串分隔為文字項目以及逐一查看這些文字項目的方法。文字項目是顯示為單一字元的文字單位,又稱為字母。文字項目可以是基底字元、Surrogate 字組或組合字元序列。如需 Surrogate 字組和結合字元序列的詳細資訊,請參閱 Surrogate 字組和結合字元序列的 Unicode 支援。
您可以使用 StringInfo.GetTextElementEnumerator 方法來建立列舉值,逐一查看字串項目。您可以使用 StringInfo.ParseCombiningCharacters 方法來傳回指定字串的每個基底字元、高 Surrogate 或控制字元的索引。
在下列程式碼範例中,將建立含有結合字元序列的阿拉伯文字元字串。例如,在 strCombining
中,Unicode 字碼 U+0625 表示阿拉伯文基底字元 (下列的 Arabic letter Alef 和 Hamza),而 Unicode 字碼 U+0650 表示阿拉伯文結合字元 (Arabic Kasra)。這些字碼可一起用來表示結合字元序列,因此必須將它們剖析為單一文字項目。接著,將會建立含有 Surrogate 字組的字串。例如,在 strSurrogates
中,Unicode 字碼 U+DACE 表示高 Surrogate 而 Unicode 字碼 U+DEFF 代表低 Surrogate。這些字碼可一起用來表示 Surrogate 字組,因此必須將它們剖析為單一文字項目。當您使用 ParseCombiningCharacters 方法和再次使用 GetTextElementEnumerator 方法後,每個字串都將被解析。這兩種方法可正確剖析索引處 0、0、3、5 和 6 的 strCombining
文字項目。這兩種方法可正確剖析索引處 0、2、4、5 和 6 的 strSurrogates
文字項目。以下將顯示剖析作業的結果。
Imports System
Imports System.IO
Imports System.Globalization
Imports System.Text
Imports Microsoft.VisualBasic
Public Class StringInfoSample
Public Shared Sub Main()
' Creates a string with text elements at <0;2;3;5;6>.
' The Unicode code points specify Arabic
' combining character sequences.
Dim strCombining As String = ChrW(&H625) & ChrW(&H650) & _
ChrW(&H64A) & ChrW(&H647) & ChrW(&H64E) & ChrW(&H627) & _
ChrW(&H628) & ChrW(&H64C)
' Creates a string with text elements at <0;2;4;5;6>.
'The Unicode code points specify private surrogate pairs.
Dim strSurrogates As String = ChrW(&HDACE) & ChrW(&HDEFF) & _
ChrW(&HDAAF) & ChrW(&HDEFC) & "a" & ChrW(&HD8BF) & ChrW(&HDD99)
EnumerateTextElements(strCombining)
EnumerateTextElements(strSurrogates)
End Sub
Public Shared Sub EnumerateTextElements(str As String)
' Creates a TextElementEnumerator.
Dim TEIndices As Integer() = Nothing
Dim TEEnum As TextElementEnumerator = Nothing
' Parses the string using the ParseCombiningCharacters() method.
Console.WriteLine(ControlChars.Newline + "Parsing '{0}' Using _
ParseCombiningCharacters()...", str)
Dim i As Integer
TEIndices = StringInfo.ParseCombiningCharacters(str)
For i = 0 To (TEIndices.Length - 1) - 1
Console.WriteLine("Text Element {0} ({1}..{2})= {3}", i, _
TEIndices(i), TEIndices((i + 1)) - 1, _
str.Substring(TEIndices(i), TEIndices((i + 1)) - _
TEIndices(i)))
Next i
Console.WriteLine("Text Element {0} ({1}..{2})= {3}", i, _
TEIndices(i), str.Length - 1, str.Substring(TEIndices(i)))
' Parses the string using the GetTextElementEnumerator method.
Console.WriteLine(ControlChars.Newline + "Parsing '{0}' Using _
TextElementEnumerator...", str)
TEEnum = StringInfo.GetTextElementEnumerator(str)
Dim Continue As Boolean = False
Dim TECount As Integer = - 1
' Note: Begins at element -1 (none).
Continue = TEEnum.MoveNext()
While Continue
' Prints the current element.
' Both GetTextElement() and Current retrieve the current
' text element. The latter returns it as an Object.
TECount += 1
Console.WriteLine("Text Element {0} ({1}..{2})= {3}", TECount, _
TEEnum.ElementIndex, TEEnum.ElementIndex + _
TEEnum.GetTextElement().Length - 1, TEEnum.Current)
' Moves to the next element.
Continue = TEEnum.MoveNext()
End While
End Sub
End Class
using System;
using System.IO;
using System.Globalization;
using System.Text;
public class StringInfoSample
{
public static void Main()
{
// Creates a string with text elements at <0;2;3;5;6>.
// The Unicode code points specify Arabic
// combining character sequences.
string strCombining =
"\u0625\u0650\u064A\u0647\u064E\u0627\u0628\u064C";
// Creates a string with text elements at <0;2;4;5;6>.
// The Unicode code points specify private surrogate pairs.
string strSurrogates = "\uDACE\uDEFF\uDAAF\uDEFCa\uD8BF\uDD99";
EnumerateTextElements(strCombining);
EnumerateTextElements(strSurrogates);
}
public static void EnumerateTextElements(string str)
{
// Creates a TextElementEnumerator.
int[] TEIndices = null;
TextElementEnumerator TEEnum = null;
// Parses the string using the ParseCombiningCharacters() method.
Console.WriteLine
("\r\nParsing '{0}' Using ParseCombiningCharacters()...",str);
int i;
TEIndices = StringInfo.ParseCombiningCharacters(str);
for (i = 0; i < (TEIndices.Length - 1); i++)
{
Console.WriteLine
("Text Element {0} ({1}..{2})=
{3}",i,TEIndices[i],TEIndices[i+1] - 1,
str.Substring(TEIndices[i],TEIndices[i+1] - TEIndices[i]));
}
Console.WriteLine
("Text Element {0} ({1}..{2})= {3}",i,TEIndices[i],str.Length -
1, str.Substring(TEIndices[i]));
// Parses the string using the GetTextElementEnumerator method.
Console.WriteLine
("\r\nParsing '{0}' Using TextElementEnumerator...",str);
TEEnum = StringInfo.GetTextElementEnumerator(str);
bool Continue = false;
int TECount = -1;
// Note: Begins at element -1 (none).
Continue = TEEnum.MoveNext();
while (Continue)
{
// Prints the current element.
// Both GetTextElement() and Current retrieve the current
// text element. The latter returns it as an Object.
TECount++;
Console.WriteLine("Text Element {0} ({1}..{2})=
{3}",TECount,TEEnum.ElementIndex,
TEEnum.ElementIndex + TEEnum.GetTextElement().Length - 1,
TEEnum.Current);
// Moves to the next element.
Continue = TEEnum.MoveNext();
}
}
}
注意事項 |
---|
如果您在主控台應用程式中執行這個程式碼,由於主控台環境並不支援所有的 Unicode 字元,因此指定的 Unicode 文字項目將無法正確顯示。 |