3.1.5.2.4 GetWindowsSortKey Pseudocode

Article
06/24/2021
This algorithm specifies the generation of sort keys for a specific UTF-16 string.<4>
 STRUCTURE CharacterWeightType
 (
      ScriptMember:    8 bit integer
      PrimaryWeight:   8 bit integer
      DiacriticWeight: 8 bit integer
      CaseWeight:      8 bit integer
 )
  
 STRUCTURE UnicodeWeightType
 (
      ScriptMember:    8 bit integer
      PrimaryWeight:   8 bit integer
      ThirdByteWeight: 8 bit integer
 )
  
 STRUCTURE SpecialWeightType
 (
      Position:       16 bit integer
      ScriptMember:    8 bit integer
      PrimaryWeight:   8 bit integer
 )
  
 STRUCTURE ExtraWeightType
 (
      W6:              8 bit integer
      W7:              8 bit integer
 )
 SET constant LCID_KOREAN to 0x0412
 SET constant LCID_KOREAN_UNICODE_SORT to 0x010412
 SET constant LCID_HUNGARIAN to 0x040e
  
 SET constant SORTKEY_SEPARATOR to 0x01
 SET constant SORTKEY_TERMINATOR to 0x00
  
 SET global KoreanScriptMap to InitKoreanScriptMap
  
 //
 //  Script Member Values.
 //
 SET constant UNSORTABLE       to 0
 SET constant NONSPACE_MARK    to 1
 SET constant EXPANSION        to 2
 SET constant EASTASIA_SPECIAL to 3
 SET constant JAMO_SPECIAL     to 4
 SET constant EXTENSION_A      to 5
 SET constant PUNCTUATION      to 6
  
 SET constant SYMBOL_1         to 7
 SET constant SYMBOL_2         to 8
 SET constant SYMBOL_3         to 9
 SET constant SYMBOL_4         to 10
 SET constant SYMBOL_5         to 11
 SET constant SYMBOL_6         to 12
  
 SET constant DIGIT            to 13
  
 SET constant LATIN            to 14
 SET constant KANA             to 34
 SET constant IDEOGRAPH        to 128
  
 IF Windows version is Windows Vista, Windows Server 2008, Windows 7, or    
    Windows Server 2008 R2 THEN
 SET constant MAX_SPECIAL_CASE to SYMBOL_6
  
 ELSE
 SET constant MAX_SPECIAL_CASE to SYMBOL_5
 ENDIF
     COMMENT Set the constant for fhe first script member of the Unicode                                                                                      
     COMMENT Private Use Area (PUA) range
     SET constant PUA3BYTESTART to 0xA9
     COMMENT Set the constant for the last script member of the Unicode  
     COMMENT Private Use Area (PUA) range
     SET constant PUA3BYTEEND to 0xAF
  
     COMMENT Set the constant for the first script member of CJK  
     COMMENT(Chinese/Japanese/Korean) 3 byte weight range
     SET constant CJK3BYTESTART to 0xC0
     COMMMENT Set the constant for the last script member of CJK 
     COMMENT (Chinese/Japanese/Korean) 3 byte weight range
     SET constant CJK3BYTEEND to 0xF9
 ENDIF
 SET constant FIRST_SCRIPT     to LATIN
 SET constant MAX_SCRIPTS      to 256
  
 //
 //  Values for CJK Unified Ideographs Extension A range.
 //    0x3400 thru 0x4dbf
 //
 SET constant SCRIPT_MEMBER_EXT_A  to 254       // SM for Extension A
 SET constant PRIMARY_WEIGHT_EXT_A to 255       // AW for Extension A
  
 //
 //  Lowest weight values.
 //  Used to remove trailing DW and CW values.
 //  Also used to keep illegal values out of sort keys.
 //
  
 SET constant MIN_DW to 2
 SET constant MIN_DW to 2
  
 //
 //  Bit mask values.
 //
 //  Case Weight (CW) - 8 bits:
 //    bit 0   => width
 //    bit 1,2 => small kana, sei-on
 //    bit 3,4 => upper/lower case
 //    bit 5   => kana
 //    bit 6,7 => contraction
 //
  
  
     SET constant CONTRACTION_8_MASK to 0xc0
     SET constant CONTRACTION_7_MASK to 0xc0
     SET constant CONTRACTION_6_MASK to 0xc0
     SET constant CONTRACTION_5_MASK to 0x80
     SET constant CONTRACTION_4_MASK to 0x80
     SET constant CONTRACTION_3_MASK to 0x40
     SET constant CONTRACTION_2_MASK to 0x40
  
     SET constant CONTRACTION_MASK to 0xc0
  
 ELSE
     COMMENT Otherwise, only 2-character or 3-character contractions 
 //  are supported.
 SET constant CONTRACTION_3_MASK to 0xc0  
 //  Bit-mask to check 2 character contraction or 3 character contraction
 SET constant CONTRACTION_2_MASK to 0x80  
 //  Bit-mask to check 2 character contraction
 ENDIF
  
 SET constant CASE_UPPER_MASK to 0xe7  // zero out case bits
 SET constant CASE_KANA_MASK  to 0xdf  // zero out kana bit
 SET constant CASE_WIDTH_MASK to 0xfe  // zero out width bit
  
 //
 //  Masks to isolate the various bits in the case weight.
 //
 //  NOTE: Bit 2 needs to always equal 1 to avoid getting
 //        a byte value of either 0 or 1.
 //
  
 SET constant CASE_EXTRA_WEIGHT_MASK to 0xc4
 SET constant ISOLATE_KANA to
              (~CASE_KANA_MASK) | CASE_EXTRA_WEIGHT_MASK
 SET constant ISOLATE_WIDTH to 
              (~CASE_WIDTH_MASK) | CASE_EXTRA_WEIGHT_MASK
  
 //
 //  Values for East Asia special case primary weights.
 //
 SET constant PW_REPEAT      to 0
 SET constant PW_CHO_ON      to 1
 SET constant MAX_SPECIAL_PW to PW_CHO_ON
  
 //
 //  Values for weight 5 - East Asia Extra Weights.
 //
 SET constant WT_FIVE_KANA to 3
 SET constant WT_FIVE_REPEAT to 4
 SET constant WT_FIVE_CHO_ON to 5
  
 //
 //  PW Mask for Cho-On:
 //  Leaves bit 7 on in PW, so it becomes Repeat
 //  if it follows Kana N.
 //
 SET constant CHO_ON_PW_MASK to 0x87
  
 //
 //  Special weight values
 //
 SET constant MAP_INVALID_WEIGHT to 0xff
  
 //
 //  Some Significant Values for Korean Jamo.
 //  The L, V & T syllables in the 0x1100 Unicode range
 //  can be composed to characters in the 0xac00 range.
 //  See The Unicode Standard for details.
 //
 SET constant NLS_CHAR_FIRST_JAMO       to 0x1100 
 //  Begin Jamo range
 SET constant NLS_CHAR_LAST_JAMO        to 0x11f9 
 //  End Jamo range
 SET constant NLS_CHAR_FIRST_VOWEL_JAMO to 0x1160 
 //  First Vowel Jamo
 SET constant
     NLS_CHAR_FIRST_TRAILING_JAMO to 0x11a8   
 //  First Trailing Jamo
 SET constant
     NLS_JAMO_VOWEL_COUNT to 21           
 //  Number of vowel Jamo (V)
 SET constant
     NLS_JAMO_TRAILING_COUNT to 28     
 //  Number of trailing Jamo (L)
 SET constant
     NLS_HANGUL_FIRST_COMPOSED to 0xac00      
 //  Begin composed range
  
 //
 //  Values for Unicode Weight extra weights (e.g. Jamo (old Hangul)).
 //  The following uses SM for extra UW weights.
 //
 SET constant ScriptMember_Extra_UnicodeWeight to 255
 //  Leading Weight / Vowel Weight / Trailing Weight
 //  according to the current Jamo class.
 //
 STRUCTURE JamoSortInfoType
 (
      // true for an old Hangul sequence
      OldHangulFlag : Boolean
      
      // true if U+1160 (Hangul Jungseong Filler) used
      FillerUsed : Boolean
  
      // index to the prior modern Hangul syllable (L)
      LeadingIndex : 8 bit integer
  
      // index to the prior modern Hangul syllable (V)
      VowelIndex : 8 bit integer
  
      // index to the prior modern Hangul syllable (T)
      TrailingIndex : 8 bit integer
  
      // Weight to offset from other old hangul (L)
      LeadingWeight : 8 bit integer
  
      // Weight to offset from other old hangul (V)
      VowelWeight : 8 bit integer
  
      // Weight to offset from other old hangul (T)
      TrailingWeight : 8 bit integer
 )
  
 // This is the raw data record type from the data table
 STRUCTURE JamoStateDataType
 (
      // true for an old Hangul sequence
      OldHangulFlag : Boolean
  
      // index to the prior modern Hangul syllable (L)
      LeadingIndex : 8 bit integer
  
      // index to the prior modern Hangul syllable (V)
      VowelIndex : 8 bit integer
  
      // index to the prior modern Hangul syllable (T)
      TrailingIndex : 8 bit integer
  
      // weight to distinguish from old Hangul
      ExtraWeight : 8 bit integer
  
      // number of additional records in this state
      TransitionCount : 8 bit integer
  
      // Current record in unisort.txt Jamo table:
      JamoRecord : data record
  
      // SORTTABLES\JAMOSORT\[Character] section 
 )
 COMMENT GetWindowsSortKey
 COMMENT
 COMMENT  On Entry:  SourceString - Unicode String to compute a
 COMMENT                            sort key for
 COMMENT             SortLocale   - Locale to determine correct 
 COMMENT                            linguistic sort
 COMMENT             Flags        - Bit Flag to control behavior
 COMMENT                            of sort key generation. 
 COMMENT                             
 COMMENT  NORM_IGNORENONSPACE    Ignore diacritic weight
 COMMENT  NORM_IGNORECASE:       Ignore case weight
 COMMENT  NORM_IGNOREKANATYPE:   Ignore Japanese Katakana/Hiraga
 COMMENT                         difference
 COMMENT  NORM_IGNOREWIDTH:      Ignore Chinese/Japanese/Korean
 COMMENT                         half-width and full-width difference.
 COMMENT
 COMMENT  On Exit:   SortKey      - Byte array containing the
 COMMENT                            computed sort key.
 COMMENT
  
 PROCEDURE GetWindowsSortKey(IN SourceString : Unicode String,
                             IN SortLocale :   LCID,
                             IN Flags : 32 bit integer,
                             OUT SortKey : BYTE String)
  
 COMMENT Compute flags for sort conditions
 COMMENT Based on the case/kana/width flags,
 COMMENT   turn off bits in case mask when comparing case weight.
  
 SET CaseMask to 0xff
  
 If (NORM_IGNORECASE bit is on in Flags) THEN
     SET CaseMask to CaseMask LOGICAL AND with CASE_UPPER_MASK
 ENDIF
  
 If (NORM_IGNOREKANATYPE bit is on in Flags) THEN
     SET CaseMask to CaseMask LOGICAL AND with CASE_KANA_MASK
 ENDIF
  
 If (NORM_IGNOREWIDTH bit is on in Flags) THEN
     SET CaseMask to CaseMask LOGICAL AND with CASE_WIDTH_MASK
 ENDIF
  
 COMMENT Windows 7 and Windows Server 2008 R2 use 3-byte 
 COMMENT (instead of 2-byte) sequence for Unicode Weights
 COMMENT for Private Use Area (PUA) and some Chinese/Japanese/Korean (
 COMMENT CJK) script members.
 COMMENT Does this sort have a 3-byte Unicode Weight (CJK sorts)?
 IF Windows version is Windows 7 and Windows Server 2008 R2 THEN
    COMMENT Check if the locale can have 3-byte Unicode weight
    SET Is3ByteWeightLocale to CALL Check3ByteWeightLocale(SortLocale)
 ENDIF
  
  
 IF Windows version is Windows Vista, Windows Server 2008, Windows 7, or 
 Windows Server 2008 R2 THEN
     COMMENT For Windows Vista, Windows Server 2008, Windows 7, and 
     COMMENT Windows Server 2008 R2, the algorithm
     COMMENT does not remap the script for Korean locale
     SET IsKoreanLocale to false
 ELSE
  
  
    IF SortLocale is LCID_KOREAN or
       SortLocale is LCID_KOREAN_UNICODE_SORT THEN
          SET IsKoreanLocale to true
          IF KoreanScriptMap is null THEN
              CALL InitKoreanScriptMap
    ELSE
        SET IsKoreanLocale to false
    ENDIF
 ENDIF
  
 //
 //  Allocate buffer to hold different levels of sort key weights.
 //  UnicodeWeights/ExtraWeights/SpecialWeights will be eventually
 //  to be collected together, in that order, into the returned
 //  Sortkey byte string.
 //
 //  Maximum expansion size is 3 times the input size
 //
  
 // Unicode Weight => 4 word (16 bit) length
 // (extension A and Jamo need extra words)
 SET UnicodeWeights to new empty string of UnicodeWeightType
  
 SET DiacriticWeights to new empty string of BYTE
 SET CaseWeights to new empty string of BYTE
  
 // Extra Weight=>4 byte length (4 weights, 1 byte each) FE Special
 SET ExtraWeights to new empty string of ExtraWeightType
  
 // Special Weight => dword length (2 words each of 16 bits)
 SET SpecialWeights to new empty string of SpecialWeightType
  
 //
 // Go through the string, code point by code point,
 // testing for contractions and Hungarian special character sequence
 //
  
 // loop presumes 0 based index for source string
 FOR SourceIndex is 0 to Length(SourceString) -1
     //
     // Get weights
     // CharacterWeight will contain all of the weight information
     // for the character tested.
     //
  
     SET CharacterWeight to CALL GetCharacterWeights
         WITH (SortLocale, SourceString[SourceIndex])
  
     SET ScriptMember to CharacterWeight.ScriptMember
  
     // Special case weights have script members less than
     // MAX_SPECIAL_CASE (11)
     IF ScriptMember is greater than MAX_SPECIAL_CASE  THEN
  
         //
         //  No special case on character, but has to check for
         //  contraction characters and Hungarian special 
         //  character sequence characters.
         //
  
         SET HasHungarianSpecialCharacterSequence to CALL                                   
             TestHungarianCharacterSequences
                  WITH (SortLocale, SourceString, SourceIndex)
  
         SET Result to CALL GetContractionType WITH (CharacterWeight)
  
         CASE Result OF
            
            "3-character Contraction":
                COMMENT This is only possible for Windows versions that 
                COMMENT are Windows NT 4.0 through Windows Server 2003
                Set ContractionFound to CALL SortkeyContractionHandler  
                  WITH (SortLocale, SourceString, SourceIndex,   
                        HasHungarianSpecialCharacterSequence, 3, 
                        UnicodeWeights, DiacriticWieghts, CaseWeights)
                IF ContractionFound is true THEN
                    COMMENT Break out of the case statement
                    BREAK
                ENDIF
                IF ContractionFound is true THEN
                    COMMENT Break out of the case statement
                    BREAK
                ENDIF
                COMMENT If no contraction is found, fall through into additional cases.
                FALLTHROUGH
  
            "2-character Contraction":
                COMMENT This is only possible for Windows versions that are 
                COMMENT Windows NT 4.0 through Windows Server 2003
                Set ContractionFound to CALL SortkeyContractionHandler  
                 WITH (SortLocale, SourceString, SourceIndex,             
                       HasHungarianSpecialCharacterSequence, 2,
                       UnicodeWeights, DiacriticWieghts, CaseWeights)
                IF ContractionFound is true THEN
                    COMMENT Break out of the case statement
                    BREAK
                ENDIF
                COMMENT If no contraction is found, fall through into the OTHER case.
                COMMENT Since "3-character contraction" or "2-character contraction" 
                COMMENT are the only two possible values for 
                COMMENT Windows NT 4.0 through Windows Server 2003, all calls to 
                COMMENT SortkeyContractionHandler  will return false.
                COMMENT So, the fallthrough will go directly to the OTHERS section
                FALLTHROUGH
  
            "6-character contraction, 7-character contraction, or 8-character contraction":
                Set ContractionFound to CALL SortkeyContractionHandler  
                 WITH (SortLocale, SourceString, SourceIndex,  
                       HasHungarianSpecialCharacterSequence, 8,
                       UnicodeWeights, DiacriticWieghts, CaseWeights)
                IF ContractionFound is true THEN
                    COMMENT Break out of the case statement
                    BREAK
                ELSE
                    Set ContractionFound to CALL SortkeyContractionHandler 
                     WITH (SortLocale, SourceString, SourceIndex,  
                           HasHungarianSpecialCharacterSequence, 7,
                           UnicodeWeights, DiacriticWieghts, CaseWeights)
                ENDIF
                IF ContractionFound is true THEN
                    COMMENT Break out of the case statement
                    BREAK
                ELSE
                    Set ContractionFound to CALL SortkeyContractionHandler  
                     WITH (SortLocale, SourceString, SourceIndex,  
                           HasHungarianSpecialCharacterSequence, 6,
                           UnicodeWeights, DiacriticWieghts, CaseWeights)
                ENDIF
                IF ContractionFound is true THEN
                    COMMENT Break out of the case statement
                    BREAK
                ENDIF
                COMMENT If no contraction is found, fall through into additional cases.
                FALLTHROUGH
  
            "4-character contraction or 5-character contraction":
                Set ContractionFound to CALL SortkeyContractionHandler  
                 WITH (SortLocale, SourceString, SourceIndex,             
                       HasHungarianSpecialCharacterSequence, 5,
                       UnicodeWeights, DiacriticWieghts, CaseWeights)
                IF ContractionFound is true THEN
                    COMMENT Break out of the case statement
                    BREAK
                ELSE
                    Set ContractionFound to CALL SortkeyContractionHandler 
                     WITH (SortLocale, SourceString, SourceIndex,  
                           HasHungarianSpecialCharacterSequence, 4,
                           UnicodeWeights, DiacriticWieghts, CaseWeights)
                ENDIF
                IF ContractionFound is true THEN
                    COMMENT Break out of the case statement
                    BREAK
                ENDIF
                COMMENT If no contraction is found, fall through into additional cases.
                FALLTHROUGH
  
            "2-character contraction or 3-character contraction":
                Set ContractionFound to CALL SortkeyContractionHandler  
                 WITH (SortLocale, SourceString, SourceIndex,      
                       HasHungarianSpecialCharacterSequence, 3,
                       UnicodeWeights, DiacriticWieghts, CaseWeights)
                IF ContractionFound is true THEN
                    COMMENT Break out of the case statement
                    BREAK
                ELSE
                    Set ContractionFound to CALL SortkeyContractionHandler 
                     WITH (SortLocale, SourceString, SourceIndex,    
                           HasHungarianSpecialCharacterSequence, 2,
                           UnicodeWeights, DiacriticWieghts, CaseWeights)
                ENDIF
                IF ContractionFound is true THEN
                    COMMENT Break out of the case statement
                    BREAK
                ENDIF
                COMMENT If no contraction is found, fall through into additional cases.
                FALLTHROUGH
  
  
            OTHERS :
               IF Windows version is greater than Windows Server 2008 R2 or Windows 7 
                 THEN
                   COMMENT In Windows Server 2008 R2 or Windows 7, 
                   COMMENT Private Use Area (PUA) code points 
                   COMMENT and some CJK (Chinese/Japanese/Korean) sorts 
                   COMMENT might need 3 byte weights
                   COMMENT Store normal Unicode weight first. Note that there is no 
                   COMMENT adjustment of Korean weight anymore.
                   SET UnicodeWeight to 
                      CorrectUnicodeWeight(CharacterWeight, FALSE)
                   COMMENT Assume 3-byte Unicode Weight is not used first. 
                   COMMENT  The alogorithm will check this later.
                    SET UnicodeWeight.ThirdByteWeight to 0
  
                   IF (ScriptMember is equal to or greater than PUA3BYTESTART)  
                      AND                       
                      (ScriptMember is less than or equal to PUA3BYTEEND) THEN
                       SET IsScriptMemberPUA3BYTEWeight to true
                   ELSE
                       SET IsScriptMemberPUA3ByteWeight to false
                   ENDIF
     
                     
                   IF (ScriptMember is equal to or greater than CJK3BYTESTART) AND
                      (ScriptMember is less than or equal to CJK3BYTEEND) THEN
                       SET IsScriptMemberCJK3ByteWeight to true
                   ELSE
                    SET IsScriptMemberCJK3ByteWeight to false
                   ENDIF
                   IF (IsScriptMemberPUA3ByteWeight is true) OR 
                      (Is3ByteWeightLocale AND 
                       IsScriptMemberCJK3ByteWeight is true) THEN
                       COMMENT PUA code points and some CJK sorts need 3 byte weights
                       SET UnicodeWeight.ThirdByteWeight to CharacterWeight.DiacriticWeight
                   ELSE
                     
                       COMMENT Normal Diacritic Weight
                       APPEND CharacterWeight.DiacriticWeight to DiacriticWeights as a BYTE
                   ENDIF
                   APPEND UnicodeWeight to UnicodeWeights
  
                   SET CaseWeight to GetCaseWeight(CharacterWeight)
                   APPEND CharacterWeight.CaseWeight to CaseWeights as a BYTE
  
               ELSE
  
                   SET UnicodeWeight to 
                      CorrectUnicodeWeight(CharacterWeight, IsKoreanLocale)
                   APPEND UnicodeWeight to UnicodeWeights
                   APPEND CharacterWeight.DiacriticWeight to DiacriticWeights                                                      
                          as a BYTE
                   SET CaseWeight to GetCaseWeight(CharacterWeight)
                   APPEND CharacterWeight.CaseWeight to CaseWeights as a BYTE
               ENDIF
        ENDCASE
     ELSE
        CALL SpecialCaseHandler WITH (SourceString, SourceIndex,
                   UnicodeWeights, ExtraWeights, SpecialWeights,
                   SortLocale, IsKoreanLocale)
     ENDIF
 ENDFOR
  
 //
 //  Store the Unicode Weights in the destination buffer.
 //
 FOR each UnicodeWeight in UnicodeWeights
     //
     //  Copy Unicode weight to destination buffer.
     //
     APPEND UnicodeWeight.ScriptMember to SortKey as a BYTE
     APPEND UnicodeWeight.PrimaryWeight to SortKey as a BYTE
    IF Windows version is greater than Windows Server 2008 R2 or Windows 7 THEN
        IF UnicodeWeight.ThirdByteWeight is not 0 THEN
            COMMENT When 3-byte Unicode Weight is used, append the additional 
            COMMENT BYTE into SortKey
            APPEND UnicodeWeight.ThirdByteWeight to SortKey as a BYTE
        ENDIF
     ENDIF
  
 ENDFOR
  
 //
 //  Copy Separator to destination buffer.
 //
 APPEND SORTKEY_SEPARATOR to SortKey as a BYTE
  
 //
 //  Store Diacritic Weights in the destination buffer.
 //
 IF (NORM_IGNORENONSPACE bit is not turned on in Flags) THEN
     IF (IsReverseDW is TRUE) THEN
        //
        //  Reverse diacritics:
        //    - remove diacritics from left  to right.
        //    - store  diacritics from right to left.
        //
        FOR each DiacriticWeight in
            DiacriticWeights in the "first in first out" order
           IF DiacriticWeight <= MIN_DW THEN
              REMOVE DiacriticWeight from DiacriticWeights
           ELSE
              BREAK from the current FOR loop
           ENDIF
        ENDFOR
  
        FOR each DiacriticWeight in
            DiacriticWeights in the "last in first out" order
           //
           //  Copy Unicode weight to destination buffer.
           //
           APPEND DiacriticWeight to SortKey as a BYTE
        ENDFOR
     ELSE
        //
        //  Regular diacritics:
        //    - remove diacritics from right to left.
        //    - store  diacritics from left  to right.
        FOR each DiacriticWeight in
            DiacriticWeights in the "last in first out" order
            IF DiacriticWeight <= MIN_DW THEN
               REMOVE DiacriticWeight from DiacriticWeights
            ELSE
               BREAK from the current FOR loop
            ENDIF
        ENDFOR
  
        FOR each DiacriticWeight in
            DiacriticWeights in the order of "first in first out"
            //
            //  Copy Unicode weight to destination buffer.
            //
            APPEND DiacriticWeight to SortKey as a BYTE
        ENDFOR
     ENDIF
 ENDIF
  
 //
 //  Copy Separator to destination buffer.
 //
 APPEND SORTKEY_SEPARATOR to SortKey as a BYTE
  
 //
 //  Store case Weights
 //
 //    - Eliminate minimum CW.
 //    - Copy case weights to destination buffer.
 //
 IF (NORM_IGNORECASE bit is not turned on in Flags
      OR NORM_IGNOREWIDTH bit is not turned on in Flags) THEN
     FOR each CaseWeight in CaseWeights
         in the "last in first out" order
         IF CaseWeight <= MIN_CW THEN
            REMOVE CaseWeight from CaseWeights
         ELSE
            BREAK from the current FOR loop
         ENDIF
     ENDFOR
  
     FOR each CaseWeight in CaseWeights
        //
        //  Copy Unicode weight to destination buffer.
        //
        APPEND CaseWeight to SortKey as a BYTE
     ENDFOR
 ENDIF
  
 //
 //  Copy Separator to destination buffer.
 //
 APPEND SORTKEY_SEPARATOR to SortKey as a BYTE
  
 //
 //  Store the Extra Weights in the destination buffer for
 //  EAST ASIA Special.
 //
 //    - Eliminate unnecessary XW.
 //    - Copy extra weights to destination buffer.
 //
 IF Length(ExtraWeights) is greater than 0 THEN
     IF (NORM_IGNORENONSPACE bit is turned on in Flag) THEN
        APPEND 0xff to SortKey as a BYTE
        APPEND 0x02 to SortKey as a BYTE
     ENDIF
  
    // Append W6 group to SortKey
    // Trim unused values from the end of the string
    SET EndExtraWeight to Length(ExtraWeights) - 1
  
    WHILE EndExtraWeight greater than 0 and
         ExtraWeightSeparator[EndExtraWeight].W6 == 0xe4
       DECREMENT EndExtraWeight
    ENDWHILE
  
    SET ExtraWeightIndex to 0
    WHILE ExtraWeightIndex is less than or equal to EndExtraWeight
       APPEND ExtraWeightSeparator[ExtraWeightIndex].W6
         to SortKey as a BYTE
       INCREMENT ExtraWeightIndex
    ENDWHILE
  
    // Append W6 separator
    APPEND 0xff to SortKey as a BYTE
  
    // Append W7 group to SortKey
    // Trim unused values from the end of the string
    SET EndExtraWeight to Length(ExtraWeights) - 1
    WHILE EndExtraWeight greater than 0 and
          ExtraWeightSeparator[EndExtraWeight].W7 == 0xe4
       DECREMENT EndExtraWeight
    ENDWHILE
  
    SET ExtraWeightIndex to 0
    WHILE ExtraWeightIndex is less than or equal to EndExtraWeight
       APPEND ExtraWeightSeparator[ExtraWeightIndex].W7 to SortKey
       INCREMENT ExtraWeightIndex
    ENDWHILE
  
    // Append W7 separator
    APPEND 0xff to SortKey as a BYTE
 ENDIF
  
 //
 //  Copy Separator to destination buffer.
 //
 APPEND SORTKEY_SEPARATOR to SortKey as a BYTE
  
 //
 //  Store the Special Weights in the destination buffer.
 //
 //    - Copy special weights to destination buffer.
 //
 FOR each SpecialWeight in SpecialWeights
    // High byte (most significant)
    SET Byte1 to SpecialWeight.Position >> 8
    // Low byte (least significant)
    SET Byte2 to SpecialWeight.Position & 0xff
    APPEND Byte1 to SortKey as a BYTE
    APPEND Byte2 to SortKey as a BYTE
    APPEND SpecialWeight.Script to SortKey as a BYTE
    APPEND SpecialWeight.Weight to SortKey as a BYTE
 ENDFOR
  
 //
 //  Copy terminator to destination buffer.
 //
 APPEND SORTKEY_TERMINATOR to SortKey
  
 RETURN SortKey
Partager via

3.1.5.2.4 GetWindowsSortKey Pseudocode

Ressources supplémentaires