]> Cypherpunks.ru repositories - gostls13.git/commitdiff
unicode: correctly handle negative runes
authorDavid Benjamin <davidben@google.com>
Fri, 25 Dec 2020 17:02:55 +0000 (12:02 -0500)
committerEmmanuel Odeke <emmanuel@orijtech.com>
Wed, 24 Feb 2021 04:00:46 +0000 (04:00 +0000)
Is and isExcludingLatin did not handle negative runes when dispatching
to is16. TestNegativeRune covers this along with the existing uint32
casts in IsGraphic, etc. (For tests, I picked the smallest non-Latin-1
code point in each range.)

Updates #43254

Change-Id: I17261b91f0d2b5b5125d19219411b45c480df74f
Reviewed-on: https://go-review.googlesource.com/c/go/+/280493
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Rob Pike <r@golang.org>
Trust: Emmanuel Odeke <emmanuel@orijtech.com>

src/unicode/letter.go
src/unicode/letter_test.go

index a57566f0a527cf7132f52ea1bd99caf02087dfab..268e457a875311bb01b34a6d7e106bae1dde4428 100644 (file)
@@ -154,7 +154,8 @@ func is32(ranges []Range32, r uint32) bool {
 // Is reports whether the rune is in the specified table of ranges.
 func Is(rangeTab *RangeTable, r rune) bool {
        r16 := rangeTab.R16
-       if len(r16) > 0 && r <= rune(r16[len(r16)-1].Hi) {
+       // Compare as uint32 to correctly handle negative runes.
+       if len(r16) > 0 && uint32(r) <= uint32(r16[len(r16)-1].Hi) {
                return is16(r16, uint16(r))
        }
        r32 := rangeTab.R32
@@ -166,7 +167,8 @@ func Is(rangeTab *RangeTable, r rune) bool {
 
 func isExcludingLatin(rangeTab *RangeTable, r rune) bool {
        r16 := rangeTab.R16
-       if off := rangeTab.LatinOffset; len(r16) > off && r <= rune(r16[len(r16)-1].Hi) {
+       // Compare as uint32 to correctly handle negative runes.
+       if off := rangeTab.LatinOffset; len(r16) > off && uint32(r) <= uint32(r16[len(r16)-1].Hi) {
                return is16(r16[off:], uint16(r))
        }
        r32 := rangeTab.R32
index 19ee535d57e15e3dee16fa73cf1e368781f330d9..a91e3a326fd2903b0081697d9e8a15241bfc8e74 100644 (file)
@@ -563,3 +563,82 @@ func TestSpecialCaseNoMapping(t *testing.T) {
                t.Errorf("got %q; want %q", got, want)
        }
 }
+
+func TestNegativeRune(t *testing.T) {
+       // Issue 43254
+       // These tests cover negative rune handling by testing values which,
+       // when cast to uint8 or uint16, look like a particular valid rune.
+       // This package has Latin-1-specific optimizations, so we test all of
+       // Latin-1 and representative non-Latin-1 values in the character
+       // categories covered by IsGraphic, etc.
+       nonLatin1 := []uint32{
+               // Lu: LATIN CAPITAL LETTER A WITH MACRON
+               0x0100,
+               // Ll: LATIN SMALL LETTER A WITH MACRON
+               0x0101,
+               // Lt: LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+               0x01C5,
+               // M: COMBINING GRAVE ACCENT
+               0x0300,
+               // Nd: ARABIC-INDIC DIGIT ZERO
+               0x0660,
+               // P: GREEK QUESTION MARK
+               0x037E,
+               // S: MODIFIER LETTER LEFT ARROWHEAD
+               0x02C2,
+               // Z: OGHAM SPACE MARK
+               0x1680,
+       }
+       for i := 0; i < MaxLatin1+len(nonLatin1); i++ {
+               base := uint32(i)
+               if i >= MaxLatin1 {
+                       base = nonLatin1[i-MaxLatin1]
+               }
+
+               // Note r is negative, but uint8(r) == uint8(base) and
+               // uint16(r) == uint16(base).
+               r := rune(base - 1<<31)
+               if Is(Letter, r) {
+                       t.Errorf("Is(Letter, 0x%x - 1<<31) = true, want false", base)
+               }
+               if IsControl(r) {
+                       t.Errorf("IsControl(0x%x - 1<<31) = true, want false", base)
+               }
+               if IsDigit(r) {
+                       t.Errorf("IsDigit(0x%x - 1<<31) = true, want false", base)
+               }
+               if IsGraphic(r) {
+                       t.Errorf("IsGraphic(0x%x - 1<<31) = true, want false", base)
+               }
+               if IsLetter(r) {
+                       t.Errorf("IsLetter(0x%x - 1<<31) = true, want false", base)
+               }
+               if IsLower(r) {
+                       t.Errorf("IsLower(0x%x - 1<<31) = true, want false", base)
+               }
+               if IsMark(r) {
+                       t.Errorf("IsMark(0x%x - 1<<31) = true, want false", base)
+               }
+               if IsNumber(r) {
+                       t.Errorf("IsNumber(0x%x - 1<<31) = true, want false", base)
+               }
+               if IsPrint(r) {
+                       t.Errorf("IsPrint(0x%x - 1<<31) = true, want false", base)
+               }
+               if IsPunct(r) {
+                       t.Errorf("IsPunct(0x%x - 1<<31) = true, want false", base)
+               }
+               if IsSpace(r) {
+                       t.Errorf("IsSpace(0x%x - 1<<31) = true, want false", base)
+               }
+               if IsSymbol(r) {
+                       t.Errorf("IsSymbol(0x%x - 1<<31) = true, want false", base)
+               }
+               if IsTitle(r) {
+                       t.Errorf("IsTitle(0x%x - 1<<31) = true, want false", base)
+               }
+               if IsUpper(r) {
+                       t.Errorf("IsUpper(0x%x - 1<<31) = true, want false", base)
+               }
+       }
+}