1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
19 func TestMemmove(t *testing.T) {
28 src := make([]byte, size)
29 dst := make([]byte, size)
30 for i := 0; i < size; i++ {
31 src[i] = byte(128 + (i & 127))
33 for i := 0; i < size; i++ {
34 dst[i] = byte(i & 127)
36 for n := 0; n <= size; n++ {
37 for x := 0; x <= size-n; x++ { // offset in src
38 for y := 0; y <= size-n; y++ { // offset in dst
39 copy(dst[y:y+n], src[x:x+n])
40 for i := 0; i < y; i++ {
41 if dst[i] != byte(i&127) {
42 t.Fatalf("prefix dst[%d] = %d", i, dst[i])
45 for i := y; i < y+n; i++ {
46 if dst[i] != byte(128+((i-y+x)&127)) {
47 t.Fatalf("copied dst[%d] = %d", i, dst[i])
49 dst[i] = byte(i & 127) // reset dst
51 for i := y + n; i < size; i++ {
52 if dst[i] != byte(i&127) {
53 t.Fatalf("suffix dst[%d] = %d", i, dst[i])
61 func TestMemmoveAlias(t *testing.T) {
70 buf := make([]byte, size)
71 for i := 0; i < size; i++ {
74 for n := 0; n <= size; n++ {
75 for x := 0; x <= size-n; x++ { // src offset
76 for y := 0; y <= size-n; y++ { // dst offset
77 copy(buf[y:y+n], buf[x:x+n])
78 for i := 0; i < y; i++ {
79 if buf[i] != byte(i) {
80 t.Fatalf("prefix buf[%d] = %d", i, buf[i])
83 for i := y; i < y+n; i++ {
84 if buf[i] != byte(i-y+x) {
85 t.Fatalf("copied buf[%d] = %d", i, buf[i])
87 buf[i] = byte(i) // reset buf
89 for i := y + n; i < size; i++ {
90 if buf[i] != byte(i) {
91 t.Fatalf("suffix buf[%d] = %d", i, buf[i])
99 func TestMemmoveLarge0x180000(t *testing.T) {
100 if testing.Short() && testenv.Builder() == "" {
106 t.Skip("skipping large memmove test under race detector")
108 testSize(t, 0x180000)
111 func TestMemmoveOverlapLarge0x120000(t *testing.T) {
112 if testing.Short() && testenv.Builder() == "" {
118 t.Skip("skipping large memmove test under race detector")
120 testOverlap(t, 0x120000)
123 func testSize(t *testing.T, size int) {
124 src := make([]byte, size)
125 dst := make([]byte, size)
126 _, _ = rand.Read(src)
127 _, _ = rand.Read(dst)
129 ref := make([]byte, size)
132 for n := size - 50; n > 1; n >>= 1 {
133 for x := 0; x <= size-n; x = x*7 + 1 { // offset in src
134 for y := 0; y <= size-n; y = y*9 + 1 { // offset in dst
135 copy(dst[y:y+n], src[x:x+n])
136 copyref(ref[y:y+n], src[x:x+n])
139 t.Fatalf("Copy failed, copying from src[%d:%d] to dst[%d:%d].\nOffset %d is different, %v != %v", x, x+n, y, y+n, p, dst[p], ref[p])
146 func testOverlap(t *testing.T, size int) {
147 src := make([]byte, size)
148 test := make([]byte, size)
149 ref := make([]byte, size)
150 _, _ = rand.Read(src)
152 for n := size - 50; n > 1; n >>= 1 {
153 for x := 0; x <= size-n; x = x*7 + 1 { // offset in src
154 for y := 0; y <= size-n; y = y*9 + 1 { // offset in dst
158 copy(test[y:y+n], test[x:x+n])
160 copyref(ref[y:y+n], ref[x:x+n])
162 copybw(ref[y:y+n], ref[x:x+n])
166 t.Fatalf("Copy failed, copying from src[%d:%d] to dst[%d:%d].\nOffset %d is different, %v != %v", x, x+n, y, y+n, p, test[p], ref[p])
175 func copyref(dst, src []byte) {
176 for i, v := range src {
182 func copybw(dst, src []byte) {
186 for i := len(src) - 1; i >= 0; i-- {
191 // Returns offset of difference
192 func matchLen(a, b []byte, max int) int {
195 for i, av := range a {
203 func cmpb(a, b []byte) int {
204 l := matchLen(a, b, len(a))
211 // Ensure that memmove writes pointers atomically, so the GC won't
212 // observe a partially updated pointer.
213 func TestMemmoveAtomicity(t *testing.T) {
215 t.Skip("skip under the race detector -- this test is intentionally racy")
220 for _, backward := range []bool{true, false} {
221 for _, n := range []int{3, 4, 5, 6, 7, 8, 9, 10, 15, 25, 49} {
224 // test copying [N]*int.
225 sz := uintptr(n * PtrSize)
226 name := fmt.Sprint(sz)
232 t.Run(name, func(t *testing.T) {
233 // Use overlapping src and dst to force forward/backward copy.
235 src := s[n-1 : 2*n-1]
247 var ready atomic.Uint32
249 sp := unsafe.Pointer(&src[0])
250 dp := unsafe.Pointer(&dst[0])
252 for i := 0; i < 10000; i++ {
254 MemclrNoHeapPointers(dp, sz)
259 for ready.Load() == 0 {
263 for ready.Load() != 2 {
266 if p != nil && p != &x {
267 t.Fatalf("got partially updated pointer %p at dst[%d], want either nil or %p", p, i, &x)
276 func benchmarkSizes(b *testing.B, sizes []int, fn func(b *testing.B, n int)) {
277 for _, n := range sizes {
278 b.Run(fmt.Sprint(n), func(b *testing.B) {
285 var bufSizes = []int{
286 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
287 32, 64, 128, 256, 512, 1024, 2048, 4096,
289 var bufSizesOverlap = []int{
290 32, 64, 128, 256, 512, 1024, 2048, 4096,
293 func BenchmarkMemmove(b *testing.B) {
294 benchmarkSizes(b, bufSizes, func(b *testing.B, n int) {
297 for i := 0; i < b.N; i++ {
303 func BenchmarkMemmoveOverlap(b *testing.B) {
304 benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) {
305 x := make([]byte, n+16)
306 for i := 0; i < b.N; i++ {
307 copy(x[16:n+16], x[:n])
312 func BenchmarkMemmoveUnalignedDst(b *testing.B) {
313 benchmarkSizes(b, bufSizes, func(b *testing.B, n int) {
314 x := make([]byte, n+1)
316 for i := 0; i < b.N; i++ {
322 func BenchmarkMemmoveUnalignedDstOverlap(b *testing.B) {
323 benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) {
324 x := make([]byte, n+16)
325 for i := 0; i < b.N; i++ {
326 copy(x[16:n+16], x[1:n+1])
331 func BenchmarkMemmoveUnalignedSrc(b *testing.B) {
332 benchmarkSizes(b, bufSizes, func(b *testing.B, n int) {
334 y := make([]byte, n+1)
335 for i := 0; i < b.N; i++ {
341 func BenchmarkMemmoveUnalignedSrcDst(b *testing.B) {
342 for _, n := range []int{16, 64, 256, 4096, 65536} {
343 buf := make([]byte, (n+8)*2)
344 x := buf[:len(buf)/2]
345 y := buf[len(buf)/2:]
346 for _, off := range []int{0, 1, 4, 7} {
347 b.Run(fmt.Sprint("f_", n, off), func(b *testing.B) {
349 for i := 0; i < b.N; i++ {
350 copy(x[off:n+off], y[off:n+off])
354 b.Run(fmt.Sprint("b_", n, off), func(b *testing.B) {
356 for i := 0; i < b.N; i++ {
357 copy(y[off:n+off], x[off:n+off])
364 func BenchmarkMemmoveUnalignedSrcOverlap(b *testing.B) {
365 benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) {
366 x := make([]byte, n+1)
367 for i := 0; i < b.N; i++ {
368 copy(x[1:n+1], x[:n])
373 func TestMemclr(t *testing.T) {
378 mem := make([]byte, size)
379 for i := 0; i < size; i++ {
382 for n := 0; n < size; n++ {
383 for x := 0; x <= size-n; x++ { // offset in mem
384 MemclrBytes(mem[x : x+n])
385 for i := 0; i < x; i++ {
387 t.Fatalf("overwrite prefix mem[%d] = %d", i, mem[i])
390 for i := x; i < x+n; i++ {
392 t.Fatalf("failed clear mem[%d] = %d", i, mem[i])
396 for i := x + n; i < size; i++ {
398 t.Fatalf("overwrite suffix mem[%d] = %d", i, mem[i])
405 func BenchmarkMemclr(b *testing.B) {
406 for _, n := range []int{5, 16, 64, 256, 4096, 65536} {
408 b.Run(fmt.Sprint(n), func(b *testing.B) {
410 for i := 0; i < b.N; i++ {
415 for _, m := range []int{1, 4, 8, 16, 64} {
416 x := make([]byte, m<<20)
417 b.Run(fmt.Sprint(m, "M"), func(b *testing.B) {
418 b.SetBytes(int64(m << 20))
419 for i := 0; i < b.N; i++ {
426 func BenchmarkMemclrUnaligned(b *testing.B) {
427 for _, off := range []int{0, 1, 4, 7} {
428 for _, n := range []int{5, 16, 64, 256, 4096, 65536} {
429 x := make([]byte, n+off)
430 b.Run(fmt.Sprint(off, n), func(b *testing.B) {
432 for i := 0; i < b.N; i++ {
439 for _, off := range []int{0, 1, 4, 7} {
440 for _, m := range []int{1, 4, 8, 16, 64} {
441 x := make([]byte, (m<<20)+off)
442 b.Run(fmt.Sprint(off, m, "M"), func(b *testing.B) {
443 b.SetBytes(int64(m << 20))
444 for i := 0; i < b.N; i++ {
452 func BenchmarkGoMemclr(b *testing.B) {
453 benchmarkSizes(b, []int{5, 16, 64, 256}, func(b *testing.B, n int) {
455 for i := 0; i < b.N; i++ {
463 func BenchmarkMemclrRange(b *testing.B) {
464 type RunData struct {
468 benchSizes := []RunData{
469 {[]int{1043, 1078, 1894, 1582, 1044, 1165, 1467, 1100, 1919, 1562, 1932, 1645,
470 1412, 1038, 1576, 1200, 1029, 1336, 1095, 1494, 1350, 1025, 1502, 1548, 1316, 1296,
471 1868, 1639, 1546, 1626, 1642, 1308, 1726, 1665, 1678, 1187, 1515, 1598, 1353, 1237,
472 1977, 1452, 2012, 1914, 1514, 1136, 1975, 1618, 1536, 1695, 1600, 1733, 1392, 1099,
473 1358, 1996, 1224, 1783, 1197, 1838, 1460, 1556, 1554, 2020}}, // 1kb-2kb
474 {[]int{3964, 5139, 6573, 7775, 6553, 2413, 3466, 5394, 2469, 7336, 7091, 6745,
475 4028, 5643, 6164, 3475, 4138, 6908, 7559, 3335, 5660, 4122, 3945, 2082, 7564, 6584,
476 5111, 2288, 6789, 2797, 4928, 7986, 5163, 5447, 2999, 4968, 3174, 3202, 7908, 8137,
477 4735, 6161, 4646, 7592, 3083, 5329, 3687, 2754, 3599, 7231, 6455, 2549, 8063, 2189,
478 7121, 5048, 4277, 6626, 6306, 2815, 7473, 3963, 7549, 7255}}, // 2kb-8kb
479 {[]int{16304, 15936, 15760, 4736, 9136, 11184, 10160, 5952, 14560, 15744,
480 6624, 5872, 13088, 14656, 14192, 10304, 4112, 10384, 9344, 4496, 11392, 7024,
481 5200, 10064, 14784, 5808, 13504, 10480, 8512, 4896, 13264, 5600}}, // 4kb-16kb
482 {[]int{164576, 233136, 220224, 183280, 214112, 217248, 228560, 201728}}, // 128kb-256kb
485 for _, t := range benchSizes {
490 for _, clrLen := range t.data {
491 maxLen = max(maxLen, clrLen)
492 if clrLen < minLen || minLen == 0 {
497 buffer := make([]byte, maxLen)
500 if minLen >= (1 << 20) {
501 text = fmt.Sprint(minLen>>20, "M ", (maxLen+(1<<20-1))>>20, "M")
502 } else if minLen >= (1 << 10) {
503 text = fmt.Sprint(minLen>>10, "K ", (maxLen+(1<<10-1))>>10, "K")
505 text = fmt.Sprint(minLen, " ", maxLen)
507 b.Run(text, func(b *testing.B) {
508 b.SetBytes(int64(total))
509 for i := 0; i < b.N; i++ {
510 for _, clrLen := range t.data {
511 MemclrBytes(buffer[:clrLen])
518 func BenchmarkClearFat7(b *testing.B) {
522 for i := 0; i < b.N; i++ {
527 func BenchmarkClearFat8(b *testing.B) {
528 p := new([8 / 4]uint32)
531 for i := 0; i < b.N; i++ {
536 func BenchmarkClearFat11(b *testing.B) {
540 for i := 0; i < b.N; i++ {
545 func BenchmarkClearFat12(b *testing.B) {
546 p := new([12 / 4]uint32)
549 for i := 0; i < b.N; i++ {
550 *p = [12 / 4]uint32{}
554 func BenchmarkClearFat13(b *testing.B) {
558 for i := 0; i < b.N; i++ {
563 func BenchmarkClearFat14(b *testing.B) {
567 for i := 0; i < b.N; i++ {
572 func BenchmarkClearFat15(b *testing.B) {
576 for i := 0; i < b.N; i++ {
581 func BenchmarkClearFat16(b *testing.B) {
582 p := new([16 / 4]uint32)
585 for i := 0; i < b.N; i++ {
586 *p = [16 / 4]uint32{}
590 func BenchmarkClearFat24(b *testing.B) {
591 p := new([24 / 4]uint32)
594 for i := 0; i < b.N; i++ {
595 *p = [24 / 4]uint32{}
599 func BenchmarkClearFat32(b *testing.B) {
600 p := new([32 / 4]uint32)
603 for i := 0; i < b.N; i++ {
604 *p = [32 / 4]uint32{}
608 func BenchmarkClearFat40(b *testing.B) {
609 p := new([40 / 4]uint32)
612 for i := 0; i < b.N; i++ {
613 *p = [40 / 4]uint32{}
617 func BenchmarkClearFat48(b *testing.B) {
618 p := new([48 / 4]uint32)
621 for i := 0; i < b.N; i++ {
622 *p = [48 / 4]uint32{}
626 func BenchmarkClearFat56(b *testing.B) {
627 p := new([56 / 4]uint32)
630 for i := 0; i < b.N; i++ {
631 *p = [56 / 4]uint32{}
635 func BenchmarkClearFat64(b *testing.B) {
636 p := new([64 / 4]uint32)
639 for i := 0; i < b.N; i++ {
640 *p = [64 / 4]uint32{}
644 func BenchmarkClearFat72(b *testing.B) {
645 p := new([72 / 4]uint32)
648 for i := 0; i < b.N; i++ {
649 *p = [72 / 4]uint32{}
653 func BenchmarkClearFat128(b *testing.B) {
654 p := new([128 / 4]uint32)
657 for i := 0; i < b.N; i++ {
658 *p = [128 / 4]uint32{}
662 func BenchmarkClearFat256(b *testing.B) {
663 p := new([256 / 4]uint32)
666 for i := 0; i < b.N; i++ {
667 *p = [256 / 4]uint32{}
671 func BenchmarkClearFat512(b *testing.B) {
672 p := new([512 / 4]uint32)
675 for i := 0; i < b.N; i++ {
676 *p = [512 / 4]uint32{}
680 func BenchmarkClearFat1024(b *testing.B) {
681 p := new([1024 / 4]uint32)
684 for i := 0; i < b.N; i++ {
685 *p = [1024 / 4]uint32{}
689 func BenchmarkClearFat1032(b *testing.B) {
690 p := new([1032 / 4]uint32)
693 for i := 0; i < b.N; i++ {
694 *p = [1032 / 4]uint32{}
698 func BenchmarkClearFat1040(b *testing.B) {
699 p := new([1040 / 4]uint32)
702 for i := 0; i < b.N; i++ {
703 *p = [1040 / 4]uint32{}
707 func BenchmarkCopyFat7(b *testing.B) {
712 for i := 0; i < b.N; i++ {
717 func BenchmarkCopyFat8(b *testing.B) {
719 p := new([8 / 4]uint32)
722 for i := 0; i < b.N; i++ {
727 func BenchmarkCopyFat11(b *testing.B) {
732 for i := 0; i < b.N; i++ {
737 func BenchmarkCopyFat12(b *testing.B) {
739 p := new([12 / 4]uint32)
742 for i := 0; i < b.N; i++ {
747 func BenchmarkCopyFat13(b *testing.B) {
752 for i := 0; i < b.N; i++ {
757 func BenchmarkCopyFat14(b *testing.B) {
762 for i := 0; i < b.N; i++ {
767 func BenchmarkCopyFat15(b *testing.B) {
772 for i := 0; i < b.N; i++ {
777 func BenchmarkCopyFat16(b *testing.B) {
779 p := new([16 / 4]uint32)
782 for i := 0; i < b.N; i++ {
787 func BenchmarkCopyFat24(b *testing.B) {
789 p := new([24 / 4]uint32)
792 for i := 0; i < b.N; i++ {
797 func BenchmarkCopyFat32(b *testing.B) {
799 p := new([32 / 4]uint32)
802 for i := 0; i < b.N; i++ {
807 func BenchmarkCopyFat64(b *testing.B) {
809 p := new([64 / 4]uint32)
812 for i := 0; i < b.N; i++ {
817 func BenchmarkCopyFat72(b *testing.B) {
819 p := new([72 / 4]uint32)
822 for i := 0; i < b.N; i++ {
827 func BenchmarkCopyFat128(b *testing.B) {
828 var x [128 / 4]uint32
829 p := new([128 / 4]uint32)
832 for i := 0; i < b.N; i++ {
837 func BenchmarkCopyFat256(b *testing.B) {
838 var x [256 / 4]uint32
839 p := new([256 / 4]uint32)
842 for i := 0; i < b.N; i++ {
847 func BenchmarkCopyFat512(b *testing.B) {
848 var x [512 / 4]uint32
849 p := new([512 / 4]uint32)
852 for i := 0; i < b.N; i++ {
857 func BenchmarkCopyFat520(b *testing.B) {
858 var x [520 / 4]uint32
859 p := new([520 / 4]uint32)
862 for i := 0; i < b.N; i++ {
867 func BenchmarkCopyFat1024(b *testing.B) {
868 var x [1024 / 4]uint32
869 p := new([1024 / 4]uint32)
872 for i := 0; i < b.N; i++ {
877 func BenchmarkCopyFat1032(b *testing.B) {
878 var x [1032 / 4]uint32
879 p := new([1032 / 4]uint32)
882 for i := 0; i < b.N; i++ {
887 func BenchmarkCopyFat1040(b *testing.B) {
888 var x [1040 / 4]uint32
889 p := new([1040 / 4]uint32)
892 for i := 0; i < b.N; i++ {
897 // BenchmarkIssue18740 ensures that memmove uses 4 and 8 byte load/store to move 4 and 8 bytes.
898 // It used to do 2 2-byte load/stores, which leads to a pipeline stall
899 // when we try to read the result with one 4-byte load.
900 func BenchmarkIssue18740(b *testing.B) {
901 benchmarks := []struct {
904 f func([]byte) uint64
906 {"2byte", 2, func(buf []byte) uint64 { return uint64(binary.LittleEndian.Uint16(buf)) }},
907 {"4byte", 4, func(buf []byte) uint64 { return uint64(binary.LittleEndian.Uint32(buf)) }},
908 {"8byte", 8, func(buf []byte) uint64 { return binary.LittleEndian.Uint64(buf) }},
912 for _, bm := range benchmarks {
913 buf := make([]byte, bm.nbyte)
914 b.Run(bm.name, func(b *testing.B) {
915 for j := 0; j < b.N; j++ {
916 for i := 0; i < 4096; i += bm.nbyte {
925 var memclrSink []int8
927 func BenchmarkMemclrKnownSize1(b *testing.B) {
931 for i := 0; i < b.N; i++ {
939 func BenchmarkMemclrKnownSize2(b *testing.B) {
943 for i := 0; i < b.N; i++ {
951 func BenchmarkMemclrKnownSize4(b *testing.B) {
955 for i := 0; i < b.N; i++ {
963 func BenchmarkMemclrKnownSize8(b *testing.B) {
967 for i := 0; i < b.N; i++ {
975 func BenchmarkMemclrKnownSize16(b *testing.B) {
979 for i := 0; i < b.N; i++ {
987 func BenchmarkMemclrKnownSize32(b *testing.B) {
991 for i := 0; i < b.N; i++ {
999 func BenchmarkMemclrKnownSize64(b *testing.B) {
1003 for i := 0; i < b.N; i++ {
1011 func BenchmarkMemclrKnownSize112(b *testing.B) {
1015 for i := 0; i < b.N; i++ {
1024 func BenchmarkMemclrKnownSize128(b *testing.B) {
1028 for i := 0; i < b.N; i++ {
1037 func BenchmarkMemclrKnownSize192(b *testing.B) {
1041 for i := 0; i < b.N; i++ {
1050 func BenchmarkMemclrKnownSize248(b *testing.B) {
1054 for i := 0; i < b.N; i++ {
1063 func BenchmarkMemclrKnownSize256(b *testing.B) {
1067 for i := 0; i < b.N; i++ {
1075 func BenchmarkMemclrKnownSize512(b *testing.B) {
1079 for i := 0; i < b.N; i++ {
1087 func BenchmarkMemclrKnownSize1024(b *testing.B) {
1091 for i := 0; i < b.N; i++ {
1099 func BenchmarkMemclrKnownSize4096(b *testing.B) {
1103 for i := 0; i < b.N; i++ {
1111 func BenchmarkMemclrKnownSize512KiB(b *testing.B) {
1115 for i := 0; i < b.N; i++ {