BLT X12, X9, f_loop4_check
// Check alignment - if alignment differs we have to do one byte at a time.
- AND $3, X10, X5
- AND $3, X11, X6
+ AND $7, X10, X5
+ AND $7, X11, X6
BNE X5, X6, f_loop8_unaligned_check
BEQZ X5, f_loop_check
// Move one byte at a time until we reach 8 byte alignment.
+ SUB X5, X9, X5
SUB X5, X12, X12
f_align:
ADD $-1, X5
BLT X12, X9, b_loop4_check
// Check alignment - if alignment differs we have to do one byte at a time.
- AND $3, X10, X5
- AND $3, X11, X6
+ AND $7, X10, X5
+ AND $7, X11, X6
BNE X5, X6, b_loop8_unaligned_check
BEQZ X5, b_loop_check
})
}
+func BenchmarkMemmoveUnalignedSrcDst(b *testing.B) {
+ for _, n := range []int{16, 64, 256, 4096, 65536} {
+ buf := make([]byte, (n+8)*2)
+ x := buf[:len(buf)/2]
+ y := buf[len(buf)/2:]
+ for _, off := range []int{0, 1, 4, 7} {
+ b.Run(fmt.Sprint("f_", n, off), func(b *testing.B) {
+ b.SetBytes(int64(n))
+ for i := 0; i < b.N; i++ {
+ copy(x[off:n+off], y[off:n+off])
+ }
+ })
+
+ b.Run(fmt.Sprint("b_", n, off), func(b *testing.B) {
+ b.SetBytes(int64(n))
+ for i := 0; i < b.N; i++ {
+ copy(y[off:n+off], x[off:n+off])
+ }
+ })
+ }
+ }
+}
+
func BenchmarkMemmoveUnalignedSrcOverlap(b *testing.B) {
benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) {
x := make([]byte, n+1)