//go:noescape
func Or(ptr *uint32, val uint32)
+//go:noescape
+func And32(ptr *uint32, val uint32) uint32
+
+//go:noescape
+func Or32(ptr *uint32, val uint32) uint32
+
+//go:noescape
+func And64(ptr *uint64, val uint64) uint64
+
+//go:noescape
+func Or64(ptr *uint64, val uint64) uint64
+
+//go:noescape
+func Anduintptr(ptr *uintptr, val uintptr) uintptr
+
+//go:noescape
+func Oruintptr(ptr *uintptr, val uintptr) uintptr
+
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape
LOCK
ANDL BX, (AX)
RET
+
+// func And32(addr *uint32, v uint32) old uint32
+TEXT ·And32(SB), NOSPLIT, $0-12
+ MOVL ptr+0(FP), BX
+ MOVL val+4(FP), CX
+casloop:
+ MOVL CX, DX
+ MOVL (BX), AX
+ ANDL AX, DX
+ LOCK
+ CMPXCHGL DX, (BX)
+ JNZ casloop
+ MOVL AX, ret+8(FP)
+ RET
+
+// func Or32(addr *uint32, v uint32) old uint32
+TEXT ·Or32(SB), NOSPLIT, $0-12
+ MOVL ptr+0(FP), BX
+ MOVL val+4(FP), CX
+casloop:
+ MOVL CX, DX
+ MOVL (BX), AX
+ ORL AX, DX
+ LOCK
+ CMPXCHGL DX, (BX)
+ JNZ casloop
+ MOVL AX, ret+8(FP)
+ RET
+
+// func And64(addr *uint64, v uint64) old uint64
+TEXT ·And64(SB), NOSPLIT, $0-20
+ MOVL ptr+0(FP), BP
+ // DI:SI = v
+ MOVL val_lo+4(FP), SI
+ MOVL val_hi+8(FP), DI
+ // DX:AX = *addr
+ MOVL 0(BP), AX
+ MOVL 4(BP), DX
+casloop:
+ // CX:BX = DX:AX (*addr) & DI:SI (mask)
+ MOVL AX, BX
+ MOVL DX, CX
+ ANDL SI, BX
+ ANDL DI, CX
+ LOCK
+ CMPXCHG8B 0(BP)
+ JNZ casloop
+ MOVL AX, ret_lo+12(FP)
+ MOVL DX, ret_hi+16(FP)
+ RET
+
+
+// func Or64(addr *uint64, v uint64) old uint64
+TEXT ·Or64(SB), NOSPLIT, $0-20
+ MOVL ptr+0(FP), BP
+ // DI:SI = v
+ MOVL val_lo+4(FP), SI
+ MOVL val_hi+8(FP), DI
+ // DX:AX = *addr
+ MOVL 0(BP), AX
+ MOVL 4(BP), DX
+casloop:
+ // CX:BX = DX:AX (*addr) | DI:SI (mask)
+ MOVL AX, BX
+ MOVL DX, CX
+ ORL SI, BX
+ ORL DI, CX
+ LOCK
+ CMPXCHG8B 0(BP)
+ JNZ casloop
+ MOVL AX, ret_lo+12(FP)
+ MOVL DX, ret_hi+16(FP)
+ RET
+
+// func Anduintptr(addr *uintptr, v uintptr) old uintptr
+TEXT ·Anduintptr(SB), NOSPLIT, $0-12
+ JMP ·And32(SB)
+
+// func Oruintptr(addr *uintptr, v uintptr) old uintptr
+TEXT ·Oruintptr(SB), NOSPLIT, $0-12
+ JMP ·Or32(SB)
//go:noescape
func Or(ptr *uint32, val uint32)
+//go:noescape
+func And32(ptr *uint32, val uint32) uint32
+
+//go:noescape
+func Or32(ptr *uint32, val uint32) uint32
+
+//go:noescape
+func And64(ptr *uint64, val uint64) uint64
+
+//go:noescape
+func Or64(ptr *uint64, val uint64) uint64
+
+//go:noescape
+func Anduintptr(ptr *uintptr, val uintptr) uintptr
+
+//go:noescape
+func Oruintptr(ptr *uintptr, val uintptr) uintptr
+
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape
LOCK
ANDL BX, (AX)
RET
+
+// func Or32(addr *uint32, v uint32) old uint32
+TEXT ·Or32(SB), NOSPLIT, $0-20
+ MOVQ ptr+0(FP), BX
+ MOVL val+8(FP), CX
+casloop:
+ MOVL CX, DX
+ MOVL (BX), AX
+ ORL AX, DX
+ LOCK
+ CMPXCHGL DX, (BX)
+ JNZ casloop
+ MOVL AX, ret+16(FP)
+ RET
+
+// func And32(addr *uint32, v uint32) old uint32
+TEXT ·And32(SB), NOSPLIT, $0-20
+ MOVQ ptr+0(FP), BX
+ MOVL val+8(FP), CX
+casloop:
+ MOVL CX, DX
+ MOVL (BX), AX
+ ANDL AX, DX
+ LOCK
+ CMPXCHGL DX, (BX)
+ JNZ casloop
+ MOVL AX, ret+16(FP)
+ RET
+
+// func Or64(addr *uint64, v uint64) old uint64
+TEXT ·Or64(SB), NOSPLIT, $0-24
+ MOVQ ptr+0(FP), BX
+ MOVQ val+8(FP), CX
+casloop:
+ MOVQ CX, DX
+ MOVQ (BX), AX
+ ORQ AX, DX
+ LOCK
+ CMPXCHGQ DX, (BX)
+ JNZ casloop
+ MOVQ AX, ret+16(FP)
+ RET
+
+// func And64(addr *uint64, v uint64) old uint64
+TEXT ·And64(SB), NOSPLIT, $0-24
+ MOVQ ptr+0(FP), BX
+ MOVQ val+8(FP), CX
+casloop:
+ MOVQ CX, DX
+ MOVQ (BX), AX
+ ANDQ AX, DX
+ LOCK
+ CMPXCHGQ DX, (BX)
+ JNZ casloop
+ MOVQ AX, ret+16(FP)
+ RET
+
+// func Anduintptr(addr *uintptr, v uintptr) old uintptr
+TEXT ·Anduintptr(SB), NOSPLIT, $0-24
+ JMP ·And64(SB)
+
+// func Oruintptr(addr *uintptr, v uintptr) old uintptr
+TEXT ·Oruintptr(SB), NOSPLIT, $0-24
+ JMP ·Or64(SB)
-//go:build ppc64 || ppc64le || riscv64 || wasm
+//go:build 386 || amd64 || ppc64 || ppc64le || riscv64 || wasm
//
// Copyright 2023 The Go Authors. All rights reserved.
}
}
}
+
+func BenchmarkAnd32(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.And32(&x[63], uint32(i))
+ }
+}
+
+func BenchmarkAnd32Parallel(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint32(0)
+ for pb.Next() {
+ atomic.And32(&x[63], i)
+ i++
+ }
+ })
+}
+
+func BenchmarkAnd64(b *testing.B) {
+ var x [128]uint64 // give x its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.And64(&x[63], uint64(i))
+ }
+}
+
+func BenchmarkAnd64Parallel(b *testing.B) {
+ var x [128]uint64 // give x its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint64(0)
+ for pb.Next() {
+ atomic.And64(&x[63], i)
+ i++
+ }
+ })
+}
+
+func BenchmarkOr32(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.Or32(&x[63], uint32(i))
+ }
+}
+
+func BenchmarkOr32Parallel(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint32(0)
+ for pb.Next() {
+ atomic.Or32(&x[63], i)
+ i++
+ }
+ })
+}
+
+func BenchmarkOr64(b *testing.B) {
+ var x [128]uint64 // give x its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.Or64(&x[63], uint64(i))
+ }
+}
+
+func BenchmarkOr64Parallel(b *testing.B) {
+ var x [128]uint64 // give x its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint64(0)
+ for pb.Next() {
+ atomic.Or64(&x[63], i)
+ i++
+ }
+ })
+}