]> Cypherpunks.ru repositories - gostls13.git/commitdiff
runtime/internal/atomic: add LSE atomics instructions to arm64
authorJonathan Swinney <jswinney@amazon.com>
Sat, 17 Apr 2021 01:12:28 +0000 (01:12 +0000)
committerCherry Mui <cherryyz@google.com>
Wed, 12 May 2021 16:18:01 +0000 (16:18 +0000)
As a follow up to an earlier change[1] to add ARMv8+LSE instructions in
the compiler generated atomic intrinsics, make the same change in the
runtime library. Since not all ARMv8 systems support LSE instructions,
they are protected by a feature-flag branch.

[1]: golang.org/cl/234217 commit: ecc3f5112eba

Change-Id: I0e2fb22e78d5eddb6547863667a8865946679a00
Reviewed-on: https://go-review.googlesource.com/c/go/+/310591
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Cherry Mui <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Heschi Kreinick <heschi@google.com>

src/runtime/internal/atomic/atomic_arm64.go
src/runtime/internal/atomic/atomic_arm64.s

index 131c687e1baa757dc0887135914069c0daa7c5dd..3c8736997f4550caa0696470b4db8b494dc82707 100644 (file)
@@ -7,7 +7,14 @@
 
 package atomic
 
-import "unsafe"
+import (
+       "unsafe"
+       "internal/cpu"
+)
+
+const (
+       offsetARM64HasATOMICS = unsafe.Offsetof(cpu.ARM64.HasATOMICS)
+)
 
 //go:noescape
 func Xadd(ptr *uint32, delta int32) uint32
index 587e7f05e2525bf1aaa88d4110dc4f2457b96574..e9467afecd707c8a6f67fbd8f0df3bd931c1e928 100644 (file)
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+#include "go_asm.h"
 #include "textflag.h"
 
 TEXT ·Casint32(SB), NOSPLIT, $0-17
@@ -127,10 +128,15 @@ TEXT ·Store64(SB), NOSPLIT, $0-16
 TEXT ·Xchg(SB), NOSPLIT, $0-20
        MOVD    ptr+0(FP), R0
        MOVW    new+8(FP), R1
-again:
+       MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+       CBZ     R4, load_store_loop
+       SWPALW  R1, (R0), R2
+       MOVW    R2, ret+16(FP)
+       RET
+load_store_loop:
        LDAXRW  (R0), R2
        STLXRW  R1, (R0), R3
-       CBNZ    R3, again
+       CBNZ    R3, load_store_loop
        MOVW    R2, ret+16(FP)
        RET
 
@@ -142,10 +148,15 @@ again:
 TEXT ·Xchg64(SB), NOSPLIT, $0-24
        MOVD    ptr+0(FP), R0
        MOVD    new+8(FP), R1
-again:
+       MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+       CBZ     R4, load_store_loop
+       SWPALD  R1, (R0), R2
+       MOVD    R2, ret+16(FP)
+       RET
+load_store_loop:
        LDAXR   (R0), R2
        STLXR   R1, (R0), R3
-       CBNZ    R3, again
+       CBNZ    R3, load_store_loop
        MOVD    R2, ret+16(FP)
        RET
 
@@ -160,12 +171,20 @@ TEXT ·Cas(SB), NOSPLIT, $0-17
        MOVD    ptr+0(FP), R0
        MOVW    old+8(FP), R1
        MOVW    new+12(FP), R2
-again:
+       MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+       CBZ     R4, load_store_loop
+       MOVD    R1, R3
+       CASALW  R3, (R0), R2
+       CMP     R1, R3
+       CSET    EQ, R0
+       MOVB    R0, ret+16(FP)
+       RET
+load_store_loop:
        LDAXRW  (R0), R3
        CMPW    R1, R3
        BNE     ok
        STLXRW  R2, (R0), R3
-       CBNZ    R3, again
+       CBNZ    R3, load_store_loop
 ok:
        CSET    EQ, R0
        MOVB    R0, ret+16(FP)
@@ -183,12 +202,20 @@ TEXT ·Cas64(SB), NOSPLIT, $0-25
        MOVD    ptr+0(FP), R0
        MOVD    old+8(FP), R1
        MOVD    new+16(FP), R2
-again:
+       MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+       CBZ     R4, load_store_loop
+       MOVD    R1, R3
+       CASALD  R3, (R0), R2
+       CMP     R1, R3
+       CSET    EQ, R0
+       MOVB    R0, ret+24(FP)
+       RET
+load_store_loop:
        LDAXR   (R0), R3
        CMP     R1, R3
        BNE     ok
        STLXR   R2, (R0), R3
-       CBNZ    R3, again
+       CBNZ    R3, load_store_loop
 ok:
        CSET    EQ, R0
        MOVB    R0, ret+24(FP)
@@ -201,11 +228,17 @@ ok:
 TEXT ·Xadd(SB), NOSPLIT, $0-20
        MOVD    ptr+0(FP), R0
        MOVW    delta+8(FP), R1
-again:
+       MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+       CBZ     R4, load_store_loop
+       LDADDALW        R1, (R0), R2
+       ADD     R1, R2
+       MOVW    R2, ret+16(FP)
+       RET
+load_store_loop:
        LDAXRW  (R0), R2
        ADDW    R2, R1, R2
        STLXRW  R2, (R0), R3
-       CBNZ    R3, again
+       CBNZ    R3, load_store_loop
        MOVW    R2, ret+16(FP)
        RET
 
@@ -216,11 +249,17 @@ again:
 TEXT ·Xadd64(SB), NOSPLIT, $0-24
        MOVD    ptr+0(FP), R0
        MOVD    delta+8(FP), R1
-again:
+       MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+       CBZ     R4, load_store_loop
+       LDADDALD        R1, (R0), R2
+       ADD     R1, R2
+       MOVD    R2, ret+16(FP)
+       RET
+load_store_loop:
        LDAXR   (R0), R2
        ADD     R2, R1, R2
        STLXR   R2, (R0), R3
-       CBNZ    R3, again
+       CBNZ    R3, load_store_loop
        MOVD    R2, ret+16(FP)
        RET
 
@@ -236,37 +275,59 @@ TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
 TEXT ·And8(SB), NOSPLIT, $0-9
        MOVD    ptr+0(FP), R0
        MOVB    val+8(FP), R1
+       MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+       CBZ     R4, load_store_loop
+       MVN     R1, R2
+       LDCLRALB        R2, (R0), R3
+       RET
+load_store_loop:
        LDAXRB  (R0), R2
        AND     R1, R2
        STLXRB  R2, (R0), R3
-       CBNZ    R3, -3(PC)
+       CBNZ    R3, load_store_loop
        RET
 
 TEXT ·Or8(SB), NOSPLIT, $0-9
        MOVD    ptr+0(FP), R0
        MOVB    val+8(FP), R1
+       MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+       CBZ     R4, load_store_loop
+       LDORALB R1, (R0), R2
+       RET
+load_store_loop:
        LDAXRB  (R0), R2
        ORR     R1, R2
        STLXRB  R2, (R0), R3
-       CBNZ    R3, -3(PC)
+       CBNZ    R3, load_store_loop
        RET
 
 // func And(addr *uint32, v uint32)
 TEXT ·And(SB), NOSPLIT, $0-12
        MOVD    ptr+0(FP), R0
        MOVW    val+8(FP), R1
+       MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+       CBZ     R4, load_store_loop
+       MVN     R1, R2
+       LDCLRALW        R2, (R0), R3
+       RET
+load_store_loop:
        LDAXRW  (R0), R2
        AND     R1, R2
        STLXRW  R2, (R0), R3
-       CBNZ    R3, -3(PC)
+       CBNZ    R3, load_store_loop
        RET
 
 // func Or(addr *uint32, v uint32)
 TEXT ·Or(SB), NOSPLIT, $0-12
        MOVD    ptr+0(FP), R0
        MOVW    val+8(FP), R1
+       MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+       CBZ     R4, load_store_loop
+       LDORALW R1, (R0), R2
+       RET
+load_store_loop:
        LDAXRW  (R0), R2
        ORR     R1, R2
        STLXRW  R2, (R0), R3
-       CBNZ    R3, -3(PC)
+       CBNZ    R3, load_store_loop
        RET