#include "textflag.h"
+// See memclrNoHeapPointers Go doc for important implementation constraints.
+
// void runtime·memclrNoHeapPointers(void*, uintptr)
-TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-16
- MOV ptr+0(FP), T1
- MOV n+8(FP), T2
- ADD T1, T2, T4
+TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB),NOSPLIT,$0-16
+ // X10 = ptr
+ // X11 = n
+
+ // If less than 8 bytes, do single byte zeroing.
+ MOV $8, X9
+ BLT X11, X9, check4
- // If less than eight bytes, do one byte at a time.
- SLTU $8, T2, T3
- BNE T3, ZERO, outcheck
+ // Check alignment
+ AND $7, X10, X5
+ BEQZ X5, aligned
- // Do one byte at a time until eight-aligned.
- JMP aligncheck
+ // Zero one byte at a time until we reach 8 byte alignment.
+ SUB X5, X9, X5
+ SUB X5, X11, X11
align:
- MOVB ZERO, (T1)
- ADD $1, T1
-aligncheck:
- AND $7, T1, T3
- BNE T3, ZERO, align
-
- // Do eight bytes at a time as long as there is room.
- ADD $-7, T4, T5
- JMP wordscheck
-words:
- MOV ZERO, (T1)
- ADD $8, T1
-wordscheck:
- SLTU T5, T1, T3
- BNE T3, ZERO, words
-
- JMP outcheck
-out:
- MOVB ZERO, (T1)
- ADD $1, T1
-outcheck:
- BNE T1, T4, out
+ SUB $1, X5
+ MOVB ZERO, 0(X10)
+ ADD $1, X10
+ BNEZ X5, align
+
+aligned:
+ // X9 already contains $8
+ BLT X11, X9, check4
+ MOV $16, X9
+ BLT X11, X9, zero8
+ MOV $32, X9
+ BLT X11, X9, zero16
+ MOV $64, X9
+ BLT X11, X9, zero32
+loop64:
+ MOV ZERO, 0(X10)
+ MOV ZERO, 8(X10)
+ MOV ZERO, 16(X10)
+ MOV ZERO, 24(X10)
+ MOV ZERO, 32(X10)
+ MOV ZERO, 40(X10)
+ MOV ZERO, 48(X10)
+ MOV ZERO, 56(X10)
+ ADD $64, X10
+ SUB $64, X11
+ BGE X11, X9, loop64
+ BEQZ X11, done
+
+check32:
+ MOV $32, X9
+ BLT X11, X9, check16
+zero32:
+ MOV ZERO, 0(X10)
+ MOV ZERO, 8(X10)
+ MOV ZERO, 16(X10)
+ MOV ZERO, 24(X10)
+ ADD $32, X10
+ SUB $32, X11
+ BEQZ X11, done
+
+check16:
+ MOV $16, X9
+ BLT X11, X9, check8
+zero16:
+ MOV ZERO, 0(X10)
+ MOV ZERO, 8(X10)
+ ADD $16, X10
+ SUB $16, X11
+ BEQZ X11, done
+
+check8:
+ MOV $8, X9
+ BLT X11, X9, check4
+zero8:
+ MOV ZERO, 0(X10)
+ ADD $8, X10
+ SUB $8, X11
+ BEQZ X11, done
+
+check4:
+ MOV $4, X9
+ BLT X11, X9, loop1
+zero4:
+ MOVB ZERO, 0(X10)
+ MOVB ZERO, 1(X10)
+ MOVB ZERO, 2(X10)
+ MOVB ZERO, 3(X10)
+ ADD $4, X10
+ SUB $4, X11
+
+loop1:
+ BEQZ X11, done
+ MOVB ZERO, 0(X10)
+ ADD $1, X10
+ SUB $1, X11
+ JMP loop1
done:
RET