summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
e816eb5)
Use ADD with constants, instead of ADDI. Also use SUB with a positive constant
rather than ADD with a negative constant. The resulting assembly is still the
same.
Change-Id: Ife10bf5ae4122e525f0e7d41b5e463e748236a9c
Reviewed-on: https://go-review.googlesource.com/c/go/+/540136
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: M Zhuo <mzh@golangcn.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Mark Ryan <markdryan@rivosinc.com>
Reviewed-by: Heschi Kreinick <heschi@google.com>
Run-TryBot: Joel Sing <joel@sing.id.au>
MOV X16, 2*8(X5) // z[2]
MOV X19, 3*8(X5) // z[3]
MOV X16, 2*8(X5) // z[2]
MOV X19, 3*8(X5) // z[3]
- ADDI $32, X5
- ADDI $32, X7
+ ADD $32, X5
+ ADD $32, X7
ADD $8, X7, X7
SUB X7, X5, X5
align:
ADD $8, X7, X7
SUB X7, X5, X5
align:
MOVBU 0(X10), X8
MOVBU 0(X12), X9
BNE X8, X9, cmp
MOVBU 0(X10), X8
MOVBU 0(X12), X9
BNE X8, X9, cmp
BNE X17, X18, cmp8b
ADD $32, X10
ADD $32, X12
BNE X17, X18, cmp8b
ADD $32, X10
ADD $32, X12
BGE X5, X6, compare32
BEQZ X5, cmp_len
BGE X5, X6, compare32
BEQZ X5, cmp_len
BNE X17, X18, cmp8b
ADD $16, X10
ADD $16, X12
BNE X17, X18, cmp8b
ADD $16, X10
ADD $16, X12
BEQZ X5, cmp_len
check8_unaligned:
BEQZ X5, cmp_len
check8_unaligned:
BNE X29, X30, cmp1h
ADD $8, X10
ADD $8, X12
BNE X29, X30, cmp1h
ADD $8, X10
ADD $8, X12
BGE X5, X6, compare8_unaligned
BEQZ X5, cmp_len
BGE X5, X6, compare8_unaligned
BEQZ X5, cmp_len
BNE X19, X20, cmp1d
ADD $4, X10
ADD $4, X12
BNE X19, X20, cmp1d
ADD $4, X10
ADD $4, X12
BGE X5, X6, compare4_unaligned
compare1:
BGE X5, X6, compare4_unaligned
compare1:
BNE X8, X9, cmp
ADD $1, X10
ADD $1, X12
BNE X8, X9, cmp
ADD $1, X10
ADD $1, X12
JMP compare1
// Compare 8 bytes of memory in X15/X16 that are known to differ.
JMP compare1
// Compare 8 bytes of memory in X15/X16 that are known to differ.
ADD $8, X9, X9
SUB X9, X12, X12
align:
ADD $8, X9, X9
SUB X9, X12, X12
align:
MOVBU 0(X10), X19
MOVBU 0(X11), X20
BNE X19, X20, not_eq
MOVBU 0(X10), X19
MOVBU 0(X11), X20
BNE X19, X20, not_eq
BNE X16, X17, not_eq
ADD $32, X10
ADD $32, X11
BNE X16, X17, not_eq
ADD $32, X10
ADD $32, X11
BGE X12, X9, loop32
BEQZ X12, eq
BGE X12, X9, loop32
BEQZ X12, eq
BNE X21, X22, not_eq
ADD $16, X10
ADD $16, X11
BNE X21, X22, not_eq
ADD $16, X10
ADD $16, X11
BGE X12, X23, loop16
BEQZ X12, eq
BGE X12, X23, loop16
BEQZ X12, eq
BNE X16, X17, not_eq
ADD $4, X10
ADD $4, X11
BNE X16, X17, not_eq
ADD $4, X10
ADD $4, X11
BGE X12, X23, loop4
loop1:
BGE X12, X23, loop4
loop1:
BNE X19, X20, not_eq
ADD $1, X10
ADD $1, X11
BNE X19, X20, not_eq
ADD $1, X10
ADD $1, X11
AND $0xff, X13
MOV X10, X12 // store base for later
ADD X10, X11 // end
AND $0xff, X13
MOV X10, X12 // store base for later
ADD X10, X11 // end
AND $0xff, X12
MOV X10, X13 // store base for later
ADD X10, X11 // end
AND $0xff, X12
MOV X10, X13 // store base for later
ADD X10, X11 // end
// func rt0_go()
TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
// X2 = stack; A0 = argc; A1 = argv
// func rt0_go()
TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
// X2 = stack; A0 = argc; A1 = argv
MOV A0, 8(X2) // argc
MOV A1, 16(X2) // argv
MOV A0, 8(X2) // argc
MOV A1, 16(X2) // argv
// create a new goroutine to start program
MOV $runtime·mainPC(SB), T0 // entry
// create a new goroutine to start program
MOV $runtime·mainPC(SB), T0 // entry
MOV T0, 8(X2)
MOV ZERO, 0(X2)
CALL runtime·newproc(SB)
MOV T0, 8(X2)
MOV ZERO, 0(X2)
CALL runtime·newproc(SB)
// switch to crashstack
MOV (g_stack+stack_hi)(g), X11
// switch to crashstack
MOV (g_stack+stack_hi)(g), X11
MOV X11, X2
// call target function
MOV X11, X2
// call target function
MOV (g_sched+gobuf_sp)(g), X2
// Create a stack frame on g0 to call newstack.
MOV ZERO, -8(X2) // Zero saved LR in frame
MOV (g_sched+gobuf_sp)(g), X2
// Create a stack frame on g0 to call newstack.
MOV ZERO, -8(X2) // Zero saved LR in frame
CALL runtime·newstack(SB)
// Not reached, but make sure the return PC from the call to newstack
CALL runtime·newstack(SB)
// Not reached, but make sure the return PC from the call to newstack
MOV 0(CTXT), T1 // code pointer
MOV (g_sched+gobuf_sp)(g), X2 // sp = m->g0->sched.sp
// we don't need special macro for regabi since arg0(X10) = g
MOV 0(CTXT), T1 // code pointer
MOV (g_sched+gobuf_sp)(g), X2 // sp = m->g0->sched.sp
// we don't need special macro for regabi since arg0(X10) = g
MOV X10, 8(X2) // setup g
MOV ZERO, 0(X2) // clear return address
JALR RA, T1
MOV X10, 8(X2) // setup g
MOV ZERO, 0(X2) // clear return address
JALR RA, T1
// Now on a scheduling stack (a pthread-created stack).
g0:
// Save room for two of our pointers.
// Now on a scheduling stack (a pthread-created stack).
g0:
// Save room for two of our pointers.
MOV X9, 0(X2) // save old g on stack
MOV (g_stack+stack_hi)(X9), X9
SUB X8, X9, X8
MOV X9, 0(X2) // save old g on stack
MOV (g_stack+stack_hi)(X9), X9
SUB X8, X9, X8
SUB X5, X9, X5
SUB X5, X11, X11
align:
SUB X5, X9, X5
SUB X5, X11, X11
align:
MOVB ZERO, 0(X10)
ADD $1, X10
BNEZ X5, align
MOVB ZERO, 0(X10)
ADD $1, X10
BNEZ X5, align
MOV ZERO, 48(X10)
MOV ZERO, 56(X10)
ADD $64, X10
MOV ZERO, 48(X10)
MOV ZERO, 56(X10)
ADD $64, X10
BGE X11, X9, loop64
BEQZ X11, done
BGE X11, X9, loop64
BEQZ X11, done
MOV ZERO, 16(X10)
MOV ZERO, 24(X10)
ADD $32, X10
MOV ZERO, 16(X10)
MOV ZERO, 24(X10)
ADD $32, X10
MOV ZERO, 0(X10)
MOV ZERO, 8(X10)
ADD $16, X10
MOV ZERO, 0(X10)
MOV ZERO, 8(X10)
ADD $16, X10
zero8:
MOV ZERO, 0(X10)
ADD $8, X10
zero8:
MOV ZERO, 0(X10)
ADD $8, X10
MOVB ZERO, 2(X10)
MOVB ZERO, 3(X10)
ADD $4, X10
MOVB ZERO, 2(X10)
MOVB ZERO, 3(X10)
ADD $4, X10
loop1:
BEQZ X11, done
MOVB ZERO, 0(X10)
ADD $1, X10
loop1:
BEQZ X11, done
MOVB ZERO, 0(X10)
ADD $1, X10
SUB X5, X9, X5
SUB X5, X12, X12
f_align:
SUB X5, X9, X5
SUB X5, X12, X12
f_align:
MOVB 0(X11), X14
MOVB X14, 0(X10)
ADD $1, X10
MOVB 0(X11), X14
MOVB X14, 0(X10)
ADD $1, X10
MOV X21, 56(X10)
ADD $64, X10
ADD $64, X11
MOV X21, 56(X10)
ADD $64, X10
ADD $64, X11
BGE X12, X9, f_loop64
BEQZ X12, done
BGE X12, X9, f_loop64
BEQZ X12, done
MOV X17, 24(X10)
ADD $32, X10
ADD $32, X11
MOV X17, 24(X10)
ADD $32, X10
ADD $32, X11
BGE X12, X9, f_loop32
BEQZ X12, done
BGE X12, X9, f_loop32
BEQZ X12, done
MOV X15, 8(X10)
ADD $16, X10
ADD $16, X11
MOV X15, 8(X10)
ADD $16, X10
ADD $16, X11
BGE X12, X9, f_loop16
BEQZ X12, done
BGE X12, X9, f_loop16
BEQZ X12, done
MOV X14, 0(X10)
ADD $8, X10
ADD $8, X11
MOV X14, 0(X10)
ADD $8, X10
ADD $8, X11
BGE X12, X9, f_loop8
BEQZ X12, done
JMP f_loop4_check
BGE X12, X9, f_loop8
BEQZ X12, done
JMP f_loop4_check
MOVB X21, 7(X10)
ADD $8, X10
ADD $8, X11
MOVB X21, 7(X10)
ADD $8, X10
ADD $8, X11
BGE X12, X9, f_loop8_unaligned
f_loop4_check:
BGE X12, X9, f_loop8_unaligned
f_loop4_check:
MOVB X17, 3(X10)
ADD $4, X10
ADD $4, X11
MOVB X17, 3(X10)
ADD $4, X10
ADD $4, X11
BGE X12, X9, f_loop4
f_loop1:
BGE X12, X9, f_loop4
f_loop1:
MOVB X14, 0(X10)
ADD $1, X10
ADD $1, X11
MOVB X14, 0(X10)
ADD $1, X10
ADD $1, X11
// Move one byte at a time until we reach 8 byte alignment.
SUB X5, X12, X12
b_align:
// Move one byte at a time until we reach 8 byte alignment.
SUB X5, X12, X12
b_align:
- ADD $-1, X5
- ADD $-1, X10
- ADD $-1, X11
+ SUB $1, X5
+ SUB $1, X10
+ SUB $1, X11
MOVB 0(X11), X14
MOVB X14, 0(X10)
BNEZ X5, b_align
MOVB 0(X11), X14
MOVB X14, 0(X10)
BNEZ X5, b_align
MOV $64, X9
BLT X12, X9, b_loop32_check
b_loop64:
MOV $64, X9
BLT X12, X9, b_loop32_check
b_loop64:
- ADD $-64, X10
- ADD $-64, X11
+ SUB $64, X10
+ SUB $64, X11
MOV 0(X11), X14
MOV 8(X11), X15
MOV 16(X11), X16
MOV 0(X11), X14
MOV 8(X11), X15
MOV 16(X11), X16
MOV X19, 40(X10)
MOV X20, 48(X10)
MOV X21, 56(X10)
MOV X19, 40(X10)
MOV X20, 48(X10)
MOV X21, 56(X10)
BGE X12, X9, b_loop64
BEQZ X12, done
BGE X12, X9, b_loop64
BEQZ X12, done
MOV $32, X9
BLT X12, X9, b_loop16_check
b_loop32:
MOV $32, X9
BLT X12, X9, b_loop16_check
b_loop32:
- ADD $-32, X10
- ADD $-32, X11
+ SUB $32, X10
+ SUB $32, X11
MOV 0(X11), X14
MOV 8(X11), X15
MOV 16(X11), X16
MOV 0(X11), X14
MOV 8(X11), X15
MOV 16(X11), X16
MOV X15, 8(X10)
MOV X16, 16(X10)
MOV X17, 24(X10)
MOV X15, 8(X10)
MOV X16, 16(X10)
MOV X17, 24(X10)
BGE X12, X9, b_loop32
BEQZ X12, done
BGE X12, X9, b_loop32
BEQZ X12, done
MOV $16, X9
BLT X12, X9, b_loop8_check
b_loop16:
MOV $16, X9
BLT X12, X9, b_loop8_check
b_loop16:
- ADD $-16, X10
- ADD $-16, X11
+ SUB $16, X10
+ SUB $16, X11
MOV 0(X11), X14
MOV 8(X11), X15
MOV X14, 0(X10)
MOV X15, 8(X10)
MOV 0(X11), X14
MOV 8(X11), X15
MOV X14, 0(X10)
MOV X15, 8(X10)
BGE X12, X9, b_loop16
BEQZ X12, done
BGE X12, X9, b_loop16
BEQZ X12, done
MOV $8, X9
BLT X12, X9, b_loop4_check
b_loop8:
MOV $8, X9
BLT X12, X9, b_loop4_check
b_loop8:
- ADD $-8, X10
- ADD $-8, X11
+ SUB $8, X10
+ SUB $8, X11
MOV 0(X11), X14
MOV X14, 0(X10)
MOV 0(X11), X14
MOV X14, 0(X10)
BGE X12, X9, b_loop8
BEQZ X12, done
JMP b_loop4_check
BGE X12, X9, b_loop8
BEQZ X12, done
JMP b_loop4_check
MOV $8, X9
BLT X12, X9, b_loop4_check
b_loop8_unaligned:
MOV $8, X9
BLT X12, X9, b_loop4_check
b_loop8_unaligned:
- ADD $-8, X10
- ADD $-8, X11
+ SUB $8, X10
+ SUB $8, X11
MOVB 0(X11), X14
MOVB 1(X11), X15
MOVB 2(X11), X16
MOVB 0(X11), X14
MOVB 1(X11), X15
MOVB 2(X11), X16
MOVB X19, 5(X10)
MOVB X20, 6(X10)
MOVB X21, 7(X10)
MOVB X19, 5(X10)
MOVB X20, 6(X10)
MOVB X21, 7(X10)
BGE X12, X9, b_loop8_unaligned
b_loop4_check:
MOV $4, X9
BLT X12, X9, b_loop1
b_loop4:
BGE X12, X9, b_loop8_unaligned
b_loop4_check:
MOV $4, X9
BLT X12, X9, b_loop1
b_loop4:
- ADD $-4, X10
- ADD $-4, X11
+ SUB $4, X10
+ SUB $4, X11
MOVB 0(X11), X14
MOVB 1(X11), X15
MOVB 2(X11), X16
MOVB 0(X11), X14
MOVB 1(X11), X15
MOVB 2(X11), X16
MOVB X15, 1(X10)
MOVB X16, 2(X10)
MOVB X17, 3(X10)
MOVB X15, 1(X10)
MOVB X16, 2(X10)
MOVB X17, 3(X10)
BGE X12, X9, b_loop4
b_loop1:
BEQZ X12, done
BGE X12, X9, b_loop4
b_loop1:
BEQZ X12, done
- ADD $-1, X10
- ADD $-1, X11
+ SUB $1, X10
+ SUB $1, X11
MOVB 0(X11), X14
MOVB X14, 0(X10)
MOVB 0(X11), X14
MOVB X14, 0(X10)
}
p("MOV X1, -%d(X2)", l.stack)
}
p("MOV X1, -%d(X2)", l.stack)
- p("ADD $-%d, X2", l.stack)
+ p("SUB $%d, X2", l.stack)
l.save()
p("CALL ·asyncPreempt2(SB)")
l.restore()
l.save()
p("CALL ·asyncPreempt2(SB)")
l.restore()
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
MOV X1, -464(X2)
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
MOV X1, -464(X2)
MOV X5, 8(X2)
MOV X6, 16(X2)
MOV X7, 24(X2)
MOV X5, 8(X2)
MOV X6, 16(X2)
MOV X7, 24(X2)
MOV (g_sched+gobuf_sp)(T1), X2
noswitch:
MOV (g_sched+gobuf_sp)(T1), X2
noswitch:
- ADDI $-24, X2 // Space for result
+ SUB $24, X2 // Space for result
ANDI $~7, X2 // Align for C code
MOV $8(X2), A1
ANDI $~7, X2 // Align for C code
MOV $8(X2), A1
MOV (g_sched+gobuf_sp)(T1), X2
noswitch:
MOV (g_sched+gobuf_sp)(T1), X2
noswitch:
- ADDI $-24, X2 // Space for result
+ SUB $24, X2 // Space for result
ANDI $~7, X2 // Align for C code
MOV $8(X2), A1
ANDI $~7, X2 // Align for C code
MOV $8(X2), A1