]> Cypherpunks.ru repositories - gostls13.git/blobdiff - src/cmd/internal/obj/arm64/obj7.go
cmd/internal/obj/arm64: fix frame pointer restore in epilogue
[gostls13.git] / src / cmd / internal / obj / arm64 / obj7.go
index e41fb3bb7531cb01e7c8319343c2c2cb5730d3ea..0ab5939b8429106b4f74dc1c038187f813bd96ed 100644 (file)
@@ -1,5 +1,5 @@
 // cmd/7l/noop.c, cmd/7l/obj.c, cmd/ld/pass.c from Vita Nuova.
-// https://code.google.com/p/ken-cc/source/browse/
+// https://bitbucket.org/plan9-from-bell-labs/9-cc/src/master/
 //
 //     Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
 //     Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
@@ -35,24 +35,122 @@ import (
        "cmd/internal/objabi"
        "cmd/internal/src"
        "cmd/internal/sys"
+       "internal/abi"
        "internal/buildcfg"
        "log"
        "math"
 )
 
-var complements = []obj.As{
-       AADD:  ASUB,
-       AADDW: ASUBW,
-       ASUB:  AADD,
-       ASUBW: AADDW,
-       ACMP:  ACMN,
-       ACMPW: ACMNW,
-       ACMN:  ACMP,
-       ACMNW: ACMPW,
+// zrReplace is the set of instructions for which $0 in the From operand
+// should be replaced with REGZERO.
+var zrReplace = map[obj.As]bool{
+       AMOVD:  true,
+       AMOVW:  true,
+       AMOVWU: true,
+       AMOVH:  true,
+       AMOVHU: true,
+       AMOVB:  true,
+       AMOVBU: true,
+       ASBC:   true,
+       ASBCW:  true,
+       ASBCS:  true,
+       ASBCSW: true,
+       AADC:   true,
+       AADCW:  true,
+       AADCS:  true,
+       AADCSW: true,
+       AFMOVD: true,
+       AFMOVS: true,
+       AMSR:   true,
 }
 
 func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
-       // MOV  g_stackguard(g), R1
+       if c.ctxt.Flag_maymorestack != "" {
+               p = c.cursym.Func().SpillRegisterArgs(p, c.newprog)
+
+               // Save LR and make room for FP, REGCTXT. Leave room
+               // for caller's saved FP.
+               const frameSize = 32
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REGLINK
+               p.To.Type = obj.TYPE_MEM
+               p.Scond = C_XPRE
+               p.To.Offset = -frameSize
+               p.To.Reg = REGSP
+               p.Spadj = frameSize
+
+               // Save FP.
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REGFP
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = REGSP
+               p.To.Offset = -8
+
+               p = obj.Appendp(p, c.newprog)
+               p.As = ASUB
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = 8
+               p.Reg = REGSP
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGFP
+
+               // Save REGCTXT (for simplicity we do this whether or
+               // not we need it.)
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REGCTXT
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = REGSP
+               p.To.Offset = 8
+
+               // BL maymorestack
+               p = obj.Appendp(p, c.newprog)
+               p.As = ABL
+               p.To.Type = obj.TYPE_BRANCH
+               // See ../x86/obj6.go
+               p.To.Sym = c.ctxt.LookupABI(c.ctxt.Flag_maymorestack, c.cursym.ABI())
+
+               // Restore REGCTXT.
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = REGSP
+               p.From.Offset = 8
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGCTXT
+
+               // Restore FP.
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = REGSP
+               p.From.Offset = -8
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGFP
+
+               // Restore LR and SP.
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_MEM
+               p.Scond = C_XPOST
+               p.From.Offset = frameSize
+               p.From.Reg = REGSP
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGLINK
+               p.Spadj = -frameSize
+
+               p = c.cursym.Func().UnspillRegisterArgs(p, c.newprog)
+       }
+
+       // Jump back to here after morestack returns.
+       startPred := p
+
+       // MOV  g_stackguard(g), RT1
        p = obj.Appendp(p, c.newprog)
 
        p.As = AMOVD
@@ -63,7 +161,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
                p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1
        }
        p.To.Type = obj.TYPE_REG
-       p.To.Reg = REG_R1
+       p.To.Reg = REGRT1
 
        // Mark the stack bound check and morestack call async nonpreemptible.
        // If we get preempted here, when resumed the preemption request is
@@ -72,41 +170,33 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
        p = c.ctxt.StartUnsafePoint(p, c.newprog)
 
        q := (*obj.Prog)(nil)
-       if framesize <= objabi.StackSmall {
+       if framesize <= abi.StackSmall {
                // small stack: SP < stackguard
-               //      MOV     SP, R2
-               //      CMP     stackguard, R2
-               p = obj.Appendp(p, c.newprog)
-
-               p.As = AMOVD
-               p.From.Type = obj.TYPE_REG
-               p.From.Reg = REGSP
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R2
+               //      CMP     stackguard, SP
 
                p = obj.Appendp(p, c.newprog)
                p.As = ACMP
                p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R1
-               p.Reg = REG_R2
-       } else if framesize <= objabi.StackBig {
+               p.From.Reg = REGRT1
+               p.Reg = REGSP
+       } else if framesize <= abi.StackBig {
                // large stack: SP-framesize < stackguard-StackSmall
-               //      SUB     $(framesize-StackSmall), SP, R2
-               //      CMP     stackguard, R2
+               //      SUB     $(framesize-StackSmall), SP, RT2
+               //      CMP     stackguard, RT2
                p = obj.Appendp(p, c.newprog)
 
                p.As = ASUB
                p.From.Type = obj.TYPE_CONST
-               p.From.Offset = int64(framesize) - objabi.StackSmall
+               p.From.Offset = int64(framesize) - abi.StackSmall
                p.Reg = REGSP
                p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R2
+               p.To.Reg = REGRT2
 
                p = obj.Appendp(p, c.newprog)
                p.As = ACMP
                p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R1
-               p.Reg = REG_R2
+               p.From.Reg = REGRT1
+               p.Reg = REGRT2
        } else {
                // Such a large stack we need to protect against underflow.
                // The runtime guarantees SP > objabi.StackBig, but
@@ -115,18 +205,18 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
                // stack guard to incorrectly succeed. We explicitly
                // guard against underflow.
                //
-               //      SUBS    $(framesize-StackSmall), SP, R2
+               //      SUBS    $(framesize-StackSmall), SP, RT2
                //      // On underflow, jump to morestack
                //      BLO     label_of_call_to_morestack
-               //      CMP     stackguard, R2
+               //      CMP     stackguard, RT2
 
                p = obj.Appendp(p, c.newprog)
                p.As = ASUBS
                p.From.Type = obj.TYPE_CONST
-               p.From.Offset = int64(framesize) - objabi.StackSmall
+               p.From.Offset = int64(framesize) - abi.StackSmall
                p.Reg = REGSP
                p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R2
+               p.To.Reg = REGRT2
 
                p = obj.Appendp(p, c.newprog)
                q = p
@@ -136,8 +226,8 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
                p = obj.Appendp(p, c.newprog)
                p.As = ACMP
                p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R1
-               p.Reg = REG_R2
+               p.From.Reg = REGRT1
+               p.Reg = REGRT2
        }
 
        // BLS  do-morestack
@@ -161,17 +251,20 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
        pcdata := c.ctxt.EmitEntryStackMap(c.cursym, spfix, c.newprog)
        pcdata = c.ctxt.StartUnsafePoint(pcdata, c.newprog)
 
+       if q != nil {
+               q.To.SetTarget(pcdata)
+       }
+       bls.To.SetTarget(pcdata)
+
+       spill := c.cursym.Func().SpillRegisterArgs(pcdata, c.newprog)
+
        // MOV  LR, R3
-       movlr := obj.Appendp(pcdata, c.newprog)
+       movlr := obj.Appendp(spill, c.newprog)
        movlr.As = AMOVD
        movlr.From.Type = obj.TYPE_REG
        movlr.From.Reg = REGLINK
        movlr.To.Type = obj.TYPE_REG
        movlr.To.Reg = REG_R3
-       if q != nil {
-               q.To.SetTarget(movlr)
-       }
-       bls.To.SetTarget(movlr)
 
        debug := movlr
        if false {
@@ -196,13 +289,15 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
        }
        call.To.Sym = c.ctxt.Lookup(morestack)
 
+       // The instructions which unspill regs should be preemptible.
        pcdata = c.ctxt.EndUnsafePoint(call, c.newprog, -1)
+       unspill := c.cursym.Func().UnspillRegisterArgs(pcdata, c.newprog)
 
        // B    start
-       jmp := obj.Appendp(pcdata, c.newprog)
+       jmp := obj.Appendp(unspill, c.newprog)
        jmp.As = AB
        jmp.To.Type = obj.TYPE_BRANCH
-       jmp.To.SetTarget(c.cursym.Func().Text.Link)
+       jmp.To.SetTarget(startPred.Link)
        jmp.Spadj = +framesize
 
        return end
@@ -214,18 +309,13 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
        p.From.Class = 0
        p.To.Class = 0
 
-       // $0 results in C_ZCON, which matches both C_REG and various
-       // C_xCON, however the C_REG cases in asmout don't expect a
-       // constant, so they will use the register fields and assemble
-       // a R0. To prevent that, rewrite $0 as ZR.
-       if p.From.Type == obj.TYPE_CONST && p.From.Offset == 0 {
+       // Previously we rewrote $0 to ZR, but we have now removed this change.
+       // In order to be compatible with some previous legal instruction formats,
+       // reserve the previous conversion for some specific instructions.
+       if p.From.Type == obj.TYPE_CONST && p.From.Offset == 0 && zrReplace[p.As] {
                p.From.Type = obj.TYPE_REG
                p.From.Reg = REGZERO
        }
-       if p.To.Type == obj.TYPE_CONST && p.To.Offset == 0 {
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REGZERO
-       }
 
        // Rewrite BR/BL to symbol as TYPE_BRANCH.
        switch p.As {
@@ -240,8 +330,33 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
                break
        }
 
-       // Rewrite float constants to values stored in memory.
+       // Rewrite float and vector constants to values stored in memory.
        switch p.As {
+       case AVMOVS:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Sym = c.ctxt.Int32Sym(p.From.Offset)
+                       p.From.Name = obj.NAME_EXTERN
+                       p.From.Offset = 0
+               }
+
+       case AVMOVD:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Sym = c.ctxt.Int64Sym(p.From.Offset)
+                       p.From.Name = obj.NAME_EXTERN
+                       p.From.Offset = 0
+               }
+
+       case AVMOVQ:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Sym = c.ctxt.Int128Sym(p.GetFrom3().Offset, p.From.Offset)
+                       p.From.Name = obj.NAME_EXTERN
+                       p.From.Offset = 0
+                       p.RestArgs = nil
+               }
+
        case AFMOVS:
                if p.From.Type == obj.TYPE_FCONST {
                        f64 := p.From.Val.(float64)
@@ -276,34 +391,6 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
                        p.From.Name = obj.NAME_EXTERN
                        p.From.Offset = 0
                }
-
-               break
-       }
-
-       // Rewrite negative immediates as positive immediates with
-       // complementary instruction.
-       switch p.As {
-       case AADD, ASUB, ACMP, ACMN:
-               if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && p.From.Offset != -1<<63 {
-                       p.From.Offset = -p.From.Offset
-                       p.As = complements[p.As]
-               }
-       case AADDW, ASUBW, ACMPW, ACMNW:
-               if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && int32(p.From.Offset) != -1<<31 {
-                       p.From.Offset = -p.From.Offset
-                       p.As = complements[p.As]
-               }
-       }
-
-       // For 32-bit instruction with constant, rewrite
-       // the high 32-bit to be a repetition of the low
-       // 32-bit, so that the BITCON test can be shared
-       // for both 32-bit and 64-bit. 32-bit ops will
-       // zero the high 32-bit of the destination register
-       // anyway.
-       if (isANDWop(p.As) || isADDWop(p.As) || p.As == AMOVW) && p.From.Type == obj.TYPE_CONST {
-               v := p.From.Offset & 0xffffffff
-               p.From.Offset = v | v<<32
        }
 
        if c.ctxt.Flag_dynlink {
@@ -321,9 +408,9 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) {
                //     CALL REGTMP
                var sym *obj.LSym
                if p.As == obj.ADUFFZERO {
-                       sym = c.ctxt.Lookup("runtime.duffzero")
+                       sym = c.ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
                } else {
-                       sym = c.ctxt.Lookup("runtime.duffcopy")
+                       sym = c.ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
                }
                offset := p.To.Offset
                p.As = AMOVD
@@ -520,7 +607,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                }
                        }
 
-                       if p.Mark&LEAF != 0 && c.autosize < objabi.StackSmall {
+                       if p.Mark&LEAF != 0 && c.autosize < abi.StackSmall {
                                // A leaf function with a small stack can be marked
                                // NOSPLIT, avoiding a stack check.
                                p.From.Sym.Set(obj.AttrNoSplit, true)
@@ -533,66 +620,85 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                        var prologueEnd *obj.Prog
 
                        aoffset := c.autosize
-                       if aoffset > 0xF0 {
-                               aoffset = 0xF0
+                       if aoffset > 0xf0 {
+                               // MOVD.W offset variant range is -0x100 to 0xf8, SP should be 16-byte aligned.
+                               // so the maximum aoffset value is 0xf0.
+                               aoffset = 0xf0
                        }
 
                        // Frame is non-empty. Make sure to save link register, even if
                        // it is a leaf function, so that traceback works.
                        q = p
                        if c.autosize > aoffset {
-                               // Frame size is too large for a MOVD.W instruction.
-                               // Store link register before decrementing SP, so if a signal comes
-                               // during the execution of the function prologue, the traceback
-                               // code will not see a half-updated stack frame.
-                               // This sequence is not async preemptible, as if we open a frame
-                               // at the current SP, it will clobber the saved LR.
-                               q = c.ctxt.StartUnsafePoint(q, c.newprog)
+                               // Frame size is too large for a MOVD.W instruction. Store the frame pointer
+                               // register and link register before decrementing SP, so if a signal comes
+                               // during the execution of the function prologue, the traceback code will
+                               // not see a half-updated stack frame.
 
-                               q = obj.Appendp(q, c.newprog)
-                               q.Pos = p.Pos
-                               q.As = ASUB
-                               q.From.Type = obj.TYPE_CONST
-                               q.From.Offset = int64(c.autosize)
-                               q.Reg = REGSP
-                               q.To.Type = obj.TYPE_REG
-                               q.To.Reg = REGTMP
+                               // SUB $autosize, RSP, R20
+                               q1 = obj.Appendp(q, c.newprog)
+                               q1.Pos = p.Pos
+                               q1.As = ASUB
+                               q1.From.Type = obj.TYPE_CONST
+                               q1.From.Offset = int64(c.autosize)
+                               q1.Reg = REGSP
+                               q1.To.Type = obj.TYPE_REG
+                               q1.To.Reg = REG_R20
 
-                               prologueEnd = q
+                               prologueEnd = q1
 
-                               q = obj.Appendp(q, c.newprog)
-                               q.Pos = p.Pos
-                               q.As = AMOVD
-                               q.From.Type = obj.TYPE_REG
-                               q.From.Reg = REGLINK
-                               q.To.Type = obj.TYPE_MEM
-                               q.To.Reg = REGTMP
+                               // STP (R29, R30), -8(R20)
+                               q1 = obj.Appendp(q1, c.newprog)
+                               q1.Pos = p.Pos
+                               q1.As = ASTP
+                               q1.From.Type = obj.TYPE_REGREG
+                               q1.From.Reg = REGFP
+                               q1.From.Offset = REGLINK
+                               q1.To.Type = obj.TYPE_MEM
+                               q1.To.Reg = REG_R20
+                               q1.To.Offset = -8
 
-                               q1 = obj.Appendp(q, c.newprog)
+                               // This is not async preemptible, as if we open a frame
+                               // at the current SP, it will clobber the saved LR.
+                               q1 = c.ctxt.StartUnsafePoint(q1, c.newprog)
+
+                               // MOVD R20, RSP
+                               q1 = obj.Appendp(q1, c.newprog)
                                q1.Pos = p.Pos
                                q1.As = AMOVD
                                q1.From.Type = obj.TYPE_REG
-                               q1.From.Reg = REGTMP
+                               q1.From.Reg = REG_R20
                                q1.To.Type = obj.TYPE_REG
                                q1.To.Reg = REGSP
                                q1.Spadj = c.autosize
 
+                               q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
+
                                if buildcfg.GOOS == "ios" {
                                        // iOS does not support SA_ONSTACK. We will run the signal handler
                                        // on the G stack. If we write below SP, it may be clobbered by
-                                       // the signal handler. So we save LR after decrementing SP.
+                                       // the signal handler. So we save FP and LR after decrementing SP.
+                                       // STP (R29, R30), -8(RSP)
                                        q1 = obj.Appendp(q1, c.newprog)
                                        q1.Pos = p.Pos
-                                       q1.As = AMOVD
-                                       q1.From.Type = obj.TYPE_REG
-                                       q1.From.Reg = REGLINK
+                                       q1.As = ASTP
+                                       q1.From.Type = obj.TYPE_REGREG
+                                       q1.From.Reg = REGFP
+                                       q1.From.Offset = REGLINK
                                        q1.To.Type = obj.TYPE_MEM
                                        q1.To.Reg = REGSP
+                                       q1.To.Offset = -8
                                }
-
-                               q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
                        } else {
-                               // small frame, update SP and save LR in a single MOVD.W instruction
+                               // small frame, update SP and save LR in a single MOVD.W instruction.
+                               // So if a signal comes during the execution of the function prologue,
+                               // the traceback code will not see a half-updated stack frame.
+                               // Also, on Linux, in a cgo binary we may get a SIGSETXID signal
+                               // early on before the signal stack is set, as glibc doesn't allow
+                               // us to block SIGSETXID. So it is important that we don't write below
+                               // the SP until the signal stack is set.
+                               // Luckily, all the functions from thread entry to setting the signal
+                               // stack have small frames.
                                q1 = obj.Appendp(q, c.newprog)
                                q1.As = AMOVD
                                q1.Pos = p.Pos
@@ -605,20 +711,20 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                q1.Spadj = aoffset
 
                                prologueEnd = q1
+
+                               // Frame pointer.
+                               q1 = obj.Appendp(q1, c.newprog)
+                               q1.Pos = p.Pos
+                               q1.As = AMOVD
+                               q1.From.Type = obj.TYPE_REG
+                               q1.From.Reg = REGFP
+                               q1.To.Type = obj.TYPE_MEM
+                               q1.To.Reg = REGSP
+                               q1.To.Offset = -8
                        }
 
                        prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd)
 
-                       // Frame pointer.
-                       q1 = obj.Appendp(q1, c.newprog)
-                       q1.Pos = p.Pos
-                       q1.As = AMOVD
-                       q1.From.Type = obj.TYPE_REG
-                       q1.From.Reg = REGFP
-                       q1.To.Type = obj.TYPE_MEM
-                       q1.To.Reg = REGSP
-                       q1.To.Offset = -8
-
                        q1 = obj.Appendp(q1, c.newprog)
                        q1.Pos = p.Pos
                        q1.As = ASUB
@@ -631,38 +737,38 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                        if c.cursym.Func().Text.From.Sym.Wrapper() {
                                // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
                                //
-                               //      MOV  g_panic(g), R1
+                               //      MOV  g_panic(g), RT1
                                //      CBNZ checkargp
                                // end:
                                //      NOP
                                // ... function body ...
                                // checkargp:
-                               //      MOV  panic_argp(R1), R2
-                               //      ADD  $(autosize+8), RSP, R3
-                               //      CMP  R2, R3
+                               //      MOV  panic_argp(RT1), RT2
+                               //      ADD  $(autosize+8), RSP, R20
+                               //      CMP  RT2, R20
                                //      BNE  end
-                               //      ADD  $8, RSP, R4
-                               //      MOVD R4, panic_argp(R1)
+                               //      ADD  $8, RSP, R20
+                               //      MOVD R20, panic_argp(RT1)
                                //      B    end
                                //
                                // The NOP is needed to give the jumps somewhere to land.
                                // It is a liblink NOP, not an ARM64 NOP: it encodes to 0 instruction bytes.
                                q = q1
 
-                               // MOV g_panic(g), R1
+                               // MOV g_panic(g), RT1
                                q = obj.Appendp(q, c.newprog)
                                q.As = AMOVD
                                q.From.Type = obj.TYPE_MEM
                                q.From.Reg = REGG
                                q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic
                                q.To.Type = obj.TYPE_REG
-                               q.To.Reg = REG_R1
+                               q.To.Reg = REGRT1
 
-                               // CBNZ R1, checkargp
+                               // CBNZ RT1, checkargp
                                cbnz := obj.Appendp(q, c.newprog)
                                cbnz.As = ACBNZ
                                cbnz.From.Type = obj.TYPE_REG
-                               cbnz.From.Reg = REG_R1
+                               cbnz.From.Reg = REGRT1
                                cbnz.To.Type = obj.TYPE_BRANCH
 
                                // Empty branch target at the top of the function body
@@ -674,33 +780,33 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                for last = end; last.Link != nil; last = last.Link {
                                }
 
-                               // MOV panic_argp(R1), R2
+                               // MOV panic_argp(RT1), RT2
                                mov := obj.Appendp(last, c.newprog)
                                mov.As = AMOVD
                                mov.From.Type = obj.TYPE_MEM
-                               mov.From.Reg = REG_R1
+                               mov.From.Reg = REGRT1
                                mov.From.Offset = 0 // Panic.argp
                                mov.To.Type = obj.TYPE_REG
-                               mov.To.Reg = REG_R2
+                               mov.To.Reg = REGRT2
 
                                // CBNZ branches to the MOV above
                                cbnz.To.SetTarget(mov)
 
-                               // ADD $(autosize+8), SP, R3
+                               // ADD $(autosize+8), SP, R20
                                q = obj.Appendp(mov, c.newprog)
                                q.As = AADD
                                q.From.Type = obj.TYPE_CONST
                                q.From.Offset = int64(c.autosize) + 8
                                q.Reg = REGSP
                                q.To.Type = obj.TYPE_REG
-                               q.To.Reg = REG_R3
+                               q.To.Reg = REG_R20
 
-                               // CMP R2, R3
+                               // CMP RT2, R20
                                q = obj.Appendp(q, c.newprog)
                                q.As = ACMP
                                q.From.Type = obj.TYPE_REG
-                               q.From.Reg = REG_R2
-                               q.Reg = REG_R3
+                               q.From.Reg = REGRT2
+                               q.Reg = REG_R20
 
                                // BNE end
                                q = obj.Appendp(q, c.newprog)
@@ -708,22 +814,22 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                q.To.Type = obj.TYPE_BRANCH
                                q.To.SetTarget(end)
 
-                               // ADD $8, SP, R4
+                               // ADD $8, SP, R20
                                q = obj.Appendp(q, c.newprog)
                                q.As = AADD
                                q.From.Type = obj.TYPE_CONST
                                q.From.Offset = 8
                                q.Reg = REGSP
                                q.To.Type = obj.TYPE_REG
-                               q.To.Reg = REG_R4
+                               q.To.Reg = REG_R20
 
-                               // MOV R4, panic_argp(R1)
+                               // MOV R20, panic_argp(RT1)
                                q = obj.Appendp(q, c.newprog)
                                q.As = AMOVD
                                q.From.Type = obj.TYPE_REG
-                               q.From.Reg = REG_R4
+                               q.From.Reg = REG_R20
                                q.To.Type = obj.TYPE_MEM
-                               q.To.Reg = REG_R1
+                               q.To.Reg = REGRT1
                                q.To.Offset = 0 // Panic.argp
 
                                // B end
@@ -744,65 +850,48 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                        p.To = obj.Addr{}
                        if c.cursym.Func().Text.Mark&LEAF != 0 {
                                if c.autosize != 0 {
+                                       // Restore frame pointer.
+                                       // ADD $framesize-8, RSP, R29
                                        p.As = AADD
                                        p.From.Type = obj.TYPE_CONST
-                                       p.From.Offset = int64(c.autosize)
+                                       p.From.Offset = int64(c.autosize) - 8
+                                       p.Reg = REGSP
                                        p.To.Type = obj.TYPE_REG
-                                       p.To.Reg = REGSP
-                                       p.Spadj = -c.autosize
+                                       p.To.Reg = REGFP
 
-                                       // Frame pointer.
+                                       // Pop stack frame.
+                                       // ADD $framesize, RSP, RSP
                                        p = obj.Appendp(p, c.newprog)
-                                       p.As = ASUB
+                                       p.As = AADD
                                        p.From.Type = obj.TYPE_CONST
-                                       p.From.Offset = 8
-                                       p.Reg = REGSP
+                                       p.From.Offset = int64(c.autosize)
                                        p.To.Type = obj.TYPE_REG
-                                       p.To.Reg = REGFP
+                                       p.To.Reg = REGSP
+                                       p.Spadj = -c.autosize
                                }
                        } else {
-                               /* want write-back pre-indexed SP+autosize -> SP, loading REGLINK*/
-
-                               // Frame pointer.
-                               p.As = AMOVD
+                               aoffset := c.autosize
+                               // LDP -8(RSP), (R29, R30)
+                               p.As = ALDP
                                p.From.Type = obj.TYPE_MEM
-                               p.From.Reg = REGSP
                                p.From.Offset = -8
-                               p.To.Type = obj.TYPE_REG
+                               p.From.Reg = REGSP
+                               p.To.Type = obj.TYPE_REGREG
                                p.To.Reg = REGFP
-                               p = obj.Appendp(p, c.newprog)
+                               p.To.Offset = REGLINK
 
-                               aoffset := c.autosize
-
-                               if aoffset <= 0xF0 {
-                                       p.As = AMOVD
-                                       p.From.Type = obj.TYPE_MEM
-                                       p.Scond = C_XPOST
-                                       p.From.Offset = int64(aoffset)
-                                       p.From.Reg = REGSP
-                                       p.To.Type = obj.TYPE_REG
-                                       p.To.Reg = REGLINK
-                                       p.Spadj = -aoffset
-                               } else {
-                                       p.As = AMOVD
-                                       p.From.Type = obj.TYPE_MEM
-                                       p.From.Offset = 0
-                                       p.From.Reg = REGSP
-                                       p.To.Type = obj.TYPE_REG
-                                       p.To.Reg = REGLINK
-
-                                       q = newprog()
-                                       q.As = AADD
-                                       q.From.Type = obj.TYPE_CONST
-                                       q.From.Offset = int64(aoffset)
-                                       q.To.Type = obj.TYPE_REG
-                                       q.To.Reg = REGSP
-                                       q.Link = p.Link
-                                       q.Spadj = int32(-q.From.Offset)
-                                       q.Pos = p.Pos
-                                       p.Link = q
-                                       p = q
-                               }
+                               // ADD $aoffset, RSP, RSP
+                               q = newprog()
+                               q.As = AADD
+                               q.From.Type = obj.TYPE_CONST
+                               q.From.Offset = int64(aoffset)
+                               q.To.Type = obj.TYPE_REG
+                               q.To.Reg = REGSP
+                               q.Spadj = -aoffset
+                               q.Pos = p.Pos
+                               q.Link = p.Link
+                               p.Link = q
+                               p = q
                        }
 
                        // If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC',
@@ -976,8 +1065,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
 
                if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 {
                        f := c.cursym.Func()
-                       if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 {
-                               c.cursym.Func().FuncFlag |= objabi.FuncFlag_SPWRITE
+                       if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
+                               c.cursym.Func().FuncFlag |= abi.FuncFlagSPWrite
                                if ctxt.Debugvlog || !ctxt.IsAsm {
                                        ctxt.Logf("auto-SPWRITE: %s %v\n", c.cursym.Name, p)
                                        if !ctxt.IsAsm {
@@ -988,6 +1077,24 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                }
                        }
                }
+               if p.From.Type == obj.TYPE_SHIFT && (p.To.Reg == REG_RSP || p.Reg == REG_RSP) {
+                       offset := p.From.Offset
+                       op := offset & (3 << 22)
+                       if op != SHIFT_LL {
+                               ctxt.Diag("illegal combination: %v", p)
+                       }
+                       r := (offset >> 16) & 31
+                       shift := (offset >> 10) & 63
+                       if shift > 4 {
+                               // the shift amount is out of range, in order to avoid repeated error
+                               // reportings, don't call ctxt.Diag, because asmout case 27 has the
+                               // same check.
+                               shift = 7
+                       }
+                       p.From.Type = obj.TYPE_REG
+                       p.From.Reg = int16(REG_LSL + r + (shift&7)<<5)
+                       p.From.Offset = 0
+               }
        }
 }