]> Cypherpunks.ru repositories - gostls13.git/blobdiff - src/cmd/internal/obj/arm64/obj7.go
cmd/internal/obj/arm64: fix frame pointer restore in epilogue
[gostls13.git] / src / cmd / internal / obj / arm64 / obj7.go
index 0baf51973ade229dbd5ae975a25233304bcd9e59..0ab5939b8429106b4f74dc1c038187f813bd96ed 100644 (file)
@@ -1,5 +1,5 @@
 // cmd/7l/noop.c, cmd/7l/obj.c, cmd/ld/pass.c from Vita Nuova.
-// https://code.google.com/p/ken-cc/source/browse/
+// https://bitbucket.org/plan9-from-bell-labs/9-cc/src/master/
 //
 //     Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
 //     Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
@@ -35,22 +35,122 @@ import (
        "cmd/internal/objabi"
        "cmd/internal/src"
        "cmd/internal/sys"
+       "internal/abi"
+       "internal/buildcfg"
+       "log"
        "math"
 )
 
-var complements = []obj.As{
-       AADD:  ASUB,
-       AADDW: ASUBW,
-       ASUB:  AADD,
-       ASUBW: AADDW,
-       ACMP:  ACMN,
-       ACMPW: ACMNW,
-       ACMN:  ACMP,
-       ACMNW: ACMPW,
+// zrReplace is the set of instructions for which $0 in the From operand
+// should be replaced with REGZERO.
+var zrReplace = map[obj.As]bool{
+       AMOVD:  true,
+       AMOVW:  true,
+       AMOVWU: true,
+       AMOVH:  true,
+       AMOVHU: true,
+       AMOVB:  true,
+       AMOVBU: true,
+       ASBC:   true,
+       ASBCW:  true,
+       ASBCS:  true,
+       ASBCSW: true,
+       AADC:   true,
+       AADCW:  true,
+       AADCS:  true,
+       AADCSW: true,
+       AFMOVD: true,
+       AFMOVS: true,
+       AMSR:   true,
 }
 
 func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
-       // MOV  g_stackguard(g), R1
+       if c.ctxt.Flag_maymorestack != "" {
+               p = c.cursym.Func().SpillRegisterArgs(p, c.newprog)
+
+               // Save LR and make room for FP, REGCTXT. Leave room
+               // for caller's saved FP.
+               const frameSize = 32
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REGLINK
+               p.To.Type = obj.TYPE_MEM
+               p.Scond = C_XPRE
+               p.To.Offset = -frameSize
+               p.To.Reg = REGSP
+               p.Spadj = frameSize
+
+               // Save FP.
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REGFP
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = REGSP
+               p.To.Offset = -8
+
+               p = obj.Appendp(p, c.newprog)
+               p.As = ASUB
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = 8
+               p.Reg = REGSP
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGFP
+
+               // Save REGCTXT (for simplicity we do this whether or
+               // not we need it.)
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REGCTXT
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = REGSP
+               p.To.Offset = 8
+
+               // BL maymorestack
+               p = obj.Appendp(p, c.newprog)
+               p.As = ABL
+               p.To.Type = obj.TYPE_BRANCH
+               // See ../x86/obj6.go
+               p.To.Sym = c.ctxt.LookupABI(c.ctxt.Flag_maymorestack, c.cursym.ABI())
+
+               // Restore REGCTXT.
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = REGSP
+               p.From.Offset = 8
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGCTXT
+
+               // Restore FP.
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = REGSP
+               p.From.Offset = -8
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGFP
+
+               // Restore LR and SP.
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_MEM
+               p.Scond = C_XPOST
+               p.From.Offset = frameSize
+               p.From.Reg = REGSP
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGLINK
+               p.Spadj = -frameSize
+
+               p = c.cursym.Func().UnspillRegisterArgs(p, c.newprog)
+       }
+
+       // Jump back to here after morestack returns.
+       startPred := p
+
+       // MOV  g_stackguard(g), RT1
        p = obj.Appendp(p, c.newprog)
 
        p.As = AMOVD
@@ -61,7 +161,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
                p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1
        }
        p.To.Type = obj.TYPE_REG
-       p.To.Reg = REG_R1
+       p.To.Reg = REGRT1
 
        // Mark the stack bound check and morestack call async nonpreemptible.
        // If we get preempted here, when resumed the preemption request is
@@ -70,92 +170,64 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
        p = c.ctxt.StartUnsafePoint(p, c.newprog)
 
        q := (*obj.Prog)(nil)
-       if framesize <= objabi.StackSmall {
+       if framesize <= abi.StackSmall {
                // small stack: SP < stackguard
-               //      MOV     SP, R2
-               //      CMP     stackguard, R2
-               p = obj.Appendp(p, c.newprog)
-
-               p.As = AMOVD
-               p.From.Type = obj.TYPE_REG
-               p.From.Reg = REGSP
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R2
+               //      CMP     stackguard, SP
 
                p = obj.Appendp(p, c.newprog)
                p.As = ACMP
                p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R1
-               p.Reg = REG_R2
-       } else if framesize <= objabi.StackBig {
+               p.From.Reg = REGRT1
+               p.Reg = REGSP
+       } else if framesize <= abi.StackBig {
                // large stack: SP-framesize < stackguard-StackSmall
-               //      SUB     $(framesize-StackSmall), SP, R2
-               //      CMP     stackguard, R2
+               //      SUB     $(framesize-StackSmall), SP, RT2
+               //      CMP     stackguard, RT2
                p = obj.Appendp(p, c.newprog)
 
                p.As = ASUB
                p.From.Type = obj.TYPE_CONST
-               p.From.Offset = int64(framesize) - objabi.StackSmall
+               p.From.Offset = int64(framesize) - abi.StackSmall
                p.Reg = REGSP
                p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R2
+               p.To.Reg = REGRT2
 
                p = obj.Appendp(p, c.newprog)
                p.As = ACMP
                p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R1
-               p.Reg = REG_R2
+               p.From.Reg = REGRT1
+               p.Reg = REGRT2
        } else {
-               // Such a large stack we need to protect against wraparound
-               // if SP is close to zero.
-               //      SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall)
-               // The +StackGuard on both sides is required to keep the left side positive:
-               // SP is allowed to be slightly below stackguard. See stack.h.
-               //      CMP     $StackPreempt, R1
-               //      BEQ     label_of_call_to_morestack
-               //      ADD     $StackGuard, SP, R2
-               //      SUB     R1, R2
-               //      MOV     $(framesize+(StackGuard-StackSmall)), R3
-               //      CMP     R3, R2
-               p = obj.Appendp(p, c.newprog)
-
-               p.As = ACMP
-               p.From.Type = obj.TYPE_CONST
-               p.From.Offset = objabi.StackPreempt
-               p.Reg = REG_R1
+               // Such a large stack we need to protect against underflow.
+               // The runtime guarantees SP > objabi.StackBig, but
+               // framesize is large enough that SP-framesize may
+               // underflow, causing a direct comparison with the
+               // stack guard to incorrectly succeed. We explicitly
+               // guard against underflow.
+               //
+               //      SUBS    $(framesize-StackSmall), SP, RT2
+               //      // On underflow, jump to morestack
+               //      BLO     label_of_call_to_morestack
+               //      CMP     stackguard, RT2
 
                p = obj.Appendp(p, c.newprog)
-               q = p
-               p.As = ABEQ
-               p.To.Type = obj.TYPE_BRANCH
-
-               p = obj.Appendp(p, c.newprog)
-               p.As = AADD
+               p.As = ASUBS
                p.From.Type = obj.TYPE_CONST
-               p.From.Offset = int64(objabi.StackGuard)
+               p.From.Offset = int64(framesize) - abi.StackSmall
                p.Reg = REGSP
                p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R2
-
-               p = obj.Appendp(p, c.newprog)
-               p.As = ASUB
-               p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R1
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R2
+               p.To.Reg = REGRT2
 
                p = obj.Appendp(p, c.newprog)
-               p.As = AMOVD
-               p.From.Type = obj.TYPE_CONST
-               p.From.Offset = int64(framesize) + (int64(objabi.StackGuard) - objabi.StackSmall)
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R3
+               q = p
+               p.As = ABLO
+               p.To.Type = obj.TYPE_BRANCH
 
                p = obj.Appendp(p, c.newprog)
                p.As = ACMP
                p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R3
-               p.Reg = REG_R2
+               p.From.Reg = REGRT1
+               p.Reg = REGRT2
        }
 
        // BLS  do-morestack
@@ -179,17 +251,20 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
        pcdata := c.ctxt.EmitEntryStackMap(c.cursym, spfix, c.newprog)
        pcdata = c.ctxt.StartUnsafePoint(pcdata, c.newprog)
 
+       if q != nil {
+               q.To.SetTarget(pcdata)
+       }
+       bls.To.SetTarget(pcdata)
+
+       spill := c.cursym.Func().SpillRegisterArgs(pcdata, c.newprog)
+
        // MOV  LR, R3
-       movlr := obj.Appendp(pcdata, c.newprog)
+       movlr := obj.Appendp(spill, c.newprog)
        movlr.As = AMOVD
        movlr.From.Type = obj.TYPE_REG
        movlr.From.Reg = REGLINK
        movlr.To.Type = obj.TYPE_REG
        movlr.To.Reg = REG_R3
-       if q != nil {
-               q.To.SetTarget(movlr)
-       }
-       bls.To.SetTarget(movlr)
 
        debug := movlr
        if false {
@@ -214,13 +289,15 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
        }
        call.To.Sym = c.ctxt.Lookup(morestack)
 
+       // The instructions which unspill regs should be preemptible.
        pcdata = c.ctxt.EndUnsafePoint(call, c.newprog, -1)
+       unspill := c.cursym.Func().UnspillRegisterArgs(pcdata, c.newprog)
 
        // B    start
-       jmp := obj.Appendp(pcdata, c.newprog)
+       jmp := obj.Appendp(unspill, c.newprog)
        jmp.As = AB
        jmp.To.Type = obj.TYPE_BRANCH
-       jmp.To.SetTarget(c.cursym.Func().Text.Link)
+       jmp.To.SetTarget(startPred.Link)
        jmp.Spadj = +framesize
 
        return end
@@ -232,18 +309,13 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
        p.From.Class = 0
        p.To.Class = 0
 
-       // $0 results in C_ZCON, which matches both C_REG and various
-       // C_xCON, however the C_REG cases in asmout don't expect a
-       // constant, so they will use the register fields and assemble
-       // a R0. To prevent that, rewrite $0 as ZR.
-       if p.From.Type == obj.TYPE_CONST && p.From.Offset == 0 {
+       // Previously we rewrote $0 to ZR, but we have now removed this change.
+       // In order to be compatible with some previous legal instruction formats,
+       // reserve the previous conversion for some specific instructions.
+       if p.From.Type == obj.TYPE_CONST && p.From.Offset == 0 && zrReplace[p.As] {
                p.From.Type = obj.TYPE_REG
                p.From.Reg = REGZERO
        }
-       if p.To.Type == obj.TYPE_CONST && p.To.Offset == 0 {
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REGZERO
-       }
 
        // Rewrite BR/BL to symbol as TYPE_BRANCH.
        switch p.As {
@@ -258,8 +330,33 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
                break
        }
 
-       // Rewrite float constants to values stored in memory.
+       // Rewrite float and vector constants to values stored in memory.
        switch p.As {
+       case AVMOVS:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Sym = c.ctxt.Int32Sym(p.From.Offset)
+                       p.From.Name = obj.NAME_EXTERN
+                       p.From.Offset = 0
+               }
+
+       case AVMOVD:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Sym = c.ctxt.Int64Sym(p.From.Offset)
+                       p.From.Name = obj.NAME_EXTERN
+                       p.From.Offset = 0
+               }
+
+       case AVMOVQ:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Sym = c.ctxt.Int128Sym(p.GetFrom3().Offset, p.From.Offset)
+                       p.From.Name = obj.NAME_EXTERN
+                       p.From.Offset = 0
+                       p.RestArgs = nil
+               }
+
        case AFMOVS:
                if p.From.Type == obj.TYPE_FCONST {
                        f64 := p.From.Val.(float64)
@@ -294,34 +391,6 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
                        p.From.Name = obj.NAME_EXTERN
                        p.From.Offset = 0
                }
-
-               break
-       }
-
-       // Rewrite negative immediates as positive immediates with
-       // complementary instruction.
-       switch p.As {
-       case AADD, ASUB, ACMP, ACMN:
-               if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && p.From.Offset != -1<<63 {
-                       p.From.Offset = -p.From.Offset
-                       p.As = complements[p.As]
-               }
-       case AADDW, ASUBW, ACMPW, ACMNW:
-               if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && int32(p.From.Offset) != -1<<31 {
-                       p.From.Offset = -p.From.Offset
-                       p.As = complements[p.As]
-               }
-       }
-
-       // For 32-bit logical instruction with constant,
-       // rewrite the high 32-bit to be a repetition of
-       // the low 32-bit, so that the BITCON test can be
-       // shared for both 32-bit and 64-bit. 32-bit ops
-       // will zero the high 32-bit of the destination
-       // register anyway.
-       if isANDWop(p.As) && p.From.Type == obj.TYPE_CONST {
-               v := p.From.Offset & 0xffffffff
-               p.From.Offset = v | v<<32
        }
 
        if c.ctxt.Flag_dynlink {
@@ -339,9 +408,9 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) {
                //     CALL REGTMP
                var sym *obj.LSym
                if p.As == obj.ADUFFZERO {
-                       sym = c.ctxt.Lookup("runtime.duffzero")
+                       sym = c.ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
                } else {
-                       sym = c.ctxt.Lookup("runtime.duffcopy")
+                       sym = c.ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
                }
                offset := p.To.Offset
                p.As = AMOVD
@@ -538,6 +607,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                }
                        }
 
+                       if p.Mark&LEAF != 0 && c.autosize < abi.StackSmall {
+                               // A leaf function with a small stack can be marked
+                               // NOSPLIT, avoiding a stack check.
+                               p.From.Sym.Set(obj.AttrNoSplit, true)
+                       }
+
                        if !p.From.Sym.NoSplit() {
                                p = c.stacksplit(p, c.autosize) // emit split check
                        }
@@ -545,66 +620,85 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                        var prologueEnd *obj.Prog
 
                        aoffset := c.autosize
-                       if aoffset > 0xF0 {
-                               aoffset = 0xF0
+                       if aoffset > 0xf0 {
+                               // MOVD.W offset variant range is -0x100 to 0xf8, SP should be 16-byte aligned.
+                               // so the maximum aoffset value is 0xf0.
+                               aoffset = 0xf0
                        }
 
                        // Frame is non-empty. Make sure to save link register, even if
                        // it is a leaf function, so that traceback works.
                        q = p
                        if c.autosize > aoffset {
-                               // Frame size is too large for a MOVD.W instruction.
-                               // Store link register before decrementing SP, so if a signal comes
-                               // during the execution of the function prologue, the traceback
-                               // code will not see a half-updated stack frame.
-                               // This sequence is not async preemptible, as if we open a frame
-                               // at the current SP, it will clobber the saved LR.
-                               q = c.ctxt.StartUnsafePoint(q, c.newprog)
+                               // Frame size is too large for a MOVD.W instruction. Store the frame pointer
+                               // register and link register before decrementing SP, so if a signal comes
+                               // during the execution of the function prologue, the traceback code will
+                               // not see a half-updated stack frame.
 
-                               q = obj.Appendp(q, c.newprog)
-                               q.Pos = p.Pos
-                               q.As = ASUB
-                               q.From.Type = obj.TYPE_CONST
-                               q.From.Offset = int64(c.autosize)
-                               q.Reg = REGSP
-                               q.To.Type = obj.TYPE_REG
-                               q.To.Reg = REGTMP
+                               // SUB $autosize, RSP, R20
+                               q1 = obj.Appendp(q, c.newprog)
+                               q1.Pos = p.Pos
+                               q1.As = ASUB
+                               q1.From.Type = obj.TYPE_CONST
+                               q1.From.Offset = int64(c.autosize)
+                               q1.Reg = REGSP
+                               q1.To.Type = obj.TYPE_REG
+                               q1.To.Reg = REG_R20
 
-                               prologueEnd = q
+                               prologueEnd = q1
 
-                               q = obj.Appendp(q, c.newprog)
-                               q.Pos = p.Pos
-                               q.As = AMOVD
-                               q.From.Type = obj.TYPE_REG
-                               q.From.Reg = REGLINK
-                               q.To.Type = obj.TYPE_MEM
-                               q.To.Reg = REGTMP
+                               // STP (R29, R30), -8(R20)
+                               q1 = obj.Appendp(q1, c.newprog)
+                               q1.Pos = p.Pos
+                               q1.As = ASTP
+                               q1.From.Type = obj.TYPE_REGREG
+                               q1.From.Reg = REGFP
+                               q1.From.Offset = REGLINK
+                               q1.To.Type = obj.TYPE_MEM
+                               q1.To.Reg = REG_R20
+                               q1.To.Offset = -8
 
-                               q1 = obj.Appendp(q, c.newprog)
+                               // This is not async preemptible, as if we open a frame
+                               // at the current SP, it will clobber the saved LR.
+                               q1 = c.ctxt.StartUnsafePoint(q1, c.newprog)
+
+                               // MOVD R20, RSP
+                               q1 = obj.Appendp(q1, c.newprog)
                                q1.Pos = p.Pos
                                q1.As = AMOVD
                                q1.From.Type = obj.TYPE_REG
-                               q1.From.Reg = REGTMP
+                               q1.From.Reg = REG_R20
                                q1.To.Type = obj.TYPE_REG
                                q1.To.Reg = REGSP
                                q1.Spadj = c.autosize
 
-                               if objabi.GOOS == "ios" {
+                               q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
+
+                               if buildcfg.GOOS == "ios" {
                                        // iOS does not support SA_ONSTACK. We will run the signal handler
                                        // on the G stack. If we write below SP, it may be clobbered by
-                                       // the signal handler. So we save LR after decrementing SP.
+                                       // the signal handler. So we save FP and LR after decrementing SP.
+                                       // STP (R29, R30), -8(RSP)
                                        q1 = obj.Appendp(q1, c.newprog)
                                        q1.Pos = p.Pos
-                                       q1.As = AMOVD
-                                       q1.From.Type = obj.TYPE_REG
-                                       q1.From.Reg = REGLINK
+                                       q1.As = ASTP
+                                       q1.From.Type = obj.TYPE_REGREG
+                                       q1.From.Reg = REGFP
+                                       q1.From.Offset = REGLINK
                                        q1.To.Type = obj.TYPE_MEM
                                        q1.To.Reg = REGSP
+                                       q1.To.Offset = -8
                                }
-
-                               q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
                        } else {
-                               // small frame, update SP and save LR in a single MOVD.W instruction
+                               // small frame, update SP and save LR in a single MOVD.W instruction.
+                               // So if a signal comes during the execution of the function prologue,
+                               // the traceback code will not see a half-updated stack frame.
+                               // Also, on Linux, in a cgo binary we may get a SIGSETXID signal
+                               // early on before the signal stack is set, as glibc doesn't allow
+                               // us to block SIGSETXID. So it is important that we don't write below
+                               // the SP until the signal stack is set.
+                               // Luckily, all the functions from thread entry to setting the signal
+                               // stack have small frames.
                                q1 = obj.Appendp(q, c.newprog)
                                q1.As = AMOVD
                                q1.Pos = p.Pos
@@ -617,11 +711,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                q1.Spadj = aoffset
 
                                prologueEnd = q1
-                       }
 
-                       prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd)
-
-                       if objabi.Framepointer_enabled {
+                               // Frame pointer.
                                q1 = obj.Appendp(q1, c.newprog)
                                q1.Pos = p.Pos
                                q1.As = AMOVD
@@ -630,52 +721,54 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                q1.To.Type = obj.TYPE_MEM
                                q1.To.Reg = REGSP
                                q1.To.Offset = -8
-
-                               q1 = obj.Appendp(q1, c.newprog)
-                               q1.Pos = p.Pos
-                               q1.As = ASUB
-                               q1.From.Type = obj.TYPE_CONST
-                               q1.From.Offset = 8
-                               q1.Reg = REGSP
-                               q1.To.Type = obj.TYPE_REG
-                               q1.To.Reg = REGFP
                        }
 
+                       prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd)
+
+                       q1 = obj.Appendp(q1, c.newprog)
+                       q1.Pos = p.Pos
+                       q1.As = ASUB
+                       q1.From.Type = obj.TYPE_CONST
+                       q1.From.Offset = 8
+                       q1.Reg = REGSP
+                       q1.To.Type = obj.TYPE_REG
+                       q1.To.Reg = REGFP
+
                        if c.cursym.Func().Text.From.Sym.Wrapper() {
                                // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
                                //
-                               //      MOV  g_panic(g), R1
+                               //      MOV  g_panic(g), RT1
                                //      CBNZ checkargp
                                // end:
                                //      NOP
                                // ... function body ...
                                // checkargp:
-                               //      MOV  panic_argp(R1), R2
-                               //      ADD  $(autosize+8), RSP, R3
-                               //      CMP  R2, R3
+                               //      MOV  panic_argp(RT1), RT2
+                               //      ADD  $(autosize+8), RSP, R20
+                               //      CMP  RT2, R20
                                //      BNE  end
-                               //      ADD  $8, RSP, R4
-                               //      MOVD R4, panic_argp(R1)
+                               //      ADD  $8, RSP, R20
+                               //      MOVD R20, panic_argp(RT1)
                                //      B    end
                                //
                                // The NOP is needed to give the jumps somewhere to land.
                                // It is a liblink NOP, not an ARM64 NOP: it encodes to 0 instruction bytes.
                                q = q1
 
-                               // MOV g_panic(g), R1
+                               // MOV g_panic(g), RT1
                                q = obj.Appendp(q, c.newprog)
                                q.As = AMOVD
                                q.From.Type = obj.TYPE_MEM
                                q.From.Reg = REGG
                                q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic
                                q.To.Type = obj.TYPE_REG
-                               q.To.Reg = REG_R1
+                               q.To.Reg = REGRT1
 
-                               // CBNZ R1, checkargp
+                               // CBNZ RT1, checkargp
                                cbnz := obj.Appendp(q, c.newprog)
                                cbnz.As = ACBNZ
                                cbnz.From.Type = obj.TYPE_REG
-                               cbnz.From.Reg = REG_R1
+                               cbnz.From.Reg = REGRT1
                                cbnz.To.Type = obj.TYPE_BRANCH
 
                                // Empty branch target at the top of the function body
@@ -687,33 +780,33 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                for last = end; last.Link != nil; last = last.Link {
                                }
 
-                               // MOV panic_argp(R1), R2
+                               // MOV panic_argp(RT1), RT2
                                mov := obj.Appendp(last, c.newprog)
                                mov.As = AMOVD
                                mov.From.Type = obj.TYPE_MEM
-                               mov.From.Reg = REG_R1
+                               mov.From.Reg = REGRT1
                                mov.From.Offset = 0 // Panic.argp
                                mov.To.Type = obj.TYPE_REG
-                               mov.To.Reg = REG_R2
+                               mov.To.Reg = REGRT2
 
                                // CBNZ branches to the MOV above
                                cbnz.To.SetTarget(mov)
 
-                               // ADD $(autosize+8), SP, R3
+                               // ADD $(autosize+8), SP, R20
                                q = obj.Appendp(mov, c.newprog)
                                q.As = AADD
                                q.From.Type = obj.TYPE_CONST
                                q.From.Offset = int64(c.autosize) + 8
                                q.Reg = REGSP
                                q.To.Type = obj.TYPE_REG
-                               q.To.Reg = REG_R3
+                               q.To.Reg = REG_R20
 
-                               // CMP R2, R3
+                               // CMP RT2, R20
                                q = obj.Appendp(q, c.newprog)
                                q.As = ACMP
                                q.From.Type = obj.TYPE_REG
-                               q.From.Reg = REG_R2
-                               q.Reg = REG_R3
+                               q.From.Reg = REGRT2
+                               q.Reg = REG_R20
 
                                // BNE end
                                q = obj.Appendp(q, c.newprog)
@@ -721,22 +814,22 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                q.To.Type = obj.TYPE_BRANCH
                                q.To.SetTarget(end)
 
-                               // ADD $8, SP, R4
+                               // ADD $8, SP, R20
                                q = obj.Appendp(q, c.newprog)
                                q.As = AADD
                                q.From.Type = obj.TYPE_CONST
                                q.From.Offset = 8
                                q.Reg = REGSP
                                q.To.Type = obj.TYPE_REG
-                               q.To.Reg = REG_R4
+                               q.To.Reg = REG_R20
 
-                               // MOV R4, panic_argp(R1)
+                               // MOV R20, panic_argp(RT1)
                                q = obj.Appendp(q, c.newprog)
                                q.As = AMOVD
                                q.From.Type = obj.TYPE_REG
-                               q.From.Reg = REG_R4
+                               q.From.Reg = REG_R20
                                q.To.Type = obj.TYPE_MEM
-                               q.To.Reg = REG_R1
+                               q.To.Reg = REGRT1
                                q.To.Offset = 0 // Panic.argp
 
                                // B end
@@ -757,67 +850,70 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                        p.To = obj.Addr{}
                        if c.cursym.Func().Text.Mark&LEAF != 0 {
                                if c.autosize != 0 {
+                                       // Restore frame pointer.
+                                       // ADD $framesize-8, RSP, R29
+                                       p.As = AADD
+                                       p.From.Type = obj.TYPE_CONST
+                                       p.From.Offset = int64(c.autosize) - 8
+                                       p.Reg = REGSP
+                                       p.To.Type = obj.TYPE_REG
+                                       p.To.Reg = REGFP
+
+                                       // Pop stack frame.
+                                       // ADD $framesize, RSP, RSP
+                                       p = obj.Appendp(p, c.newprog)
                                        p.As = AADD
                                        p.From.Type = obj.TYPE_CONST
                                        p.From.Offset = int64(c.autosize)
                                        p.To.Type = obj.TYPE_REG
                                        p.To.Reg = REGSP
                                        p.Spadj = -c.autosize
-
-                                       if objabi.Framepointer_enabled {
-                                               p = obj.Appendp(p, c.newprog)
-                                               p.As = ASUB
-                                               p.From.Type = obj.TYPE_CONST
-                                               p.From.Offset = 8
-                                               p.Reg = REGSP
-                                               p.To.Type = obj.TYPE_REG
-                                               p.To.Reg = REGFP
-                                       }
                                }
                        } else {
-                               /* want write-back pre-indexed SP+autosize -> SP, loading REGLINK*/
-
-                               if objabi.Framepointer_enabled {
-                                       p.As = AMOVD
-                                       p.From.Type = obj.TYPE_MEM
-                                       p.From.Reg = REGSP
-                                       p.From.Offset = -8
-                                       p.To.Type = obj.TYPE_REG
-                                       p.To.Reg = REGFP
-                                       p = obj.Appendp(p, c.newprog)
-                               }
-
                                aoffset := c.autosize
+                               // LDP -8(RSP), (R29, R30)
+                               p.As = ALDP
+                               p.From.Type = obj.TYPE_MEM
+                               p.From.Offset = -8
+                               p.From.Reg = REGSP
+                               p.To.Type = obj.TYPE_REGREG
+                               p.To.Reg = REGFP
+                               p.To.Offset = REGLINK
 
-                               if aoffset <= 0xF0 {
-                                       p.As = AMOVD
-                                       p.From.Type = obj.TYPE_MEM
-                                       p.Scond = C_XPOST
-                                       p.From.Offset = int64(aoffset)
-                                       p.From.Reg = REGSP
-                                       p.To.Type = obj.TYPE_REG
-                                       p.To.Reg = REGLINK
-                                       p.Spadj = -aoffset
-                               } else {
-                                       p.As = AMOVD
-                                       p.From.Type = obj.TYPE_MEM
-                                       p.From.Offset = 0
-                                       p.From.Reg = REGSP
-                                       p.To.Type = obj.TYPE_REG
-                                       p.To.Reg = REGLINK
+                               // ADD $aoffset, RSP, RSP
+                               q = newprog()
+                               q.As = AADD
+                               q.From.Type = obj.TYPE_CONST
+                               q.From.Offset = int64(aoffset)
+                               q.To.Type = obj.TYPE_REG
+                               q.To.Reg = REGSP
+                               q.Spadj = -aoffset
+                               q.Pos = p.Pos
+                               q.Link = p.Link
+                               p.Link = q
+                               p = q
+                       }
 
+                       // If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC',
+                       // so that if you are debugging a low-level crash where PC and LR are zero,
+                       // you can look at R27 to see what jumped to the zero.
+                       // This is useful when bringing up Go on a new system.
+                       // (There is similar code in ../ppc64/obj9.go:/if.false.)
+                       const debugRETZERO = false
+                       if debugRETZERO {
+                               if p.As != obj.ARET {
                                        q = newprog()
-                                       q.As = AADD
-                                       q.From.Type = obj.TYPE_CONST
-                                       q.From.Offset = int64(aoffset)
-                                       q.To.Type = obj.TYPE_REG
-                                       q.To.Reg = REGSP
-                                       q.Link = p.Link
-                                       q.Spadj = int32(-q.From.Offset)
                                        q.Pos = p.Pos
+                                       q.Link = p.Link
                                        p.Link = q
                                        p = q
                                }
+                               p.As = AADR
+                               p.From.Type = obj.TYPE_BRANCH
+                               p.From.Offset = 0
+                               p.To.Type = obj.TYPE_REG
+                               p.To.Reg = REGTMP
+
                        }
 
                        if p.As != obj.ARET {
@@ -865,110 +961,139 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                        }
 
                case obj.ADUFFCOPY:
-                       if objabi.Framepointer_enabled {
-                               //  ADR ret_addr, R27
-                               //  STP (FP, R27), -24(SP)
-                               //  SUB 24, SP, FP
-                               //  DUFFCOPY
-                               // ret_addr:
-                               //  SUB 8, SP, FP
-
-                               q1 := p
-                               // copy DUFFCOPY from q1 to q4
-                               q4 := obj.Appendp(p, c.newprog)
-                               q4.Pos = p.Pos
-                               q4.As = obj.ADUFFCOPY
-                               q4.To = p.To
-
-                               q1.As = AADR
-                               q1.From.Type = obj.TYPE_BRANCH
-                               q1.To.Type = obj.TYPE_REG
-                               q1.To.Reg = REG_R27
-
-                               q2 := obj.Appendp(q1, c.newprog)
-                               q2.Pos = p.Pos
-                               q2.As = ASTP
-                               q2.From.Type = obj.TYPE_REGREG
-                               q2.From.Reg = REGFP
-                               q2.From.Offset = int64(REG_R27)
-                               q2.To.Type = obj.TYPE_MEM
-                               q2.To.Reg = REGSP
-                               q2.To.Offset = -24
-
-                               // maintaine FP for DUFFCOPY
-                               q3 := obj.Appendp(q2, c.newprog)
-                               q3.Pos = p.Pos
-                               q3.As = ASUB
-                               q3.From.Type = obj.TYPE_CONST
-                               q3.From.Offset = 24
-                               q3.Reg = REGSP
-                               q3.To.Type = obj.TYPE_REG
-                               q3.To.Reg = REGFP
-
-                               q5 := obj.Appendp(q4, c.newprog)
-                               q5.Pos = p.Pos
-                               q5.As = ASUB
-                               q5.From.Type = obj.TYPE_CONST
-                               q5.From.Offset = 8
-                               q5.Reg = REGSP
-                               q5.To.Type = obj.TYPE_REG
-                               q5.To.Reg = REGFP
-                               q1.From.SetTarget(q5)
-                               p = q5
-                       }
+                       //  ADR ret_addr, R27
+                       //  STP (FP, R27), -24(SP)
+                       //  SUB 24, SP, FP
+                       //  DUFFCOPY
+                       // ret_addr:
+                       //  SUB 8, SP, FP
+
+                       q1 := p
+                       // copy DUFFCOPY from q1 to q4
+                       q4 := obj.Appendp(p, c.newprog)
+                       q4.Pos = p.Pos
+                       q4.As = obj.ADUFFCOPY
+                       q4.To = p.To
+
+                       q1.As = AADR
+                       q1.From.Type = obj.TYPE_BRANCH
+                       q1.To.Type = obj.TYPE_REG
+                       q1.To.Reg = REG_R27
+
+                       q2 := obj.Appendp(q1, c.newprog)
+                       q2.Pos = p.Pos
+                       q2.As = ASTP
+                       q2.From.Type = obj.TYPE_REGREG
+                       q2.From.Reg = REGFP
+                       q2.From.Offset = int64(REG_R27)
+                       q2.To.Type = obj.TYPE_MEM
+                       q2.To.Reg = REGSP
+                       q2.To.Offset = -24
+
+                       // maintain FP for DUFFCOPY
+                       q3 := obj.Appendp(q2, c.newprog)
+                       q3.Pos = p.Pos
+                       q3.As = ASUB
+                       q3.From.Type = obj.TYPE_CONST
+                       q3.From.Offset = 24
+                       q3.Reg = REGSP
+                       q3.To.Type = obj.TYPE_REG
+                       q3.To.Reg = REGFP
+
+                       q5 := obj.Appendp(q4, c.newprog)
+                       q5.Pos = p.Pos
+                       q5.As = ASUB
+                       q5.From.Type = obj.TYPE_CONST
+                       q5.From.Offset = 8
+                       q5.Reg = REGSP
+                       q5.To.Type = obj.TYPE_REG
+                       q5.To.Reg = REGFP
+                       q1.From.SetTarget(q5)
+                       p = q5
 
                case obj.ADUFFZERO:
-                       if objabi.Framepointer_enabled {
-                               //  ADR ret_addr, R27
-                               //  STP (FP, R27), -24(SP)
-                               //  SUB 24, SP, FP
-                               //  DUFFZERO
-                               // ret_addr:
-                               //  SUB 8, SP, FP
-
-                               q1 := p
-                               // copy DUFFZERO from q1 to q4
-                               q4 := obj.Appendp(p, c.newprog)
-                               q4.Pos = p.Pos
-                               q4.As = obj.ADUFFZERO
-                               q4.To = p.To
-
-                               q1.As = AADR
-                               q1.From.Type = obj.TYPE_BRANCH
-                               q1.To.Type = obj.TYPE_REG
-                               q1.To.Reg = REG_R27
-
-                               q2 := obj.Appendp(q1, c.newprog)
-                               q2.Pos = p.Pos
-                               q2.As = ASTP
-                               q2.From.Type = obj.TYPE_REGREG
-                               q2.From.Reg = REGFP
-                               q2.From.Offset = int64(REG_R27)
-                               q2.To.Type = obj.TYPE_MEM
-                               q2.To.Reg = REGSP
-                               q2.To.Offset = -24
-
-                               // maintaine FP for DUFFZERO
-                               q3 := obj.Appendp(q2, c.newprog)
-                               q3.Pos = p.Pos
-                               q3.As = ASUB
-                               q3.From.Type = obj.TYPE_CONST
-                               q3.From.Offset = 24
-                               q3.Reg = REGSP
-                               q3.To.Type = obj.TYPE_REG
-                               q3.To.Reg = REGFP
-
-                               q5 := obj.Appendp(q4, c.newprog)
-                               q5.Pos = p.Pos
-                               q5.As = ASUB
-                               q5.From.Type = obj.TYPE_CONST
-                               q5.From.Offset = 8
-                               q5.Reg = REGSP
-                               q5.To.Type = obj.TYPE_REG
-                               q5.To.Reg = REGFP
-                               q1.From.SetTarget(q5)
-                               p = q5
+                       //  ADR ret_addr, R27
+                       //  STP (FP, R27), -24(SP)
+                       //  SUB 24, SP, FP
+                       //  DUFFZERO
+                       // ret_addr:
+                       //  SUB 8, SP, FP
+
+                       q1 := p
+                       // copy DUFFZERO from q1 to q4
+                       q4 := obj.Appendp(p, c.newprog)
+                       q4.Pos = p.Pos
+                       q4.As = obj.ADUFFZERO
+                       q4.To = p.To
+
+                       q1.As = AADR
+                       q1.From.Type = obj.TYPE_BRANCH
+                       q1.To.Type = obj.TYPE_REG
+                       q1.To.Reg = REG_R27
+
+                       q2 := obj.Appendp(q1, c.newprog)
+                       q2.Pos = p.Pos
+                       q2.As = ASTP
+                       q2.From.Type = obj.TYPE_REGREG
+                       q2.From.Reg = REGFP
+                       q2.From.Offset = int64(REG_R27)
+                       q2.To.Type = obj.TYPE_MEM
+                       q2.To.Reg = REGSP
+                       q2.To.Offset = -24
+
+                       // maintain FP for DUFFZERO
+                       q3 := obj.Appendp(q2, c.newprog)
+                       q3.Pos = p.Pos
+                       q3.As = ASUB
+                       q3.From.Type = obj.TYPE_CONST
+                       q3.From.Offset = 24
+                       q3.Reg = REGSP
+                       q3.To.Type = obj.TYPE_REG
+                       q3.To.Reg = REGFP
+
+                       q5 := obj.Appendp(q4, c.newprog)
+                       q5.Pos = p.Pos
+                       q5.As = ASUB
+                       q5.From.Type = obj.TYPE_CONST
+                       q5.From.Offset = 8
+                       q5.Reg = REGSP
+                       q5.To.Type = obj.TYPE_REG
+                       q5.To.Reg = REGFP
+                       q1.From.SetTarget(q5)
+                       p = q5
+               }
+
+               if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 {
+                       f := c.cursym.Func()
+                       if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
+                               c.cursym.Func().FuncFlag |= abi.FuncFlagSPWrite
+                               if ctxt.Debugvlog || !ctxt.IsAsm {
+                                       ctxt.Logf("auto-SPWRITE: %s %v\n", c.cursym.Name, p)
+                                       if !ctxt.IsAsm {
+                                               ctxt.Diag("invalid auto-SPWRITE in non-assembly")
+                                               ctxt.DiagFlush()
+                                               log.Fatalf("bad SPWRITE")
+                                       }
+                               }
+                       }
+               }
+               if p.From.Type == obj.TYPE_SHIFT && (p.To.Reg == REG_RSP || p.Reg == REG_RSP) {
+                       offset := p.From.Offset
+                       op := offset & (3 << 22)
+                       if op != SHIFT_LL {
+                               ctxt.Diag("illegal combination: %v", p)
                        }
+                       r := (offset >> 16) & 31
+                       shift := (offset >> 10) & 63
+                       if shift > 4 {
+                               // the shift amount is out of range, in order to avoid repeated error
+                               // reportings, don't call ctxt.Diag, because asmout case 27 has the
+                               // same check.
+                               shift = 7
+                       }
+                       p.From.Type = obj.TYPE_REG
+                       p.From.Reg = int16(REG_LSL + r + (shift&7)<<5)
+                       p.From.Offset = 0
                }
        }
 }