]> Cypherpunks.ru repositories - gostls13.git/blobdiff - src/cmd/internal/obj/arm64/obj7.go
cmd/internal/obj/arm64: fix frame pointer restore in epilogue
[gostls13.git] / src / cmd / internal / obj / arm64 / obj7.go
index 71fc523fe030f68bf0c0bb414ba33189f610547a..0ab5939b8429106b4f74dc1c038187f813bd96ed 100644 (file)
@@ -1,5 +1,5 @@
 // cmd/7l/noop.c, cmd/7l/obj.c, cmd/ld/pass.c from Vita Nuova.
-// https://code.google.com/p/ken-cc/source/browse/
+// https://bitbucket.org/plan9-from-bell-labs/9-cc/src/master/
 //
 //     Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
 //     Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
@@ -33,23 +33,124 @@ package arm64
 import (
        "cmd/internal/obj"
        "cmd/internal/objabi"
+       "cmd/internal/src"
        "cmd/internal/sys"
+       "internal/abi"
+       "internal/buildcfg"
+       "log"
        "math"
 )
 
-var complements = []obj.As{
-       AADD:  ASUB,
-       AADDW: ASUBW,
-       ASUB:  AADD,
-       ASUBW: AADDW,
-       ACMP:  ACMN,
-       ACMPW: ACMNW,
-       ACMN:  ACMP,
-       ACMNW: ACMPW,
+// zrReplace is the set of instructions for which $0 in the From operand
+// should be replaced with REGZERO.
+var zrReplace = map[obj.As]bool{
+       AMOVD:  true,
+       AMOVW:  true,
+       AMOVWU: true,
+       AMOVH:  true,
+       AMOVHU: true,
+       AMOVB:  true,
+       AMOVBU: true,
+       ASBC:   true,
+       ASBCW:  true,
+       ASBCS:  true,
+       ASBCSW: true,
+       AADC:   true,
+       AADCW:  true,
+       AADCS:  true,
+       AADCSW: true,
+       AFMOVD: true,
+       AFMOVS: true,
+       AMSR:   true,
 }
 
 func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
-       // MOV  g_stackguard(g), R1
+       if c.ctxt.Flag_maymorestack != "" {
+               p = c.cursym.Func().SpillRegisterArgs(p, c.newprog)
+
+               // Save LR and make room for FP, REGCTXT. Leave room
+               // for caller's saved FP.
+               const frameSize = 32
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REGLINK
+               p.To.Type = obj.TYPE_MEM
+               p.Scond = C_XPRE
+               p.To.Offset = -frameSize
+               p.To.Reg = REGSP
+               p.Spadj = frameSize
+
+               // Save FP.
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REGFP
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = REGSP
+               p.To.Offset = -8
+
+               p = obj.Appendp(p, c.newprog)
+               p.As = ASUB
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = 8
+               p.Reg = REGSP
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGFP
+
+               // Save REGCTXT (for simplicity we do this whether or
+               // not we need it.)
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REGCTXT
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = REGSP
+               p.To.Offset = 8
+
+               // BL maymorestack
+               p = obj.Appendp(p, c.newprog)
+               p.As = ABL
+               p.To.Type = obj.TYPE_BRANCH
+               // See ../x86/obj6.go
+               p.To.Sym = c.ctxt.LookupABI(c.ctxt.Flag_maymorestack, c.cursym.ABI())
+
+               // Restore REGCTXT.
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = REGSP
+               p.From.Offset = 8
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGCTXT
+
+               // Restore FP.
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = REGSP
+               p.From.Offset = -8
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGFP
+
+               // Restore LR and SP.
+               p = obj.Appendp(p, c.newprog)
+               p.As = AMOVD
+               p.From.Type = obj.TYPE_MEM
+               p.Scond = C_XPOST
+               p.From.Offset = frameSize
+               p.From.Reg = REGSP
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGLINK
+               p.Spadj = -frameSize
+
+               p = c.cursym.Func().UnspillRegisterArgs(p, c.newprog)
+       }
+
+       // Jump back to here after morestack returns.
+       startPred := p
+
+       // MOV  g_stackguard(g), RT1
        p = obj.Appendp(p, c.newprog)
 
        p.As = AMOVD
@@ -60,95 +161,73 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
                p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1
        }
        p.To.Type = obj.TYPE_REG
-       p.To.Reg = REG_R1
+       p.To.Reg = REGRT1
+
+       // Mark the stack bound check and morestack call async nonpreemptible.
+       // If we get preempted here, when resumed the preemption request is
+       // cleared, but we'll still call morestack, which will double the stack
+       // unnecessarily. See issue #35470.
+       p = c.ctxt.StartUnsafePoint(p, c.newprog)
 
        q := (*obj.Prog)(nil)
-       if framesize <= objabi.StackSmall {
+       if framesize <= abi.StackSmall {
                // small stack: SP < stackguard
-               //      MOV     SP, R2
-               //      CMP     stackguard, R2
-               p = obj.Appendp(p, c.newprog)
-
-               p.As = AMOVD
-               p.From.Type = obj.TYPE_REG
-               p.From.Reg = REGSP
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R2
+               //      CMP     stackguard, SP
 
                p = obj.Appendp(p, c.newprog)
                p.As = ACMP
                p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R1
-               p.Reg = REG_R2
-       } else if framesize <= objabi.StackBig {
+               p.From.Reg = REGRT1
+               p.Reg = REGSP
+       } else if framesize <= abi.StackBig {
                // large stack: SP-framesize < stackguard-StackSmall
-               //      SUB     $(framesize-StackSmall), SP, R2
-               //      CMP     stackguard, R2
+               //      SUB     $(framesize-StackSmall), SP, RT2
+               //      CMP     stackguard, RT2
                p = obj.Appendp(p, c.newprog)
 
                p.As = ASUB
                p.From.Type = obj.TYPE_CONST
-               p.From.Offset = int64(framesize) - objabi.StackSmall
+               p.From.Offset = int64(framesize) - abi.StackSmall
                p.Reg = REGSP
                p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R2
+               p.To.Reg = REGRT2
 
                p = obj.Appendp(p, c.newprog)
                p.As = ACMP
                p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R1
-               p.Reg = REG_R2
+               p.From.Reg = REGRT1
+               p.Reg = REGRT2
        } else {
-               // Such a large stack we need to protect against wraparound
-               // if SP is close to zero.
-               //      SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall)
-               // The +StackGuard on both sides is required to keep the left side positive:
-               // SP is allowed to be slightly below stackguard. See stack.h.
-               //      CMP     $StackPreempt, R1
-               //      BEQ     label_of_call_to_morestack
-               //      ADD     $StackGuard, SP, R2
-               //      SUB     R1, R2
-               //      MOV     $(framesize+(StackGuard-StackSmall)), R3
-               //      CMP     R3, R2
-               p = obj.Appendp(p, c.newprog)
-
-               p.As = ACMP
-               p.From.Type = obj.TYPE_CONST
-               p.From.Offset = objabi.StackPreempt
-               p.Reg = REG_R1
-
-               p = obj.Appendp(p, c.newprog)
-               q = p
-               p.As = ABEQ
-               p.To.Type = obj.TYPE_BRANCH
+               // Such a large stack we need to protect against underflow.
+               // The runtime guarantees SP > objabi.StackBig, but
+               // framesize is large enough that SP-framesize may
+               // underflow, causing a direct comparison with the
+               // stack guard to incorrectly succeed. We explicitly
+               // guard against underflow.
+               //
+               //      SUBS    $(framesize-StackSmall), SP, RT2
+               //      // On underflow, jump to morestack
+               //      BLO     label_of_call_to_morestack
+               //      CMP     stackguard, RT2
 
                p = obj.Appendp(p, c.newprog)
-               p.As = AADD
+               p.As = ASUBS
                p.From.Type = obj.TYPE_CONST
-               p.From.Offset = objabi.StackGuard
+               p.From.Offset = int64(framesize) - abi.StackSmall
                p.Reg = REGSP
                p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R2
+               p.To.Reg = REGRT2
 
                p = obj.Appendp(p, c.newprog)
-               p.As = ASUB
-               p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R1
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R2
-
-               p = obj.Appendp(p, c.newprog)
-               p.As = AMOVD
-               p.From.Type = obj.TYPE_CONST
-               p.From.Offset = int64(framesize) + (objabi.StackGuard - objabi.StackSmall)
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R3
+               q = p
+               p.As = ABLO
+               p.To.Type = obj.TYPE_BRANCH
 
                p = obj.Appendp(p, c.newprog)
                p.As = ACMP
                p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R3
-               p.Reg = REG_R2
+               p.From.Reg = REGRT1
+               p.Reg = REGRT2
        }
 
        // BLS  do-morestack
@@ -156,8 +235,10 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
        bls.As = ABLS
        bls.To.Type = obj.TYPE_BRANCH
 
+       end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1)
+
        var last *obj.Prog
-       for last = c.cursym.Func.Text; last.Link != nil; last = last.Link {
+       for last = c.cursym.Func().Text; last.Link != nil; last = last.Link {
        }
 
        // Now we are at the end of the function, but logically
@@ -167,25 +248,23 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
        spfix.As = obj.ANOP
        spfix.Spadj = -framesize
 
-       pcdata := obj.Appendp(spfix, c.newprog)
-       pcdata.Pos = c.cursym.Func.Text.Pos
-       pcdata.As = obj.APCDATA
-       pcdata.From.Type = obj.TYPE_CONST
-       pcdata.From.Offset = objabi.PCDATA_StackMapIndex
-       pcdata.To.Type = obj.TYPE_CONST
-       pcdata.To.Offset = -1 // pcdata starts at -1 at function entry
+       pcdata := c.ctxt.EmitEntryStackMap(c.cursym, spfix, c.newprog)
+       pcdata = c.ctxt.StartUnsafePoint(pcdata, c.newprog)
+
+       if q != nil {
+               q.To.SetTarget(pcdata)
+       }
+       bls.To.SetTarget(pcdata)
+
+       spill := c.cursym.Func().SpillRegisterArgs(pcdata, c.newprog)
 
        // MOV  LR, R3
-       movlr := obj.Appendp(pcdata, c.newprog)
+       movlr := obj.Appendp(spill, c.newprog)
        movlr.As = AMOVD
        movlr.From.Type = obj.TYPE_REG
        movlr.From.Reg = REGLINK
        movlr.To.Type = obj.TYPE_REG
        movlr.To.Reg = REG_R3
-       if q != nil {
-               q.Pcond = movlr
-       }
-       bls.Pcond = movlr
 
        debug := movlr
        if false {
@@ -205,23 +284,23 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
        switch {
        case c.cursym.CFunc():
                morestack = "runtime.morestackc"
-       case !c.cursym.Func.Text.From.Sym.NeedCtxt():
+       case !c.cursym.Func().Text.From.Sym.NeedCtxt():
                morestack = "runtime.morestack_noctxt"
        }
        call.To.Sym = c.ctxt.Lookup(morestack)
 
+       // The instructions which unspill regs should be preemptible.
+       pcdata = c.ctxt.EndUnsafePoint(call, c.newprog, -1)
+       unspill := c.cursym.Func().UnspillRegisterArgs(pcdata, c.newprog)
+
        // B    start
-       jmp := obj.Appendp(call, c.newprog)
+       jmp := obj.Appendp(unspill, c.newprog)
        jmp.As = AB
        jmp.To.Type = obj.TYPE_BRANCH
-       jmp.Pcond = c.cursym.Func.Text.Link
+       jmp.To.SetTarget(startPred.Link)
        jmp.Spadj = +framesize
 
-       // placeholder for bls's jump target
-       // p = obj.Appendp(ctxt, p)
-       // p.As = obj.ANOP
-
-       return bls
+       return end
 }
 
 func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
@@ -230,18 +309,13 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
        p.From.Class = 0
        p.To.Class = 0
 
-       // $0 results in C_ZCON, which matches both C_REG and various
-       // C_xCON, however the C_REG cases in asmout don't expect a
-       // constant, so they will use the register fields and assemble
-       // a R0. To prevent that, rewrite $0 as ZR.
-       if p.From.Type == obj.TYPE_CONST && p.From.Offset == 0 {
+       // Previously we rewrote $0 to ZR, but we have now removed this change.
+       // In order to be compatible with some previous legal instruction formats,
+       // reserve the previous conversion for some specific instructions.
+       if p.From.Type == obj.TYPE_CONST && p.From.Offset == 0 && zrReplace[p.As] {
                p.From.Type = obj.TYPE_REG
                p.From.Reg = REGZERO
        }
-       if p.To.Type == obj.TYPE_CONST && p.To.Offset == 0 {
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REGZERO
-       }
 
        // Rewrite BR/BL to symbol as TYPE_BRANCH.
        switch p.As {
@@ -256,11 +330,40 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
                break
        }
 
-       // Rewrite float constants to values stored in memory.
+       // Rewrite float and vector constants to values stored in memory.
        switch p.As {
+       case AVMOVS:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Sym = c.ctxt.Int32Sym(p.From.Offset)
+                       p.From.Name = obj.NAME_EXTERN
+                       p.From.Offset = 0
+               }
+
+       case AVMOVD:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Sym = c.ctxt.Int64Sym(p.From.Offset)
+                       p.From.Name = obj.NAME_EXTERN
+                       p.From.Offset = 0
+               }
+
+       case AVMOVQ:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Sym = c.ctxt.Int128Sym(p.GetFrom3().Offset, p.From.Offset)
+                       p.From.Name = obj.NAME_EXTERN
+                       p.From.Offset = 0
+                       p.RestArgs = nil
+               }
+
        case AFMOVS:
                if p.From.Type == obj.TYPE_FCONST {
-                       f32 := float32(p.From.Val.(float64))
+                       f64 := p.From.Val.(float64)
+                       f32 := float32(f64)
+                       if c.chipfloat7(f64) > 0 {
+                               break
+                       }
                        if math.Float32bits(f32) == 0 {
                                p.From.Type = obj.TYPE_REG
                                p.From.Reg = REGZERO
@@ -275,6 +378,9 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
        case AFMOVD:
                if p.From.Type == obj.TYPE_FCONST {
                        f64 := p.From.Val.(float64)
+                       if c.chipfloat7(f64) > 0 {
+                               break
+                       }
                        if math.Float64bits(f64) == 0 {
                                p.From.Type = obj.TYPE_REG
                                p.From.Reg = REGZERO
@@ -285,37 +391,6 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
                        p.From.Name = obj.NAME_EXTERN
                        p.From.Offset = 0
                }
-
-               break
-       }
-
-       // Rewrite negative immediates as positive immediates with
-       // complementary instruction.
-       switch p.As {
-       case AADD, ASUB, ACMP, ACMN:
-               if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && p.From.Offset != -1<<63 {
-                       p.From.Offset = -p.From.Offset
-                       p.As = complements[p.As]
-               }
-       case AADDW, ASUBW, ACMPW, ACMNW:
-               if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && int32(p.From.Offset) != -1<<31 {
-                       p.From.Offset = -p.From.Offset
-                       p.As = complements[p.As]
-               }
-       }
-
-       // For 32-bit logical instruction with constant,
-       // rewrite the high 32-bit to be a repetition of
-       // the low 32-bit, so that the BITCON test can be
-       // shared for both 32-bit and 64-bit. 32-bit ops
-       // will zero the high 32-bit of the destination
-       // register anyway.
-       switch p.As {
-       case AANDW, AORRW, AEORW, AANDSW:
-               if p.From.Type == obj.TYPE_CONST {
-                       v := p.From.Offset & 0xffffffff
-                       p.From.Offset = v | v<<32
-               }
        }
 
        if c.ctxt.Flag_dynlink {
@@ -333,9 +408,9 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) {
                //     CALL REGTMP
                var sym *obj.LSym
                if p.As == obj.ADUFFZERO {
-                       sym = c.ctxt.Lookup("runtime.duffzero")
+                       sym = c.ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
                } else {
-                       sym = c.ctxt.Lookup("runtime.duffcopy")
+                       sym = c.ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
                }
                offset := p.To.Offset
                p.As = AMOVD
@@ -435,13 +510,13 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) {
 }
 
 func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
-       if cursym.Func.Text == nil || cursym.Func.Text.Link == nil {
+       if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
                return
        }
 
        c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym}
 
-       p := c.cursym.Func.Text
+       p := c.cursym.Func().Text
        textstksiz := p.To.Offset
        if textstksiz == -8 {
                // Historical way to mark NOFRAME.
@@ -457,82 +532,32 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                }
        }
 
-       c.cursym.Func.Args = p.To.Val.(int32)
-       c.cursym.Func.Locals = int32(textstksiz)
+       c.cursym.Func().Args = p.To.Val.(int32)
+       c.cursym.Func().Locals = int32(textstksiz)
 
        /*
         * find leaf subroutines
-        * strip NOPs
-        * expand RET
         */
-       q := (*obj.Prog)(nil)
-       var q1 *obj.Prog
-       for p := c.cursym.Func.Text; p != nil; p = p.Link {
+       for p := c.cursym.Func().Text; p != nil; p = p.Link {
                switch p.As {
                case obj.ATEXT:
                        p.Mark |= LEAF
 
-               case obj.ARET:
-                       break
-
-               case obj.ANOP:
-                       q1 = p.Link
-                       q.Link = q1 /* q is non-nop */
-                       q1.Mark |= p.Mark
-                       continue
-
                case ABL,
                        obj.ADUFFZERO,
                        obj.ADUFFCOPY:
-                       c.cursym.Func.Text.Mark &^= LEAF
-                       fallthrough
-
-               case ACBNZ,
-                       ACBZ,
-                       ACBNZW,
-                       ACBZW,
-                       ATBZ,
-                       ATBNZ,
-                       AB,
-                       ABEQ,
-                       ABNE,
-                       ABCS,
-                       ABHS,
-                       ABCC,
-                       ABLO,
-                       ABMI,
-                       ABPL,
-                       ABVS,
-                       ABVC,
-                       ABHI,
-                       ABLS,
-                       ABGE,
-                       ABLT,
-                       ABGT,
-                       ABLE,
-                       AADR, /* strange */
-                       AADRP:
-                       q1 = p.Pcond
-
-                       if q1 != nil {
-                               for q1.As == obj.ANOP {
-                                       q1 = q1.Link
-                                       p.Pcond = q1
-                               }
-                       }
-
-                       break
+                       c.cursym.Func().Text.Mark &^= LEAF
                }
-
-               q = p
        }
 
+       var q *obj.Prog
+       var q1 *obj.Prog
        var retjmp *obj.LSym
-       for p := c.cursym.Func.Text; p != nil; p = p.Link {
+       for p := c.cursym.Func().Text; p != nil; p = p.Link {
                o := p.As
                switch o {
                case obj.ATEXT:
-                       c.cursym.Func.Text = p
+                       c.cursym.Func().Text = p
                        c.autosize = int32(textstksiz)
 
                        if p.Mark&LEAF != 0 && c.autosize == 0 {
@@ -546,83 +571,134 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                c.autosize += 8
                        }
 
-                       if c.autosize != 0 && c.autosize&(16-1) != 0 {
-                               // The frame includes an LR.
-                               // If the frame size is 8, it's only an LR,
-                               // so there's no potential for breaking references to
-                               // local variables by growing the frame size,
-                               // because there are no local variables.
-                               // But otherwise, if there is a non-empty locals section,
-                               // the author of the code is responsible for making sure
-                               // that the frame size is 8 mod 16.
-                               if c.autosize == 8 {
-                                       c.autosize += 8
-                                       c.cursym.Func.Locals += 8
+                       if c.autosize != 0 {
+                               extrasize := int32(0)
+                               if c.autosize%16 == 8 {
+                                       // Allocate extra 8 bytes on the frame top to save FP
+                                       extrasize = 8
+                               } else if c.autosize&(16-1) == 0 {
+                                       // Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16
+                                       extrasize = 16
                                } else {
-                                       c.ctxt.Diag("%v: unaligned frame size %d - must be 8 mod 16 (or 0)", p, c.autosize-8)
+                                       c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8)
                                }
+                               c.autosize += extrasize
+                               c.cursym.Func().Locals += extrasize
+
+                               // low 32 bits for autosize
+                               // high 32 bits for extrasize
+                               p.To.Offset = int64(c.autosize) | int64(extrasize)<<32
+                       } else {
+                               // NOFRAME
+                               p.To.Offset = 0
                        }
-                       if c.autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 {
+
+                       if c.autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 {
                                if c.ctxt.Debugvlog {
-                                       c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func.Text.From.Sym.Name)
+                                       c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func().Text.From.Sym.Name)
                                }
-                               c.cursym.Func.Text.Mark |= LEAF
+                               c.cursym.Func().Text.Mark |= LEAF
                        }
 
-                       // FP offsets need an updated p.To.Offset.
-                       p.To.Offset = int64(c.autosize) - 8
-
-                       if cursym.Func.Text.Mark&LEAF != 0 {
+                       if cursym.Func().Text.Mark&LEAF != 0 {
                                cursym.Set(obj.AttrLeaf, true)
                                if p.From.Sym.NoFrame() {
                                        break
                                }
                        }
 
+                       if p.Mark&LEAF != 0 && c.autosize < abi.StackSmall {
+                               // A leaf function with a small stack can be marked
+                               // NOSPLIT, avoiding a stack check.
+                               p.From.Sym.Set(obj.AttrNoSplit, true)
+                       }
+
                        if !p.From.Sym.NoSplit() {
                                p = c.stacksplit(p, c.autosize) // emit split check
                        }
 
+                       var prologueEnd *obj.Prog
+
                        aoffset := c.autosize
-                       if aoffset > 0xF0 {
-                               aoffset = 0xF0
+                       if aoffset > 0xf0 {
+                               // MOVD.W offset variant range is -0x100 to 0xf8, SP should be 16-byte aligned.
+                               // so the maximum aoffset value is 0xf0.
+                               aoffset = 0xf0
                        }
 
                        // Frame is non-empty. Make sure to save link register, even if
                        // it is a leaf function, so that traceback works.
                        q = p
                        if c.autosize > aoffset {
-                               // Frame size is too large for a MOVD.W instruction.
-                               // Store link register before decrementing SP, so if a signal comes
-                               // during the execution of the function prologue, the traceback
-                               // code will not see a half-updated stack frame.
-                               q = obj.Appendp(q, c.newprog)
-                               q.Pos = p.Pos
-                               q.As = ASUB
-                               q.From.Type = obj.TYPE_CONST
-                               q.From.Offset = int64(c.autosize)
-                               q.Reg = REGSP
-                               q.To.Type = obj.TYPE_REG
-                               q.To.Reg = REGTMP
-
-                               q = obj.Appendp(q, c.newprog)
-                               q.Pos = p.Pos
-                               q.As = AMOVD
-                               q.From.Type = obj.TYPE_REG
-                               q.From.Reg = REGLINK
-                               q.To.Type = obj.TYPE_MEM
-                               q.To.Reg = REGTMP
+                               // Frame size is too large for a MOVD.W instruction. Store the frame pointer
+                               // register and link register before decrementing SP, so if a signal comes
+                               // during the execution of the function prologue, the traceback code will
+                               // not see a half-updated stack frame.
 
+                               // SUB $autosize, RSP, R20
                                q1 = obj.Appendp(q, c.newprog)
                                q1.Pos = p.Pos
+                               q1.As = ASUB
+                               q1.From.Type = obj.TYPE_CONST
+                               q1.From.Offset = int64(c.autosize)
+                               q1.Reg = REGSP
+                               q1.To.Type = obj.TYPE_REG
+                               q1.To.Reg = REG_R20
+
+                               prologueEnd = q1
+
+                               // STP (R29, R30), -8(R20)
+                               q1 = obj.Appendp(q1, c.newprog)
+                               q1.Pos = p.Pos
+                               q1.As = ASTP
+                               q1.From.Type = obj.TYPE_REGREG
+                               q1.From.Reg = REGFP
+                               q1.From.Offset = REGLINK
+                               q1.To.Type = obj.TYPE_MEM
+                               q1.To.Reg = REG_R20
+                               q1.To.Offset = -8
+
+                               // This is not async preemptible, as if we open a frame
+                               // at the current SP, it will clobber the saved LR.
+                               q1 = c.ctxt.StartUnsafePoint(q1, c.newprog)
+
+                               // MOVD R20, RSP
+                               q1 = obj.Appendp(q1, c.newprog)
+                               q1.Pos = p.Pos
                                q1.As = AMOVD
                                q1.From.Type = obj.TYPE_REG
-                               q1.From.Reg = REGTMP
+                               q1.From.Reg = REG_R20
                                q1.To.Type = obj.TYPE_REG
                                q1.To.Reg = REGSP
                                q1.Spadj = c.autosize
+
+                               q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
+
+                               if buildcfg.GOOS == "ios" {
+                                       // iOS does not support SA_ONSTACK. We will run the signal handler
+                                       // on the G stack. If we write below SP, it may be clobbered by
+                                       // the signal handler. So we save FP and LR after decrementing SP.
+                                       // STP (R29, R30), -8(RSP)
+                                       q1 = obj.Appendp(q1, c.newprog)
+                                       q1.Pos = p.Pos
+                                       q1.As = ASTP
+                                       q1.From.Type = obj.TYPE_REGREG
+                                       q1.From.Reg = REGFP
+                                       q1.From.Offset = REGLINK
+                                       q1.To.Type = obj.TYPE_MEM
+                                       q1.To.Reg = REGSP
+                                       q1.To.Offset = -8
+                               }
                        } else {
-                               // small frame, update SP and save LR in a single MOVD.W instruction
+                               // small frame, update SP and save LR in a single MOVD.W instruction.
+                               // So if a signal comes during the execution of the function prologue,
+                               // the traceback code will not see a half-updated stack frame.
+                               // Also, on Linux, in a cgo binary we may get a SIGSETXID signal
+                               // early on before the signal stack is set, as glibc doesn't allow
+                               // us to block SIGSETXID. So it is important that we don't write below
+                               // the SP until the signal stack is set.
+                               // Luckily, all the functions from thread entry to setting the signal
+                               // stack have small frames.
                                q1 = obj.Appendp(q, c.newprog)
                                q1.As = AMOVD
                                q1.Pos = p.Pos
@@ -633,43 +709,66 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                q1.To.Offset = int64(-aoffset)
                                q1.To.Reg = REGSP
                                q1.Spadj = aoffset
+
+                               prologueEnd = q1
+
+                               // Frame pointer.
+                               q1 = obj.Appendp(q1, c.newprog)
+                               q1.Pos = p.Pos
+                               q1.As = AMOVD
+                               q1.From.Type = obj.TYPE_REG
+                               q1.From.Reg = REGFP
+                               q1.To.Type = obj.TYPE_MEM
+                               q1.To.Reg = REGSP
+                               q1.To.Offset = -8
                        }
 
-                       if c.cursym.Func.Text.From.Sym.Wrapper() {
+                       prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd)
+
+                       q1 = obj.Appendp(q1, c.newprog)
+                       q1.Pos = p.Pos
+                       q1.As = ASUB
+                       q1.From.Type = obj.TYPE_CONST
+                       q1.From.Offset = 8
+                       q1.Reg = REGSP
+                       q1.To.Type = obj.TYPE_REG
+                       q1.To.Reg = REGFP
+
+                       if c.cursym.Func().Text.From.Sym.Wrapper() {
                                // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
                                //
-                               //      MOV  g_panic(g), R1
+                               //      MOV  g_panic(g), RT1
                                //      CBNZ checkargp
                                // end:
                                //      NOP
                                // ... function body ...
                                // checkargp:
-                               //      MOV  panic_argp(R1), R2
-                               //      ADD  $(autosize+8), RSP, R3
-                               //      CMP  R2, R3
+                               //      MOV  panic_argp(RT1), RT2
+                               //      ADD  $(autosize+8), RSP, R20
+                               //      CMP  RT2, R20
                                //      BNE  end
-                               //      ADD  $8, RSP, R4
-                               //      MOVD R4, panic_argp(R1)
+                               //      ADD  $8, RSP, R20
+                               //      MOVD R20, panic_argp(RT1)
                                //      B    end
                                //
                                // The NOP is needed to give the jumps somewhere to land.
                                // It is a liblink NOP, not an ARM64 NOP: it encodes to 0 instruction bytes.
                                q = q1
 
-                               // MOV g_panic(g), R1
+                               // MOV g_panic(g), RT1
                                q = obj.Appendp(q, c.newprog)
                                q.As = AMOVD
                                q.From.Type = obj.TYPE_MEM
                                q.From.Reg = REGG
                                q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic
                                q.To.Type = obj.TYPE_REG
-                               q.To.Reg = REG_R1
+                               q.To.Reg = REGRT1
 
-                               // CBNZ R1, checkargp
+                               // CBNZ RT1, checkargp
                                cbnz := obj.Appendp(q, c.newprog)
                                cbnz.As = ACBNZ
                                cbnz.From.Type = obj.TYPE_REG
-                               cbnz.From.Reg = REG_R1
+                               cbnz.From.Reg = REGRT1
                                cbnz.To.Type = obj.TYPE_BRANCH
 
                                // Empty branch target at the top of the function body
@@ -681,63 +780,63 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                for last = end; last.Link != nil; last = last.Link {
                                }
 
-                               // MOV panic_argp(R1), R2
+                               // MOV panic_argp(RT1), RT2
                                mov := obj.Appendp(last, c.newprog)
                                mov.As = AMOVD
                                mov.From.Type = obj.TYPE_MEM
-                               mov.From.Reg = REG_R1
+                               mov.From.Reg = REGRT1
                                mov.From.Offset = 0 // Panic.argp
                                mov.To.Type = obj.TYPE_REG
-                               mov.To.Reg = REG_R2
+                               mov.To.Reg = REGRT2
 
                                // CBNZ branches to the MOV above
-                               cbnz.Pcond = mov
+                               cbnz.To.SetTarget(mov)
 
-                               // ADD $(autosize+8), SP, R3
+                               // ADD $(autosize+8), SP, R20
                                q = obj.Appendp(mov, c.newprog)
                                q.As = AADD
                                q.From.Type = obj.TYPE_CONST
                                q.From.Offset = int64(c.autosize) + 8
                                q.Reg = REGSP
                                q.To.Type = obj.TYPE_REG
-                               q.To.Reg = REG_R3
+                               q.To.Reg = REG_R20
 
-                               // CMP R2, R3
+                               // CMP RT2, R20
                                q = obj.Appendp(q, c.newprog)
                                q.As = ACMP
                                q.From.Type = obj.TYPE_REG
-                               q.From.Reg = REG_R2
-                               q.Reg = REG_R3
+                               q.From.Reg = REGRT2
+                               q.Reg = REG_R20
 
                                // BNE end
                                q = obj.Appendp(q, c.newprog)
                                q.As = ABNE
                                q.To.Type = obj.TYPE_BRANCH
-                               q.Pcond = end
+                               q.To.SetTarget(end)
 
-                               // ADD $8, SP, R4
+                               // ADD $8, SP, R20
                                q = obj.Appendp(q, c.newprog)
                                q.As = AADD
                                q.From.Type = obj.TYPE_CONST
                                q.From.Offset = 8
                                q.Reg = REGSP
                                q.To.Type = obj.TYPE_REG
-                               q.To.Reg = REG_R4
+                               q.To.Reg = REG_R20
 
-                               // MOV R4, panic_argp(R1)
+                               // MOV R20, panic_argp(RT1)
                                q = obj.Appendp(q, c.newprog)
                                q.As = AMOVD
                                q.From.Type = obj.TYPE_REG
-                               q.From.Reg = REG_R4
+                               q.From.Reg = REG_R20
                                q.To.Type = obj.TYPE_MEM
-                               q.To.Reg = REG_R1
+                               q.To.Reg = REGRT1
                                q.To.Offset = 0 // Panic.argp
 
                                // B end
                                q = obj.Appendp(q, c.newprog)
                                q.As = AB
                                q.To.Type = obj.TYPE_BRANCH
-                               q.Pcond = end
+                               q.To.SetTarget(end)
                        }
 
                case obj.ARET:
@@ -749,8 +848,20 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
 
                        retjmp = p.To.Sym
                        p.To = obj.Addr{}
-                       if c.cursym.Func.Text.Mark&LEAF != 0 {
+                       if c.cursym.Func().Text.Mark&LEAF != 0 {
                                if c.autosize != 0 {
+                                       // Restore frame pointer.
+                                       // ADD $framesize-8, RSP, R29
+                                       p.As = AADD
+                                       p.From.Type = obj.TYPE_CONST
+                                       p.From.Offset = int64(c.autosize) - 8
+                                       p.Reg = REGSP
+                                       p.To.Type = obj.TYPE_REG
+                                       p.To.Reg = REGFP
+
+                                       // Pop stack frame.
+                                       // ADD $framesize, RSP, RSP
+                                       p = obj.Appendp(p, c.newprog)
                                        p.As = AADD
                                        p.From.Type = obj.TYPE_CONST
                                        p.From.Offset = int64(c.autosize)
@@ -759,33 +870,50 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                        p.Spadj = -c.autosize
                                }
                        } else {
-                               /* want write-back pre-indexed SP+autosize -> SP, loading REGLINK*/
                                aoffset := c.autosize
-
-                               if aoffset > 0xF0 {
-                                       aoffset = 0xF0
-                               }
-                               p.As = AMOVD
+                               // LDP -8(RSP), (R29, R30)
+                               p.As = ALDP
                                p.From.Type = obj.TYPE_MEM
-                               p.Scond = C_XPOST
-                               p.From.Offset = int64(aoffset)
+                               p.From.Offset = -8
                                p.From.Reg = REGSP
-                               p.To.Type = obj.TYPE_REG
-                               p.To.Reg = REGLINK
-                               p.Spadj = -aoffset
-                               if c.autosize > aoffset {
+                               p.To.Type = obj.TYPE_REGREG
+                               p.To.Reg = REGFP
+                               p.To.Offset = REGLINK
+
+                               // ADD $aoffset, RSP, RSP
+                               q = newprog()
+                               q.As = AADD
+                               q.From.Type = obj.TYPE_CONST
+                               q.From.Offset = int64(aoffset)
+                               q.To.Type = obj.TYPE_REG
+                               q.To.Reg = REGSP
+                               q.Spadj = -aoffset
+                               q.Pos = p.Pos
+                               q.Link = p.Link
+                               p.Link = q
+                               p = q
+                       }
+
+                       // If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC',
+                       // so that if you are debugging a low-level crash where PC and LR are zero,
+                       // you can look at R27 to see what jumped to the zero.
+                       // This is useful when bringing up Go on a new system.
+                       // (There is similar code in ../ppc64/obj9.go:/if.false.)
+                       const debugRETZERO = false
+                       if debugRETZERO {
+                               if p.As != obj.ARET {
                                        q = newprog()
-                                       q.As = AADD
-                                       q.From.Type = obj.TYPE_CONST
-                                       q.From.Offset = int64(c.autosize) - int64(aoffset)
-                                       q.To.Type = obj.TYPE_REG
-                                       q.To.Reg = REGSP
-                                       q.Link = p.Link
-                                       q.Spadj = int32(-q.From.Offset)
                                        q.Pos = p.Pos
+                                       q.Link = p.Link
                                        p.Link = q
                                        p = q
                                }
+                               p.As = AADR
+                               p.From.Type = obj.TYPE_BRANCH
+                               p.From.Offset = 0
+                               p.To.Type = obj.TYPE_REG
+                               p.To.Reg = REGTMP
+
                        }
 
                        if p.As != obj.ARET {
@@ -818,7 +946,154 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                        p.Spadj = int32(+p.From.Offset)
                                }
                        }
-                       break
+
+               case obj.AGETCALLERPC:
+                       if cursym.Leaf() {
+                               /* MOVD LR, Rd */
+                               p.As = AMOVD
+                               p.From.Type = obj.TYPE_REG
+                               p.From.Reg = REGLINK
+                       } else {
+                               /* MOVD (RSP), Rd */
+                               p.As = AMOVD
+                               p.From.Type = obj.TYPE_MEM
+                               p.From.Reg = REGSP
+                       }
+
+               case obj.ADUFFCOPY:
+                       //  ADR ret_addr, R27
+                       //  STP (FP, R27), -24(SP)
+                       //  SUB 24, SP, FP
+                       //  DUFFCOPY
+                       // ret_addr:
+                       //  SUB 8, SP, FP
+
+                       q1 := p
+                       // copy DUFFCOPY from q1 to q4
+                       q4 := obj.Appendp(p, c.newprog)
+                       q4.Pos = p.Pos
+                       q4.As = obj.ADUFFCOPY
+                       q4.To = p.To
+
+                       q1.As = AADR
+                       q1.From.Type = obj.TYPE_BRANCH
+                       q1.To.Type = obj.TYPE_REG
+                       q1.To.Reg = REG_R27
+
+                       q2 := obj.Appendp(q1, c.newprog)
+                       q2.Pos = p.Pos
+                       q2.As = ASTP
+                       q2.From.Type = obj.TYPE_REGREG
+                       q2.From.Reg = REGFP
+                       q2.From.Offset = int64(REG_R27)
+                       q2.To.Type = obj.TYPE_MEM
+                       q2.To.Reg = REGSP
+                       q2.To.Offset = -24
+
+                       // maintain FP for DUFFCOPY
+                       q3 := obj.Appendp(q2, c.newprog)
+                       q3.Pos = p.Pos
+                       q3.As = ASUB
+                       q3.From.Type = obj.TYPE_CONST
+                       q3.From.Offset = 24
+                       q3.Reg = REGSP
+                       q3.To.Type = obj.TYPE_REG
+                       q3.To.Reg = REGFP
+
+                       q5 := obj.Appendp(q4, c.newprog)
+                       q5.Pos = p.Pos
+                       q5.As = ASUB
+                       q5.From.Type = obj.TYPE_CONST
+                       q5.From.Offset = 8
+                       q5.Reg = REGSP
+                       q5.To.Type = obj.TYPE_REG
+                       q5.To.Reg = REGFP
+                       q1.From.SetTarget(q5)
+                       p = q5
+
+               case obj.ADUFFZERO:
+                       //  ADR ret_addr, R27
+                       //  STP (FP, R27), -24(SP)
+                       //  SUB 24, SP, FP
+                       //  DUFFZERO
+                       // ret_addr:
+                       //  SUB 8, SP, FP
+
+                       q1 := p
+                       // copy DUFFZERO from q1 to q4
+                       q4 := obj.Appendp(p, c.newprog)
+                       q4.Pos = p.Pos
+                       q4.As = obj.ADUFFZERO
+                       q4.To = p.To
+
+                       q1.As = AADR
+                       q1.From.Type = obj.TYPE_BRANCH
+                       q1.To.Type = obj.TYPE_REG
+                       q1.To.Reg = REG_R27
+
+                       q2 := obj.Appendp(q1, c.newprog)
+                       q2.Pos = p.Pos
+                       q2.As = ASTP
+                       q2.From.Type = obj.TYPE_REGREG
+                       q2.From.Reg = REGFP
+                       q2.From.Offset = int64(REG_R27)
+                       q2.To.Type = obj.TYPE_MEM
+                       q2.To.Reg = REGSP
+                       q2.To.Offset = -24
+
+                       // maintain FP for DUFFZERO
+                       q3 := obj.Appendp(q2, c.newprog)
+                       q3.Pos = p.Pos
+                       q3.As = ASUB
+                       q3.From.Type = obj.TYPE_CONST
+                       q3.From.Offset = 24
+                       q3.Reg = REGSP
+                       q3.To.Type = obj.TYPE_REG
+                       q3.To.Reg = REGFP
+
+                       q5 := obj.Appendp(q4, c.newprog)
+                       q5.Pos = p.Pos
+                       q5.As = ASUB
+                       q5.From.Type = obj.TYPE_CONST
+                       q5.From.Offset = 8
+                       q5.Reg = REGSP
+                       q5.To.Type = obj.TYPE_REG
+                       q5.To.Reg = REGFP
+                       q1.From.SetTarget(q5)
+                       p = q5
+               }
+
+               if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 {
+                       f := c.cursym.Func()
+                       if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
+                               c.cursym.Func().FuncFlag |= abi.FuncFlagSPWrite
+                               if ctxt.Debugvlog || !ctxt.IsAsm {
+                                       ctxt.Logf("auto-SPWRITE: %s %v\n", c.cursym.Name, p)
+                                       if !ctxt.IsAsm {
+                                               ctxt.Diag("invalid auto-SPWRITE in non-assembly")
+                                               ctxt.DiagFlush()
+                                               log.Fatalf("bad SPWRITE")
+                                       }
+                               }
+                       }
+               }
+               if p.From.Type == obj.TYPE_SHIFT && (p.To.Reg == REG_RSP || p.Reg == REG_RSP) {
+                       offset := p.From.Offset
+                       op := offset & (3 << 22)
+                       if op != SHIFT_LL {
+                               ctxt.Diag("illegal combination: %v", p)
+                       }
+                       r := (offset >> 16) & 31
+                       shift := (offset >> 10) & 63
+                       if shift > 4 {
+                               // the shift amount is out of range, in order to avoid repeated error
+                               // reportings, don't call ctxt.Diag, because asmout case 27 has the
+                               // same check.
+                               shift = 7
+                       }
+                       p.From.Type = obj.TYPE_REG
+                       p.From.Reg = int16(REG_LSL + r + (shift&7)<<5)
+                       p.From.Offset = 0
                }
        }
 }
@@ -838,10 +1113,11 @@ var unaryDst = map[obj.As]bool{
 }
 
 var Linkarm64 = obj.LinkArch{
-       Arch:       sys.ArchARM64,
-       Init:       buildop,
-       Preprocess: preprocess,
-       Assemble:   span7,
-       Progedit:   progedit,
-       UnaryDst:   unaryDst,
+       Arch:           sys.ArchARM64,
+       Init:           buildop,
+       Preprocess:     preprocess,
+       Assemble:       span7,
+       Progedit:       progedit,
+       UnaryDst:       unaryDst,
+       DWARFRegisters: ARM64DWARFRegisters,
 }