1 // Inferno utils/6l/pass.c
2 // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6 // Portions Copyright © 1997-1999 Vita Nuova Limited
7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8 // Portions Copyright © 2004,2006 Bruce Ellis
9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11 // Portions Copyright © 2009 The Go Authors. All rights reserved.
13 // Permission is hereby granted, free of charge, to any person obtaining a copy
14 // of this software and associated documentation files (the "Software"), to deal
15 // in the Software without restriction, including without limitation the rights
16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 // copies of the Software, and to permit persons to whom the Software is
18 // furnished to do so, subject to the following conditions:
20 // The above copyright notice and this permission notice shall be included in
21 // all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
45 func CanUse1InsnTLS(ctxt *obj.Link) bool {
47 // Android uses a global variable for the tls offset.
51 if ctxt.Arch.Family == sys.I386 {
52 switch ctxt.Headtype {
62 switch ctxt.Headtype {
63 case objabi.Hplan9, objabi.Hwindows:
65 case objabi.Hlinux, objabi.Hfreebsd:
66 return !ctxt.Flag_shared
72 func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
73 // Thread-local storage references use the TLS pseudo-register.
74 // As a register, TLS refers to the thread-local storage base, and it
75 // can only be loaded into another register:
79 // An offset from the thread-local storage base is written off(reg)(TLS*1).
80 // Semantically it is off(reg), but the (TLS*1) annotation marks this as
81 // indexing from the loaded TLS base. This emits a relocation so that
82 // if the linker needs to adjust the offset, it can. For example:
85 // MOVQ 0(AX)(TLS*1), CX // load g into CX
87 // On systems that support direct access to the TLS memory, this
88 // pair of instructions can be reduced to a direct TLS memory reference:
90 // MOVQ 0(TLS), CX // load g into CX
92 // The 2-instruction and 1-instruction forms correspond to the two code
93 // sequences for loading a TLS variable in the local exec model given in "ELF
94 // Handling For Thread-Local Storage".
96 // We apply this rewrite on systems that support the 1-instruction form.
97 // The decision is made using only the operating system and the -shared flag,
98 // not the link mode. If some link modes on a particular operating system
99 // require the 2-instruction form, then all builds for that operating system
100 // will use the 2-instruction form, so that the link mode decision can be
101 // delayed to link time.
103 // In this way, all supported systems use identical instructions to
104 // access TLS, and they are rewritten appropriately first here in
105 // liblink and then finally using relocations in the linker.
107 // When -shared is passed, we leave the code in the 2-instruction form but
108 // assemble (and relocate) them in different ways to generate the initial
109 // exec code sequence. It's a bit of a fluke that this is possible without
110 // rewriting the instructions more comprehensively, and it only does because
111 // we only support a single TLS variable (g).
113 if CanUse1InsnTLS(ctxt) {
114 // Reduce 2-instruction sequence to 1-instruction sequence.
117 // ... off(BX)(TLS*1) ...
122 // TODO(rsc): Remove the Hsolaris special case. It exists only to
123 // guarantee we are producing byte-identical binaries as before this code.
124 // But it should be unnecessary.
125 if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
128 if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
131 p.From.Index = REG_NONE
134 if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
137 p.To.Index = REG_NONE
140 // load_g, below, always inserts the 1-instruction sequence. Rewrite it
141 // as the 2-instruction sequence if necessary.
145 // MOVQ 0(BX)(TLS*1), BX
146 if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
147 q := obj.Appendp(p, newprog)
150 q.From.Type = obj.TYPE_MEM
151 q.From.Reg = p.To.Reg
152 q.From.Index = REG_TLS
153 q.From.Scale = 2 // TODO: use 1
155 p.From.Type = obj.TYPE_REG
157 p.From.Index = REG_NONE
162 // Android uses a tls offset determined at runtime. Rewrite
165 // MOVQ runtime.tls_g(SB), BX
166 if isAndroid && (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
167 p.From.Type = obj.TYPE_MEM
168 p.From.Name = obj.NAME_EXTERN
169 p.From.Reg = REG_NONE
170 p.From.Sym = ctxt.Lookup("runtime.tls_g")
171 p.From.Index = REG_NONE
175 if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
176 if p.From.Scale == 1 && p.From.Index == REG_TLS {
179 if p.To.Scale == 1 && p.To.Index == REG_TLS {
184 // Rewrite 0 to $0 in 3rd argument to CMPPS etc.
185 // That's what the tables expect.
187 case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
188 if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
189 p.To.Type = obj.TYPE_CONST
193 // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
195 case obj.ACALL, obj.AJMP, obj.ARET:
196 if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
197 p.To.Type = obj.TYPE_BRANCH
201 // Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
202 if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
206 p.From.Type = obj.TYPE_MEM
209 p.From.Type = obj.TYPE_MEM
213 // Rewrite float constants to values stored in memory.
215 // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
217 if p.From.Type == obj.TYPE_FCONST {
218 // f == 0 can't be used here due to -0, so use Float64bits
219 if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
220 if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
244 if p.From.Type == obj.TYPE_FCONST {
245 f32 := float32(p.From.Val.(float64))
246 p.From.Type = obj.TYPE_MEM
247 p.From.Name = obj.NAME_EXTERN
248 p.From.Sym = ctxt.Float32Sym(f32)
253 // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
254 if p.From.Type == obj.TYPE_FCONST {
255 // f == 0 can't be used here due to -0, so use Float64bits
256 if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
257 if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
281 if p.From.Type == obj.TYPE_FCONST {
282 f64 := p.From.Val.(float64)
283 p.From.Type = obj.TYPE_MEM
284 p.From.Name = obj.NAME_EXTERN
285 p.From.Sym = ctxt.Float64Sym(f64)
290 if ctxt.Flag_dynlink {
291 rewriteToUseGot(ctxt, p, newprog)
294 if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
295 rewriteToPcrel(ctxt, p, newprog)
299 // Rewrite p, if necessary, to access global data via the global offset table.
300 func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
303 if ctxt.Arch.Family == sys.AMD64 {
311 if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
312 // Special case: clobber the destination register with
313 // the PC so we don't have to clobber CX.
314 // The SSA backend depends on CX not being clobbered across LEAL.
315 // See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
320 if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
323 // $MOV runtime.duffxxx@GOT, $reg
324 // $LEA $offset($reg), $reg
326 // (we use LEAx rather than ADDx because ADDx clobbers
327 // flags and duffzero on 386 does not otherwise do so).
329 if p.As == obj.ADUFFZERO {
330 sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
332 sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
334 offset := p.To.Offset
336 p.From.Type = obj.TYPE_MEM
337 p.From.Name = obj.NAME_GOTREF
339 p.To.Type = obj.TYPE_REG
343 p1 := obj.Appendp(p, newprog)
345 p1.From.Type = obj.TYPE_MEM
346 p1.From.Offset = offset
348 p1.To.Type = obj.TYPE_REG
350 p2 := obj.Appendp(p1, newprog)
352 p2.To.Type = obj.TYPE_REG
356 // We only care about global data: NAME_EXTERN means a global
357 // symbol in the Go sense, and p.Sym.Local is true for a few
358 // internally defined symbols.
359 if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
360 // $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
362 p.From.Type = obj.TYPE_ADDR
364 if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
365 // $MOV $sym, Rx becomes $MOV sym@GOT, Rx
366 // $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
367 // On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
371 if p.To.Type != obj.TYPE_REG || pAs != mov {
372 if ctxt.Arch.Family == sys.AMD64 {
373 ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
378 p.To.Type = obj.TYPE_REG
381 p.To.Name = obj.NAME_NONE
383 p.From.Type = obj.TYPE_MEM
384 p.From.Name = obj.NAME_GOTREF
386 if p.From.Offset != 0 {
387 q = obj.Appendp(p, newprog)
389 q.From.Type = obj.TYPE_MEM
390 q.From.Reg = p.To.Reg
391 q.From.Offset = p.From.Offset
396 q = obj.Appendp(q, newprog)
399 q.From.Type = obj.TYPE_REG
403 if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
404 ctxt.Diag("don't know how to handle %v with -dynlink", p)
407 // MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
408 // MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
409 // An addition may be inserted between the two MOVs if there is an offset.
410 if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
411 if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
412 ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
415 } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
420 if p.As == obj.ACALL {
421 // When dynlinking on 386, almost any call might end up being a call
422 // to a PLT, so make sure the GOT pointer is loaded into BX.
423 // RegTo2 is set on the replacement call insn to stop it being
424 // processed when it is in turn passed to progedit.
426 // We disable open-coded defers in buildssa() on 386 ONLY with shared
427 // libraries because of this extra code added before deferreturn calls.
428 if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
431 p1 := obj.Appendp(p, newprog)
432 p2 := obj.Appendp(p1, newprog)
435 p1.From.Type = obj.TYPE_MEM
436 p1.From.Name = obj.NAME_STATIC
437 p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
438 p1.To.Type = obj.TYPE_REG
444 if p.RestArgs != nil {
445 p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
449 // p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
450 // in ../pass.go complain, so set it back to TYPE_MEM here, until p2
451 // itself gets passed to progedit.
452 p2.To.Type = obj.TYPE_MEM
459 if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
462 if source.Type != obj.TYPE_MEM {
463 ctxt.Diag("don't know how to handle %v with -dynlink", p)
465 p1 := obj.Appendp(p, newprog)
466 p2 := obj.Appendp(p1, newprog)
469 p1.From.Type = obj.TYPE_MEM
470 p1.From.Sym = source.Sym
471 p1.From.Name = obj.NAME_GOTREF
472 p1.To.Type = obj.TYPE_REG
478 if p.From.Name == obj.NAME_EXTERN {
480 p2.From.Name = obj.NAME_NONE
482 } else if p.To.Name == obj.NAME_EXTERN {
484 p2.To.Name = obj.NAME_NONE
492 func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
493 // RegTo2 is set on the instructions we insert here so they don't get
498 if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
501 // Any Prog (aside from the above special cases) with an Addr with Name ==
502 // NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
503 // inserted before it.
504 isName := func(a *obj.Addr) bool {
505 if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
508 if a.Sym.Type == objabi.STLSBSS {
511 return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
514 if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
515 // Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
516 // to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
518 if p.To.Type != obj.TYPE_REG {
519 q := obj.Appendp(p, newprog)
521 q.From.Type = obj.TYPE_REG
525 p.To.Type = obj.TYPE_REG
528 p.To.Name = obj.NAME_NONE
532 if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
535 var dst int16 = REG_CX
536 if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
538 // Why? See the comment near the top of rewriteToUseGot above.
539 // AMOVLs might be introduced by the GOT rewrites.
541 q := obj.Appendp(p, newprog)
543 r := obj.Appendp(q, newprog)
546 thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
547 q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
548 q.To.Type = obj.TYPE_MEM
549 q.To.Name = obj.NAME_EXTERN
553 r.RestArgs = p.RestArgs
562 if p.GetFrom3() != nil && isName(p.GetFrom3()) {
563 r.GetFrom3().Reg = dst
570 markBit = 1 << 0 // used in errorCheck to avoid duplicate work
573 func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
574 if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
578 p := cursym.Func().Text
579 autoffset := int32(p.To.Offset)
585 for q := p; q != nil; q = q.Link {
586 if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
593 if ctxt.Arch.Family == sys.AMD64 &&
594 !p.From.Sym.NoFrame() && // (1) below
595 !(autoffset == 0 && p.From.Sym.NoSplit()) && // (2) below
596 !(autoffset == 0 && !hasCall) { // (3) below
597 // Make room to save a base pointer.
598 // There are 2 cases we must avoid:
599 // 1) If noframe is set (which we do for functions which tail call).
600 // 2) Scary runtime internals which would be all messed up by frame pointers.
601 // We detect these using a heuristic: frameless nosplit functions.
602 // TODO: Maybe someday we label them all with NOFRAME and get rid of this heuristic.
603 // For performance, we also want to avoid:
604 // 3) Frameless leaf functions
605 bpsize = ctxt.Arch.PtrSize
606 autoffset += int32(bpsize)
607 p.To.Offset += int64(bpsize)
612 textarg := int64(p.To.Val.(int32))
613 cursym.Func().Args = int32(textarg)
614 cursym.Func().Locals = int32(p.To.Offset)
616 // TODO(rsc): Remove.
617 if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 {
618 cursym.Func().Locals = 0
621 // TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
622 if ctxt.Arch.Family == sys.AMD64 && autoffset < objabi.StackSmall && !p.From.Sym.NoSplit() {
625 for q := p; q != nil; q = q.Link {
628 // Treat common runtime calls that take no arguments
629 // the same as duffcopy and duffzero.
630 if !isZeroArgRuntimeCall(q.To.Sym) {
635 case obj.ADUFFCOPY, obj.ADUFFZERO:
636 if autoffset >= objabi.StackSmall-8 {
644 p.From.Sym.Set(obj.AttrNoSplit, true)
649 if !p.From.Sym.NoSplit() || p.From.Sym.Wrapper() {
650 if ctxt.Arch.Family == sys.AMD64 && buildcfg.Experiment.RegabiG && cursym.ABI() == obj.ABIInternal {
651 regg = REGG // use the g register directly in ABIInternal
653 p = obj.Appendp(p, newprog)
655 if ctxt.Arch.Family == sys.AMD64 {
656 // Using this register means that stacksplit works w/ //go:registerparams even when !buildcfg.Experiment.RegabiG
657 regg = REGG // == REG_R14
659 p = load_g(ctxt, p, newprog, regg) // load g into regg
662 var regEntryTmp0, regEntryTmp1 int16
663 if ctxt.Arch.Family == sys.AMD64 {
664 regEntryTmp0, regEntryTmp1 = REGENTRYTMP0, REGENTRYTMP1
666 regEntryTmp0, regEntryTmp1 = REG_BX, REG_DI
669 if !cursym.Func().Text.From.Sym.NoSplit() {
670 p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg), regg) // emit split check
673 // Delve debugger would like the next instruction to be noted as the end of the function prologue.
674 // TODO: are there other cases (e.g., wrapper functions) that need marking?
675 markedPrologue := false
678 if autoffset%int32(ctxt.Arch.RegSize) != 0 {
679 ctxt.Diag("unaligned stack size %d", autoffset)
681 p = obj.Appendp(p, newprog)
683 p.From.Type = obj.TYPE_CONST
684 p.From.Offset = int64(autoffset)
686 p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
687 markedPrologue = true
692 p = obj.Appendp(p, newprog)
695 p.From.Type = obj.TYPE_REG
697 p.To.Type = obj.TYPE_MEM
700 p.To.Offset = int64(autoffset) - int64(bpsize)
702 p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
705 // Move current frame to BP
706 p = obj.Appendp(p, newprog)
709 p.From.Type = obj.TYPE_MEM
712 p.From.Offset = int64(autoffset) - int64(bpsize)
713 p.To.Type = obj.TYPE_REG
717 if cursym.Func().Text.From.Sym.Wrapper() {
718 // if g._panic != nil && g._panic.argp == FP {
719 // g._panic.argp = bottom-of-frame
722 // MOVQ g_panic(g), regEntryTmp0
723 // TESTQ regEntryTmp0, regEntryTmp0
727 // ... rest of function ...
729 // LEAQ (autoffset+8)(SP), regEntryTmp1
730 // CMPQ panic_argp(regEntryTmp0), regEntryTmp1
732 // MOVQ SP, panic_argp(regEntryTmp0)
735 // The NOP is needed to give the jumps somewhere to land.
736 // It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
738 // The layout is chosen to help static branch prediction:
739 // Both conditional jumps are unlikely, so they are arranged to be forward jumps.
741 // MOVQ g_panic(g), regEntryTmp0
742 p = obj.Appendp(p, newprog)
744 p.From.Type = obj.TYPE_MEM
746 p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic
747 p.To.Type = obj.TYPE_REG
748 p.To.Reg = regEntryTmp0
749 if ctxt.Arch.Family == sys.I386 {
753 // TESTQ regEntryTmp0, regEntryTmp0
754 p = obj.Appendp(p, newprog)
756 p.From.Type = obj.TYPE_REG
757 p.From.Reg = regEntryTmp0
758 p.To.Type = obj.TYPE_REG
759 p.To.Reg = regEntryTmp0
760 if ctxt.Arch.Family == sys.I386 {
764 // JNE checkargp (checkargp to be resolved later)
765 jne := obj.Appendp(p, newprog)
767 jne.To.Type = obj.TYPE_BRANCH
771 end := obj.Appendp(jne, newprog)
774 // Fast forward to end of function.
776 for last = end; last.Link != nil; last = last.Link {
779 // LEAQ (autoffset+8)(SP), regEntryTmp1
780 p = obj.Appendp(last, newprog)
782 p.From.Type = obj.TYPE_MEM
784 p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize)
785 p.To.Type = obj.TYPE_REG
786 p.To.Reg = regEntryTmp1
787 if ctxt.Arch.Family == sys.I386 {
791 // Set jne branch target.
794 // CMPQ panic_argp(regEntryTmp0), regEntryTmp1
795 p = obj.Appendp(p, newprog)
797 p.From.Type = obj.TYPE_MEM
798 p.From.Reg = regEntryTmp0
799 p.From.Offset = 0 // Panic.argp
800 p.To.Type = obj.TYPE_REG
801 p.To.Reg = regEntryTmp1
802 if ctxt.Arch.Family == sys.I386 {
807 p = obj.Appendp(p, newprog)
809 p.To.Type = obj.TYPE_BRANCH
812 // MOVQ SP, panic_argp(regEntryTmp0)
813 p = obj.Appendp(p, newprog)
815 p.From.Type = obj.TYPE_REG
817 p.To.Type = obj.TYPE_MEM
818 p.To.Reg = regEntryTmp0
819 p.To.Offset = 0 // Panic.argp
820 if ctxt.Arch.Family == sys.I386 {
825 p = obj.Appendp(p, newprog)
827 p.To.Type = obj.TYPE_BRANCH
830 // Reset p for following code.
835 for p = cursym.Func().Text; p != nil; p = p.Link {
836 pcsize := ctxt.Arch.RegSize
839 p.From.Offset += int64(deltasp) - int64(bpsize)
841 p.From.Offset += int64(deltasp) + int64(pcsize)
843 if p.GetFrom3() != nil {
844 switch p.GetFrom3().Name {
846 p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
848 p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
853 p.To.Offset += int64(deltasp) - int64(bpsize)
855 p.To.Offset += int64(deltasp) + int64(pcsize)
860 if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ {
862 if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 {
863 f.FuncFlag |= objabi.FuncFlag_SPWRITE
864 if ctxt.Debugvlog || !ctxt.IsAsm {
865 ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
867 ctxt.Diag("invalid auto-SPWRITE in non-assembly")
869 log.Fatalf("bad SPWRITE")
876 case APUSHL, APUSHFL:
881 case APUSHQ, APUSHFQ:
886 case APUSHW, APUSHFW:
907 p.Spadj = int32(p.From.Offset)
908 deltasp += int32(p.From.Offset)
915 if autoffset != deltasp {
916 ctxt.Diag("%s: unbalanced PUSH/POP", cursym)
920 to := p.To // Keep To attached to RET for retjmp below
923 // Restore caller's BP
926 p.From.Type = obj.TYPE_MEM
929 p.From.Offset = int64(autoffset) - int64(bpsize)
930 p.To.Type = obj.TYPE_REG
932 p = obj.Appendp(p, newprog)
936 p.From.Type = obj.TYPE_CONST
937 p.From.Offset = int64(-autoffset)
939 p = obj.Appendp(p, newprog)
943 // If there are instructions following
944 // this ARET, they come from a branch
945 // with the same stackframe, so undo
950 if p.To.Sym != nil { // retjmp
956 func isZeroArgRuntimeCall(s *obj.LSym) bool {
961 case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift":
964 if strings.HasPrefix(s.Name, "runtime.panicIndex") || strings.HasPrefix(s.Name, "runtime.panicSlice") {
965 // These functions do take arguments (in registers),
966 // but use no stack before they do a stack check. We
967 // should include them. See issue 31219.
973 func indir_cx(ctxt *obj.Link, a *obj.Addr) {
974 a.Type = obj.TYPE_MEM
978 // Append code to p to load g into cx.
979 // Overwrites p with the first instruction (no first appendp).
980 // Overwriting p is unusual but it lets use this in both the
981 // prologue (caller must call appendp first) and in the epilogue.
982 // Returns last new instruction.
983 func load_g(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, rg int16) *obj.Prog {
985 if ctxt.Arch.PtrSize == 4 {
988 p.From.Type = obj.TYPE_MEM
991 p.To.Type = obj.TYPE_REG
995 progedit(ctxt, p, newprog)
998 progedit(ctxt, p, newprog)
1001 if p.From.Index == REG_TLS {
1008 // Append code to p to check for stack split.
1009 // Appends to (does not overwrite) p.
1010 // Assumes g is in rg.
1011 // Returns last new instruction.
1012 func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32, rg int16) *obj.Prog {
1018 if ctxt.Arch.Family == sys.I386 {
1025 tmp := int16(REG_AX) // use AX for 32-bit
1026 if ctxt.Arch.Family == sys.AMD64 {
1027 // Avoid register parameters.
1028 tmp = int16(REGENTRYTMP0)
1032 if framesize <= objabi.StackSmall {
1033 // small stack: SP <= stackguard
1034 // CMPQ SP, stackguard
1035 p = obj.Appendp(p, newprog)
1038 p.From.Type = obj.TYPE_REG
1040 p.To.Type = obj.TYPE_MEM
1042 p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
1044 p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
1047 // Mark the stack bound check and morestack call async nonpreemptible.
1048 // If we get preempted here, when resumed the preemption request is
1049 // cleared, but we'll still call morestack, which will double the stack
1050 // unnecessarily. See issue #35470.
1051 p = ctxt.StartUnsafePoint(p, newprog)
1052 } else if framesize <= objabi.StackBig {
1053 // large stack: SP-framesize <= stackguard-StackSmall
1054 // LEAQ -xxx(SP), tmp
1055 // CMPQ tmp, stackguard
1056 p = obj.Appendp(p, newprog)
1059 p.From.Type = obj.TYPE_MEM
1061 p.From.Offset = -(int64(framesize) - objabi.StackSmall)
1062 p.To.Type = obj.TYPE_REG
1065 p = obj.Appendp(p, newprog)
1067 p.From.Type = obj.TYPE_REG
1069 p.To.Type = obj.TYPE_MEM
1071 p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
1073 p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
1076 p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
1078 // Such a large stack we need to protect against underflow.
1079 // The runtime guarantees SP > objabi.StackBig, but
1080 // framesize is large enough that SP-framesize may
1081 // underflow, causing a direct comparison with the
1082 // stack guard to incorrectly succeed. We explicitly
1083 // guard against underflow.
1086 // SUBQ $(framesize - StackSmall), tmp
1087 // // If subtraction wrapped (carry set), morestack.
1088 // JCS label-of-call-to-morestack
1089 // CMPQ tmp, stackguard
1091 p = obj.Appendp(p, newprog)
1094 p.From.Type = obj.TYPE_REG
1096 p.To.Type = obj.TYPE_REG
1099 p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
1101 p = obj.Appendp(p, newprog)
1103 p.From.Type = obj.TYPE_CONST
1104 p.From.Offset = int64(framesize) - objabi.StackSmall
1105 p.To.Type = obj.TYPE_REG
1108 p = obj.Appendp(p, newprog)
1110 p.To.Type = obj.TYPE_BRANCH
1113 p = obj.Appendp(p, newprog)
1115 p.From.Type = obj.TYPE_REG
1117 p.To.Type = obj.TYPE_MEM
1119 p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
1121 p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
1126 jls := obj.Appendp(p, newprog)
1128 jls.To.Type = obj.TYPE_BRANCH
1130 end := ctxt.EndUnsafePoint(jls, newprog, -1)
1133 for last = cursym.Func().Text; last.Link != nil; last = last.Link {
1136 // Now we are at the end of the function, but logically
1137 // we are still in function prologue. We need to fix the
1138 // SP data and PCDATA.
1139 spfix := obj.Appendp(last, newprog)
1141 spfix.Spadj = -framesize
1143 pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
1144 spill := ctxt.StartUnsafePoint(pcdata, newprog)
1145 pcdata = cursym.Func().SpillRegisterArgs(spill, newprog)
1147 call := obj.Appendp(pcdata, newprog)
1148 call.Pos = cursym.Func().Text.Pos
1150 call.To.Type = obj.TYPE_BRANCH
1151 call.To.Name = obj.NAME_EXTERN
1152 morestack := "runtime.morestack"
1154 case cursym.CFunc():
1155 morestack = "runtime.morestackc"
1156 case !cursym.Func().Text.From.Sym.NeedCtxt():
1157 morestack = "runtime.morestack_noctxt"
1159 call.To.Sym = ctxt.Lookup(morestack)
1160 // When compiling 386 code for dynamic linking, the call needs to be adjusted
1161 // to follow PIC rules. This in turn can insert more instructions, so we need
1162 // to keep track of the start of the call (where the jump will be to) and the
1163 // end (which following instructions are appended to).
1165 progedit(ctxt, callend, newprog)
1166 for ; callend.Link != nil; callend = callend.Link {
1167 progedit(ctxt, callend.Link, newprog)
1170 pcdata = cursym.Func().UnspillRegisterArgs(callend, newprog)
1171 pcdata = ctxt.EndUnsafePoint(pcdata, newprog, -1)
1173 jmp := obj.Appendp(pcdata, newprog)
1175 jmp.To.Type = obj.TYPE_BRANCH
1176 jmp.To.SetTarget(cursym.Func().Text.Link)
1177 jmp.Spadj = +framesize
1179 jls.To.SetTarget(spill)
1181 q1.To.SetTarget(spill)
1187 func isR15(r int16) bool {
1188 return r == REG_R15 || r == REG_R15B
1190 func addrMentionsR15(a *obj.Addr) bool {
1194 return isR15(a.Reg) || isR15(a.Index)
1196 func progMentionsR15(p *obj.Prog) bool {
1197 return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3())
1200 // progOverwritesR15 reports whether p writes to R15 and does not depend on
1201 // the previous value of R15.
1202 func progOverwritesR15(p *obj.Prog) bool {
1203 if !(p.To.Type == obj.TYPE_REG && isR15(p.To.Reg)) {
1204 // Not writing to R15.
1207 if (p.As == AXORL || p.As == AXORQ) && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
1208 // These look like uses of R15, but aren't, so we must detect these
1209 // before the use check below.
1212 if addrMentionsR15(&p.From) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3()) {
1213 // use before overwrite
1216 if p.As == AMOVL || p.As == AMOVQ || p.As == APOPQ {
1218 // TODO: MOVB might be ok if we only ever use R15B.
1223 func addrUsesGlobal(a *obj.Addr) bool {
1227 return a.Name == obj.NAME_EXTERN && !a.Sym.Local()
1229 func progUsesGlobal(p *obj.Prog) bool {
1230 if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
1231 // These opcodes don't use a GOT to access their argument (see rewriteToUseGot),
1232 // or R15 would be dead at them anyway.
1236 // The GOT entry is placed directly in the destination register; R15 is not used.
1239 return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3())
1242 func errorCheck(ctxt *obj.Link, s *obj.LSym) {
1243 // When dynamic linking, R15 is used to access globals. Reject code that
1244 // uses R15 after a global variable access.
1245 if !ctxt.Flag_dynlink {
1249 // Flood fill all the instructions where R15's value is junk.
1250 // If there are any uses of R15 in that set, report an error.
1251 var work []*obj.Prog
1252 var mentionsR15 bool
1253 for p := s.Func().Text; p != nil; p = p.Link {
1254 if progUsesGlobal(p) {
1255 work = append(work, p)
1258 if progMentionsR15(p) {
1264 p := work[len(work)-1]
1265 work = work[:len(work)-1]
1266 if q := p.To.Target(); q != nil && q.Mark&markBit == 0 {
1268 work = append(work, q)
1270 if p.As == obj.AJMP || p.As == obj.ARET {
1271 continue // no fallthrough
1273 if progMentionsR15(p) {
1274 if progOverwritesR15(p) {
1275 // R15 is overwritten by this instruction. Its value is not junk any more.
1278 pos := ctxt.PosTable.Pos(p.Pos)
1279 ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p)
1280 break // only report one error
1282 if q := p.Link; q != nil && q.Mark&markBit == 0 {
1284 work = append(work, q)
1290 for p := s.Func().Text; p != nil; p = p.Link {
1295 var unaryDst = map[obj.As]bool{
1380 var Linkamd64 = obj.LinkArch{
1381 Arch: sys.ArchAMD64,
1383 ErrorCheck: errorCheck,
1384 Preprocess: preprocess,
1388 DWARFRegisters: AMD64DWARFRegisters,
1391 var Link386 = obj.LinkArch{
1394 Preprocess: preprocess,
1398 DWARFRegisters: X86DWARFRegisters,