1 // Derived from Inferno utils/6l/l.h and related files.
2 // http://code.google.com/p/inferno-os/source/browse/utils/6l/l.h
4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6 // Portions Copyright © 1997-1999 Vita Nuova Limited
7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8 // Portions Copyright © 2004,2006 Bruce Ellis
9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11 // Portions Copyright © 2009 The Go Authors. All rights reserved.
13 // Permission is hereby granted, free of charge, to any person obtaining a copy
14 // of this software and associated documentation files (the "Software"), to deal
15 // in the Software without restriction, including without limitation the rights
16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 // copies of the Software, and to permit persons to whom the Software is
18 // furnished to do so, subject to the following conditions:
20 // The above copyright notice and this permission notice shall be included in
21 // all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
33 import "encoding/binary"
35 // An Addr is an argument to an instruction.
36 // The general forms and their encodings are:
38 // sym±offset(symkind)(reg)(index*scale)
39 // Memory reference at address &sym(symkind) + offset + reg + index*scale.
40 // Any of sym(symkind), ±offset, (reg), (index*scale), and *scale can be omitted.
41 // If (reg) and *scale are both omitted, the resulting expression (index) is parsed as (reg).
42 // To force a parsing as index*scale, write (index*1).
45 // name = symkind (NAME_AUTO, ...) or 0 (NAME_NONE)
49 // index = index (REG_*)
50 // scale = scale (1, 2, 4, 8)
53 // Effective address of memory reference <mem>, defined above.
54 // Encoding: same as memory reference, but type = TYPE_ADDR.
57 // This is a special case of $<mem>, in which only ±offset is present.
58 // It has a separate type for easy recognition.
61 // offset = ±integer value
64 // Indirect reference through memory reference <mem>, defined above.
65 // Only used on x86 for CALL/JMP *sym(SB), which calls/jumps to a function
66 // pointer stored in the data word sym(SB), not a function named sym(SB).
67 // Encoding: same as above, but type = TYPE_INDIR.
71 // On machines with actual SB registers, $*$<mem> forced the
72 // instruction encoding to use a full 32-bit constant, never a
73 // reference relative to SB.
75 // $<floating point literal>
76 // Floating point constant value.
79 // val = floating point value
81 // $<string literal, up to 8 chars>
82 // String literal value (raw bytes used for DATA instruction).
88 // Any register: integer, floating point, control, segment, and so on.
89 // If looking for specific register kind, must check type and reg value range.
97 // val = Prog* reference OR ELSE offset = target pc (branch takes priority)
100 // Final argument to TEXT, specifying local frame size x and argument size y.
101 // In this form, x and y are integer literals only, not arbitrary expressions.
102 // This avoids parsing ambiguities due to the use of - as a separator.
103 // The ± are optional.
104 // If the final argument to TEXT omits the -±y, the encoding should still
105 // use TYPE_TEXTSIZE (not TYPE_CONST), with u.argsize = ArgsSizeUnknown.
107 // type = TYPE_TEXTSIZE
111 // reg<<shift, reg>>shift, reg->shift, reg@>shift
112 // Shifted register value, for ARM.
113 // In this form, reg must be a register and shift can be a register or an integer constant.
116 // offset = (reg&15) | shifttype<<5 | count
117 // shifttype = 0, 1, 2, 3 for <<, >>, ->, @>
118 // count = (reg&15)<<8 | 1<<4 for a register shift count, (n&31)<<7 for an integer constant.
121 // A destination register pair. When used as the last argument of an instruction,
122 // this form makes clear that both registers are destinations.
124 // type = TYPE_REGREG
125 // reg = first register
126 // offset = second register
128 // [reg, reg, reg-reg]
129 // Register list for ARM.
131 // type = TYPE_REGLIST
132 // offset = bit mask of registers in list; R0 is low bit.
135 // Register pair for ARM.
139 // Register pair for PPC64.
142 // reg = first register
143 // index = second register
150 Scale int16 // Sometimes holds a register.
160 // for TYPE_SCONST, a string
161 // for TYPE_FCONST, a float64
162 // for TYPE_BRANCH, a *Prog (optional)
163 // for TYPE_TEXTSIZE, an int32 (optional)
166 Node interface{} // for use by compiler
175 // A reference to name@GOT(SB) is a reference to the entry in the global offset
185 TYPE_BRANCH = 5 + iota
200 // TODO(rsc): Describe prog.
201 // TODO(rsc): Describe TEXT/GLOBL flag in from3, DATA width in from3.
206 From3 *Addr // optional
211 Rel *Prog // Source of forward jumps on x86; pcrel on arm
217 RegTo2 int16 // 2nd register output operand
218 Mark uint16 // bitmask of arch-specific items
224 Isize uint8 // size of the instruction in bytes (x86 only)
230 // From3Type returns From3.Type, or TYPE_NONE when From3 is nil.
231 func (p *Prog) From3Type() int16 {
238 // From3Offset returns From3.Offset, or 0 when From3 is nil.
239 func (p *Prog) From3Offset() int64 {
243 return p.From3.Offset
246 // ProgInfo holds information about the instruction for use
247 // by clients such as the compiler. The exact meaning of this
248 // data is up to the client and is not interpreted by the cmd/internal/obj/... packages.
249 type ProgInfo struct {
250 _ struct{} // to prevent unkeyed literals. Trailing zero-sized field will take space.
251 Flags uint32 // flag bits
252 Reguse uint64 // registers implicitly used by this instruction
253 Regset uint64 // registers implicitly set by this instruction
254 Regindex uint64 // registers used by addressing mode
258 // These are the portable opcodes, common to all architectures.
259 // Each architecture defines many more arch-specific opcodes,
260 // with values starting at A_ARCHSPECIFIC.
261 // Each architecture adds an offset to this so each machine has
262 // distinct space for its instructions. The offset is a power of
263 // two so it can be masked to return to origin zero.
264 // See the definitions of ABase386 etc.
289 // An LSym is the sort of symbol that is written to an object file.
300 // Local means make the symbol local even when compiling Go code to reference Go
301 // symbols in other shared libraries, as in this mode symbols are global by
302 // default. "local" here means in the sense of the dynamic linker, i.e. not
303 // visible outside of the module (shared library or executable) that contains its
304 // definition. (When not compiling to support Go shared libraries, all symbols are
305 // local in this sense unless there is a cgo_export_* directive).
347 // Types STYPE-SFUNCTAB above are written to the .rodata section by default.
348 // When linking a shared object, some conceptually "read only" types need to
349 // be written to by relocations and putting them in a section called
350 // ".rodata" interacts poorly with the system linkers. The GNU linkers
351 // support this situation by arranging for sections of the name
352 // ".data.rel.ro.XXX" to be mprotected read only by the dynamic linker after
353 // relocations have applied, so when the Go linker is creating a shared
354 // object it checks all objects of the above types and bumps any object that
355 // has a relocation to it to the corresponding type below, which are then
356 // written to sections with appropriate magic names.
394 SCONTAINER = 1 << 10 // has a sub-symbol
408 // R_ADDRPOWER relocates a pair of "D-form" instructions (instructions with 16-bit
409 // immediates in the low half of the instruction word), usually addis followed by
410 // another add or a load, inserting the "high adjusted" 16 bits of the address of
411 // the referenced symbol into the immediate field of the first instruction and the
412 // low 16 bits into that of the second instruction.
414 // R_ADDRARM64 relocates an adrp, add pair to compute the address of the
415 // referenced symbol.
417 // R_ADDRMIPS (only used on mips64) resolves to a 32-bit external address,
418 // by loading the address into a register with two instructions (lui, ori).
426 // R_CALLMIPS (only used on mips64) resolves to non-PC-relative target address
427 // of a CALL (JAL) instruction, by encoding the address into the instruction.
431 // R_TLS_LE, used on 386, amd64, and ARM, resolves to the offset of the
432 // thread-local symbol from the thread local base and is used to implement the
433 // "local exec" model for tls access (r.Sym is not set on intel platforms but is
434 // set to a TLS symbol -- runtime.tlsg -- in the linker when externally linking).
436 // R_TLS_IE, used 386, amd64, and ARM resolves to the PC-relative offset to a GOT
437 // slot containing the offset from the thread-local symbol from the thread local
438 // base and is used to implemented the "initial exec" model for tls access (r.Sym
439 // is not set on intel platforms but is set to a TLS symbol -- runtime.tlsg -- in
440 // the linker when externally linking).
447 // R_USETYPE resolves to an *rtype, but no relocation is created. The
448 // linker uses this as a signal that the pointed-to type information
449 // should be linked into the final binary, even if there are no other
450 // direct references. (This is used for types reachable by reflection.)
454 // R_JMPMIPS (only used on mips64) resolves to non-PC-relative target address
455 // of a JMP instruction, by encoding the address into the instruction.
456 // The stack nosplit check ignores this since it is not a function call.
459 // Platform dependent relocations. Architectures with fixed width instructions
460 // have the inherent issue that a 32-bit (or 64-bit!) displacement cannot be
461 // stuffed into a 32-bit instruction, so an address needs to be spread across
462 // several instructions, and in turn this requires a sequence of relocations, each
463 // updating a part of an instruction. This leads to relocation codes that are
464 // inherently processor specific.
468 // Set a MOV[NZ] immediate field to bits [15:0] of the offset from the thread
469 // local base to the thread local variable defined by the referenced (thread
470 // local) symbol. Error if the offset does not fit into 16 bits.
473 // Relocates an ADRP; LD64 instruction sequence to load the offset between
474 // the thread local base and the thread local variable defined by the
475 // referenced (thread local) symbol from the GOT.
478 // R_ARM64_GOTPCREL relocates an adrp, ld64 pair to compute the address of the GOT
479 // slot of the referenced symbol.
484 // R_POWER_TLS_LE is used to implement the "local exec" model for tls
485 // access. It resolves to the offset of the thread-local symbol from the
486 // thread pointer (R13) and inserts this value into the low 16 bits of an
490 // R_POWER_TLS_IE is used to implement the "initial exec" model for tls access. It
491 // relocates a D-form, DS-form instruction sequence like R_ADDRPOWER_DS. It
492 // inserts to the offset of GOT slot for the thread-local symbol from the TOC (the
493 // GOT slot is filled by the dynamic linker with the offset of the thread-local
494 // symbol from the thread pointer (R13)).
497 // R_POWER_TLS marks an X-form instruction such as "MOVD 0(R13)(R31*1), g" as
498 // accessing a particular thread-local symbol. It does not affect code generation
499 // but is used by the system linker when relaxing "initial exec" model code to
500 // "local exec" model code.
503 // R_ADDRPOWER_DS is similar to R_ADDRPOWER above, but assumes the second
504 // instruction is a "DS-form" instruction, which has an immediate field occupying
505 // bits [15:2] of the instruction word. Bits [15:2] of the address of the
506 // relocated symbol are inserted into this field; it is an error if the last two
507 // bits of the address are not 0.
510 // R_ADDRPOWER_PCREL relocates a D-form, DS-form instruction sequence like
511 // R_ADDRPOWER_DS but inserts the offset of the GOT slot for the referenced symbol
512 // from the TOC rather than the symbol's address.
515 // R_ADDRPOWER_PCREL relocates two D-form instructions like R_ADDRPOWER, but
516 // inserts the displacement from the place being relocated to the address of the
517 // the relocated symbol instead of just its address.
520 // R_ADDRPOWER_TOCREL relocates two D-form instructions like R_ADDRPOWER, but
521 // inserts the offset from the TOC to the address of the the relocated symbol
522 // rather than the symbol's address.
525 // R_ADDRPOWER_TOCREL relocates a D-form, DS-form instruction sequence like
526 // R_ADDRPOWER_DS but inserts the offset from the TOC to the address of the the
527 // relocated symbol rather than the symbol's address.
528 R_ADDRPOWER_TOCREL_DS
550 // for(pciterinit(ctxt, &it, &pcd); !it.done; pciternext(&it)) { it.value holds in [it.pc, it.nextpc) }
562 // symbol version, incremented each time a file is loaded.
563 // version==1 is reserved for savehist.
568 // Link holds the context for writing object code from a compiler
569 // to be linker input or for reading that input into the linker.
586 Enforce_data_order int32
587 Hash map[SymVer]*LSym
613 DiagFunc func(string, ...interface{})
621 // state for writing objects
632 func (ctxt *Link) Diag(format string, args ...interface{}) {
634 ctxt.DiagFunc(format, args...)
637 // The smallest possible offset from the hardware stack pointer to a local
638 // variable on the stack. Architectures that use a link register save its value
639 // on the stack in the function prologue and so always have a pointer between
640 // the hardware stack pointer and the local variable area.
641 func (ctxt *Link) FixedFrameSize() int64 {
642 switch ctxt.Arch.Thechar {
646 // PIC code on ppc64le requires 32 bytes of stack, and it's easier to
647 // just use that much stack always on ppc64x.
648 return int64(4 * ctxt.Arch.Ptrsize)
650 return int64(ctxt.Arch.Ptrsize)
656 Version int // TODO: make int16 to match LSym.Version?
659 // LinkArch is the definition of a single architecture.
660 type LinkArch struct {
661 ByteOrder binary.ByteOrder
664 Preprocess func(*Link, *LSym)
665 Assemble func(*Link, *LSym)
666 Follow func(*Link, *LSym)
667 Progedit func(*Link, *Prog)
668 UnaryDst map[int]bool // Instruction takes one operand, a destination.
674 /* executable header types */
698 * start a new Prog list.
700 func Linknewplist(ctxt *Link) *Plist {
702 if ctxt.Plist == nil {