"cmd/internal/objabi"
"encoding/binary"
"fmt"
+ "internal/buildcfg"
"log"
"math"
"math/bits"
r0iszero = 1
)
+const (
+ // R bit option in prefixed load/store/add D-form operations
+ PFX_R_ABS = 0 // Offset is absolute
+ PFX_R_PCREL = 1 // Offset is relative to PC, RA should be 0
+)
+
+const (
+ // The preferred hardware nop instruction.
+ NOP = 0x60000000
+)
+
type Optab struct {
as obj.As // Opcode
a1 uint8 // p.From argument (obj.Addr). p is of type obj.Prog.
//
// Likewise, each slice of optab is dynamically sorted using the ocmp Sort interface
// to arrange entries to minimize text size of each opcode.
-var optab = []Optab{
+//
+// optab is the sorted result of combining optabBase, optabGen, and prefixableOptab.
+var optab []Optab
+
+var optabBase = []Optab{
{as: obj.ATEXT, a1: C_LOREG, a6: C_TEXTSIZE, type_: 0, size: 0},
{as: obj.ATEXT, a1: C_LOREG, a3: C_LCON, a6: C_TEXTSIZE, type_: 0, size: 0},
{as: obj.ATEXT, a1: C_ADDR, a6: C_TEXTSIZE, type_: 0, size: 0},
{as: AADD, a1: C_SCON, a6: C_REG, type_: 4, size: 4},
{as: AADD, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 4, size: 4},
{as: AADD, a1: C_ADDCON, a6: C_REG, type_: 4, size: 4},
- {as: AADD, a1: C_UCON, a2: C_REG, a6: C_REG, type_: 20, size: 4},
- {as: AADD, a1: C_UCON, a6: C_REG, type_: 20, size: 4},
{as: AADD, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 22, size: 8},
{as: AADD, a1: C_ANDCON, a6: C_REG, type_: 22, size: 8},
- {as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12},
- {as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12},
{as: AADDIS, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 20, size: 4},
{as: AADDIS, a1: C_ADDCON, a6: C_REG, type_: 20, size: 4},
{as: AADDC, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4},
{as: AANDCC, a1: C_REG, a6: C_REG, type_: 6, size: 4},
{as: AANDCC, a1: C_ANDCON, a6: C_REG, type_: 58, size: 4},
{as: AANDCC, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 58, size: 4},
- {as: AANDCC, a1: C_UCON, a6: C_REG, type_: 59, size: 4},
- {as: AANDCC, a1: C_UCON, a2: C_REG, a6: C_REG, type_: 59, size: 4},
{as: AANDCC, a1: C_ADDCON, a6: C_REG, type_: 23, size: 8},
{as: AANDCC, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 23, size: 8},
{as: AANDCC, a1: C_LCON, a6: C_REG, type_: 23, size: 12},
{as: AANDCC, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 23, size: 12},
- {as: AANDISCC, a1: C_ANDCON, a6: C_REG, type_: 59, size: 4},
- {as: AANDISCC, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 59, size: 4},
+ {as: AANDISCC, a1: C_ANDCON, a6: C_REG, type_: 58, size: 4},
+ {as: AANDISCC, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 58, size: 4},
{as: AMULLW, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4},
{as: AMULLW, a1: C_REG, a6: C_REG, type_: 2, size: 4},
{as: AMULLW, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 4, size: 4},
{as: AOR, a1: C_REG, a6: C_REG, type_: 6, size: 4},
{as: AOR, a1: C_ANDCON, a6: C_REG, type_: 58, size: 4},
{as: AOR, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 58, size: 4},
- {as: AOR, a1: C_UCON, a6: C_REG, type_: 59, size: 4},
- {as: AOR, a1: C_UCON, a2: C_REG, a6: C_REG, type_: 59, size: 4},
{as: AOR, a1: C_ADDCON, a6: C_REG, type_: 23, size: 8},
{as: AOR, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 23, size: 8},
{as: AOR, a1: C_LCON, a6: C_REG, type_: 23, size: 12},
{as: AOR, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 23, size: 12},
- {as: AORIS, a1: C_ANDCON, a6: C_REG, type_: 59, size: 4},
- {as: AORIS, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 59, size: 4},
+ {as: AORIS, a1: C_ANDCON, a6: C_REG, type_: 58, size: 4},
+ {as: AORIS, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 58, size: 4},
{as: ADIVW, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4}, /* op r1[,r2],r3 */
{as: ADIVW, a1: C_REG, a6: C_REG, type_: 2, size: 4},
{as: ASUB, a1: C_REG, a2: C_REG, a6: C_REG, type_: 10, size: 4}, /* op r2[,r1],r3 */
{as: ASRAD, a1: C_REG, a2: C_REG, a6: C_REG, type_: 6, size: 4},
{as: ASRAD, a1: C_SCON, a2: C_REG, a6: C_REG, type_: 56, size: 4},
{as: ASRAD, a1: C_SCON, a6: C_REG, type_: 56, size: 4},
- {as: ARLWMI, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 62, size: 4},
- {as: ARLWMI, a1: C_SCON, a2: C_REG, a3: C_SCON, a4: C_SCON, a6: C_REG, type_: 102, size: 4},
- {as: ARLWMI, a1: C_REG, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 63, size: 4},
- {as: ARLWMI, a1: C_REG, a2: C_REG, a3: C_SCON, a4: C_SCON, a6: C_REG, type_: 103, size: 4},
+ {as: ARLWNM, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 63, size: 4},
+ {as: ARLWNM, a1: C_SCON, a2: C_REG, a3: C_SCON, a4: C_SCON, a6: C_REG, type_: 63, size: 4},
+ {as: ARLWNM, a1: C_REG, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 63, size: 4},
+ {as: ARLWNM, a1: C_REG, a2: C_REG, a3: C_SCON, a4: C_SCON, a6: C_REG, type_: 63, size: 4},
{as: ACLRLSLWI, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 62, size: 4},
{as: ARLDMI, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 30, size: 4},
{as: ARLDC, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 29, size: 4},
+ {as: ARLDC, a1: C_REG, a3: C_U8CON, a4: C_U8CON, a6: C_REG, type_: 9, size: 4},
{as: ARLDCL, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 29, size: 4},
{as: ARLDCL, a1: C_REG, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 14, size: 4},
{as: ARLDICL, a1: C_REG, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 14, size: 4},
{as: AMOVHBR, a1: C_REG, a6: C_XOREG, type_: 44, size: 4},
{as: AMOVHBR, a1: C_XOREG, a6: C_REG, type_: 45, size: 4},
- {as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 75, size: 12},
- {as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 36, size: 12},
{as: AMOVB, a1: C_SOREG, a6: C_REG, type_: 8, size: 8},
{as: AMOVB, a1: C_XOREG, a6: C_REG, type_: 109, size: 8},
- {as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
{as: AMOVB, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
- {as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
{as: AMOVB, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
{as: AMOVB, a1: C_REG, a6: C_REG, type_: 13, size: 4},
- {as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8},
- {as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
{as: AMOVBZ, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
{as: AMOVBZ, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
- {as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
{as: AMOVBZ, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
- {as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
{as: AMOVBZ, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
{as: AMOVBZ, a1: C_REG, a6: C_REG, type_: 13, size: 4},
{as: AMOVD, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4},
{as: AMOVD, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4},
- {as: AMOVD, a1: C_UCON, a6: C_REG, type_: 3, size: 4},
- {as: AMOVD, a1: C_LCON, a6: C_REG, type_: 19, size: 8},
{as: AMOVD, a1: C_SACON, a6: C_REG, type_: 3, size: 4},
- {as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8},
- {as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8},
{as: AMOVD, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
{as: AMOVD, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
{as: AMOVD, a1: C_SOREG, a6: C_SPR, type_: 107, size: 8},
- {as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
- {as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8},
- {as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12},
{as: AMOVD, a1: C_SPR, a6: C_REG, type_: 66, size: 4},
- {as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
{as: AMOVD, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
{as: AMOVD, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
{as: AMOVD, a1: C_SPR, a6: C_SOREG, type_: 106, size: 8},
- {as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
{as: AMOVD, a1: C_REG, a6: C_SPR, type_: 66, size: 4},
{as: AMOVD, a1: C_REG, a6: C_REG, type_: 13, size: 4},
{as: AMOVW, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4},
{as: AMOVW, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4},
- {as: AMOVW, a1: C_UCON, a6: C_REG, type_: 3, size: 4},
- {as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8},
{as: AMOVW, a1: C_SACON, a6: C_REG, type_: 3, size: 4},
- {as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8},
- {as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8},
{as: AMOVW, a1: C_CREG, a6: C_REG, type_: 68, size: 4},
{as: AMOVW, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
- {as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
{as: AMOVW, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
{as: AMOVW, a1: C_SPR, a6: C_REG, type_: 66, size: 4},
- {as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
{as: AMOVW, a1: C_REG, a6: C_CREG, type_: 69, size: 4},
{as: AMOVW, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
- {as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
{as: AMOVW, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
{as: AMOVW, a1: C_REG, a6: C_SPR, type_: 66, size: 4},
{as: AMOVW, a1: C_REG, a6: C_REG, type_: 13, size: 4},
{as: AFMOVD, a1: C_ADDCON, a6: C_FREG, type_: 24, size: 8},
{as: AFMOVD, a1: C_SOREG, a6: C_FREG, type_: 8, size: 4},
{as: AFMOVD, a1: C_XOREG, a6: C_FREG, type_: 109, size: 4},
- {as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8},
{as: AFMOVD, a1: C_ZCON, a6: C_FREG, type_: 24, size: 4},
- {as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8},
{as: AFMOVD, a1: C_FREG, a6: C_FREG, type_: 33, size: 4},
{as: AFMOVD, a1: C_FREG, a6: C_SOREG, type_: 7, size: 4},
{as: AFMOVD, a1: C_FREG, a6: C_XOREG, type_: 108, size: 4},
- {as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8},
- {as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8},
{as: AFMOVSX, a1: C_XOREG, a6: C_FREG, type_: 45, size: 4},
{as: AFMOVSX, a1: C_FREG, a6: C_XOREG, type_: 44, size: 4},
{as: AADDEX, a1: C_REG, a2: C_REG, a3: C_SCON, a6: C_REG, type_: 94, size: 4}, /* add extended using alternate carry, z23-form */
{as: ACRAND, a1: C_CRBIT, a2: C_CRBIT, a6: C_CRBIT, type_: 2, size: 4}, /* logical ops for condition register bits xl-form */
+ /* Misc ISA 3.0 instructions */
+ {as: ASETB, a1: C_CREG, a6: C_REG, type_: 110, size: 4},
+ {as: AVCLZLSBB, a1: C_VREG, a6: C_REG, type_: 85, size: 4},
+
/* Vector instructions */
/* Vector load */
{as: obj.APCALIGN, a1: C_LCON, type_: 0, size: 0}, // align code
}
+// These are opcodes above which may generate different sequences depending on whether prefix opcode support
+// is available
+type PrefixableOptab struct {
+ Optab
+ minGOPPC64 int // Minimum GOPPC64 required to support this.
+ pfxsize int8 // Instruction sequence size when prefixed opcodes are used
+}
+
+// The prefixable optab entry contains the pseudo-opcodes which generate relocations, or may generate
+// a more efficient sequence of instructions if a prefixed version exists (ex. paddi instead of oris/ori/add).
+//
+// This table is meant to transform all sequences which might be TOC-relative into an equivalent PC-relative
+// sequence. It also encompasses several transformations which do not involve relocations, those could be
+// separated and applied to AIX and other non-ELF targets. Likewise, the prefixed forms do not have encoding
+// restrictions on the offset, so they are also used for static binary to allow better code generation. e.x
+//
+// MOVD something-byte-aligned(Rx), Ry
+// MOVD 3(Rx), Ry
+//
+// is allowed when the prefixed forms are used.
+//
+// This requires an ISA 3.1 compatible cpu (e.g Power10), and when linking externally an ELFv2 1.5 compliant.
+var prefixableOptab = []PrefixableOptab{
+ {Optab: Optab{as: AMOVD, a1: C_S34CON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12}, minGOPPC64: 10, pfxsize: 12},
+ {Optab: Optab{as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+ {Optab: Optab{as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+ {Optab: Optab{as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 36, size: 12}, minGOPPC64: 10, pfxsize: 12},
+ {Optab: Optab{as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 75, size: 12}, minGOPPC64: 10, pfxsize: 12},
+ {Optab: Optab{as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+ {Optab: Optab{as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+ {Optab: Optab{as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+ {Optab: Optab{as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AADD, a1: C_S34CON, a2: C_REG, a6: C_REG, type_: 22, size: 20}, minGOPPC64: 10, pfxsize: 8},
+ {Optab: Optab{as: AADD, a1: C_S34CON, a6: C_REG, type_: 22, size: 20}, minGOPPC64: 10, pfxsize: 8},
+}
+
var oprange [ALAST & obj.AMask][]Optab
var xcmp [C_NCLASS][C_NCLASS]bool
-// padding bytes to add to align code as requested
+var pfxEnabled = false // ISA 3.1 prefixed instructions are supported.
+var buildOpCfg = "" // Save the os/cpu/arch tuple used to configure the assembler in buildop
+
+// padding bytes to add to align code as requested.
func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int {
- // For 16 and 32 byte alignment, there is a tradeoff
- // between aligning the code and adding too many NOPs.
switch a {
- case 8:
- if pc&7 != 0 {
- return 4
- }
- case 16:
- // Align to 16 bytes if possible but add at
- // most 2 NOPs.
- switch pc & 15 {
- case 4, 12:
- return 4
- case 8:
- return 8
- }
- case 32:
- // Align to 32 bytes if possible but add at
- // most 3 NOPs.
- switch pc & 31 {
- case 4, 20:
- return 12
- case 8, 24:
- return 8
- case 12, 28:
- return 4
- }
- // When 32 byte alignment is requested on Linux,
- // promote the function's alignment to 32. On AIX
- // the function alignment is not changed which might
- // result in 16 byte alignment but that is still fine.
- // TODO: alignment on AIX
- if ctxt.Headtype != objabi.Haix && cursym.Func().Align < 32 {
- cursym.Func().Align = 32
+ case 8, 16, 32, 64:
+ // By default function alignment is 16. If an alignment > 16 is
+ // requested then the function alignment must also be promoted.
+ // The function alignment is not promoted on AIX at this time.
+ // TODO: Investigate AIX function alignment.
+ if ctxt.Headtype != objabi.Haix && cursym.Func().Align < int32(a) {
+ cursym.Func().Align = int32(a)
+ }
+ if pc&(a-1) != 0 {
+ return int(a - (pc & (a - 1)))
}
default:
ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", a)
return 0
}
-// Get the implied register of a operand which doesn't specify one. These show up
+// Get the implied register of an operand which doesn't specify one. These show up
// in handwritten asm like "MOVD R5, foosymbol" where a base register is not supplied,
// or "MOVD R5, foo+10(SP) or pseudo-register is used. The other common case is when
// generating constants in register like "MOVD $constant, Rx".
// lay out the code, emitting code and data relocations.
bp := c.cursym.P
- nop := LOP_IRR(OP_ORI, REGZERO, REGZERO, 0)
var i int32
for p := c.cursym.Func().Text.Link; p != nil; p = p.Link {
c.pc = p.Pc
if v > 0 {
// Same padding instruction for all
for i = 0; i < int32(v/4); i++ {
- c.ctxt.Arch.ByteOrder.PutUint32(bp, nop)
+ c.ctxt.Arch.ByteOrder.PutUint32(bp, NOP)
bp = bp[4:]
}
}
} else {
if p.Mark&PFX_X64B != 0 {
- c.ctxt.Arch.ByteOrder.PutUint32(bp, nop)
+ c.ctxt.Arch.ByteOrder.PutUint32(bp, NOP)
bp = bp[4:]
}
o.asmout(&c, p, o, &out)
case sbits <= 16:
return C_U16CON
case sbits <= 31:
- // Special case, a positive int32 value which is a multiple of 2^16
- if c.instoffset&0xFFFF == 0 {
- return C_U3216CON
- }
return C_U32CON
case sbits <= 32:
return C_U32CON
case sbits <= 15:
return C_S16CON
case sbits <= 31:
- // Special case, a negative int32 value which is a multiple of 2^16
- if c.instoffset&0xFFFF == 0 {
- return C_S3216CON
- }
return C_S32CON
case sbits <= 33:
return C_S34CON
}
case obj.TYPE_BRANCH:
- if a.Sym != nil && c.ctxt.Flag_dynlink {
+ if a.Sym != nil && c.ctxt.Flag_dynlink && !pfxEnabled {
return C_LBRAPIC
}
return C_SBRA
case C_S16CON:
return cmp(C_U15CON, b)
case C_32CON:
- return cmp(C_S16CON, b) || cmp(C_U16CON, b) || cmp(C_32S16CON, b)
+ return cmp(C_S16CON, b) || cmp(C_U16CON, b)
case C_S34CON:
return cmp(C_32CON, b)
case C_64CON:
return cmp(C_S34CON, b)
- case C_32S16CON:
- return cmp(C_ZCON, b)
-
case C_LACON:
return cmp(C_SACON, b)
oprange[a&obj.AMask] = oprange[b0]
}
+// Determine if the build configuration requires a TOC pointer.
+// It is assumed this always called after buildop.
+func NeedTOCpointer(ctxt *obj.Link) bool {
+ return !pfxEnabled && ctxt.Flag_shared
+}
+
// Build the opcode table
func buildop(ctxt *obj.Link) {
- if oprange[AANDN&obj.AMask] != nil {
- // Already initialized; stop now.
+ // Limit PC-relative prefix instruction usage to supported and tested targets.
+ pfxEnabled = buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
+ cfg := fmt.Sprintf("power%d/%s/%s", buildcfg.GOPPC64, buildcfg.GOARCH, buildcfg.GOOS)
+ if cfg == buildOpCfg {
+ // Already initialized to correct OS/cpu; stop now.
// This happens in the cmd/asm tests,
// each of which re-initializes the arch.
return
}
+ buildOpCfg = cfg
+
+ // Configure the optab entries which may generate prefix opcodes.
+ prefixOptab := make([]Optab, 0, len(prefixableOptab))
+ for _, entry := range prefixableOptab {
+ entry := entry
+ if pfxEnabled && buildcfg.GOPPC64 >= entry.minGOPPC64 {
+ // Enable prefix opcode generation and resize.
+ entry.ispfx = true
+ entry.size = entry.pfxsize
+ }
+ prefixOptab = append(prefixOptab, entry.Optab)
+
+ }
for i := 0; i < C_NCLASS; i++ {
for n := 0; n < C_NCLASS; n++ {
}
}
}
+
+ // Append the generated entries, sort, and fill out oprange.
+ optab = make([]Optab, 0, len(optabBase)+len(optabGen)+len(prefixOptab))
+ optab = append(optab, optabBase...)
+ optab = append(optab, optabGen...)
+ optab = append(optab, prefixOptab...)
+ sort.Slice(optab, optabLess)
+
for i := range optab {
// Use the legacy assembler function if none provided.
if optab[i].asmout == nil {
optab[i].asmout = asmout
}
}
- // Append the generated entries, sort, and fill out oprange.
- optab = append(optab, optabGen...)
- sort.Slice(optab, optabLess)
+
for i := 0; i < len(optab); {
r := optab[i].as
r0 := r & obj.AMask
opset(APTESYNC, r0)
opset(ATLBSYNC, r0)
- case ARLWMI:
- opset(ARLWMICC, r0)
- opset(ARLWNM, r0)
+ case ARLWNM:
opset(ARLWNMCC, r0)
+ opset(ARLWMI, r0)
+ opset(ARLWMICC, r0)
case ARLDMI:
opset(ARLDMICC, r0)
case AMOVW: /* load/store/move word with sign extension; move 32-bit literals */
opset(AMOVWZ, r0) /* Same as above, but zero extended */
+ case AVCLZLSBB:
+ opset(AVCTZLSBB, r0)
+
case AADD,
AADDIS,
AANDCC, /* and. Rb,Rs,Ra; andi. $uimm,Rs,Ra */
AMTVSRDD,
APNOP,
AISEL,
+ ASETB,
obj.ANOP,
obj.ATEXT,
obj.AUNDEF,
return o<<26 | xo<<2 | rc&1
}
-/* the order is dest, a/s, b/imm for both arithmetic and logical operations */
+/* the order is dest, a/s, b/imm for both arithmetic and logical operations. */
func AOP_RRR(op uint32, d uint32, a uint32, b uint32) uint32 {
return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11
}
return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | (mb&31)<<6 | (me&31)<<1
}
-func AOP_RLDIC(op uint32, a uint32, s uint32, sh uint32, m uint32) uint32 {
- return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 | (m&31)<<6 | ((m&32)>>5)<<5
-}
-
func AOP_EXTSWSLI(op uint32, a uint32, s uint32, sh uint32) uint32 {
return op | (a&31)<<21 | (s&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1
}
return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6
}
+/* MD-form 2-register, 2 6-bit immediate operands */
+func AOP_MD(op uint32, a uint32, s uint32, sh uint32, m uint32) uint32 {
+ return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 | (m&31)<<6 | ((m&32)>>5)<<5
+}
+
+/* MDS-form 3-register, 1 6-bit immediate operands. rsh argument is a register. */
+func AOP_MDS(op, to, from, rsh, m uint32) uint32 {
+ return AOP_MD(op, to, from, rsh&31, m)
+}
+
+func AOP_PFX_00_8LS(r, ie uint32) uint32 {
+ return 1<<26 | 0<<24 | 0<<23 | (r&1)<<20 | (ie & 0x3FFFF)
+}
+func AOP_PFX_10_MLS(r, ie uint32) uint32 {
+ return 1<<26 | 2<<24 | 0<<23 | (r&1)<<20 | (ie & 0x3FFFF)
+}
+
const (
/* each rhs is OPVCC(_, _, _, _) */
OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0
OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0
OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0
OP_EXTSWSLI = 31<<26 | 445<<2
+ OP_SETB = 31<<26 | 128<<1
)
+func pfxadd(rt, ra int16, r uint32, imm32 int64) (uint32, uint32) {
+ return AOP_PFX_10_MLS(r, uint32(imm32>>16)), AOP_IRR(14<<26, uint32(rt), uint32(ra), uint32(imm32))
+}
+
+func pfxload(a obj.As, reg int16, base int16, r uint32) (uint32, uint32) {
+ switch a {
+ case AMOVH:
+ return AOP_PFX_10_MLS(r, 0), AOP_IRR(42<<26, uint32(reg), uint32(base), 0)
+ case AMOVW:
+ return AOP_PFX_00_8LS(r, 0), AOP_IRR(41<<26, uint32(reg), uint32(base), 0)
+ case AMOVD:
+ return AOP_PFX_00_8LS(r, 0), AOP_IRR(57<<26, uint32(reg), uint32(base), 0)
+ case AMOVBZ, AMOVB:
+ return AOP_PFX_10_MLS(r, 0), AOP_IRR(34<<26, uint32(reg), uint32(base), 0)
+ case AMOVHZ:
+ return AOP_PFX_10_MLS(r, 0), AOP_IRR(40<<26, uint32(reg), uint32(base), 0)
+ case AMOVWZ:
+ return AOP_PFX_10_MLS(r, 0), AOP_IRR(32<<26, uint32(reg), uint32(base), 0)
+ case AFMOVS:
+ return AOP_PFX_10_MLS(r, 0), AOP_IRR(48<<26, uint32(reg), uint32(base), 0)
+ case AFMOVD:
+ return AOP_PFX_10_MLS(r, 0), AOP_IRR(50<<26, uint32(reg), uint32(base), 0)
+ }
+ log.Fatalf("Error no pfxload for %v\n", a)
+ return 0, 0
+}
+
+func pfxstore(a obj.As, reg int16, base int16, r uint32) (uint32, uint32) {
+ switch a {
+ case AMOVD:
+ return AOP_PFX_00_8LS(r, 0), AOP_IRR(61<<26, uint32(reg), uint32(base), 0)
+ case AMOVBZ, AMOVB:
+ return AOP_PFX_10_MLS(r, 0), AOP_IRR(38<<26, uint32(reg), uint32(base), 0)
+ case AMOVHZ, AMOVH:
+ return AOP_PFX_10_MLS(r, 0), AOP_IRR(44<<26, uint32(reg), uint32(base), 0)
+ case AMOVWZ, AMOVW:
+ return AOP_PFX_10_MLS(r, 0), AOP_IRR(36<<26, uint32(reg), uint32(base), 0)
+ case AFMOVS:
+ return AOP_PFX_10_MLS(r, 0), AOP_IRR(52<<26, uint32(reg), uint32(base), 0)
+ case AFMOVD:
+ return AOP_PFX_10_MLS(r, 0), AOP_IRR(54<<26, uint32(reg), uint32(base), 0)
+ }
+ log.Fatalf("Error no pfxstore for %v\n", a)
+ return 0, 0
+}
+
func oclass(a *obj.Addr) int {
return int(a.Class) - 1
}
// Encode instructions and create relocation for accessing s+d according to the
// instruction op with source or destination (as appropriate) register reg.
-func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse bool) (o1, o2 uint32) {
+func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse bool) (o1, o2 uint32, rel *obj.Reloc) {
if c.ctxt.Headtype == objabi.Haix {
// Every symbol access must be made via a TOC anchor.
c.ctxt.Diag("symbolAccess called for %s", s.Name)
o1 = AOP_IRR(OP_ADDIS, uint32(reg), base, 0)
o2 = AOP_IRR(op, uint32(reg), uint32(reg), 0)
}
- rel := obj.Addrel(c.cursym)
+ rel = obj.Addrel(c.cursym)
rel.Off = int32(c.pc)
rel.Siz = 8
rel.Sym = s
return
}
-/*
- * 32-bit masks
- */
-func getmask(m []byte, v uint32) bool {
- m[1] = 0
- m[0] = m[1]
- if v != ^uint32(0) && v&(1<<31) != 0 && v&1 != 0 { /* MB > ME */
- if getmask(m, ^v) {
- i := int(m[0])
- m[0] = m[1] + 1
- m[1] = byte(i - 1)
- return true
- }
-
- return false
- }
-
- for i := 0; i < 32; i++ {
- if v&(1<<uint(31-i)) != 0 {
- m[0] = byte(i)
- for {
- m[1] = byte(i)
- i++
- if i >= 32 || v&(1<<uint(31-i)) == 0 {
- break
- }
- }
-
- for ; i < 32; i++ {
- if v&(1<<uint(31-i)) != 0 {
- return false
- }
- }
- return true
- }
+// Determine the mask begin (mb) and mask end (me) values
+// for a valid word rotate mask. A valid 32 bit mask is of
+// the form 1+0*1+ or 0*1+0*.
+//
+// Note, me is inclusive.
+func decodeMask32(mask uint32) (mb, me uint32, valid bool) {
+ mb = uint32(bits.LeadingZeros32(mask))
+ me = uint32(32 - bits.TrailingZeros32(mask))
+ mbn := uint32(bits.LeadingZeros32(^mask))
+ men := uint32(32 - bits.TrailingZeros32(^mask))
+ // Check for a wrapping mask (e.g bits at 0 and 31)
+ if mb == 0 && me == 32 {
+ // swap the inverted values
+ mb, me = men, mbn
}
- return false
-}
-
-func (c *ctxt9) maskgen(p *obj.Prog, m []byte, v uint32) {
- if !getmask(m, v) {
- c.ctxt.Diag("cannot generate mask #%x\n%v", v, p)
- }
+ // Validate mask is of the binary form 1+0*1+ or 0*1+0*
+ // Isolate rightmost 1 (if none 0) and add.
+ v := mask
+ vp := (v & -v) + v
+ // Likewise, check for the wrapping (inverted) case.
+ vn := ^v
+ vpn := (vn & -vn) + vn
+ return mb, (me - 1) & 31, (v&vp == 0 || vn&vpn == 0) && v != 0
}
-/*
- * 64-bit masks (rldic etc)
- */
-func getmask64(m []byte, v uint64) bool {
- m[1] = 0
- m[0] = m[1]
- for i := 0; i < 64; i++ {
- if v&(uint64(1)<<uint(63-i)) != 0 {
- m[0] = byte(i)
- for {
- m[1] = byte(i)
- i++
- if i >= 64 || v&(uint64(1)<<uint(63-i)) == 0 {
- break
- }
- }
-
- for ; i < 64; i++ {
- if v&(uint64(1)<<uint(63-i)) != 0 {
- return false
- }
- }
- return true
- }
- }
-
- return false
+// Decompose a mask of contiguous bits into a begin (mb) and
+// end (me) value.
+//
+// 64b mask values cannot wrap on any valid PPC64 instruction.
+// Only masks of the form 0*1+0* are valid.
+//
+// Note, me is inclusive.
+func decodeMask64(mask int64) (mb, me uint32, valid bool) {
+ m := uint64(mask)
+ mb = uint32(bits.LeadingZeros64(m))
+ me = uint32(64 - bits.TrailingZeros64(m))
+ valid = ((m&-m)+m)&m == 0 && m != 0
+ return mb, (me - 1) & 63, valid
}
-func (c *ctxt9) maskgen64(p *obj.Prog, m []byte, v uint64) {
- if !getmask64(m, v) {
- c.ctxt.Diag("cannot generate mask #%x\n%v", v, p)
+// Load the lower 16 bits of a constant into register r.
+func loadl16(r int, d int64) uint32 {
+ v := uint16(d)
+ if v == 0 {
+ // Avoid generating "ori r,r,0", r != 0. Instead, generate the architectually preferred nop.
+ // For example, "ori r31,r31,0" is a special execution serializing nop on Power10 called "exser".
+ return NOP
}
+ return LOP_IRR(OP_ORI, uint32(r), uint32(r), uint32(v))
}
+// Load the upper 16 bits of a 32b constant into register r.
func loadu32(r int, d int64) uint32 {
v := int32(d >> 16)
if isuint32(uint64(d)) {
c.ctxt.Diag("literal operation on R0\n%v", p)
}
a := OP_ADDI
- if o.a1 == C_UCON {
- if d&0xffff != 0 {
- log.Fatalf("invalid handling of %v", p)
- }
- // For UCON operands the value is right shifted 16, using ADDIS if the
- // value should be signed, ORIS if unsigned.
- v >>= 16
- if r == REGZERO && isuint32(uint64(d)) {
- o1 = LOP_IRR(OP_ORIS, uint32(p.To.Reg), REGZERO, uint32(v))
- break
- }
-
- a = OP_ADDIS
- } else if int64(int16(d)) != d {
+ if int64(int16(d)) != d {
// Operand is 16 bit value with sign bit set
if o.a1 == C_ANDCON {
// Needs unsigned 16 bit so use ORI
// AROTL and AROTLW are extended mnemonics, which map to RLDCL and RLWNM.
switch p.As {
case AROTL:
- o1 = AOP_RLDIC(OP_RLDCL, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), uint32(0))
+ o1 = AOP_MD(OP_RLDCL, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), uint32(0))
case AROTLW:
o1 = OP_RLW(OP_RLWNM, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), 0, 31)
default:
// Sign extend MOVB operations. This is ignored for other cases (o.size == 4).
o2 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
+ case 9: /* RLDC Ra, $sh, $mb, Rb */
+ sh := uint32(p.RestArgs[0].Addr.Offset) & 0x3F
+ mb := uint32(p.RestArgs[1].Addr.Offset) & 0x3F
+ o1 = AOP_RRR(c.opirr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), (uint32(sh) & 0x1F))
+ o1 |= (sh & 0x20) >> 4 // sh[5] is placed in bit 1.
+ o1 |= (mb & 0x1F) << 6 // mb[0:4] is placed in bits 6-10.
+ o1 |= (mb & 0x20) // mb[5] is placed in bit 5
+
case 10: /* sub Ra,[Rb],Rd => subf Rd,Ra,Rb */
r := int(p.Reg)
rel.Add = int64(v)
rel.Type = objabi.R_CALLPOWER
}
- o2 = 0x60000000 // nop, sometimes overwritten by ld r2, 24(r1) when dynamic linking
+ o2 = NOP // nop, sometimes overwritten by ld r2, 24(r1) when dynamic linking
case 13: /* mov[bhwd]{z,} r,r */
// This needs to handle "MOV* $0, Rx". This shows up because $0 also
}
case 14: /* rldc[lr] Rb,Rs,$mask,Ra -- left, right give different masks */
- r := int(p.Reg)
+ r := uint32(p.Reg)
if r == 0 {
- r = int(p.To.Reg)
+ r = uint32(p.To.Reg)
}
d := c.vregoff(p.GetFrom3())
- var a int
switch p.As {
// These opcodes expect a mask operand that has to be converted into the
// appropriate operand. The way these were defined, not all valid masks are possible.
// Left here for compatibility in case they were used or generated.
case ARLDCL, ARLDCLCC:
- var mask [2]uint8
- c.maskgen64(p, mask[:], uint64(d))
-
- a = int(mask[0]) /* MB */
- if mask[1] != 63 {
+ mb, me, valid := decodeMask64(d)
+ if me != 63 || !valid {
c.ctxt.Diag("invalid mask for rotate: %x (end != bit 63)\n%v", uint64(d), p)
}
- o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
- o1 |= (uint32(a) & 31) << 6
- if a&0x20 != 0 {
- o1 |= 1 << 5 /* mb[5] is top bit */
- }
+ o1 = AOP_MDS(c.oprrr(p.As), uint32(p.To.Reg), r, uint32(p.From.Reg), mb)
case ARLDCR, ARLDCRCC:
- var mask [2]uint8
- c.maskgen64(p, mask[:], uint64(d))
-
- a = int(mask[1]) /* ME */
- if mask[0] != 0 {
- c.ctxt.Diag("invalid mask for rotate: %x %x (start != 0)\n%v", uint64(d), mask[0], p)
- }
- o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
- o1 |= (uint32(a) & 31) << 6
- if a&0x20 != 0 {
- o1 |= 1 << 5 /* mb[5] is top bit */
+ mb, me, valid := decodeMask64(d)
+ if mb != 0 || !valid {
+ c.ctxt.Diag("invalid mask for rotate: %x (start != 0)\n%v", uint64(d), p)
}
+ o1 = AOP_MDS(c.oprrr(p.As), uint32(p.To.Reg), r, uint32(p.From.Reg), me)
// These opcodes use a shift count like the ppc64 asm, no mask conversion done
case ARLDICR, ARLDICRCC:
- me := int(d)
+ me := uint32(d)
sh := c.regoff(&p.From)
if me < 0 || me > 63 || sh > 63 {
c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh, p)
}
- o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(me))
+ o1 = AOP_MD(c.oprrr(p.As), uint32(p.To.Reg), r, uint32(sh), me)
case ARLDICL, ARLDICLCC, ARLDIC, ARLDICCC:
- mb := int(d)
+ mb := uint32(d)
sh := c.regoff(&p.From)
if mb < 0 || mb > 63 || sh > 63 {
c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh, p)
}
- o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(mb))
+ o1 = AOP_MD(c.oprrr(p.As), uint32(p.To.Reg), r, uint32(sh), mb)
case ACLRLSLDI:
// This is an extended mnemonic defined in the ISA section C.8.1
if n > b || b > 63 {
c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b, p)
}
- o1 = AOP_RLDIC(OP_RLDIC, uint32(p.To.Reg), uint32(r), uint32(n), uint32(b)-uint32(n))
+ o1 = AOP_MD(OP_RLDIC, uint32(p.To.Reg), uint32(r), uint32(n), uint32(b)-uint32(n))
default:
c.ctxt.Diag("unexpected op in rldc case\n%v", p)
- a = 0
}
case 17, /* bc bo,bi,lbra (same for now) */
case 19: /* mov $lcon,r ==> cau+or */
d := c.vregoff(&p.From)
- o1 = loadu32(int(p.To.Reg), d)
- o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d)))
+ if o.ispfx {
+ o1, o2 = pfxadd(p.To.Reg, REG_R0, PFX_R_ABS, d)
+ } else {
+ o1 = loadu32(int(p.To.Reg), d)
+ o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d)))
+ }
case 20: /* add $ucon,,r | addis $addcon,r,r */
v := c.regoff(&p.From)
if r == 0 {
r = int(p.To.Reg)
}
- if p.As == AADD && (r0iszero == 0 /*TypeKind(100016)*/ && p.Reg == 0 || r0iszero != 0 /*TypeKind(100016)*/ && p.To.Reg == 0) {
- c.ctxt.Diag("literal operation on R0\n%v", p)
- }
- if p.As == AADDIS {
- o1 = AOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v))
- } else {
- o1 = AOP_IRR(c.opirr(AADDIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16)
- }
+ o1 = AOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v))
- case 22: /* add $lcon/$andcon,r1,r2 ==> oris+ori+add/ori+add */
+ case 22: /* add $lcon/$andcon,r1,r2 ==> oris+ori+add/ori+add, add $s34con,r1 ==> addis+ori+slw+ori+add */
if p.To.Reg == REGTMP || p.Reg == REGTMP {
c.ctxt.Diag("can't synthesize large constant\n%v", p)
}
if p.From.Sym != nil {
c.ctxt.Diag("%v is not supported", p)
}
- // If operand is ANDCON, generate 2 instructions using
- // ORI for unsigned value; with LCON 3 instructions.
- if o.size == 8 {
- o1 = LOP_IRR(OP_ORI, REGTMP, REGZERO, uint32(int32(d)))
- o2 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
+ if o.ispfx {
+ o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, d)
+ } else if o.size == 8 {
+ o1 = LOP_IRR(OP_ORI, REGTMP, REGZERO, uint32(int32(d))) // tmp = uint16(d)
+ o2 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) // to = tmp + from
+ } else if o.size == 12 {
+ // Note, o1 is ADDIS if d is negative, ORIS otherwise.
+ o1 = loadu32(REGTMP, d) // tmp = d & 0xFFFF0000
+ o2 = loadl16(REGTMP, d) // tmp |= d & 0xFFFF
+ o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) // to = from + tmp
} else {
- o1 = loadu32(REGTMP, d)
- o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d)))
- o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
+ // For backwards compatibility with GOPPC64 < 10, generate 34b constants in register.
+ o1 = LOP_IRR(OP_ADDIS, REGZERO, REGTMP, uint32(d>>32)) // tmp = sign_extend((d>>32)&0xFFFF0000)
+ o2 = loadl16(REGTMP, int64(d>>16)) // tmp |= (d>>16)&0xFFFF
+ o3 = AOP_MD(OP_RLDICR, REGTMP, REGTMP, 16, 63-16) // tmp <<= 16
+ o4 = loadl16(REGTMP, int64(uint16(d))) // tmp |= d&0xFFFF
+ o5 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
}
case 23: /* and $lcon/$addcon,r1,r2 ==> oris+ori+and/addi+and */
o2 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
} else {
o1 = loadu32(REGTMP, d)
- o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d)))
+ o2 = loadl16(REGTMP, d)
o3 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
}
if p.From.Sym != nil {
o1 = AOP_EXTSWSLI(OP_EXTSWSLI, uint32(r), uint32(p.To.Reg), uint32(v))
} else {
- o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a))
+ o1 = AOP_MD(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a))
}
if p.As == ASLDCC || p.As == ASRDCC || p.As == AEXTSWSLICC {
o1 |= 1 // Set the condition code bit
case 26: /* mov $lsext/auto/oreg,,r2 ==> addis+addi */
v := c.vregoff(&p.From)
r := int(p.From.Reg)
+ var rel *obj.Reloc
switch p.From.Name {
case obj.NAME_EXTERN, obj.NAME_STATIC:
// Load a 32 bit constant, or relocation depending on if a symbol is attached
- o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI, true)
+ o1, o2, rel = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI, true)
default:
if r == 0 {
r = c.getimpliedreg(&p.From, p)
o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(v))
}
+ if o.ispfx {
+ if rel == nil {
+ o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, v)
+ } else {
+ o1, o2 = pfxadd(int16(p.To.Reg), REG_R0, PFX_R_PCREL, 0)
+ rel.Type = objabi.R_ADDRPOWER_PCREL34
+ }
+ }
+
case 27: /* subc ra,$simm,rd => subfic rd,ra,$simm */
v := c.regoff(p.GetFrom3())
if p.To.Reg == REGTMP || p.From.Reg == REGTMP {
c.ctxt.Diag("can't synthesize large constant\n%v", p)
}
- v := c.regoff(p.GetFrom3())
+ v := c.vregoff(p.GetFrom3())
o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, uint32(v)>>16)
- o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(v))
+ o2 = loadl16(REGTMP, v)
o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), REGTMP)
if p.From.Sym != nil {
c.ctxt.Diag("%v is not supported", p)
}
case 29: /* rldic[lr]? $sh,s,$mask,a -- left, right, plain give different masks */
- v := c.regoff(&p.From)
-
+ sh := uint32(c.regoff(&p.From))
d := c.vregoff(p.GetFrom3())
- var mask [2]uint8
- c.maskgen64(p, mask[:], uint64(d))
- var a int
+ mb, me, valid := decodeMask64(d)
+ var a uint32
switch p.As {
case ARLDC, ARLDCCC:
- a = int(mask[0]) /* MB */
- if int32(mask[1]) != (63 - v) {
- c.ctxt.Diag("invalid mask for shift: %x %x (shift %d)\n%v", uint64(d), mask[1], v, p)
+ a = mb
+ if me != (63-sh) || !valid {
+ c.ctxt.Diag("invalid mask for shift: %016x (mb=%d,me=%d) (shift %d)\n%v", uint64(d), mb, me, sh, p)
}
case ARLDCL, ARLDCLCC:
- a = int(mask[0]) /* MB */
- if mask[1] != 63 {
- c.ctxt.Diag("invalid mask for shift: %x %s (shift %d)\n%v", uint64(d), mask[1], v, p)
+ a = mb
+ if mb != 63 || !valid {
+ c.ctxt.Diag("invalid mask for shift: %016x (mb=%d,me=%d) (shift %d)\n%v", uint64(d), mb, me, sh, p)
}
case ARLDCR, ARLDCRCC:
- a = int(mask[1]) /* ME */
- if mask[0] != 0 {
- c.ctxt.Diag("invalid mask for shift: %x %x (shift %d)\n%v", uint64(d), mask[0], v, p)
+ a = me
+ if mb != 0 || !valid {
+ c.ctxt.Diag("invalid mask for shift: %016x (mb=%d,me=%d) (shift %d)\n%v", uint64(d), mb, me, sh, p)
}
default:
c.ctxt.Diag("unexpected op in rldic case\n%v", p)
- a = 0
- }
-
- o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), (uint32(v) & 0x1F))
- o1 |= (uint32(a) & 31) << 6
- if v&0x20 != 0 {
- o1 |= 1 << 1
- }
- if a&0x20 != 0 {
- o1 |= 1 << 5 /* mb[5] is top bit */
}
+ o1 = AOP_MD(c.opirr(p.As), uint32(p.To.Reg), uint32(p.Reg), sh, a)
case 30: /* rldimi $sh,s,$mask,a */
- v := c.regoff(&p.From)
-
+ sh := uint32(c.regoff(&p.From))
d := c.vregoff(p.GetFrom3())
// Original opcodes had mask operands which had to be converted to a shift count as expected by
// the ppc64 asm.
switch p.As {
case ARLDMI, ARLDMICC:
- var mask [2]uint8
- c.maskgen64(p, mask[:], uint64(d))
- if int32(mask[1]) != (63 - v) {
- c.ctxt.Diag("invalid mask for shift: %x %x (shift %d)\n%v", uint64(d), mask[1], v, p)
- }
- o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), (uint32(v) & 0x1F))
- o1 |= (uint32(mask[0]) & 31) << 6
- if v&0x20 != 0 {
- o1 |= 1 << 1
- }
- if mask[0]&0x20 != 0 {
- o1 |= 1 << 5 /* mb[5] is top bit */
+ mb, me, valid := decodeMask64(d)
+ if me != (63-sh) || !valid {
+ c.ctxt.Diag("invalid mask for shift: %x %x (shift %d)\n%v", uint64(d), me, sh, p)
}
+ o1 = AOP_MD(c.opirr(p.As), uint32(p.To.Reg), uint32(p.Reg), sh, mb)
// Opcodes with shift count operands.
case ARLDIMI, ARLDIMICC:
- o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), (uint32(v) & 0x1F))
- o1 |= (uint32(d) & 31) << 6
- if d&0x20 != 0 {
- o1 |= 1 << 5
- }
- if v&0x20 != 0 {
- o1 |= 1 << 1
- }
+ o1 = AOP_MD(c.opirr(p.As), uint32(p.To.Reg), uint32(p.Reg), sh, uint32(d))
}
case 31: /* dword */
r = c.getimpliedreg(&p.To, p)
}
// Offsets in DS form stores must be a multiple of 4
- inst := c.opstore(p.As)
- if c.opform(inst) == DS_FORM && v&0x3 != 0 {
- log.Fatalf("invalid offset for DS form load/store %v", p)
+ if o.ispfx {
+ o1, o2 = pfxstore(p.As, p.From.Reg, int16(r), PFX_R_ABS)
+ o1 |= uint32((v >> 16) & 0x3FFFF)
+ o2 |= uint32(v & 0xFFFF)
+ } else {
+ inst := c.opstore(p.As)
+ if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+ log.Fatalf("invalid offset for DS form load/store %v", p)
+ }
+ o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v)))
+ o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v))
}
- o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v)))
- o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v))
case 36: /* mov b/bz/h/hz lext/lauto/lreg,r ==> lbz+extsb/lbz/lha/lhz etc */
v := c.regoff(&p.From)
if r == 0 {
r = c.getimpliedreg(&p.From, p)
}
- o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(v)))
- o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(p.To.Reg), uint32(v))
+
+ if o.ispfx {
+ o1, o2 = pfxload(p.As, p.To.Reg, int16(r), PFX_R_ABS)
+ o1 |= uint32((v >> 16) & 0x3FFFF)
+ o2 |= uint32(v & 0xFFFF)
+ } else {
+ if o.a6 == C_REG {
+ // Reuse the base register when loading a GPR (C_REG) to avoid
+ // using REGTMP (R31) when possible.
+ o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(v)))
+ o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(p.To.Reg), uint32(v))
+ } else {
+ o1 = AOP_IRR(OP_ADDIS, uint32(REGTMP), uint32(r), uint32(high16adjusted(v)))
+ o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(REGTMP), uint32(v))
+ }
+ }
// Sign extend MOVB if needed
o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
}
o1 = LOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v))
- case 59: /* or/xor/and $ucon,,r | oris/xoris/andis $addcon,r,r */
- v := c.regoff(&p.From)
-
- r := int(p.Reg)
- if r == 0 {
- r = int(p.To.Reg)
- }
- switch p.As {
- case AOR:
- o1 = LOP_IRR(c.opirr(AORIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16) /* oris, xoris, andis. */
- case AXOR:
- o1 = LOP_IRR(c.opirr(AXORIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16)
- case AANDCC:
- o1 = LOP_IRR(c.opirr(AANDISCC), uint32(p.To.Reg), uint32(r), uint32(v)>>16)
- default:
- o1 = LOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v))
- }
-
case 60: /* tw to,a,b */
r := int(c.regoff(&p.From) & 31)
v := c.regoff(&p.To)
o1 = AOP_IRR(c.opirr(p.As), uint32(r), uint32(p.Reg), uint32(v))
- case 62: /* rlwmi $sh,s,$mask,a */
+ case 62: /* clrlslwi $sh,s,$mask,a */
v := c.regoff(&p.From)
- switch p.As {
- case ACLRLSLWI:
- n := c.regoff(p.GetFrom3())
- // This is an extended mnemonic described in the ISA C.8.2
- // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n
- // It maps onto rlwinm which is directly generated here.
- if n > v || v >= 32 {
- c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p)
+ n := c.regoff(p.GetFrom3())
+ // This is an extended mnemonic described in the ISA C.8.2
+ // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n
+ // It maps onto rlwinm which is directly generated here.
+ if n > v || v >= 32 {
+ c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p)
+ }
+
+ o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n))
+
+ case 63: /* rlwimi/rlwnm/rlwinm [$sh,b],s,[$mask or mb,me],a*/
+ var mb, me uint32
+ if len(p.RestArgs) == 1 { // Mask needs decomposed into mb and me.
+ var valid bool
+ // Note, optab rules ensure $mask is a 32b constant.
+ mb, me, valid = decodeMask32(uint32(p.RestArgs[0].Addr.Offset))
+ if !valid {
+ c.ctxt.Diag("cannot generate mask #%x\n%v", uint64(p.RestArgs[0].Addr.Offset), p)
}
-
- o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n))
- default:
- var mask [2]uint8
- c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3())))
- o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v))
- o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1
+ } else { // Otherwise, mask is already passed as mb and me in RestArgs.
+ mb, me = uint32(p.RestArgs[0].Addr.Offset), uint32(p.RestArgs[1].Addr.Offset)
+ }
+ if p.From.Type == obj.TYPE_CONST {
+ o1 = OP_RLW(c.opirr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Offset), mb, me)
+ } else {
+ o1 = OP_RLW(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), mb, me)
}
-
- case 63: /* rlwmi b,s,$mask,a */
- var mask [2]uint8
- c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3())))
- o1 = AOP_RRR(c.oprrr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(p.From.Reg))
- o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1
case 64: /* mtfsf fr[, $m] {,fpcsr} */
var v int32
/* relocation operations */
case 74:
+ var rel *obj.Reloc
v := c.vregoff(&p.To)
// Offsets in DS form stores must be a multiple of 4
inst := c.opstore(p.As)
- if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+
+ // Can't reuse base for store instructions.
+ o1, o2, rel = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst, false)
+
+ // Rewrite as a prefixed store if supported.
+ if o.ispfx {
+ o1, o2 = pfxstore(p.As, p.From.Reg, REG_R0, PFX_R_PCREL)
+ rel.Type = objabi.R_ADDRPOWER_PCREL34
+ } else if c.opform(inst) == DS_FORM && v&0x3 != 0 {
log.Fatalf("invalid offset for DS form load/store %v", p)
}
- // Can't reuse base for store instructions.
- o1, o2 = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst, false)
case 75: // 32 bit offset symbol loads (got/toc/addr)
+ var rel *obj.Reloc
v := p.From.Offset
// Offsets in DS form loads must be a multiple of 4
inst := c.opload(p.As)
- if c.opform(inst) == DS_FORM && v&0x3 != 0 {
- log.Fatalf("invalid offset for DS form load/store %v", p)
- }
switch p.From.Name {
case obj.NAME_GOTREF, obj.NAME_TOCREF:
if v != 0 {
}
o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
o2 = AOP_IRR(inst, uint32(p.To.Reg), uint32(p.To.Reg), 0)
- rel := obj.Addrel(c.cursym)
+ rel = obj.Addrel(c.cursym)
rel.Off = int32(c.pc)
rel.Siz = 8
rel.Sym = p.From.Sym
rel.Type = objabi.R_ADDRPOWER_TOCREL_DS
}
default:
- reuseBaseReg := p.As != AFMOVD && p.As != AFMOVS
- // Reuse To.Reg as base register if not FP move.
- o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst, reuseBaseReg)
+ reuseBaseReg := o.a6 == C_REG
+ // Reuse To.Reg as base register if it is a GPR.
+ o1, o2, rel = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst, reuseBaseReg)
+ }
+
+ // Convert to prefixed forms if supported.
+ if o.ispfx {
+ switch rel.Type {
+ case objabi.R_ADDRPOWER, objabi.R_ADDRPOWER_DS,
+ objabi.R_ADDRPOWER_TOCREL, objabi.R_ADDRPOWER_TOCREL_DS:
+ o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+ rel.Type = objabi.R_ADDRPOWER_PCREL34
+ case objabi.R_POWER_TLS_IE:
+ o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+ rel.Type = objabi.R_POWER_TLS_IE_PCREL34
+ case objabi.R_ADDRPOWER_GOT:
+ o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+ rel.Type = objabi.R_ADDRPOWER_GOT_PCREL34
+ default:
+ // We've failed to convert a TOC-relative relocation to a PC-relative one.
+ log.Fatalf("Unable convert TOC-relative relocation %v to PC-relative", rel.Type)
+ }
+ } else if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+ log.Fatalf("invalid offset for DS form load/store %v", p)
}
o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
if p.From.Offset != 0 {
c.ctxt.Diag("invalid offset against tls var %v", p)
}
- o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0)
- o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0)
rel := obj.Addrel(c.cursym)
rel.Off = int32(c.pc)
rel.Siz = 8
rel.Sym = p.From.Sym
- rel.Type = objabi.R_POWER_TLS_LE
+ if !o.ispfx {
+ o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0)
+ o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0)
+ rel.Type = objabi.R_POWER_TLS_LE
+ } else {
+ o1, o2 = pfxadd(p.To.Reg, REG_R13, PFX_R_ABS, 0)
+ rel.Type = objabi.R_POWER_TLS_LE_TPREL34
+ }
case 80:
if p.From.Offset != 0 {
c.ctxt.Diag("invalid offset against tls var %v", p)
}
- o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
- o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0)
- o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13)
rel := obj.Addrel(c.cursym)
rel.Off = int32(c.pc)
rel.Siz = 8
rel.Sym = p.From.Sym
rel.Type = objabi.R_POWER_TLS_IE
+ if !o.ispfx {
+ o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
+ o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0)
+ } else {
+ o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+ rel.Type = objabi.R_POWER_TLS_IE_PCREL34
+ }
+ o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13)
rel = obj.Addrel(c.cursym)
rel.Off = int32(c.pc) + 8
rel.Siz = 4
case 101:
o1 = AOP_XX2(c.oprrr(p.As), uint32(p.To.Reg), uint32(0), uint32(p.From.Reg))
- case 102: /* RLWMI $sh,rs,$mb,$me,rt (M-form opcode)*/
- mb := uint32(c.regoff(&p.RestArgs[0].Addr))
- me := uint32(c.regoff(&p.RestArgs[1].Addr))
- sh := uint32(c.regoff(&p.From))
- o1 = OP_RLW(c.opirr(p.As), uint32(p.To.Reg), uint32(p.Reg), sh, mb, me)
-
- case 103: /* RLWMI rb,rs,$mb,$me,rt (M-form opcode)*/
- mb := uint32(c.regoff(&p.RestArgs[0].Addr))
- me := uint32(c.regoff(&p.RestArgs[1].Addr))
- o1 = OP_RLW(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), mb, me)
-
case 104: /* VSX mtvsr* instructions, XX1-form RA,RB,XT */
o1 = AOP_XX1(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg))
o1 = AOP_RRR(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(r))
// Sign extend MOVB operations. This is ignored for other cases (o.size == 4).
o2 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
+
+ case 110: /* SETB creg, rt */
+ bfa := uint32(p.From.Reg) << 2
+ rt := uint32(p.To.Reg)
+ o1 = LOP_RRR(OP_SETB, bfa, rt, 0)
}
out[0] = o1
case AHRFID:
return OPVCC(19, 274, 0, 0)
- case ARLWMI:
- return OPVCC(20, 0, 0, 0)
- case ARLWMICC:
- return OPVCC(20, 0, 0, 1)
case ARLWNM:
return OPVCC(23, 0, 0, 0)
case ARLWNMCC:
case AVCLZD:
return OPVX(4, 1986, 0, 0) /* vclzd - v2.07 */
+ case AVCLZLSBB:
+ return OPVX(4, 1538, 0, 0) /* vclzlsbb - v3.0 */
+ case AVCTZLSBB:
+ return OPVX(4, 1538, 0, 0) | 1<<16 /* vctzlsbb - v3.0 */
+
case AVPOPCNTB:
return OPVX(4, 1795, 0, 0) /* vpopcntb - v2.07 */
case AVPOPCNTH: