cmd/internal/obj/ppc64: remove C_UCON optab matching class

[gostls13.git] / src / cmd / internal / obj / ppc64 / asm9.go
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go

index 051663601a8e814cf3a463b4c10f596bd77f04ae..9524f66ca455f93791bc758e1031a29b610e8684 100644 (file)
--- a/src/cmd/internal/obj/ppc64/asm9.go
+++ b/src/cmd/internal/obj/ppc64/asm9.go
@@ -34,8 +34,10 @@ import (
         "cmd/internal/objabi"
         "encoding/binary"
         "fmt"
+       "internal/buildcfg"
         "log"
         "math"
+       "math/bits"
         "sort"
  )
  
@@ -54,12 +56,18 @@ type ctxt9 struct {
  // Instruction layout.
  
  const (
-       funcAlign     = 16
-       funcAlignMask = funcAlign - 1
+       r0iszero = 1
  )
  
  const (
-       r0iszero = 1
+       // R bit option in prefixed load/store/add D-form operations
+       PFX_R_ABS   = 0 // Offset is absolute
+       PFX_R_PCREL = 1 // Offset is relative to PC, RA should be 0
+)
+
+const (
+       // The preferred hardware nop instruction.
+       NOP = 0x60000000
  )
  
  type Optab struct {
@@ -72,6 +80,14 @@ type Optab struct {
         a6    uint8  // p.To (obj.Addr)
         type_ int8   // cases in asmout below. E.g., 44 = st r,(ra+rb); 45 = ld (ra+rb), r
         size  int8   // Text space in bytes to lay operation
+
+       // A prefixed instruction is generated by this opcode. This cannot be placed
+       // across a 64B PC address. Opcodes should not translate to more than one
+       // prefixed instruction. The prefixed instruction should be written first
+       // (e.g when Optab.size > 8).
+       ispfx bool
+
+       asmout func(*ctxt9, *obj.Prog, *Optab, *[5]uint32)
  }
  
  // optab contains an array to be sliced of accepted operand combinations for an
@@ -88,7 +104,11 @@ type Optab struct {
  //
  // Likewise, each slice of optab is dynamically sorted using the ocmp Sort interface
  // to arrange entries to minimize text size of each opcode.
-var optab = []Optab{
+//
+// optab is the sorted result of combining optabBase, optabGen, and prefixableOptab.
+var optab []Optab
+
+var optabBase = []Optab{
         {as: obj.ATEXT, a1: C_LOREG, a6: C_TEXTSIZE, type_: 0, size: 0},
         {as: obj.ATEXT, a1: C_LOREG, a3: C_LCON, a6: C_TEXTSIZE, type_: 0, size: 0},
         {as: obj.ATEXT, a1: C_ADDR, a6: C_TEXTSIZE, type_: 0, size: 0},
@@ -100,12 +120,8 @@ var optab = []Optab{
         {as: AADD, a1: C_SCON, a6: C_REG, type_: 4, size: 4},
         {as: AADD, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 4, size: 4},
         {as: AADD, a1: C_ADDCON, a6: C_REG, type_: 4, size: 4},
-       {as: AADD, a1: C_UCON, a2: C_REG, a6: C_REG, type_: 20, size: 4},
-       {as: AADD, a1: C_UCON, a6: C_REG, type_: 20, size: 4},
         {as: AADD, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 22, size: 8},
         {as: AADD, a1: C_ANDCON, a6: C_REG, type_: 22, size: 8},
-       {as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12},
-       {as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12},
         {as: AADDIS, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 20, size: 4},
         {as: AADDIS, a1: C_ADDCON, a6: C_REG, type_: 20, size: 4},
         {as: AADDC, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4},
@@ -120,14 +136,12 @@ var optab = []Optab{
         {as: AANDCC, a1: C_REG, a6: C_REG, type_: 6, size: 4},
         {as: AANDCC, a1: C_ANDCON, a6: C_REG, type_: 58, size: 4},
         {as: AANDCC, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 58, size: 4},
-       {as: AANDCC, a1: C_UCON, a6: C_REG, type_: 59, size: 4},
-       {as: AANDCC, a1: C_UCON, a2: C_REG, a6: C_REG, type_: 59, size: 4},
         {as: AANDCC, a1: C_ADDCON, a6: C_REG, type_: 23, size: 8},
         {as: AANDCC, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 23, size: 8},
         {as: AANDCC, a1: C_LCON, a6: C_REG, type_: 23, size: 12},
         {as: AANDCC, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 23, size: 12},
-       {as: AANDISCC, a1: C_ANDCON, a6: C_REG, type_: 59, size: 4},
-       {as: AANDISCC, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 59, size: 4},
+       {as: AANDISCC, a1: C_ANDCON, a6: C_REG, type_: 58, size: 4},
+       {as: AANDISCC, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 58, size: 4},
         {as: AMULLW, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4},
         {as: AMULLW, a1: C_REG, a6: C_REG, type_: 2, size: 4},
         {as: AMULLW, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 4, size: 4},
@@ -144,14 +158,12 @@ var optab = []Optab{
         {as: AOR, a1: C_REG, a6: C_REG, type_: 6, size: 4},
         {as: AOR, a1: C_ANDCON, a6: C_REG, type_: 58, size: 4},
         {as: AOR, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 58, size: 4},
-       {as: AOR, a1: C_UCON, a6: C_REG, type_: 59, size: 4},
-       {as: AOR, a1: C_UCON, a2: C_REG, a6: C_REG, type_: 59, size: 4},
         {as: AOR, a1: C_ADDCON, a6: C_REG, type_: 23, size: 8},
         {as: AOR, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 23, size: 8},
         {as: AOR, a1: C_LCON, a6: C_REG, type_: 23, size: 12},
         {as: AOR, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 23, size: 12},
-       {as: AORIS, a1: C_ANDCON, a6: C_REG, type_: 59, size: 4},
-       {as: AORIS, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 59, size: 4},
+       {as: AORIS, a1: C_ANDCON, a6: C_REG, type_: 58, size: 4},
+       {as: AORIS, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 58, size: 4},
         {as: ADIVW, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4}, /* op r1[,r2],r3 */
         {as: ADIVW, a1: C_REG, a6: C_REG, type_: 2, size: 4},
         {as: ASUB, a1: C_REG, a2: C_REG, a6: C_REG, type_: 10, size: 4}, /* op r2[,r1],r3 */
@@ -174,13 +186,14 @@ var optab = []Optab{
         {as: ASRAD, a1: C_REG, a2: C_REG, a6: C_REG, type_: 6, size: 4},
         {as: ASRAD, a1: C_SCON, a2: C_REG, a6: C_REG, type_: 56, size: 4},
         {as: ASRAD, a1: C_SCON, a6: C_REG, type_: 56, size: 4},
-       {as: ARLWMI, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 62, size: 4},
-       {as: ARLWMI, a1: C_SCON, a2: C_REG, a3: C_SCON, a4: C_SCON, a6: C_REG, type_: 102, size: 4},
-       {as: ARLWMI, a1: C_REG, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 63, size: 4},
-       {as: ARLWMI, a1: C_REG, a2: C_REG, a3: C_SCON, a4: C_SCON, a6: C_REG, type_: 103, size: 4},
+       {as: ARLWNM, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 63, size: 4},
+       {as: ARLWNM, a1: C_SCON, a2: C_REG, a3: C_SCON, a4: C_SCON, a6: C_REG, type_: 63, size: 4},
+       {as: ARLWNM, a1: C_REG, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 63, size: 4},
+       {as: ARLWNM, a1: C_REG, a2: C_REG, a3: C_SCON, a4: C_SCON, a6: C_REG, type_: 63, size: 4},
         {as: ACLRLSLWI, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 62, size: 4},
         {as: ARLDMI, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 30, size: 4},
         {as: ARLDC, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 29, size: 4},
+       {as: ARLDC, a1: C_REG, a3: C_U8CON, a4: C_U8CON, a6: C_REG, type_: 9, size: 4},
         {as: ARLDCL, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 29, size: 4},
         {as: ARLDCL, a1: C_REG, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 14, size: 4},
         {as: ARLDICL, a1: C_REG, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 14, size: 4},
@@ -195,80 +208,69 @@ var optab = []Optab{
         {as: AFMUL, a1: C_FREG, a2: C_FREG, a6: C_FREG, type_: 32, size: 4},
  
         {as: AMOVBU, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
-       {as: AMOVBU, a1: C_SOREG, a6: C_REG, type_: 9, size: 8},
+       {as: AMOVBU, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
+       {as: AMOVBU, a1: C_SOREG, a6: C_REG, type_: 8, size: 8},
+       {as: AMOVBU, a1: C_XOREG, a6: C_REG, type_: 109, size: 8},
  
         {as: AMOVBZU, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
+       {as: AMOVBZU, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
         {as: AMOVBZU, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
+       {as: AMOVBZU, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
  
-       {as: AMOVHBR, a1: C_REG, a6: C_ZOREG, type_: 44, size: 4},
-       {as: AMOVHBR, a1: C_ZOREG, a6: C_REG, type_: 45, size: 4},
+       {as: AMOVHBR, a1: C_REG, a6: C_XOREG, type_: 44, size: 4},
+       {as: AMOVHBR, a1: C_XOREG, a6: C_REG, type_: 45, size: 4},
  
-       {as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 76, size: 12},
-       {as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 37, size: 12},
-       {as: AMOVB, a1: C_SOREG, a6: C_REG, type_: 9, size: 8},
-       {as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
+       {as: AMOVB, a1: C_SOREG, a6: C_REG, type_: 8, size: 8},
+       {as: AMOVB, a1: C_XOREG, a6: C_REG, type_: 109, size: 8},
         {as: AMOVB, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
-       {as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
+       {as: AMOVB, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
         {as: AMOVB, a1: C_REG, a6: C_REG, type_: 13, size: 4},
  
-       {as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8},
-       {as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
         {as: AMOVBZ, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
-       {as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
+       {as: AMOVBZ, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
         {as: AMOVBZ, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
-       {as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
+       {as: AMOVBZ, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
         {as: AMOVBZ, a1: C_REG, a6: C_REG, type_: 13, size: 4},
  
         {as: AMOVD, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4},
         {as: AMOVD, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4},
-       {as: AMOVD, a1: C_UCON, a6: C_REG, type_: 3, size: 4},
-       {as: AMOVD, a1: C_LCON, a6: C_REG, type_: 19, size: 8},
         {as: AMOVD, a1: C_SACON, a6: C_REG, type_: 3, size: 4},
-       {as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8},
-       {as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8},
         {as: AMOVD, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
-       {as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
-       {as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8},
-       {as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12},
+       {as: AMOVD, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
+       {as: AMOVD, a1: C_SOREG, a6: C_SPR, type_: 107, size: 8},
         {as: AMOVD, a1: C_SPR, a6: C_REG, type_: 66, size: 4},
-       {as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
         {as: AMOVD, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
-       {as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
+       {as: AMOVD, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
+       {as: AMOVD, a1: C_SPR, a6: C_SOREG, type_: 106, size: 8},
         {as: AMOVD, a1: C_REG, a6: C_SPR, type_: 66, size: 4},
         {as: AMOVD, a1: C_REG, a6: C_REG, type_: 13, size: 4},
  
         {as: AMOVW, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4},
         {as: AMOVW, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4},
-       {as: AMOVW, a1: C_UCON, a6: C_REG, type_: 3, size: 4},
-       {as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8},
         {as: AMOVW, a1: C_SACON, a6: C_REG, type_: 3, size: 4},
-       {as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8},
-       {as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8},
         {as: AMOVW, a1: C_CREG, a6: C_REG, type_: 68, size: 4},
         {as: AMOVW, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
-       {as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
+       {as: AMOVW, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
         {as: AMOVW, a1: C_SPR, a6: C_REG, type_: 66, size: 4},
-       {as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
         {as: AMOVW, a1: C_REG, a6: C_CREG, type_: 69, size: 4},
         {as: AMOVW, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
-       {as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
+       {as: AMOVW, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
         {as: AMOVW, a1: C_REG, a6: C_SPR, type_: 66, size: 4},
         {as: AMOVW, a1: C_REG, a6: C_REG, type_: 13, size: 4},
  
         {as: AFMOVD, a1: C_ADDCON, a6: C_FREG, type_: 24, size: 8},
         {as: AFMOVD, a1: C_SOREG, a6: C_FREG, type_: 8, size: 4},
-       {as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8},
+       {as: AFMOVD, a1: C_XOREG, a6: C_FREG, type_: 109, size: 4},
         {as: AFMOVD, a1: C_ZCON, a6: C_FREG, type_: 24, size: 4},
-       {as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8},
         {as: AFMOVD, a1: C_FREG, a6: C_FREG, type_: 33, size: 4},
         {as: AFMOVD, a1: C_FREG, a6: C_SOREG, type_: 7, size: 4},
-       {as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8},
-       {as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8},
+       {as: AFMOVD, a1: C_FREG, a6: C_XOREG, type_: 108, size: 4},
  
-       {as: AFMOVSX, a1: C_ZOREG, a6: C_FREG, type_: 45, size: 4},
-       {as: AFMOVSX, a1: C_FREG, a6: C_ZOREG, type_: 44, size: 4},
+       {as: AFMOVSX, a1: C_XOREG, a6: C_FREG, type_: 45, size: 4},
+       {as: AFMOVSX, a1: C_FREG, a6: C_XOREG, type_: 44, size: 4},
  
         {as: AFMOVSZ, a1: C_ZOREG, a6: C_FREG, type_: 45, size: 4},
+       {as: AFMOVSZ, a1: C_XOREG, a6: C_FREG, type_: 45, size: 4},
  
         {as: AMOVFL, a1: C_CREG, a6: C_CREG, type_: 67, size: 4},
         {as: AMOVFL, a1: C_FPSCR, a6: C_CREG, type_: 73, size: 4},
@@ -284,30 +286,25 @@ var optab = []Optab{
         {as: ASYSCALL, a1: C_SCON, type_: 77, size: 12},
         {as: ABEQ, a6: C_SBRA, type_: 16, size: 4},
         {as: ABEQ, a1: C_CREG, a6: C_SBRA, type_: 16, size: 4},
-       {as: ABR, a6: C_LBRA, type_: 11, size: 4},
-       {as: ABR, a6: C_LBRAPIC, type_: 11, size: 8},
-       {as: ABC, a1: C_SCON, a2: C_REG, a6: C_SBRA, type_: 16, size: 4},
-       {as: ABC, a1: C_SCON, a2: C_REG, a6: C_LBRA, type_: 17, size: 4},
-       {as: ABR, a6: C_LR, type_: 18, size: 4},
-       {as: ABR, a3: C_SCON, a6: C_LR, type_: 18, size: 4},
-       {as: ABR, a6: C_CTR, type_: 18, size: 4},
-       {as: ABR, a1: C_REG, a6: C_CTR, type_: 18, size: 4},
-       {as: ABR, a6: C_ZOREG, type_: 15, size: 8},
-       {as: ABC, a2: C_REG, a6: C_LR, type_: 18, size: 4},
-       {as: ABC, a2: C_REG, a6: C_CTR, type_: 18, size: 4},
-       {as: ABC, a1: C_SCON, a2: C_REG, a6: C_LR, type_: 18, size: 4},
-       {as: ABC, a1: C_SCON, a2: C_REG, a6: C_CTR, type_: 18, size: 4},
-       {as: ABC, a6: C_ZOREG, type_: 15, size: 8},
+       {as: ABR, a6: C_LBRA, type_: 11, size: 4},                                    // b label
+       {as: ABR, a6: C_LBRAPIC, type_: 11, size: 8},                                 // b label; nop
+       {as: ABR, a6: C_LR, type_: 18, size: 4},                                      // blr
+       {as: ABR, a6: C_CTR, type_: 18, size: 4},                                     // bctr
+       {as: ABC, a1: C_SCON, a2: C_CRBIT, a6: C_SBRA, type_: 16, size: 4},           // bc bo, bi, label
+       {as: ABC, a1: C_SCON, a2: C_CRBIT, a6: C_LBRA, type_: 17, size: 4},           // bc bo, bi, label
+       {as: ABC, a1: C_SCON, a2: C_CRBIT, a6: C_LR, type_: 18, size: 4},             // bclr bo, bi
+       {as: ABC, a1: C_SCON, a2: C_CRBIT, a3: C_SCON, a6: C_LR, type_: 18, size: 4}, // bclr bo, bi, bh
+       {as: ABC, a1: C_SCON, a2: C_CRBIT, a6: C_CTR, type_: 18, size: 4},            // bcctr bo, bi
+       {as: ABDNZ, a6: C_SBRA, type_: 16, size: 4},
         {as: ASYNC, type_: 46, size: 4},
         {as: AWORD, a1: C_LCON, type_: 40, size: 4},
-       {as: ADWORD, a1: C_LCON, type_: 31, size: 8},
-       {as: ADWORD, a1: C_DCON, type_: 31, size: 8},
+       {as: ADWORD, a1: C_64CON, type_: 31, size: 8},
         {as: ADWORD, a1: C_LACON, type_: 31, size: 8},
         {as: AADDME, a1: C_REG, a6: C_REG, type_: 47, size: 4},
         {as: AEXTSB, a1: C_REG, a6: C_REG, type_: 48, size: 4},
         {as: AEXTSB, a6: C_REG, type_: 48, size: 4},
-       {as: AISEL, a1: C_LCON, a2: C_REG, a3: C_REG, a6: C_REG, type_: 84, size: 4},
-       {as: AISEL, a1: C_ZCON, a2: C_REG, a3: C_REG, a6: C_REG, type_: 84, size: 4},
+       {as: AISEL, a1: C_U5CON, a2: C_REG, a3: C_REG, a6: C_REG, type_: 84, size: 4},
+       {as: AISEL, a1: C_CRBIT, a2: C_REG, a3: C_REG, a6: C_REG, type_: 84, size: 4},
         {as: ANEG, a1: C_REG, a6: C_REG, type_: 47, size: 4},
         {as: ANEG, a6: C_REG, type_: 47, size: 4},
         {as: AREM, a1: C_REG, a6: C_REG, type_: 50, size: 12},
@@ -326,18 +323,21 @@ var optab = []Optab{
         {as: AFTSQRT, a1: C_FREG, a6: C_SCON, type_: 93, size: 4},                     /* floating test for sw square root, x-form */
         {as: ACOPY, a1: C_REG, a6: C_REG, type_: 92, size: 4},                         /* copy/paste facility, x-form */
         {as: ADARN, a1: C_SCON, a6: C_REG, type_: 92, size: 4},                        /* deliver random number, x-form */
-       {as: ALDMX, a1: C_SOREG, a6: C_REG, type_: 45, size: 4},                       /* load doubleword monitored, x-form */
         {as: AMADDHD, a1: C_REG, a2: C_REG, a3: C_REG, a6: C_REG, type_: 83, size: 4}, /* multiply-add high/low doubleword, va-form */
         {as: AADDEX, a1: C_REG, a2: C_REG, a3: C_SCON, a6: C_REG, type_: 94, size: 4}, /* add extended using alternate carry, z23-form */
-       {as: ACRAND, a1: C_CREG, a6: C_CREG, type_: 2, size: 4},                       /* logical ops for condition registers xl-form */
+       {as: ACRAND, a1: C_CRBIT, a2: C_CRBIT, a6: C_CRBIT, type_: 2, size: 4},        /* logical ops for condition register bits xl-form */
+
+       /* Misc ISA 3.0 instructions */
+       {as: ASETB, a1: C_CREG, a6: C_REG, type_: 110, size: 4},
+       {as: AVCLZLSBB, a1: C_VREG, a6: C_REG, type_: 85, size: 4},
  
         /* Vector instructions */
  
         /* Vector load */
-       {as: ALV, a1: C_SOREG, a6: C_VREG, type_: 45, size: 4}, /* vector load, x-form */
+       {as: ALVEBX, a1: C_XOREG, a6: C_VREG, type_: 45, size: 4}, /* vector load, x-form */
  
         /* Vector store */
-       {as: ASTV, a1: C_VREG, a6: C_SOREG, type_: 44, size: 4}, /* vector store, x-form */
+       {as: ASTVEBX, a1: C_VREG, a6: C_XOREG, type_: 44, size: 4}, /* vector store, x-form */
  
         /* Vector logical */
         {as: AVAND, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector and, vx-form */
@@ -406,37 +406,35 @@ var optab = []Optab{
         {as: AVSHASIGMA, a1: C_ANDCON, a2: C_VREG, a3: C_ANDCON, a6: C_VREG, type_: 82, size: 4}, /* vector SHA sigma, vx-form */
  
         /* VSX vector load */
-       {as: ALXVD2X, a1: C_SOREG, a6: C_VSREG, type_: 87, size: 4},        /* vsx vector load, xx1-form */
+       {as: ALXVD2X, a1: C_XOREG, a6: C_VSREG, type_: 87, size: 4},        /* vsx vector load, xx1-form */
         {as: ALXV, a1: C_SOREG, a6: C_VSREG, type_: 96, size: 4},           /* vsx vector load, dq-form */
         {as: ALXVL, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 98, size: 4}, /* vsx vector load length */
  
         /* VSX vector store */
-       {as: ASTXVD2X, a1: C_VSREG, a6: C_SOREG, type_: 86, size: 4},        /* vsx vector store, xx1-form */
+       {as: ASTXVD2X, a1: C_VSREG, a6: C_XOREG, type_: 86, size: 4},        /* vsx vector store, xx1-form */
         {as: ASTXV, a1: C_VSREG, a6: C_SOREG, type_: 97, size: 4},           /* vsx vector store, dq-form */
         {as: ASTXVL, a1: C_VSREG, a2: C_REG, a6: C_REG, type_: 99, size: 4}, /* vsx vector store with length x-form */
  
         /* VSX scalar load */
-       {as: ALXSDX, a1: C_SOREG, a6: C_VSREG, type_: 87, size: 4}, /* vsx scalar load, xx1-form */
+       {as: ALXSDX, a1: C_XOREG, a6: C_VSREG, type_: 87, size: 4}, /* vsx scalar load, xx1-form */
  
         /* VSX scalar store */
-       {as: ASTXSDX, a1: C_VSREG, a6: C_SOREG, type_: 86, size: 4}, /* vsx scalar store, xx1-form */
+       {as: ASTXSDX, a1: C_VSREG, a6: C_XOREG, type_: 86, size: 4}, /* vsx scalar store, xx1-form */
  
         /* VSX scalar as integer load */
-       {as: ALXSIWAX, a1: C_SOREG, a6: C_VSREG, type_: 87, size: 4}, /* vsx scalar as integer load, xx1-form */
+       {as: ALXSIWAX, a1: C_XOREG, a6: C_VSREG, type_: 87, size: 4}, /* vsx scalar as integer load, xx1-form */
  
         /* VSX scalar store as integer */
-       {as: ASTXSIWX, a1: C_VSREG, a6: C_SOREG, type_: 86, size: 4}, /* vsx scalar as integer store, xx1-form */
+       {as: ASTXSIWX, a1: C_VSREG, a6: C_XOREG, type_: 86, size: 4}, /* vsx scalar as integer store, xx1-form */
  
         /* VSX move from VSR */
-       {as: AMFVSRD, a1: C_VSREG, a6: C_REG, type_: 88, size: 4}, /* vsx move from vsr, xx1-form */
+       {as: AMFVSRD, a1: C_VSREG, a6: C_REG, type_: 88, size: 4},
         {as: AMFVSRD, a1: C_FREG, a6: C_REG, type_: 88, size: 4},
-       {as: AMFVSRD, a1: C_VREG, a6: C_REG, type_: 88, size: 4},
  
         /* VSX move to VSR */
-       {as: AMTVSRD, a1: C_REG, a6: C_VSREG, type_: 88, size: 4}, /* vsx move to vsr, xx1-form */
-       {as: AMTVSRD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 88, size: 4},
-       {as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 88, size: 4},
-       {as: AMTVSRD, a1: C_REG, a6: C_VREG, type_: 88, size: 4},
+       {as: AMTVSRD, a1: C_REG, a6: C_VSREG, type_: 104, size: 4},
+       {as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 104, size: 4},
+       {as: AMTVSRDD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 104, size: 4},
  
         /* VSX logical */
         {as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx and, xx3-form */
@@ -480,36 +478,36 @@ var optab = []Optab{
         {as: AXVCVSXDDP, a1: C_VSREG, a6: C_VSREG, type_: 89, size: 4}, /* vsx vector integer-fp conversion, xx2-form */
  
         {as: ACMP, a1: C_REG, a6: C_REG, type_: 70, size: 4},
-       {as: ACMP, a1: C_REG, a2: C_REG, a6: C_REG, type_: 70, size: 4},
+       {as: ACMP, a1: C_REG, a2: C_CREG, a6: C_REG, type_: 70, size: 4},
         {as: ACMP, a1: C_REG, a6: C_ADDCON, type_: 71, size: 4},
-       {as: ACMP, a1: C_REG, a2: C_REG, a6: C_ADDCON, type_: 71, size: 4},
+       {as: ACMP, a1: C_REG, a2: C_CREG, a6: C_ADDCON, type_: 71, size: 4},
         {as: ACMPU, a1: C_REG, a6: C_REG, type_: 70, size: 4},
-       {as: ACMPU, a1: C_REG, a2: C_REG, a6: C_REG, type_: 70, size: 4},
+       {as: ACMPU, a1: C_REG, a2: C_CREG, a6: C_REG, type_: 70, size: 4},
         {as: ACMPU, a1: C_REG, a6: C_ANDCON, type_: 71, size: 4},
-       {as: ACMPU, a1: C_REG, a2: C_REG, a6: C_ANDCON, type_: 71, size: 4},
+       {as: ACMPU, a1: C_REG, a2: C_CREG, a6: C_ANDCON, type_: 71, size: 4},
         {as: AFCMPO, a1: C_FREG, a6: C_FREG, type_: 70, size: 4},
-       {as: AFCMPO, a1: C_FREG, a2: C_REG, a6: C_FREG, type_: 70, size: 4},
+       {as: AFCMPO, a1: C_FREG, a2: C_CREG, a6: C_FREG, type_: 70, size: 4},
         {as: ATW, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 60, size: 4},
         {as: ATW, a1: C_LCON, a2: C_REG, a6: C_ADDCON, type_: 61, size: 4},
-       {as: ADCBF, a1: C_ZOREG, type_: 43, size: 4},
         {as: ADCBF, a1: C_SOREG, type_: 43, size: 4},
-       {as: ADCBF, a1: C_ZOREG, a2: C_REG, a6: C_SCON, type_: 43, size: 4},
+       {as: ADCBF, a1: C_XOREG, type_: 43, size: 4},
+       {as: ADCBF, a1: C_XOREG, a2: C_REG, a6: C_SCON, type_: 43, size: 4},
         {as: ADCBF, a1: C_SOREG, a6: C_SCON, type_: 43, size: 4},
-       {as: AECOWX, a1: C_REG, a2: C_REG, a6: C_ZOREG, type_: 44, size: 4},
-       {as: AECIWX, a1: C_ZOREG, a2: C_REG, a6: C_REG, type_: 45, size: 4},
-       {as: AECOWX, a1: C_REG, a6: C_ZOREG, type_: 44, size: 4},
-       {as: AECIWX, a1: C_ZOREG, a6: C_REG, type_: 45, size: 4},
-       {as: ALDAR, a1: C_ZOREG, a6: C_REG, type_: 45, size: 4},
-       {as: ALDAR, a1: C_ZOREG, a3: C_ANDCON, a6: C_REG, type_: 45, size: 4},
+       {as: ADCBF, a1: C_XOREG, a6: C_SCON, type_: 43, size: 4},
+       {as: ASTDCCC, a1: C_REG, a2: C_REG, a6: C_XOREG, type_: 44, size: 4},
+       {as: ASTDCCC, a1: C_REG, a6: C_XOREG, type_: 44, size: 4},
+       {as: ALDAR, a1: C_XOREG, a6: C_REG, type_: 45, size: 4},
+       {as: ALDAR, a1: C_XOREG, a3: C_ANDCON, a6: C_REG, type_: 45, size: 4},
         {as: AEIEIO, type_: 46, size: 4},
         {as: ATLBIE, a1: C_REG, type_: 49, size: 4},
         {as: ATLBIE, a1: C_SCON, a6: C_REG, type_: 49, size: 4},
         {as: ASLBMFEE, a1: C_REG, a6: C_REG, type_: 55, size: 4},
         {as: ASLBMTE, a1: C_REG, a6: C_REG, type_: 55, size: 4},
-       {as: ASTSW, a1: C_REG, a6: C_ZOREG, type_: 44, size: 4},
+       {as: ASTSW, a1: C_REG, a6: C_XOREG, type_: 44, size: 4},
         {as: ASTSW, a1: C_REG, a3: C_LCON, a6: C_ZOREG, type_: 41, size: 4},
-       {as: ALSW, a1: C_ZOREG, a6: C_REG, type_: 45, size: 4},
+       {as: ALSW, a1: C_XOREG, a6: C_REG, type_: 45, size: 4},
         {as: ALSW, a1: C_ZOREG, a3: C_LCON, a6: C_REG, type_: 42, size: 4},
+
         {as: obj.AUNDEF, type_: 78, size: 4},
         {as: obj.APCDATA, a1: C_LCON, a6: C_LCON, type_: 0, size: 0},
         {as: obj.AFUNCDATA, a1: C_SCON, a6: C_ADDR, type_: 0, size: 0},
@@ -520,50 +518,88 @@ var optab = []Optab{
         {as: obj.ADUFFZERO, a6: C_LBRA, type_: 11, size: 4}, // same as ABR/ABL
         {as: obj.ADUFFCOPY, a6: C_LBRA, type_: 11, size: 4}, // same as ABR/ABL
         {as: obj.APCALIGN, a1: C_LCON, type_: 0, size: 0},   // align code
+}
  
-       {as: obj.AXXX, type_: 0, size: 4},
+// These are opcodes above which may generate different sequences depending on whether prefix opcode support
+// is available
+type PrefixableOptab struct {
+       Optab
+       minGOPPC64 int  // Minimum GOPPC64 required to support this.
+       pfxsize    int8 // Instruction sequence size when prefixed opcodes are used
+}
+
+// The prefixable optab entry contains the pseudo-opcodes which generate relocations, or may generate
+// a more efficient sequence of instructions if a prefixed version exists (ex. paddi instead of oris/ori/add).
+//
+// This table is meant to transform all sequences which might be TOC-relative into an equivalent PC-relative
+// sequence. It also encompasses several transformations which do not involve relocations, those could be
+// separated and applied to AIX and other non-ELF targets. Likewise, the prefixed forms do not have encoding
+// restrictions on the offset, so they are also used for static binary to allow better code generation. e.x
+//
+//     MOVD something-byte-aligned(Rx), Ry
+//     MOVD 3(Rx), Ry
+//
+// is allowed when the prefixed forms are used.
+//
+// This requires an ISA 3.1 compatible cpu (e.g Power10), and when linking externally an ELFv2 1.5 compliant.
+var prefixableOptab = []PrefixableOptab{
+       {Optab: Optab{as: AMOVD, a1: C_S34CON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12}, minGOPPC64: 10, pfxsize: 12},
+       {Optab: Optab{as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+       {Optab: Optab{as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+       {Optab: Optab{as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 36, size: 12}, minGOPPC64: 10, pfxsize: 12},
+       {Optab: Optab{as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 75, size: 12}, minGOPPC64: 10, pfxsize: 12},
+       {Optab: Optab{as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+       {Optab: Optab{as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+       {Optab: Optab{as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+       {Optab: Optab{as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AADD, a1: C_S34CON, a2: C_REG, a6: C_REG, type_: 22, size: 20}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AADD, a1: C_S34CON, a6: C_REG, type_: 22, size: 20}, minGOPPC64: 10, pfxsize: 8},
  }
  
  var oprange [ALAST & obj.AMask][]Optab
  
  var xcmp [C_NCLASS][C_NCLASS]bool
  
-// padding bytes to add to align code as requested
+var pfxEnabled = false // ISA 3.1 prefixed instructions are supported.
+var buildOpCfg = ""    // Save the os/cpu/arch tuple used to configure the assembler in buildop
+
+// padding bytes to add to align code as requested.
  func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int {
-       // For 16 and 32 byte alignment, there is a tradeoff
-       // between aligning the code and adding too many NOPs.
         switch a {
-       case 8:
-               if pc&7 != 0 {
-                       return 4
-               }
-       case 16:
-               // Align to 16 bytes if possible but add at
-               // most 2 NOPs.
-               switch pc & 15 {
-               case 4, 12:
-                       return 4
-               case 8:
-                       return 8
-               }
-       case 32:
-               // Align to 32 bytes if possible but add at
-               // most 3 NOPs.
-               switch pc & 31 {
-               case 4, 20:
-                       return 12
-               case 8, 24:
-                       return 8
-               case 12, 28:
-                       return 4
-               }
-               // When 32 byte alignment is requested on Linux,
-               // promote the function's alignment to 32. On AIX
-               // the function alignment is not changed which might
-               // result in 16 byte alignment but that is still fine.
-               // TODO: alignment on AIX
-               if ctxt.Headtype != objabi.Haix && cursym.Func().Align < 32 {
-                       cursym.Func().Align = 32
+       case 8, 16, 32, 64:
+               // By default function alignment is 16. If an alignment > 16 is
+               // requested then the function alignment must also be promoted.
+               // The function alignment is not promoted on AIX at this time.
+               // TODO: Investigate AIX function alignment.
+               if ctxt.Headtype != objabi.Haix && cursym.Func().Align < int32(a) {
+                       cursym.Func().Align = int32(a)
+               }
+               if pc&(a-1) != 0 {
+                       return int(a - (pc & (a - 1)))
                 }
         default:
                 ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", a)
@@ -571,17 +607,19 @@ func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int {
         return 0
  }
  
-// Get the implied register of a operand which doesn't specify one.  These show up
+// Get the implied register of an operand which doesn't specify one.  These show up
  // in handwritten asm like "MOVD R5, foosymbol" where a base register is not supplied,
  // or "MOVD R5, foo+10(SP) or pseudo-register is used.  The other common case is when
  // generating constants in register like "MOVD $constant, Rx".
  func (c *ctxt9) getimpliedreg(a *obj.Addr, p *obj.Prog) int {
-       switch oclass(a) {
-       case C_ADDCON, C_ANDCON, C_UCON, C_LCON, C_SCON, C_ZCON:
+       class := oclass(a)
+       if class >= C_ZCON && class <= C_64CON {
                 return REGZERO
+       }
+       switch class {
         case C_SACON, C_LACON:
                 return REGSP
-       case C_LOREG, C_SOREG, C_ZOREG:
+       case C_LOREG, C_SOREG, C_ZOREG, C_XOREG:
                 switch a.Name {
                 case obj.NAME_EXTERN, obj.NAME_STATIC:
                         return REGSB
@@ -642,9 +680,12 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
  
         var otxt int64
         var q *obj.Prog
+       var out [5]uint32
+       var falign int32 // Track increased alignment requirements for prefix.
         for bflag != 0 {
                 bflag = 0
                 pc = 0
+               falign = 0 // Note, linker bumps function symbols to funcAlign.
                 for p = c.cursym.Func().Text.Link; p != nil; p = p.Link {
                         p.Pc = pc
                         o = c.oplook(p)
@@ -653,22 +694,74 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                         if (o.type_ == 16 || o.type_ == 17) && p.To.Target() != nil {
                                 otxt = p.To.Target().Pc - pc
                                 if otxt < -(1<<15)+10 || otxt >= (1<<15)-10 {
-                                       q = c.newprog()
-                                       q.Link = p.Link
-                                       p.Link = q
-                                       q.As = ABR
-                                       q.To.Type = obj.TYPE_BRANCH
-                                       q.To.SetTarget(p.To.Target())
-                                       p.To.SetTarget(q)
-                                       q = c.newprog()
-                                       q.Link = p.Link
-                                       p.Link = q
-                                       q.As = ABR
-                                       q.To.Type = obj.TYPE_BRANCH
-                                       q.To.SetTarget(q.Link.Link)
-
-                                       //addnop(p->link);
-                                       //addnop(p);
+                                       // Assemble the instruction with a target not too far to figure out BI and BO fields.
+                                       // If only the CTR or BI (the CR bit) are tested, the conditional branch can be inverted,
+                                       // and only one extra branch is needed to reach the target.
+                                       tgt := p.To.Target()
+                                       p.To.SetTarget(p.Link)
+                                       o.asmout(&c, p, o, &out)
+                                       p.To.SetTarget(tgt)
+
+                                       bo := int64(out[0]>>21) & 31
+                                       bi := int16((out[0] >> 16) & 31)
+                                       invertible := false
+
+                                       if bo&0x14 == 0x14 {
+                                               // A conditional branch that is unconditionally taken. This cannot be inverted.
+                                       } else if bo&0x10 == 0x10 {
+                                               // A branch based on the value of CTR. Invert the CTR comparison against zero bit.
+                                               bo ^= 0x2
+                                               invertible = true
+                                       } else if bo&0x04 == 0x04 {
+                                               // A branch based on CR bit. Invert the BI comparison bit.
+                                               bo ^= 0x8
+                                               invertible = true
+                                       }
+
+                                       if invertible {
+                                               // Rewrite
+                                               //     BC bo,...,far_away_target
+                                               //     NEXT_INSN
+                                               // to:
+                                               //     BC invert(bo),next_insn
+                                               //     JMP far_away_target
+                                               //   next_insn:
+                                               //     NEXT_INSN
+                                               p.As = ABC
+                                               p.From = obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: bo}
+                                               q = c.newprog()
+                                               q.As = ABR
+                                               q.To.Type = obj.TYPE_BRANCH
+                                               q.To.SetTarget(p.To.Target())
+                                               q.Link = p.Link
+                                               p.To.SetTarget(p.Link)
+                                               p.Link = q
+                                               p.Reg = REG_CRBIT0 + bi
+                                       } else {
+                                               // Rewrite
+                                               //     BC ...,far_away_target
+                                               //     NEXT_INSN
+                                               // to
+                                               //     BC ...,tmp
+                                               //     JMP next_insn
+                                               //   tmp:
+                                               //     JMP far_away_target
+                                               //   next_insn:
+                                               //     NEXT_INSN
+                                               q = c.newprog()
+                                               q.Link = p.Link
+                                               p.Link = q
+                                               q.As = ABR
+                                               q.To.Type = obj.TYPE_BRANCH
+                                               q.To.SetTarget(p.To.Target())
+                                               p.To.SetTarget(q)
+                                               q = c.newprog()
+                                               q.Link = p.Link
+                                               p.Link = q
+                                               q.As = ABR
+                                               q.To.Type = obj.TYPE_BRANCH
+                                               q.To.SetTarget(q.Link.Link)
+                                       }
                                         bflag = 1
                                 }
                         }
@@ -686,27 +779,56 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                 }
                         }
  
+                       // Prefixed instructions cannot be placed across a 64B boundary.
+                       // Mark and adjust the PC of those which do. A nop will be
+                       // inserted during final assembly.
+                       if o.ispfx {
+                               mark := p.Mark &^ PFX_X64B
+                               if pc&63 == 60 {
+                                       p.Pc += 4
+                                       m += 4
+                                       mark |= PFX_X64B
+                               }
+
+                               // Marks may be adjusted if a too-far conditional branch is
+                               // fixed up above. Likewise, inserting a NOP may cause a
+                               // branch target to become too far away.  We need to run
+                               // another iteration and verify no additional changes
+                               // are needed.
+                               if mark != p.Mark {
+                                       bflag = 1
+                                       p.Mark = mark
+                               }
+
+                               // Check for 16 or 32B crossing of this prefixed insn.
+                               // These do no require padding, but do require increasing
+                               // the function alignment to prevent them from potentially
+                               // crossing a 64B boundary when the linker assigns the final
+                               // PC.
+                               switch p.Pc & 31 {
+                               case 28: // 32B crossing
+                                       falign = 64
+                               case 12: // 16B crossing
+                                       if falign < 64 {
+                                               falign = 32
+                                       }
+                               }
+                       }
+
                         pc += int64(m)
                 }
  
                 c.cursym.Size = pc
         }
  
-       if r := pc & funcAlignMask; r != 0 {
-               pc += funcAlign - r
-       }
-
         c.cursym.Size = pc
-
-       /*
-        * lay out the code, emitting code and data relocations.
-        */
-
+       c.cursym.Func().Align = falign
         c.cursym.Grow(c.cursym.Size)
  
+       // lay out the code, emitting code and data relocations.
+
         bp := c.cursym.P
         var i int32
-       var out [6]uint32
         for p := c.cursym.Func().Text.Link; p != nil; p = p.Link {
                 c.pc = p.Pc
                 o = c.oplook(p)
@@ -715,18 +837,21 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                 }
                 // asmout is not set up to add large amounts of padding
                 if o.type_ == 0 && p.As == obj.APCALIGN {
-                       pad := LOP_RRR(OP_OR, REGZERO, REGZERO, REGZERO)
                         aln := c.vregoff(&p.From)
                         v := addpad(p.Pc, aln, c.ctxt, c.cursym)
                         if v > 0 {
                                 // Same padding instruction for all
                                 for i = 0; i < int32(v/4); i++ {
-                                       c.ctxt.Arch.ByteOrder.PutUint32(bp, pad)
+                                       c.ctxt.Arch.ByteOrder.PutUint32(bp, NOP)
                                         bp = bp[4:]
                                 }
                         }
                 } else {
-                       c.asmout(p, o, out[:])
+                       if p.Mark&PFX_X64B != 0 {
+                               c.ctxt.Arch.ByteOrder.PutUint32(bp, NOP)
+                               bp = bp[4:]
+                       }
+                       o.asmout(&c, p, o, &out)
                         for i = 0; i < int32(o.size/4); i++ {
                                 c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i])
                                 bp = bp[4:]
@@ -743,51 +868,64 @@ func isuint32(v uint64) bool {
         return uint64(uint32(v)) == v
  }
  
+func (c *ctxt9) aclassreg(reg int16) int {
+       if REG_R0 <= reg && reg <= REG_R31 {
+               return C_REGP + int(reg&1)
+       }
+       if REG_F0 <= reg && reg <= REG_F31 {
+               return C_FREGP + int(reg&1)
+       }
+       if REG_V0 <= reg && reg <= REG_V31 {
+               return C_VREG
+       }
+       if REG_VS0 <= reg && reg <= REG_VS63 {
+               return C_VSREGP + int(reg&1)
+       }
+       if REG_CR0 <= reg && reg <= REG_CR7 || reg == REG_CR {
+               return C_CREG
+       }
+       if REG_CR0LT <= reg && reg <= REG_CR7SO {
+               return C_CRBIT
+       }
+       if REG_SPR0 <= reg && reg <= REG_SPR0+1023 {
+               switch reg {
+               case REG_LR:
+                       return C_LR
+
+               case REG_XER:
+                       return C_XER
+
+               case REG_CTR:
+                       return C_CTR
+               }
+
+               return C_SPR
+       }
+       if REG_A0 <= reg && reg <= REG_A7 {
+               return C_AREG
+       }
+       if reg == REG_FPSCR {
+               return C_FPSCR
+       }
+       return C_GOK
+}
+
  func (c *ctxt9) aclass(a *obj.Addr) int {
         switch a.Type {
         case obj.TYPE_NONE:
                 return C_NONE
  
         case obj.TYPE_REG:
-               if REG_R0 <= a.Reg && a.Reg <= REG_R31 {
-                       return C_REG
-               }
-               if REG_F0 <= a.Reg && a.Reg <= REG_F31 {
-                       return C_FREG
-               }
-               if REG_V0 <= a.Reg && a.Reg <= REG_V31 {
-                       return C_VREG
-               }
-               if REG_VS0 <= a.Reg && a.Reg <= REG_VS63 {
-                       return C_VSREG
-               }
-               if REG_CR0 <= a.Reg && a.Reg <= REG_CR7 || a.Reg == REG_CR {
-                       return C_CREG
-               }
-               if REG_SPR0 <= a.Reg && a.Reg <= REG_SPR0+1023 {
-                       switch a.Reg {
-                       case REG_LR:
-                               return C_LR
+               return c.aclassreg(a.Reg)
  
-                       case REG_XER:
-                               return C_XER
+       case obj.TYPE_MEM:
+               if a.Index != 0 {
+                       if a.Name != obj.NAME_NONE || a.Offset != 0 {
+                               c.ctxt.Logf("Unexpected Instruction operand index %d offset %d class %d \n", a.Index, a.Offset, a.Class)
  
-                       case REG_CTR:
-                               return C_CTR
                         }
-
-                       return C_SPR
-               }
-
-               if REG_DCR0 <= a.Reg && a.Reg <= REG_DCR0+1023 {
-                       return C_SPR
+                       return C_XOREG
                 }
-               if a.Reg == REG_FPSCR {
-                       return C_FPSCR
-               }
-               return C_GOK
-
-       case obj.TYPE_MEM:
                 switch a.Name {
                 case obj.NAME_GOTREF, obj.NAME_TOCREF:
                         return C_ADDR
@@ -810,13 +948,14 @@ func (c *ctxt9) aclass(a *obj.Addr) int {
  
                 case obj.NAME_AUTO:
                         c.instoffset = int64(c.autosize) + a.Offset
+
                         if c.instoffset >= -BIG && c.instoffset < BIG {
                                 return C_SOREG
                         }
                         return C_LOREG
  
                 case obj.NAME_PARAM:
-                       c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.FixedFrameSize()
+                       c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize
                         if c.instoffset >= -BIG && c.instoffset < BIG {
                                 return C_SOREG
                         }
@@ -824,13 +963,13 @@ func (c *ctxt9) aclass(a *obj.Addr) int {
  
                 case obj.NAME_NONE:
                         c.instoffset = a.Offset
-                       if c.instoffset == 0 {
+                       if a.Offset == 0 && a.Index == 0 {
                                 return C_ZOREG
-                       }
-                       if c.instoffset >= -BIG && c.instoffset < BIG {
+                       } else if c.instoffset >= -BIG && c.instoffset < BIG {
                                 return C_SOREG
+                       } else {
+                               return C_LOREG
                         }
-                       return C_LOREG
                 }
  
                 return C_GOK
@@ -856,7 +995,7 @@ func (c *ctxt9) aclass(a *obj.Addr) int {
                 case obj.NAME_NONE:
                         c.instoffset = a.Offset
                         if a.Reg != 0 {
-                               if -BIG <= c.instoffset && c.instoffset <= BIG {
+                               if -BIG <= c.instoffset && c.instoffset < BIG {
                                         return C_SACON
                                 }
                                 if isint32(c.instoffset) {
@@ -882,7 +1021,7 @@ func (c *ctxt9) aclass(a *obj.Addr) int {
                         return C_LACON
  
                 case obj.NAME_PARAM:
-                       c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.FixedFrameSize()
+                       c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize
                         if c.instoffset >= -BIG && c.instoffset < BIG {
                                 return C_SACON
                         }
@@ -893,37 +1032,41 @@ func (c *ctxt9) aclass(a *obj.Addr) int {
                 }
  
                 if c.instoffset >= 0 {
-                       if c.instoffset == 0 {
-                               return C_ZCON
-                       }
-                       if c.instoffset <= 0x7fff {
-                               return C_SCON
-                       }
-                       if c.instoffset <= 0xffff {
-                               return C_ANDCON
-                       }
-                       if c.instoffset&0xffff == 0 && isuint32(uint64(c.instoffset)) { /* && (instoffset & (1<<31)) == 0) */
-                               return C_UCON
+                       sbits := bits.Len64(uint64(c.instoffset))
+                       switch {
+                       case sbits <= 5:
+                               return C_ZCON + sbits
+                       case sbits <= 8:
+                               return C_U8CON
+                       case sbits <= 15:
+                               return C_U15CON
+                       case sbits <= 16:
+                               return C_U16CON
+                       case sbits <= 31:
+                               return C_U32CON
+                       case sbits <= 32:
+                               return C_U32CON
+                       case sbits <= 33:
+                               return C_S34CON
+                       default:
+                               return C_64CON
                         }
-                       if isint32(c.instoffset) || isuint32(uint64(c.instoffset)) {
-                               return C_LCON
+               } else {
+                       sbits := bits.Len64(uint64(^c.instoffset))
+                       switch {
+                       case sbits <= 15:
+                               return C_S16CON
+                       case sbits <= 31:
+                               return C_S32CON
+                       case sbits <= 33:
+                               return C_S34CON
+                       default:
+                               return C_64CON
                         }
-                       return C_DCON
-               }
-
-               if c.instoffset >= -0x8000 {
-                       return C_ADDCON
-               }
-               if c.instoffset&0xffff == 0 && isint32(c.instoffset) {
-                       return C_UCON
                 }
-               if isint32(c.instoffset) {
-                       return C_LCON
-               }
-               return C_DCON
  
         case obj.TYPE_BRANCH:
-               if a.Sym != nil && c.ctxt.Flag_dynlink {
+               if a.Sym != nil && c.ctxt.Flag_dynlink && !pfxEnabled {
                         return C_LBRAPIC
                 }
                 return C_SBRA
@@ -970,27 +1113,20 @@ func (c *ctxt9) oplook(p *obj.Prog) *Optab {
  
         a2 := C_NONE
         if p.Reg != 0 {
-               if REG_R0 <= p.Reg && p.Reg <= REG_R31 {
-                       a2 = C_REG
-               } else if REG_V0 <= p.Reg && p.Reg <= REG_V31 {
-                       a2 = C_VREG
-               } else if REG_VS0 <= p.Reg && p.Reg <= REG_VS63 {
-                       a2 = C_VSREG
-               } else if REG_F0 <= p.Reg && p.Reg <= REG_F31 {
-                       a2 = C_FREG
-               }
+               a2 = c.aclassreg(p.Reg)
         }
  
         // c.ctxt.Logf("oplook %v %d %d %d %d\n", p, a1, a2, a3, a4, a5, a6)
         ops := oprange[p.As&obj.AMask]
         c1 := &xcmp[a1]
+       c2 := &xcmp[a2]
         c3 := &xcmp[a3]
         c4 := &xcmp[a4]
         c5 := &xcmp[a5]
         c6 := &xcmp[a6]
         for i := range ops {
                 op := &ops[i]
-               if int(op.a2) == a2 && c1[op.a1] && c3[op.a3] && c4[op.a4] && c5[op.a5] && c6[op.a6] {
+               if c1[op.a1] && c2[op.a2] && c3[op.a3] && c4[op.a4] && c5[op.a5] && c6[op.a6] {
                         p.Optab = uint16(cap(optab) - cap(ops) + i + 1)
                         return op
                 }
@@ -1004,65 +1140,72 @@ func (c *ctxt9) oplook(p *obj.Prog) *Optab {
         return &ops[0]
  }
  
+// Compare two operand types (ex C_REG, or C_SCON)
+// and return true if b is compatible with a.
+//
+// Argument comparison isn't reflexitive, so care must be taken.
+// a is the argument type as found in optab, b is the argument as
+// fitted by aclass.
  func cmp(a int, b int) bool {
         if a == b {
                 return true
         }
         switch a {
-       case C_LCON:
-               if b == C_ZCON || b == C_SCON || b == C_UCON || b == C_ADDCON || b == C_ANDCON {
-                       return true
-               }
-
-       case C_ADDCON:
-               if b == C_ZCON || b == C_SCON {
-                       return true
-               }
-
-       case C_ANDCON:
-               if b == C_ZCON || b == C_SCON {
-                       return true
-               }
  
         case C_SPR:
                 if b == C_LR || b == C_XER || b == C_CTR {
                         return true
                 }
  
-       case C_UCON:
-               if b == C_ZCON {
-                       return true
-               }
-
-       case C_SCON:
-               if b == C_ZCON {
-                       return true
-               }
+       case C_U1CON:
+               return cmp(C_ZCON, b)
+       case C_U2CON:
+               return cmp(C_U1CON, b)
+       case C_U3CON:
+               return cmp(C_U2CON, b)
+       case C_U4CON:
+               return cmp(C_U3CON, b)
+       case C_U5CON:
+               return cmp(C_U4CON, b)
+       case C_U8CON:
+               return cmp(C_U5CON, b)
+       case C_U15CON:
+               return cmp(C_U8CON, b)
+       case C_U16CON:
+               return cmp(C_U15CON, b)
+
+       case C_S16CON:
+               return cmp(C_U15CON, b)
+       case C_32CON:
+               return cmp(C_S16CON, b) || cmp(C_U16CON, b)
+       case C_S34CON:
+               return cmp(C_32CON, b)
+       case C_64CON:
+               return cmp(C_S34CON, b)
  
         case C_LACON:
-               if b == C_SACON {
-                       return true
-               }
+               return cmp(C_SACON, b)
  
         case C_LBRA:
-               if b == C_SBRA {
-                       return true
-               }
+               return cmp(C_SBRA, b)
  
         case C_SOREG:
-               if b == C_ZOREG {
-                       return true
-               }
+               return cmp(C_ZOREG, b)
  
         case C_LOREG:
-               if b == C_SOREG || b == C_ZOREG {
-                       return true
-               }
+               return cmp(C_SOREG, b)
+
+       case C_XOREG:
+               return cmp(C_REG, b) || cmp(C_ZOREG, b)
  
+       // An even/odd register input always matches the regular register types.
         case C_REG:
-               if b == C_ZCON {
-                       return r0iszero != 0 /*TypeKind(100016)*/
-               }
+               return cmp(C_REGP, b) || (b == C_ZCON && r0iszero != 0)
+       case C_FREG:
+               return cmp(C_FREGP, b)
+       case C_VSREG:
+               /* Allow any VR argument as a VSR operand. */
+               return cmp(C_VSREGP, b) || cmp(C_VREG, b)
  
         case C_ANY:
                 return true
@@ -1071,22 +1214,12 @@ func cmp(a int, b int) bool {
         return false
  }
  
-type ocmp []Optab
-
-func (x ocmp) Len() int {
-       return len(x)
-}
-
-func (x ocmp) Swap(i, j int) {
-       x[i], x[j] = x[j], x[i]
-}
-
  // Used when sorting the optab. Sorting is
  // done in a way so that the best choice of
  // opcode/operand combination is considered first.
-func (x ocmp) Less(i, j int) bool {
-       p1 := &x[i]
-       p2 := &x[j]
+func optabLess(i, j int) bool {
+       p1 := &optab[i]
+       p2 := &optab[j]
         n := int(p1.as) - int(p2.as)
         // same opcode
         if n != 0 {
@@ -1134,41 +1267,75 @@ func opset(a, b0 obj.As) {
         oprange[a&obj.AMask] = oprange[b0]
  }
  
+// Determine if the build configuration requires a TOC pointer.
+// It is assumed this always called after buildop.
+func NeedTOCpointer(ctxt *obj.Link) bool {
+       return !pfxEnabled && ctxt.Flag_shared
+}
+
  // Build the opcode table
  func buildop(ctxt *obj.Link) {
-       if oprange[AANDN&obj.AMask] != nil {
-               // Already initialized; stop now.
+       // Limit PC-relative prefix instruction usage to supported and tested targets.
+       pfxEnabled = buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
+       cfg := fmt.Sprintf("power%d/%s/%s", buildcfg.GOPPC64, buildcfg.GOARCH, buildcfg.GOOS)
+       if cfg == buildOpCfg {
+               // Already initialized to correct OS/cpu; stop now.
                 // This happens in the cmd/asm tests,
                 // each of which re-initializes the arch.
                 return
         }
+       buildOpCfg = cfg
+
+       // Configure the optab entries which may generate prefix opcodes.
+       prefixOptab := make([]Optab, 0, len(prefixableOptab))
+       for _, entry := range prefixableOptab {
+               entry := entry
+               if pfxEnabled && buildcfg.GOPPC64 >= entry.minGOPPC64 {
+                       // Enable prefix opcode generation and resize.
+                       entry.ispfx = true
+                       entry.size = entry.pfxsize
+               }
+               prefixOptab = append(prefixOptab, entry.Optab)
  
-       var n int
+       }
  
         for i := 0; i < C_NCLASS; i++ {
-               for n = 0; n < C_NCLASS; n++ {
+               for n := 0; n < C_NCLASS; n++ {
                         if cmp(n, i) {
                                 xcmp[i][n] = true
                         }
                 }
         }
-       for n = 0; optab[n].as != obj.AXXX; n++ {
+
+       // Append the generated entries, sort, and fill out oprange.
+       optab = make([]Optab, 0, len(optabBase)+len(optabGen)+len(prefixOptab))
+       optab = append(optab, optabBase...)
+       optab = append(optab, optabGen...)
+       optab = append(optab, prefixOptab...)
+       sort.Slice(optab, optabLess)
+
+       for i := range optab {
+               // Use the legacy assembler function if none provided.
+               if optab[i].asmout == nil {
+                       optab[i].asmout = asmout
+               }
         }
-       sort.Sort(ocmp(optab[:n]))
-       for i := 0; i < n; i++ {
+
+       for i := 0; i < len(optab); {
                 r := optab[i].as
                 r0 := r & obj.AMask
                 start := i
-               for optab[i].as == r {
+               for i < len(optab) && optab[i].as == r {
                         i++
                 }
                 oprange[r0] = optab[start:i]
-               i--
  
                 switch r {
                 default:
-                       ctxt.Diag("unknown op in build: %v", r)
-                       log.Fatalf("instruction missing from switch in asm9.go:buildop: %v", r)
+                       if !opsetGen(r) {
+                               ctxt.Diag("unknown op in build: %v", r)
+                               log.Fatalf("instruction missing from switch in asm9.go:buildop: %v", r)
+                       }
  
                 case ADCBF: /* unary indexed: op (b+a); op (b) */
                         opset(ADCBI, r0)
@@ -1179,11 +1346,10 @@ func buildop(ctxt *obj.Link) {
                         opset(ADCBZ, r0)
                         opset(AICBI, r0)
  
-               case AECOWX: /* indexed store: op s,(b+a); op s,(b) */
+               case ASTDCCC: /* indexed store: op s,(b+a); op s,(b) */
                         opset(ASTWCCC, r0)
                         opset(ASTHCCC, r0)
                         opset(ASTBCCC, r0)
-                       opset(ASTDCCC, r0)
  
                 case AREM: /* macro */
                         opset(AREM, r0)
@@ -1283,8 +1449,7 @@ func buildop(ctxt *obj.Link) {
                         opset(AMOVDU, r0)
                         opset(AMOVMW, r0)
  
-               case ALV: /* lvebx, lvehx, lvewx, lvx, lvxl, lvsl, lvsr */
-                       opset(ALVEBX, r0)
+               case ALVEBX: /* lvebx, lvehx, lvewx, lvx, lvxl, lvsl, lvsr */
                         opset(ALVEHX, r0)
                         opset(ALVEWX, r0)
                         opset(ALVX, r0)
@@ -1292,8 +1457,7 @@ func buildop(ctxt *obj.Link) {
                         opset(ALVSL, r0)
                         opset(ALVSR, r0)
  
-               case ASTV: /* stvebx, stvehx, stvewx, stvx, stvxl */
-                       opset(ASTVEBX, r0)
+               case ASTVEBX: /* stvebx, stvehx, stvewx, stvx, stvxl */
                         opset(ASTVEHX, r0)
                         opset(ASTVEWX, r0)
                         opset(ASTVX, r0)
@@ -1542,7 +1706,6 @@ func buildop(ctxt *obj.Link) {
                         opset(AMTVRD, r0)
                         opset(AMTVSRWA, r0)
                         opset(AMTVSRWZ, r0)
-                       opset(AMTVSRDD, r0)
                         opset(AMTVSRWS, r0)
  
                 case AXXLAND: /* xxland, xxlandc, xxleqv, xxlnand */
@@ -1667,6 +1830,9 @@ func buildop(ctxt *obj.Link) {
                 case ABC:
                         opset(ABCL, r0)
  
+               case ABDNZ:
+                       opset(ABDZ, r0)
+
                 case AEXTSB: /* op Rs, Ra */
                         opset(AEXTSBCC, r0)
  
@@ -1761,9 +1927,6 @@ func buildop(ctxt *obj.Link) {
                 case AFCMPO:
                         opset(AFCMPU, r0)
  
-               case AISEL:
-                       opset(AISEL, r0)
-
                 case AMTFSB0:
                         opset(AMTFSB0CC, r0)
                         opset(AMTFSB1, r0)
@@ -1822,10 +1985,10 @@ func buildop(ctxt *obj.Link) {
                         opset(APTESYNC, r0)
                         opset(ATLBSYNC, r0)
  
-               case ARLWMI:
-                       opset(ARLWMICC, r0)
-                       opset(ARLWNM, r0)
+               case ARLWNM:
                         opset(ARLWNMCC, r0)
+                       opset(ARLWMI, r0)
+                       opset(ARLWMICC, r0)
  
                 case ARLDMI:
                         opset(ARLDMICC, r0)
@@ -1901,6 +2064,9 @@ func buildop(ctxt *obj.Link) {
                 case AMOVW: /* load/store/move word with sign extension; move 32-bit literals  */
                         opset(AMOVWZ, r0) /* Same as above, but zero extended */
  
+               case AVCLZLSBB:
+                       opset(AVCTZLSBB, r0)
+
                 case AADD,
                         AADDIS,
                         AANDCC, /* and. Rb,Rs,Ra; andi. $uimm,Rs,Ra */
@@ -1919,12 +2085,14 @@ func buildop(ctxt *obj.Link) {
                         AWORD,
                         ADWORD,
                         ADARN,
-                       ALDMX,
                         AVMSUMUDM,
                         AADDEX,
                         ACMPEQB,
-                       AECIWX,
                         ACLRLSLWI,
+                       AMTVSRDD,
+                       APNOP,
+                       AISEL,
+                       ASETB,
                         obj.ANOP,
                         obj.ATEXT,
                         obj.AUNDEF,
@@ -1983,7 +2151,7 @@ func OPMD(o, xo, rc uint32) uint32 {
         return o<<26 | xo<<2 | rc&1
  }
  
-/* the order is dest, a/s, b/imm for both arithmetic and logical operations */
+/* the order is dest, a/s, b/imm for both arithmetic and logical operations. */
  func AOP_RRR(op uint32, d uint32, a uint32, b uint32) uint32 {
         return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11
  }
@@ -2023,50 +2191,32 @@ func AOP_IR(op uint32, d uint32, simm uint32) uint32 {
  }
  
  /* XX1-form 3-register operands, 1 VSR operand */
-func AOP_XX1(op uint32, d uint32, a uint32, b uint32) uint32 {
-       /* For the XX-form encodings, we need the VSX register number to be exactly */
-       /* between 0-63, so we can properly set the rightmost bits. */
-       r := d - REG_VS0
+func AOP_XX1(op uint32, r uint32, a uint32, b uint32) uint32 {
         return op | (r&31)<<21 | (a&31)<<16 | (b&31)<<11 | (r&32)>>5
  }
  
  /* XX2-form 3-register operands, 2 VSR operands */
-func AOP_XX2(op uint32, d uint32, a uint32, b uint32) uint32 {
-       xt := d - REG_VS0
-       xb := b - REG_VS0
+func AOP_XX2(op uint32, xt uint32, a uint32, xb uint32) uint32 {
         return op | (xt&31)<<21 | (a&3)<<16 | (xb&31)<<11 | (xb&32)>>4 | (xt&32)>>5
  }
  
  /* XX3-form 3 VSR operands */
-func AOP_XX3(op uint32, d uint32, a uint32, b uint32) uint32 {
-       xt := d - REG_VS0
-       xa := a - REG_VS0
-       xb := b - REG_VS0
+func AOP_XX3(op uint32, xt uint32, xa uint32, xb uint32) uint32 {
         return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
  }
  
  /* XX3-form 3 VSR operands + immediate */
-func AOP_XX3I(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 {
-       xt := d - REG_VS0
-       xa := a - REG_VS0
-       xb := b - REG_VS0
+func AOP_XX3I(op uint32, xt uint32, xa uint32, xb uint32, c uint32) uint32 {
         return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (c&3)<<8 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
  }
  
  /* XX4-form, 4 VSR operands */
-func AOP_XX4(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 {
-       xt := d - REG_VS0
-       xa := a - REG_VS0
-       xb := b - REG_VS0
-       xc := c - REG_VS0
+func AOP_XX4(op uint32, xt uint32, xa uint32, xb uint32, xc uint32) uint32 {
         return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xc&31)<<6 | (xc&32)>>2 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
  }
  
  /* DQ-form, VSR register, register + offset operands */
-func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 {
-       /* For the DQ-form encodings, we need the VSX register number to be exactly */
-       /* between 0-63, so we can properly set the SX bit. */
-       r := d - REG_VS0
+func AOP_DQ(op uint32, xt uint32, a uint32, b uint32) uint32 {
         /* The EA for this instruction form is (RA) + DQ << 4, where DQ is a 12-bit signed integer. */
         /* In order to match the output of the GNU objdump (and make the usage in Go asm easier), the */
         /* instruction is called using the sign extended value (i.e. a valid offset would be -32752 or 32752, */
@@ -2074,7 +2224,7 @@ func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 {
         /* bits 0 to 3 in 'dq' need to be zero, otherwise this will generate an illegal instruction. */
         /* If in doubt how this instruction form is encoded, refer to ISA 3.0b, pages 492 and 507. */
         dq := b >> 4
-       return op | (r&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (r&32)>>2
+       return op | (xt&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (xt&32)>>2
  }
  
  /* Z23-form, 3-register operands + CY field */
@@ -2111,10 +2261,6 @@ func OP_RLW(op uint32, a uint32, s uint32, sh uint32, mb uint32, me uint32) uint
         return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | (mb&31)<<6 | (me&31)<<1
  }
  
-func AOP_RLDIC(op uint32, a uint32, s uint32, sh uint32, m uint32) uint32 {
-       return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 | (m&31)<<6 | ((m&32)>>5)<<5
-}
-
  func AOP_EXTSWSLI(op uint32, a uint32, s uint32, sh uint32) uint32 {
         return op | (a&31)<<21 | (s&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1
  }
@@ -2123,6 +2269,23 @@ func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 {
         return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6
  }
  
+/* MD-form 2-register, 2 6-bit immediate operands */
+func AOP_MD(op uint32, a uint32, s uint32, sh uint32, m uint32) uint32 {
+       return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 | (m&31)<<6 | ((m&32)>>5)<<5
+}
+
+/* MDS-form 3-register, 1 6-bit immediate operands. rsh argument is a register. */
+func AOP_MDS(op, to, from, rsh, m uint32) uint32 {
+       return AOP_MD(op, to, from, rsh&31, m)
+}
+
+func AOP_PFX_00_8LS(r, ie uint32) uint32 {
+       return 1<<26 | 0<<24 | 0<<23 | (r&1)<<20 | (ie & 0x3FFFF)
+}
+func AOP_PFX_10_MLS(r, ie uint32) uint32 {
+       return 1<<26 | 2<<24 | 0<<23 | (r&1)<<20 | (ie & 0x3FFFF)
+}
+
  const (
         /* each rhs is OPVCC(_, _, _, _) */
         OP_ADD      = 31<<26 | 266<<1 | 0<<10 | 0
@@ -2160,8 +2323,55 @@ const (
         OP_RLDICL   = 30<<26 | 0<<1 | 0<<10 | 0
         OP_RLDCL    = 30<<26 | 8<<1 | 0<<10 | 0
         OP_EXTSWSLI = 31<<26 | 445<<2
+       OP_SETB     = 31<<26 | 128<<1
  )
  
+func pfxadd(rt, ra int16, r uint32, imm32 int64) (uint32, uint32) {
+       return AOP_PFX_10_MLS(r, uint32(imm32>>16)), AOP_IRR(14<<26, uint32(rt), uint32(ra), uint32(imm32))
+}
+
+func pfxload(a obj.As, reg int16, base int16, r uint32) (uint32, uint32) {
+       switch a {
+       case AMOVH:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(42<<26, uint32(reg), uint32(base), 0)
+       case AMOVW:
+               return AOP_PFX_00_8LS(r, 0), AOP_IRR(41<<26, uint32(reg), uint32(base), 0)
+       case AMOVD:
+               return AOP_PFX_00_8LS(r, 0), AOP_IRR(57<<26, uint32(reg), uint32(base), 0)
+       case AMOVBZ, AMOVB:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(34<<26, uint32(reg), uint32(base), 0)
+       case AMOVHZ:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(40<<26, uint32(reg), uint32(base), 0)
+       case AMOVWZ:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(32<<26, uint32(reg), uint32(base), 0)
+       case AFMOVS:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(48<<26, uint32(reg), uint32(base), 0)
+       case AFMOVD:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(50<<26, uint32(reg), uint32(base), 0)
+       }
+       log.Fatalf("Error no pfxload for %v\n", a)
+       return 0, 0
+}
+
+func pfxstore(a obj.As, reg int16, base int16, r uint32) (uint32, uint32) {
+       switch a {
+       case AMOVD:
+               return AOP_PFX_00_8LS(r, 0), AOP_IRR(61<<26, uint32(reg), uint32(base), 0)
+       case AMOVBZ, AMOVB:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(38<<26, uint32(reg), uint32(base), 0)
+       case AMOVHZ, AMOVH:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(44<<26, uint32(reg), uint32(base), 0)
+       case AMOVWZ, AMOVW:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(36<<26, uint32(reg), uint32(base), 0)
+       case AFMOVS:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(52<<26, uint32(reg), uint32(base), 0)
+       case AFMOVD:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(54<<26, uint32(reg), uint32(base), 0)
+       }
+       log.Fatalf("Error no pfxstore for %v\n", a)
+       return 0, 0
+}
+
  func oclass(a *obj.Addr) int {
         return int(a.Class) - 1
  }
@@ -2220,7 +2430,7 @@ func (c *ctxt9) opform(insn uint32) int {
  
  // Encode instructions and create relocation for accessing s+d according to the
  // instruction op with source or destination (as appropriate) register reg.
-func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32) (o1, o2 uint32) {
+func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse bool) (o1, o2 uint32, rel *obj.Reloc) {
         if c.ctxt.Headtype == objabi.Haix {
                 // Every symbol access must be made via a TOC anchor.
                 c.ctxt.Diag("symbolAccess called for %s", s.Name)
@@ -2232,9 +2442,16 @@ func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32) (o1, o2
         } else {
                 base = REG_R0
         }
-       o1 = AOP_IRR(OP_ADDIS, REGTMP, base, 0)
-       o2 = AOP_IRR(op, uint32(reg), REGTMP, 0)
-       rel := obj.Addrel(c.cursym)
+       // If reg can be reused when computing the symbol address,
+       // use it instead of REGTMP.
+       if !reuse {
+               o1 = AOP_IRR(OP_ADDIS, REGTMP, base, 0)
+               o2 = AOP_IRR(op, uint32(reg), REGTMP, 0)
+       } else {
+               o1 = AOP_IRR(OP_ADDIS, uint32(reg), base, 0)
+               o2 = AOP_IRR(op, uint32(reg), uint32(reg), 0)
+       }
+       rel = obj.Addrel(c.cursym)
         rel.Off = int32(c.pc)
         rel.Siz = 8
         rel.Sym = s
@@ -2258,87 +2475,59 @@ func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32) (o1, o2
         return
  }
  
-/*
- * 32-bit masks
- */
-func getmask(m []byte, v uint32) bool {
-       m[1] = 0
-       m[0] = m[1]
-       if v != ^uint32(0) && v&(1<<31) != 0 && v&1 != 0 { /* MB > ME */
-               if getmask(m, ^v) {
-                       i := int(m[0])
-                       m[0] = m[1] + 1
-                       m[1] = byte(i - 1)
-                       return true
-               }
-
-               return false
-       }
-
-       for i := 0; i < 32; i++ {
-               if v&(1<<uint(31-i)) != 0 {
-                       m[0] = byte(i)
-                       for {
-                               m[1] = byte(i)
-                               i++
-                               if i >= 32 || v&(1<<uint(31-i)) == 0 {
-                                       break
-                               }
-                       }
-
-                       for ; i < 32; i++ {
-                               if v&(1<<uint(31-i)) != 0 {
-                                       return false
-                               }
-                       }
-                       return true
-               }
+// Determine the mask begin (mb) and mask end (me) values
+// for a valid word rotate mask. A valid 32 bit mask is of
+// the form 1+0*1+ or 0*1+0*.
+//
+// Note, me is inclusive.
+func decodeMask32(mask uint32) (mb, me uint32, valid bool) {
+       mb = uint32(bits.LeadingZeros32(mask))
+       me = uint32(32 - bits.TrailingZeros32(mask))
+       mbn := uint32(bits.LeadingZeros32(^mask))
+       men := uint32(32 - bits.TrailingZeros32(^mask))
+       // Check for a wrapping mask (e.g bits at 0 and 31)
+       if mb == 0 && me == 32 {
+               // swap the inverted values
+               mb, me = men, mbn
         }
  
-       return false
-}
-
-func (c *ctxt9) maskgen(p *obj.Prog, m []byte, v uint32) {
-       if !getmask(m, v) {
-               c.ctxt.Diag("cannot generate mask #%x\n%v", v, p)
-       }
+       // Validate mask is of the binary form 1+0*1+ or 0*1+0*
+       // Isolate rightmost 1 (if none 0) and add.
+       v := mask
+       vp := (v & -v) + v
+       // Likewise, check for the wrapping (inverted) case.
+       vn := ^v
+       vpn := (vn & -vn) + vn
+       return mb, (me - 1) & 31, (v&vp == 0 || vn&vpn == 0) && v != 0
  }
  
-/*
- * 64-bit masks (rldic etc)
- */
-func getmask64(m []byte, v uint64) bool {
-       m[1] = 0
-       m[0] = m[1]
-       for i := 0; i < 64; i++ {
-               if v&(uint64(1)<<uint(63-i)) != 0 {
-                       m[0] = byte(i)
-                       for {
-                               m[1] = byte(i)
-                               i++
-                               if i >= 64 || v&(uint64(1)<<uint(63-i)) == 0 {
-                                       break
-                               }
-                       }
-
-                       for ; i < 64; i++ {
-                               if v&(uint64(1)<<uint(63-i)) != 0 {
-                                       return false
-                               }
-                       }
-                       return true
-               }
-       }
-
-       return false
+// Decompose a mask of contiguous bits into a begin (mb) and
+// end (me) value.
+//
+// 64b mask values cannot wrap on any valid PPC64 instruction.
+// Only masks of the form 0*1+0* are valid.
+//
+// Note, me is inclusive.
+func decodeMask64(mask int64) (mb, me uint32, valid bool) {
+       m := uint64(mask)
+       mb = uint32(bits.LeadingZeros64(m))
+       me = uint32(64 - bits.TrailingZeros64(m))
+       valid = ((m&-m)+m)&m == 0 && m != 0
+       return mb, (me - 1) & 63, valid
  }
  
-func (c *ctxt9) maskgen64(p *obj.Prog, m []byte, v uint64) {
-       if !getmask64(m, v) {
-               c.ctxt.Diag("cannot generate mask #%x\n%v", v, p)
+// Load the lower 16 bits of a constant into register r.
+func loadl16(r int, d int64) uint32 {
+       v := uint16(d)
+       if v == 0 {
+               // Avoid generating "ori r,r,0", r != 0. Instead, generate the architectually preferred nop.
+               // For example, "ori r31,r31,0" is a special execution serializing nop on Power10 called "exser".
+               return NOP
         }
+       return LOP_IRR(OP_ORI, uint32(r), uint32(r), uint32(v))
  }
  
+// Load the upper 16 bits of a 32b constant into register r.
  func loadu32(r int, d int64) uint32 {
         v := int32(d >> 16)
         if isuint32(uint64(d)) {
@@ -2354,7 +2543,7 @@ func high16adjusted(d int32) uint16 {
         return uint16(d >> 16)
  }
  
-func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
+func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
         o1 := uint32(0)
         o2 := uint32(0)
         o3 := uint32(0)
@@ -2390,20 +2579,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                         c.ctxt.Diag("literal operation on R0\n%v", p)
                 }
                 a := OP_ADDI
-               if o.a1 == C_UCON {
-                       if d&0xffff != 0 {
-                               log.Fatalf("invalid handling of %v", p)
-                       }
-                       // For UCON operands the value is right shifted 16, using ADDIS if the
-                       // value should be signed, ORIS if unsigned.
-                       v >>= 16
-                       if r == REGZERO && isuint32(uint64(d)) {
-                               o1 = LOP_IRR(OP_ORIS, uint32(p.To.Reg), REGZERO, uint32(v))
-                               break
-                       }
-
-                       a = OP_ADDIS
-               } else if int64(int16(d)) != d {
+               if int64(int16(d)) != d {
                         // Operand is 16 bit value with sign bit set
                         if o.a1 == C_ANDCON {
                                 // Needs unsigned 16 bit so use ORI
@@ -2446,11 +2622,17 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 // AROTL and AROTLW are extended mnemonics, which map to RLDCL and RLWNM.
                 switch p.As {
                 case AROTL:
-                       o1 = AOP_RLDIC(OP_RLDCL, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), uint32(0))
+                       o1 = AOP_MD(OP_RLDCL, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), uint32(0))
                 case AROTLW:
                         o1 = OP_RLW(OP_RLWNM, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), 0, 31)
                 default:
-                       o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
+                       if p.As == AOR && p.From.Type == obj.TYPE_CONST && p.From.Offset == 0 {
+                               // Compile "OR $0, Rx, Ry" into ori. If Rx == Ry == 0, this is the preferred
+                               // hardware no-op. This happens because $0 matches C_REG before C_ZCON.
+                               o1 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(r), 0)
+                       } else {
+                               o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
+                       }
                 }
  
         case 7: /* mov r, soreg ==> stw o(r) */
@@ -2460,64 +2642,44 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                         r = c.getimpliedreg(&p.To, p)
                 }
                 v := c.regoff(&p.To)
-               if p.To.Type == obj.TYPE_MEM && p.To.Index != 0 {
-                       if v != 0 {
-                               c.ctxt.Diag("illegal indexed instruction\n%v", p)
-                       }
-                       o1 = AOP_RRR(c.opstorex(p.As), uint32(p.From.Reg), uint32(p.To.Index), uint32(r))
-               } else {
-                       if int32(int16(v)) != v {
-                               log.Fatalf("mishandled instruction %v", p)
-                       }
-                       // Offsets in DS form stores must be a multiple of 4
-                       inst := c.opstore(p.As)
-                       if c.opform(inst) == DS_FORM && v&0x3 != 0 {
-                               log.Fatalf("invalid offset for DS form load/store %v", p)
-                       }
-                       o1 = AOP_IRR(inst, uint32(p.From.Reg), uint32(r), uint32(v))
-               }
-
-       case 8: /* mov soreg, r ==> lbz/lhz/lwz o(r) */
-               r := int(p.From.Reg)
-
-               if r == 0 {
-                       r = c.getimpliedreg(&p.From, p)
+               if int32(int16(v)) != v {
+                       log.Fatalf("mishandled instruction %v", p)
                 }
-               v := c.regoff(&p.From)
-               if p.From.Type == obj.TYPE_MEM && p.From.Index != 0 {
-                       if v != 0 {
-                               c.ctxt.Diag("illegal indexed instruction\n%v", p)
-                       }
-                       o1 = AOP_RRR(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(r))
-               } else {
-                       if int32(int16(v)) != v {
-                               log.Fatalf("mishandled instruction %v", p)
-                       }
-                       // Offsets in DS form loads must be a multiple of 4
-                       inst := c.opload(p.As)
-                       if c.opform(inst) == DS_FORM && v&0x3 != 0 {
-                               log.Fatalf("invalid offset for DS form load/store %v", p)
-                       }
-                       o1 = AOP_IRR(inst, uint32(p.To.Reg), uint32(r), uint32(v))
+               // Offsets in DS form stores must be a multiple of 4
+               inst := c.opstore(p.As)
+               if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+                       log.Fatalf("invalid offset for DS form load/store %v", p)
                 }
+               o1 = AOP_IRR(inst, uint32(p.From.Reg), uint32(r), uint32(v))
  
-       case 9: /* movb soreg, r ==> lbz o(r),r2; extsb r2,r2 */
+       case 8: /* mov soreg, r ==> lbz/lhz/lwz o(r), lbz o(r) + extsb r,r */
                 r := int(p.From.Reg)
  
                 if r == 0 {
                         r = c.getimpliedreg(&p.From, p)
                 }
                 v := c.regoff(&p.From)
-               if p.From.Type == obj.TYPE_MEM && p.From.Index != 0 {
-                       if v != 0 {
-                               c.ctxt.Diag("illegal indexed instruction\n%v", p)
-                       }
-                       o1 = AOP_RRR(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(r))
-               } else {
-                       o1 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(r), uint32(v))
+               if int32(int16(v)) != v {
+                       log.Fatalf("mishandled instruction %v", p)
                 }
+               // Offsets in DS form loads must be a multiple of 4
+               inst := c.opload(p.As)
+               if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+                       log.Fatalf("invalid offset for DS form load/store %v", p)
+               }
+               o1 = AOP_IRR(inst, uint32(p.To.Reg), uint32(r), uint32(v))
+
+               // Sign extend MOVB operations. This is ignored for other cases (o.size == 4).
                 o2 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
  
+       case 9: /* RLDC Ra, $sh, $mb, Rb */
+               sh := uint32(p.RestArgs[0].Addr.Offset) & 0x3F
+               mb := uint32(p.RestArgs[1].Addr.Offset) & 0x3F
+               o1 = AOP_RRR(c.opirr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), (uint32(sh) & 0x1F))
+               o1 |= (sh & 0x20) >> 4 // sh[5] is placed in bit 1.
+               o1 |= (mb & 0x1F) << 6 // mb[0:4] is placed in bits 6-10.
+               o1 |= (mb & 0x20)      // mb[5] is placed in bit 5
+
         case 10: /* sub Ra,[Rb],Rd => subf Rd,Ra,Rb */
                 r := int(p.Reg)
  
@@ -2556,7 +2718,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                         rel.Add = int64(v)
                         rel.Type = objabi.R_CALLPOWER
                 }
-               o2 = 0x60000000 // nop, sometimes overwritten by ld r2, 24(r1) when dynamic linking
+               o2 = NOP // nop, sometimes overwritten by ld r2, 24(r1) when dynamic linking
  
         case 13: /* mov[bhwd]{z,} r,r */
                 // This needs to handle "MOV* $0, Rx".  This shows up because $0 also
@@ -2590,62 +2752,47 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 }
  
         case 14: /* rldc[lr] Rb,Rs,$mask,Ra -- left, right give different masks */
-               r := int(p.Reg)
+               r := uint32(p.Reg)
  
                 if r == 0 {
-                       r = int(p.To.Reg)
+                       r = uint32(p.To.Reg)
                 }
                 d := c.vregoff(p.GetFrom3())
-               var a int
                 switch p.As {
  
                 // These opcodes expect a mask operand that has to be converted into the
                 // appropriate operand.  The way these were defined, not all valid masks are possible.
                 // Left here for compatibility in case they were used or generated.
                 case ARLDCL, ARLDCLCC:
-                       var mask [2]uint8
-                       c.maskgen64(p, mask[:], uint64(d))
-
-                       a = int(mask[0]) /* MB */
-                       if mask[1] != 63 {
+                       mb, me, valid := decodeMask64(d)
+                       if me != 63 || !valid {
                                 c.ctxt.Diag("invalid mask for rotate: %x (end != bit 63)\n%v", uint64(d), p)
                         }
-                       o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
-                       o1 |= (uint32(a) & 31) << 6
-                       if a&0x20 != 0 {
-                               o1 |= 1 << 5 /* mb[5] is top bit */
-                       }
+                       o1 = AOP_MDS(c.oprrr(p.As), uint32(p.To.Reg), r, uint32(p.From.Reg), mb)
  
                 case ARLDCR, ARLDCRCC:
-                       var mask [2]uint8
-                       c.maskgen64(p, mask[:], uint64(d))
-
-                       a = int(mask[1]) /* ME */
-                       if mask[0] != 0 {
-                               c.ctxt.Diag("invalid mask for rotate: %x %x (start != 0)\n%v", uint64(d), mask[0], p)
-                       }
-                       o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
-                       o1 |= (uint32(a) & 31) << 6
-                       if a&0x20 != 0 {
-                               o1 |= 1 << 5 /* mb[5] is top bit */
+                       mb, me, valid := decodeMask64(d)
+                       if mb != 0 || !valid {
+                               c.ctxt.Diag("invalid mask for rotate: %x (start != 0)\n%v", uint64(d), p)
                         }
+                       o1 = AOP_MDS(c.oprrr(p.As), uint32(p.To.Reg), r, uint32(p.From.Reg), me)
  
                 // These opcodes use a shift count like the ppc64 asm, no mask conversion done
                 case ARLDICR, ARLDICRCC:
-                       me := int(d)
+                       me := uint32(d)
                         sh := c.regoff(&p.From)
                         if me < 0 || me > 63 || sh > 63 {
                                 c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh, p)
                         }
-                       o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(me))
+                       o1 = AOP_MD(c.oprrr(p.As), uint32(p.To.Reg), r, uint32(sh), me)
  
                 case ARLDICL, ARLDICLCC, ARLDIC, ARLDICCC:
-                       mb := int(d)
+                       mb := uint32(d)
                         sh := c.regoff(&p.From)
                         if mb < 0 || mb > 63 || sh > 63 {
                                 c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh, p)
                         }
-                       o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(mb))
+                       o1 = AOP_MD(c.oprrr(p.As), uint32(p.To.Reg), r, uint32(sh), mb)
  
                 case ACLRLSLDI:
                         // This is an extended mnemonic defined in the ISA section C.8.1
@@ -2657,11 +2804,10 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                         if n > b || b > 63 {
                                 c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b, p)
                         }
-                       o1 = AOP_RLDIC(OP_RLDIC, uint32(p.To.Reg), uint32(r), uint32(n), uint32(b)-uint32(n))
+                       o1 = AOP_MD(OP_RLDIC, uint32(p.To.Reg), uint32(r), uint32(n), uint32(b)-uint32(n))
  
                 default:
                         c.ctxt.Diag("unexpected op in rldc case\n%v", p)
-                       a = 0
                 }
  
         case 17, /* bc bo,bi,lbra (same for now) */
@@ -2712,20 +2858,6 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 }
                 o1 = OP_BC(c.opirr(p.As), uint32(a), uint32(r), uint32(v), 0)
  
-       case 15: /* br/bl (r) => mov r,lr; br/bl (lr) */
-               var v int32
-               if p.As == ABC || p.As == ABCL {
-                       v = c.regoff(&p.To) & 31
-               } else {
-                       v = 20 /* unconditional */
-               }
-               o1 = AOP_RRR(OP_MTSPR, uint32(p.To.Reg), 0, 0) | (REG_LR&0x1f)<<16 | ((REG_LR>>5)&0x1f)<<11
-               o2 = OPVCC(19, 16, 0, 0)
-               if p.As == ABL || p.As == ABCL {
-                       o2 |= 1
-               }
-               o2 = OP_BCR(o2, uint32(v), uint32(p.To.Index))
-
         case 18: /* br/bl (lr/ctr); bc/bcl bo,bi,(lr/ctr) */
                 var v int32
                 var bh uint32 = 0
@@ -2766,8 +2898,12 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
  
         case 19: /* mov $lcon,r ==> cau+or */
                 d := c.vregoff(&p.From)
-               o1 = loadu32(int(p.To.Reg), d)
-               o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d)))
+               if o.ispfx {
+                       o1, o2 = pfxadd(p.To.Reg, REG_R0, PFX_R_ABS, d)
+               } else {
+                       o1 = loadu32(int(p.To.Reg), d)
+                       o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d)))
+               }
  
         case 20: /* add $ucon,,r | addis $addcon,r,r */
                 v := c.regoff(&p.From)
@@ -2776,16 +2912,9 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 if r == 0 {
                         r = int(p.To.Reg)
                 }
-               if p.As == AADD && (r0iszero == 0 /*TypeKind(100016)*/ && p.Reg == 0 || r0iszero != 0 /*TypeKind(100016)*/ && p.To.Reg == 0) {
-                       c.ctxt.Diag("literal operation on R0\n%v", p)
-               }
-               if p.As == AADDIS {
-                       o1 = AOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v))
-               } else {
-                       o1 = AOP_IRR(c.opirr(AADDIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16)
-               }
+               o1 = AOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v))
  
-       case 22: /* add $lcon/$andcon,r1,r2 ==> oris+ori+add/ori+add */
+       case 22: /* add $lcon/$andcon,r1,r2 ==> oris+ori+add/ori+add, add $s34con,r1 ==> addis+ori+slw+ori+add */
                 if p.To.Reg == REGTMP || p.Reg == REGTMP {
                         c.ctxt.Diag("can't synthesize large constant\n%v", p)
                 }
@@ -2797,15 +2926,23 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 if p.From.Sym != nil {
                         c.ctxt.Diag("%v is not supported", p)
                 }
-               // If operand is ANDCON, generate 2 instructions using
-               // ORI for unsigned value; with LCON 3 instructions.
-               if o.size == 8 {
-                       o1 = LOP_IRR(OP_ORI, REGTMP, REGZERO, uint32(int32(d)))
-                       o2 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
+               if o.ispfx {
+                       o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, d)
+               } else if o.size == 8 {
+                       o1 = LOP_IRR(OP_ORI, REGTMP, REGZERO, uint32(int32(d)))          // tmp = uint16(d)
+                       o2 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) // to = tmp + from
+               } else if o.size == 12 {
+                       // Note, o1 is ADDIS if d is negative, ORIS otherwise.
+                       o1 = loadu32(REGTMP, d)                                          // tmp = d & 0xFFFF0000
+                       o2 = loadl16(REGTMP, d)                                          // tmp |= d & 0xFFFF
+                       o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) // to = from + tmp
                 } else {
-                       o1 = loadu32(REGTMP, d)
-                       o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d)))
-                       o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
+                       // For backwards compatibility with GOPPC64 < 10, generate 34b constants in register.
+                       o1 = LOP_IRR(OP_ADDIS, REGZERO, REGTMP, uint32(d>>32)) // tmp = sign_extend((d>>32)&0xFFFF0000)
+                       o2 = loadl16(REGTMP, int64(d>>16))                     // tmp |= (d>>16)&0xFFFF
+                       o3 = AOP_MD(OP_RLDICR, REGTMP, REGTMP, 16, 63-16)      // tmp <<= 16
+                       o4 = loadl16(REGTMP, int64(uint16(d)))                 // tmp |= d&0xFFFF
+                       o5 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
                 }
  
         case 23: /* and $lcon/$addcon,r1,r2 ==> oris+ori+and/addi+and */
@@ -2825,7 +2962,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                         o2 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
                 } else {
                         o1 = loadu32(REGTMP, d)
-                       o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d)))
+                       o2 = loadl16(REGTMP, d)
                         o3 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
                 }
                 if p.From.Sym != nil {
@@ -2866,7 +3003,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 case AROTL:
                         a = int(0)
                         op = OP_RLDICL
-               case AEXTSWSLI:
+               case AEXTSWSLI, AEXTSWSLICC:
                         a = int(v)
                 default:
                         c.ctxt.Diag("unexpected op in sldi case\n%v", p)
@@ -2878,7 +3015,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                         o1 = AOP_EXTSWSLI(OP_EXTSWSLI, uint32(r), uint32(p.To.Reg), uint32(v))
  
                 } else {
-                       o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a))
+                       o1 = AOP_MD(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a))
                 }
                 if p.As == ASLDCC || p.As == ASRDCC || p.As == AEXTSWSLICC {
                         o1 |= 1 // Set the condition code bit
@@ -2887,18 +3024,28 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
         case 26: /* mov $lsext/auto/oreg,,r2 ==> addis+addi */
                 v := c.vregoff(&p.From)
                 r := int(p.From.Reg)
+               var rel *obj.Reloc
  
                 switch p.From.Name {
                 case obj.NAME_EXTERN, obj.NAME_STATIC:
                         // Load a 32 bit constant, or relocation depending on if a symbol is attached
-                       o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI)
+                       o1, o2, rel = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI, true)
                 default:
                         if r == 0 {
                                 r = c.getimpliedreg(&p.From, p)
                         }
                         // Add a 32 bit offset to a register.
-                       o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(int32(v))))
-                       o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), REGTMP, uint32(v))
+                       o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(int32(v))))
+                       o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(v))
+               }
+
+               if o.ispfx {
+                       if rel == nil {
+                               o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, v)
+                       } else {
+                               o1, o2 = pfxadd(int16(p.To.Reg), REG_R0, PFX_R_PCREL, 0)
+                               rel.Type = objabi.R_ADDRPOWER_PCREL34
+                       }
                 }
  
         case 27: /* subc ra,$simm,rd => subfic rd,ra,$simm */
@@ -2911,87 +3058,60 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 if p.To.Reg == REGTMP || p.From.Reg == REGTMP {
                         c.ctxt.Diag("can't synthesize large constant\n%v", p)
                 }
-               v := c.regoff(p.GetFrom3())
+               v := c.vregoff(p.GetFrom3())
                 o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, uint32(v)>>16)
-               o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(v))
+               o2 = loadl16(REGTMP, v)
                 o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), REGTMP)
                 if p.From.Sym != nil {
                         c.ctxt.Diag("%v is not supported", p)
                 }
  
         case 29: /* rldic[lr]? $sh,s,$mask,a -- left, right, plain give different masks */
-               v := c.regoff(&p.From)
-
+               sh := uint32(c.regoff(&p.From))
                 d := c.vregoff(p.GetFrom3())
-               var mask [2]uint8
-               c.maskgen64(p, mask[:], uint64(d))
-               var a int
+               mb, me, valid := decodeMask64(d)
+               var a uint32
                 switch p.As {
                 case ARLDC, ARLDCCC:
-                       a = int(mask[0]) /* MB */
-                       if int32(mask[1]) != (63 - v) {
-                               c.ctxt.Diag("invalid mask for shift: %x %x (shift %d)\n%v", uint64(d), mask[1], v, p)
+                       a = mb
+                       if me != (63-sh) || !valid {
+                               c.ctxt.Diag("invalid mask for shift: %016x (mb=%d,me=%d) (shift %d)\n%v", uint64(d), mb, me, sh, p)
                         }
  
                 case ARLDCL, ARLDCLCC:
-                       a = int(mask[0]) /* MB */
-                       if mask[1] != 63 {
-                               c.ctxt.Diag("invalid mask for shift: %x %s (shift %d)\n%v", uint64(d), mask[1], v, p)
+                       a = mb
+                       if mb != 63 || !valid {
+                               c.ctxt.Diag("invalid mask for shift: %016x (mb=%d,me=%d) (shift %d)\n%v", uint64(d), mb, me, sh, p)
                         }
  
                 case ARLDCR, ARLDCRCC:
-                       a = int(mask[1]) /* ME */
-                       if mask[0] != 0 {
-                               c.ctxt.Diag("invalid mask for shift: %x %x (shift %d)\n%v", uint64(d), mask[0], v, p)
+                       a = me
+                       if mb != 0 || !valid {
+                               c.ctxt.Diag("invalid mask for shift: %016x (mb=%d,me=%d) (shift %d)\n%v", uint64(d), mb, me, sh, p)
                         }
  
                 default:
                         c.ctxt.Diag("unexpected op in rldic case\n%v", p)
-                       a = 0
-               }
-
-               o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), (uint32(v) & 0x1F))
-               o1 |= (uint32(a) & 31) << 6
-               if v&0x20 != 0 {
-                       o1 |= 1 << 1
-               }
-               if a&0x20 != 0 {
-                       o1 |= 1 << 5 /* mb[5] is top bit */
                 }
+               o1 = AOP_MD(c.opirr(p.As), uint32(p.To.Reg), uint32(p.Reg), sh, a)
  
         case 30: /* rldimi $sh,s,$mask,a */
-               v := c.regoff(&p.From)
-
+               sh := uint32(c.regoff(&p.From))
                 d := c.vregoff(p.GetFrom3())
  
                 // Original opcodes had mask operands which had to be converted to a shift count as expected by
                 // the ppc64 asm.
                 switch p.As {
                 case ARLDMI, ARLDMICC:
-                       var mask [2]uint8
-                       c.maskgen64(p, mask[:], uint64(d))
-                       if int32(mask[1]) != (63 - v) {
-                               c.ctxt.Diag("invalid mask for shift: %x %x (shift %d)\n%v", uint64(d), mask[1], v, p)
-                       }
-                       o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), (uint32(v) & 0x1F))
-                       o1 |= (uint32(mask[0]) & 31) << 6
-                       if v&0x20 != 0 {
-                               o1 |= 1 << 1
-                       }
-                       if mask[0]&0x20 != 0 {
-                               o1 |= 1 << 5 /* mb[5] is top bit */
+                       mb, me, valid := decodeMask64(d)
+                       if me != (63-sh) || !valid {
+                               c.ctxt.Diag("invalid mask for shift: %x %x (shift %d)\n%v", uint64(d), me, sh, p)
                         }
+                       o1 = AOP_MD(c.opirr(p.As), uint32(p.To.Reg), uint32(p.Reg), sh, mb)
  
                 // Opcodes with shift count operands.
                 case ARLDIMI, ARLDIMICC:
-                       o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), (uint32(v) & 0x1F))
-                       o1 |= (uint32(d) & 31) << 6
-                       if d&0x20 != 0 {
-                               o1 |= 1 << 5
-                       }
-                       if v&0x20 != 0 {
-                               o1 |= 1 << 1
-                       }
+                       o1 = AOP_MD(c.opirr(p.As), uint32(p.To.Reg), uint32(p.Reg), sh, uint32(d))
                 }
  
         case 31: /* dword */
@@ -3043,41 +3163,60 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                         r = c.getimpliedreg(&p.To, p)
                 }
                 // Offsets in DS form stores must be a multiple of 4
-               inst := c.opstore(p.As)
-               if c.opform(inst) == DS_FORM && v&0x3 != 0 {
-                       log.Fatalf("invalid offset for DS form load/store %v", p)
+               if o.ispfx {
+                       o1, o2 = pfxstore(p.As, p.From.Reg, int16(r), PFX_R_ABS)
+                       o1 |= uint32((v >> 16) & 0x3FFFF)
+                       o2 |= uint32(v & 0xFFFF)
+               } else {
+                       inst := c.opstore(p.As)
+                       if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+                               log.Fatalf("invalid offset for DS form load/store %v", p)
+                       }
+                       o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v)))
+                       o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v))
                 }
-               o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v)))
-               o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v))
  
-       case 36: /* mov bz/h/hz lext/lauto/lreg,r ==> lbz/lha/lhz etc */
+       case 36: /* mov b/bz/h/hz lext/lauto/lreg,r ==> lbz+extsb/lbz/lha/lhz etc */
                 v := c.regoff(&p.From)
  
                 r := int(p.From.Reg)
                 if r == 0 {
                         r = c.getimpliedreg(&p.From, p)
                 }
-               o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v)))
-               o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), REGTMP, uint32(v))
  
-       case 37: /* movb lext/lauto/lreg,r ==> lbz o(reg),r; extsb r */
-               v := c.regoff(&p.From)
-
-               r := int(p.From.Reg)
-               if r == 0 {
-                       r = c.getimpliedreg(&p.From, p)
+               if o.ispfx {
+                       o1, o2 = pfxload(p.As, p.To.Reg, int16(r), PFX_R_ABS)
+                       o1 |= uint32((v >> 16) & 0x3FFFF)
+                       o2 |= uint32(v & 0xFFFF)
+               } else {
+                       if o.a6 == C_REG {
+                               // Reuse the base register when loading a GPR (C_REG) to avoid
+                               // using REGTMP (R31) when possible.
+                               o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(v)))
+                               o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(p.To.Reg), uint32(v))
+                       } else {
+                               o1 = AOP_IRR(OP_ADDIS, uint32(REGTMP), uint32(r), uint32(high16adjusted(v)))
+                               o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(REGTMP), uint32(v))
+                       }
                 }
-               o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v)))
-               o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), REGTMP, uint32(v))
+
+               // Sign extend MOVB if needed
                 o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
  
         case 40: /* word */
                 o1 = uint32(c.regoff(&p.From))
  
         case 41: /* stswi */
+               if p.To.Type == obj.TYPE_MEM && p.To.Index == 0 && p.To.Offset != 0 {
+                       c.ctxt.Diag("Invalid addressing mode used in index type instruction: %v", p.As)
+               }
+
                 o1 = AOP_RRR(c.opirr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), 0) | (uint32(c.regoff(p.GetFrom3()))&0x7F)<<11
  
         case 42: /* lswi */
+               if p.From.Type == obj.TYPE_MEM && p.From.Index == 0 && p.From.Offset != 0 {
+                       c.ctxt.Diag("Invalid addressing mode used in index type instruction: %v", p.As)
+               }
                 o1 = AOP_RRR(c.opirr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), 0) | (uint32(c.regoff(p.GetFrom3()))&0x7F)<<11
  
         case 43: /* data cache instructions: op (Ra+[Rb]), [th|l] */
@@ -3247,24 +3386,6 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 }
                 o1 = LOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v))
  
-       case 59: /* or/xor/and $ucon,,r | oris/xoris/andis $addcon,r,r */
-               v := c.regoff(&p.From)
-
-               r := int(p.Reg)
-               if r == 0 {
-                       r = int(p.To.Reg)
-               }
-               switch p.As {
-               case AOR:
-                       o1 = LOP_IRR(c.opirr(AORIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16) /* oris, xoris, andis. */
-               case AXOR:
-                       o1 = LOP_IRR(c.opirr(AXORIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16)
-               case AANDCC:
-                       o1 = LOP_IRR(c.opirr(AANDISCC), uint32(p.To.Reg), uint32(r), uint32(v)>>16)
-               default:
-                       o1 = LOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v))
-               }
-
         case 60: /* tw to,a,b */
                 r := int(c.regoff(&p.From) & 31)
  
@@ -3276,31 +3397,35 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 v := c.regoff(&p.To)
                 o1 = AOP_IRR(c.opirr(p.As), uint32(r), uint32(p.Reg), uint32(v))
  
-       case 62: /* rlwmi $sh,s,$mask,a */
+       case 62: /* clrlslwi $sh,s,$mask,a */
                 v := c.regoff(&p.From)
-               switch p.As {
-               case ACLRLSLWI:
-                       n := c.regoff(p.GetFrom3())
-                       // This is an extended mnemonic described in the ISA C.8.2
-                       // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n
-                       // It maps onto rlwinm which is directly generated here.
-                       if n > v || v >= 32 {
-                               c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p)
+               n := c.regoff(p.GetFrom3())
+               // This is an extended mnemonic described in the ISA C.8.2
+               // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n
+               // It maps onto rlwinm which is directly generated here.
+               if n > v || v >= 32 {
+                       c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p)
+               }
+
+               o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n))
+
+       case 63: /* rlwimi/rlwnm/rlwinm [$sh,b],s,[$mask or mb,me],a*/
+               var mb, me uint32
+               if len(p.RestArgs) == 1 { // Mask needs decomposed into mb and me.
+                       var valid bool
+                       // Note, optab rules ensure $mask is a 32b constant.
+                       mb, me, valid = decodeMask32(uint32(p.RestArgs[0].Addr.Offset))
+                       if !valid {
+                               c.ctxt.Diag("cannot generate mask #%x\n%v", uint64(p.RestArgs[0].Addr.Offset), p)
                         }
-
-                       o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n))
-               default:
-                       var mask [2]uint8
-                       c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3())))
-                       o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v))
-                       o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1
+               } else { // Otherwise, mask is already passed as mb and me in RestArgs.
+                       mb, me = uint32(p.RestArgs[0].Addr.Offset), uint32(p.RestArgs[1].Addr.Offset)
+               }
+               if p.From.Type == obj.TYPE_CONST {
+                       o1 = OP_RLW(c.opirr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Offset), mb, me)
+               } else {
+                       o1 = OP_RLW(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), mb, me)
                 }
-
-       case 63: /* rlwmi b,s,$mask,a */
-               var mask [2]uint8
-               c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3())))
-               o1 = AOP_RRR(c.oprrr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(p.From.Reg))
-               o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1
  
         case 64: /* mtfsf fr[, $m] {,fpcsr} */
                 var v int32
@@ -3317,56 +3442,46 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 }
                 o1 = OP_MTFSFI | (uint32(p.To.Reg)&15)<<23 | (uint32(c.regoff(&p.From))&31)<<12
  
-       case 66: /* mov spr,r1; mov r1,spr, also dcr */
+       case 66: /* mov spr,r1; mov r1,spr */
                 var r int
                 var v int32
                 if REG_R0 <= p.From.Reg && p.From.Reg <= REG_R31 {
                         r = int(p.From.Reg)
                         v = int32(p.To.Reg)
-                       if REG_DCR0 <= v && v <= REG_DCR0+1023 {
-                               o1 = OPVCC(31, 451, 0, 0) /* mtdcr */
-                       } else {
-                               o1 = OPVCC(31, 467, 0, 0) /* mtspr */
-                       }
+                       o1 = OPVCC(31, 467, 0, 0) /* mtspr */
                 } else {
                         r = int(p.To.Reg)
                         v = int32(p.From.Reg)
-                       if REG_DCR0 <= v && v <= REG_DCR0+1023 {
-                               o1 = OPVCC(31, 323, 0, 0) /* mfdcr */
-                       } else {
-                               o1 = OPVCC(31, 339, 0, 0) /* mfspr */
-                       }
+                       o1 = OPVCC(31, 339, 0, 0) /* mfspr */
                 }
  
                 o1 = AOP_RRR(o1, uint32(r), 0, 0) | (uint32(v)&0x1f)<<16 | ((uint32(v)>>5)&0x1f)<<11
  
         case 67: /* mcrf crfD,crfS */
-               if p.From.Type != obj.TYPE_REG || p.From.Reg < REG_CR0 || REG_CR7 < p.From.Reg || p.To.Type != obj.TYPE_REG || p.To.Reg < REG_CR0 || REG_CR7 < p.To.Reg {
-                       c.ctxt.Diag("illegal CR field number\n%v", p)
+               if p.From.Reg == REG_CR || p.To.Reg == REG_CR {
+                       c.ctxt.Diag("CR argument must be a conditional register field (CR0-CR7)\n%v", p)
                 }
                 o1 = AOP_RRR(OP_MCRF, ((uint32(p.To.Reg) & 7) << 2), ((uint32(p.From.Reg) & 7) << 2), 0)
  
         case 68: /* mfcr rD; mfocrf CRM,rD */
-               if p.From.Type == obj.TYPE_REG && REG_CR0 <= p.From.Reg && p.From.Reg <= REG_CR7 {
-                       v := int32(1 << uint(7-(p.To.Reg&7)))                                 /* CR(n) */
-                       o1 = AOP_RRR(OP_MFCR, uint32(p.To.Reg), 0, 0) | 1<<20 | uint32(v)<<12 /* new form, mfocrf */
-               } else {
-                       o1 = AOP_RRR(OP_MFCR, uint32(p.To.Reg), 0, 0) /* old form, whole register */
+               o1 = AOP_RRR(OP_MFCR, uint32(p.To.Reg), 0, 0) /*  form, whole register */
+               if p.From.Reg != REG_CR {
+                       v := uint32(1) << uint(7-(p.From.Reg&7)) /* CR(n) */
+                       o1 |= 1<<20 | v<<12                      /* new form, mfocrf */
                 }
  
-       case 69: /* mtcrf CRM,rS */
-               var v int32
-               if p.From3Type() != obj.TYPE_NONE {
-                       if p.To.Reg != 0 {
-                               c.ctxt.Diag("can't use both mask and CR(n)\n%v", p)
-                       }
-                       v = c.regoff(p.GetFrom3()) & 0xff
-               } else {
-                       if p.To.Reg == 0 {
-                               v = 0xff /* CR */
-                       } else {
-                               v = 1 << uint(7-(p.To.Reg&7)) /* CR(n) */
-                       }
+       case 69: /* mtcrf CRM,rS, mtocrf CRx,rS */
+               var v uint32
+               if p.To.Reg == REG_CR {
+                       v = 0xff
+               } else if p.To.Offset != 0 { // MOVFL gpr, constant
+                       v = uint32(p.To.Offset)
+               } else { // p.To.Reg == REG_CRx
+                       v = 1 << uint(7-(p.To.Reg&7))
+               }
+               // Use mtocrf form if only one CR field moved.
+               if bits.OnesCount32(v) == 1 {
+                       v |= 1 << 8
                 }
  
                 o1 = AOP_RRR(OP_MTCRF, uint32(p.From.Reg), 0, 0) | uint32(v)<<12
@@ -3420,22 +3535,28 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
  
         /* relocation operations */
         case 74:
+               var rel *obj.Reloc
                 v := c.vregoff(&p.To)
                 // Offsets in DS form stores must be a multiple of 4
                 inst := c.opstore(p.As)
-               if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+
+               // Can't reuse base for store instructions.
+               o1, o2, rel = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst, false)
+
+               // Rewrite as a prefixed store if supported.
+               if o.ispfx {
+                       o1, o2 = pfxstore(p.As, p.From.Reg, REG_R0, PFX_R_PCREL)
+                       rel.Type = objabi.R_ADDRPOWER_PCREL34
+               } else if c.opform(inst) == DS_FORM && v&0x3 != 0 {
                         log.Fatalf("invalid offset for DS form load/store %v", p)
                 }
-               o1, o2 = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst)
  
         case 75: // 32 bit offset symbol loads (got/toc/addr)
+               var rel *obj.Reloc
                 v := p.From.Offset
  
                 // Offsets in DS form loads must be a multiple of 4
                 inst := c.opload(p.As)
-               if c.opform(inst) == DS_FORM && v&0x3 != 0 {
-                       log.Fatalf("invalid offset for DS form load/store %v", p)
-               }
                 switch p.From.Name {
                 case obj.NAME_GOTREF, obj.NAME_TOCREF:
                         if v != 0 {
@@ -3443,7 +3564,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                         }
                         o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
                         o2 = AOP_IRR(inst, uint32(p.To.Reg), uint32(p.To.Reg), 0)
-                       rel := obj.Addrel(c.cursym)
+                       rel = obj.Addrel(c.cursym)
                         rel.Off = int32(c.pc)
                         rel.Siz = 8
                         rel.Sym = p.From.Sym
@@ -3454,43 +3575,68 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                                 rel.Type = objabi.R_ADDRPOWER_TOCREL_DS
                         }
                 default:
-                       o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst)
-               }
-
-       case 76:
-               v := c.vregoff(&p.From)
-               // Offsets in DS form loads must be a multiple of 4
-               inst := c.opload(p.As)
-               if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+                       reuseBaseReg := o.a6 == C_REG
+                       // Reuse To.Reg as base register if it is a GPR.
+                       o1, o2, rel = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst, reuseBaseReg)
+               }
+
+               // Convert to prefixed forms if supported.
+               if o.ispfx {
+                       switch rel.Type {
+                       case objabi.R_ADDRPOWER, objabi.R_ADDRPOWER_DS,
+                               objabi.R_ADDRPOWER_TOCREL, objabi.R_ADDRPOWER_TOCREL_DS:
+                               o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+                               rel.Type = objabi.R_ADDRPOWER_PCREL34
+                       case objabi.R_POWER_TLS_IE:
+                               o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+                               rel.Type = objabi.R_POWER_TLS_IE_PCREL34
+                       case objabi.R_ADDRPOWER_GOT:
+                               o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+                               rel.Type = objabi.R_ADDRPOWER_GOT_PCREL34
+                       default:
+                               // We've failed to convert a TOC-relative relocation to a PC-relative one.
+                               log.Fatalf("Unable convert TOC-relative relocation %v to PC-relative", rel.Type)
+                       }
+               } else if c.opform(inst) == DS_FORM && v&0x3 != 0 {
                         log.Fatalf("invalid offset for DS form load/store %v", p)
                 }
-               o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst)
+
                 o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
  
         case 79:
                 if p.From.Offset != 0 {
                         c.ctxt.Diag("invalid offset against tls var %v", p)
                 }
-               o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0)
-               o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0)
                 rel := obj.Addrel(c.cursym)
                 rel.Off = int32(c.pc)
                 rel.Siz = 8
                 rel.Sym = p.From.Sym
-               rel.Type = objabi.R_POWER_TLS_LE
+               if !o.ispfx {
+                       o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0)
+                       o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0)
+                       rel.Type = objabi.R_POWER_TLS_LE
+               } else {
+                       o1, o2 = pfxadd(p.To.Reg, REG_R13, PFX_R_ABS, 0)
+                       rel.Type = objabi.R_POWER_TLS_LE_TPREL34
+               }
  
         case 80:
                 if p.From.Offset != 0 {
                         c.ctxt.Diag("invalid offset against tls var %v", p)
                 }
-               o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
-               o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0)
-               o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13)
                 rel := obj.Addrel(c.cursym)
                 rel.Off = int32(c.pc)
                 rel.Siz = 8
                 rel.Sym = p.From.Sym
                 rel.Type = objabi.R_POWER_TLS_IE
+               if !o.ispfx {
+                       o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
+                       o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0)
+               } else {
+                       o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+                       rel.Type = objabi.R_POWER_TLS_IE_PCREL34
+               }
+               o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13)
                 rel = obj.Addrel(c.cursym)
                 rel.Off = int32(c.pc) + 8
                 rel.Siz = 4
@@ -3535,6 +3681,10 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
  
         case 84: // ISEL BC,RA,RB,RT -> isel rt,ra,rb,bc
                 bc := c.vregoff(&p.From)
+               if o.a1 == C_CRBIT {
+                       // CR bit is encoded as a register, not a constant.
+                       bc = int64(p.From.Reg)
+               }
  
                 // rt = To.Reg, ra = p.Reg, rb = p.From3.Reg
                 o1 = AOP_ISEL(OP_ISEL, uint32(p.To.Reg), uint32(p.Reg), uint32(p.GetFrom3().Reg), uint32(bc))
@@ -3554,33 +3704,8 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 /* 3-register operand order: (RB)(RA*1), XT */
                 o1 = AOP_XX1(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(p.From.Reg))
  
-       case 88: /* VSX instructions, XX1-form */
-               /* reg reg none OR reg reg reg */
-               /* 3-register operand order: RA, RB, XT */
-               /* 2-register operand order: XS, RA or RA, XT */
-               xt := int32(p.To.Reg)
-               xs := int32(p.From.Reg)
-               /* We need to treat the special case of extended mnemonics that may have a FREG/VREG as an argument */
-               if REG_V0 <= xt && xt <= REG_V31 {
-                       /* Convert V0-V31 to VS32-VS63 */
-                       xt = xt + 64
-                       o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg))
-               } else if REG_F0 <= xt && xt <= REG_F31 {
-                       /* Convert F0-F31 to VS0-VS31 */
-                       xt = xt + 64
-                       o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg))
-               } else if REG_VS0 <= xt && xt <= REG_VS63 {
-                       o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg))
-               } else if REG_V0 <= xs && xs <= REG_V31 {
-                       /* Likewise for XS */
-                       xs = xs + 64
-                       o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg))
-               } else if REG_F0 <= xs && xs <= REG_F31 {
-                       xs = xs + 64
-                       o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg))
-               } else if REG_VS0 <= xs && xs <= REG_VS63 {
-                       o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg))
-               }
+       case 88: /* VSX mfvsr* instructions, XX1-form XS,RA */
+               o1 = AOP_XX1(c.oprrr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
  
         case 89: /* VSX instructions, XX2-form */
                 /* reg none reg OR reg imm reg */
@@ -3701,16 +3826,48 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
         case 101:
                 o1 = AOP_XX2(c.oprrr(p.As), uint32(p.To.Reg), uint32(0), uint32(p.From.Reg))
  
-       case 102: /* RLWMI $sh,rs,$mb,$me,rt (M-form opcode)*/
-               mb := uint32(c.regoff(&p.RestArgs[0].Addr))
-               me := uint32(c.regoff(&p.RestArgs[1].Addr))
-               sh := uint32(c.regoff(&p.From))
-               o1 = OP_RLW(c.opirr(p.As), uint32(p.To.Reg), uint32(p.Reg), sh, mb, me)
+       case 104: /* VSX mtvsr* instructions, XX1-form RA,RB,XT */
+               o1 = AOP_XX1(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg))
+
+       case 106: /* MOVD spr, soreg */
+               v := int32(p.From.Reg)
+               o1 = OPVCC(31, 339, 0, 0) /* mfspr */
+               o1 = AOP_RRR(o1, uint32(REGTMP), 0, 0) | (uint32(v)&0x1f)<<16 | ((uint32(v)>>5)&0x1f)<<11
+               so := c.regoff(&p.To)
+               o2 = AOP_IRR(c.opstore(AMOVD), uint32(REGTMP), uint32(p.To.Reg), uint32(so))
+               if so&0x3 != 0 {
+                       log.Fatalf("invalid offset for DS form load/store %v", p)
+               }
+               if p.To.Reg == REGTMP {
+                       log.Fatalf("SPR move to memory will clobber R31 %v", p)
+               }
+
+       case 107: /* MOVD soreg, spr */
+               v := int32(p.From.Reg)
+               so := c.regoff(&p.From)
+               o1 = AOP_IRR(c.opload(AMOVD), uint32(REGTMP), uint32(v), uint32(so))
+               o2 = OPVCC(31, 467, 0, 0) /* mtspr */
+               v = int32(p.To.Reg)
+               o2 = AOP_RRR(o2, uint32(REGTMP), 0, 0) | (uint32(v)&0x1f)<<16 | ((uint32(v)>>5)&0x1f)<<11
+               if so&0x3 != 0 {
+                       log.Fatalf("invalid offset for DS form load/store %v", p)
+               }
+
+       case 108: /* mov r, xoreg ==> stwx rx,ry */
+               r := int(p.To.Reg)
+               o1 = AOP_RRR(c.opstorex(p.As), uint32(p.From.Reg), uint32(p.To.Index), uint32(r))
+
+       case 109: /* mov xoreg, r ==> lbzx/lhzx/lwzx rx,ry, lbzx rx,ry + extsb r,r */
+               r := int(p.From.Reg)
  
-       case 103: /* RLWMI rb,rs,$mb,$me,rt (M-form opcode)*/
-               mb := uint32(c.regoff(&p.RestArgs[0].Addr))
-               me := uint32(c.regoff(&p.RestArgs[1].Addr))
-               o1 = OP_RLW(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), mb, me)
+               o1 = AOP_RRR(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(r))
+               // Sign extend MOVB operations. This is ignored for other cases (o.size == 4).
+               o2 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
+
+       case 110: /* SETB creg, rt */
+               bfa := uint32(p.From.Reg) << 2
+               rt := uint32(p.To.Reg)
+               o1 = LOP_RRR(OP_SETB, bfa, rt, 0)
         }
  
         out[0] = o1
@@ -4183,10 +4340,6 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
         case AHRFID:
                 return OPVCC(19, 274, 0, 0)
  
-       case ARLWMI:
-               return OPVCC(20, 0, 0, 0)
-       case ARLWMICC:
-               return OPVCC(20, 0, 0, 1)
         case ARLWNM:
                 return OPVCC(23, 0, 0, 0)
         case ARLWNMCC:
@@ -4514,6 +4667,11 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
         case AVCLZD:
                 return OPVX(4, 1986, 0, 0) /* vclzd - v2.07 */
  
+       case AVCLZLSBB:
+               return OPVX(4, 1538, 0, 0) /* vclzlsbb - v3.0 */
+       case AVCTZLSBB:
+               return OPVX(4, 1538, 0, 0) | 1<<16 /* vctzlsbb - v3.0 */
+
         case AVPOPCNTB:
                 return OPVX(4, 1795, 0, 0) /* vpopcntb - v2.07 */
         case AVPOPCNTH:
@@ -4822,21 +4980,25 @@ func (c *ctxt9) opirr(a obj.As) uint32 {
                 return OPVCC(16, 0, 0, 0) | 1
  
         case ABEQ:
-               return AOP_RRR(16<<26, 12, 2, 0)
+               return AOP_RRR(16<<26, BO_BCR, BI_EQ, 0)
         case ABGE:
-               return AOP_RRR(16<<26, 4, 0, 0)
+               return AOP_RRR(16<<26, BO_NOTBCR, BI_LT, 0)
         case ABGT:
-               return AOP_RRR(16<<26, 12, 1, 0)
+               return AOP_RRR(16<<26, BO_BCR, BI_GT, 0)
         case ABLE:
-               return AOP_RRR(16<<26, 4, 1, 0)
+               return AOP_RRR(16<<26, BO_NOTBCR, BI_GT, 0)
         case ABLT:
-               return AOP_RRR(16<<26, 12, 0, 0)
+               return AOP_RRR(16<<26, BO_BCR, BI_LT, 0)
         case ABNE:
-               return AOP_RRR(16<<26, 4, 2, 0)
+               return AOP_RRR(16<<26, BO_NOTBCR, BI_EQ, 0)
         case ABVC:
-               return AOP_RRR(16<<26, 4, 3, 0) // apparently unordered-clear
+               return AOP_RRR(16<<26, BO_NOTBCR, BI_FU, 0)
         case ABVS:
-               return AOP_RRR(16<<26, 12, 3, 0) // apparently unordered-set
+               return AOP_RRR(16<<26, BO_BCR, BI_FU, 0)
+       case ABDZ:
+               return AOP_RRR(16<<26, BO_NOTBCTR, 0, 0)
+       case ABDNZ:
+               return AOP_RRR(16<<26, BO_BCTR, 0, 0)
  
         case ACMP:
                 return OPVCC(11, 0, 0, 0) | 1<<21 /* L=1 */
@@ -5054,8 +5216,6 @@ func (c *ctxt9) oploadx(a obj.As) uint32 {
                 return OPVCC(31, 279, 0, 0) /* lhzx */
         case AMOVHZU:
                 return OPVCC(31, 311, 0, 0) /* lhzux */
-       case AECIWX:
-               return OPVCC(31, 310, 0, 0) /* eciwx */
         case ALBAR:
                 return OPVCC(31, 52, 0, 0) /* lbarx */
         case ALHAR:
@@ -5070,8 +5230,6 @@ func (c *ctxt9) oploadx(a obj.As) uint32 {
                 return OPVCC(31, 21, 0, 0) /* ldx */
         case AMOVDU:
                 return OPVCC(31, 53, 0, 0) /* ldux */
-       case ALDMX:
-               return OPVCC(31, 309, 0, 0) /* ldmx */
  
         /* Vector (VMX/Altivec) instructions */
         case ALVEBX:
@@ -5216,8 +5374,6 @@ func (c *ctxt9) opstorex(a obj.As) uint32 {
                 return OPVCC(31, 150, 0, 1) /* stwcx. */
         case ASTDCCC:
                 return OPVCC(31, 214, 0, 1) /* stwdx. */
-       case AECOWX:
-               return OPVCC(31, 438, 0, 0) /* ecowx */
         case AMOVD:
                 return OPVCC(31, 149, 0, 0) /* stdx */
         case AMOVDU: