src/cmd/compile/internal/ssa/_gen/PPC64Ops.go

   1 // Copyright 2016 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package main
   6
   7 import "strings"
   8
   9 // Notes:
  10 //  - Less-than-64-bit integer types live in the low portion of registers.
  11 //    The upper portion is junk.
  12 //  - Boolean types are zero or 1; stored in a byte, with upper bytes of the register containing junk.
  13 //  - *const instructions may use a constant larger than the instruction can encode.
  14 //    In this case the assembler expands to multiple instructions and uses tmp
  15 //    register (R31).
  16
  17 var regNamesPPC64 = []string{
  18         "R0", // REGZERO, not used, but simplifies counting in regalloc
  19         "SP", // REGSP
  20         "SB", // REGSB
  21         "R3",
  22         "R4",
  23         "R5",
  24         "R6",
  25         "R7",
  26         "R8",
  27         "R9",
  28         "R10",
  29         "R11", // REGCTXT for closures
  30         "R12",
  31         "R13", // REGTLS
  32         "R14",
  33         "R15",
  34         "R16",
  35         "R17",
  36         "R18",
  37         "R19",
  38         "R20",
  39         "R21",
  40         "R22",
  41         "R23",
  42         "R24",
  43         "R25",
  44         "R26",
  45         "R27",
  46         "R28",
  47         "R29",
  48         "g",   // REGG.  Using name "g" and setting Config.hasGReg makes it "just happen".
  49         "R31", // REGTMP
  50
  51         "F0",
  52         "F1",
  53         "F2",
  54         "F3",
  55         "F4",
  56         "F5",
  57         "F6",
  58         "F7",
  59         "F8",
  60         "F9",
  61         "F10",
  62         "F11",
  63         "F12",
  64         "F13",
  65         "F14",
  66         "F15",
  67         "F16",
  68         "F17",
  69         "F18",
  70         "F19",
  71         "F20",
  72         "F21",
  73         "F22",
  74         "F23",
  75         "F24",
  76         "F25",
  77         "F26",
  78         "F27",
  79         "F28",
  80         "F29",
  81         "F30",
  82         // "F31", the allocator is limited to 64 entries. We sacrifice this FPR to support XER.
  83
  84         "XER",
  85
  86         // If you add registers, update asyncPreempt in runtime.
  87
  88         // "CR0",
  89         // "CR1",
  90         // "CR2",
  91         // "CR3",
  92         // "CR4",
  93         // "CR5",
  94         // "CR6",
  95         // "CR7",
  96
  97         // "CR",
  98         // "LR",
  99         // "CTR",
 100 }
 101
 102 func init() {
 103         // Make map from reg names to reg integers.
 104         if len(regNamesPPC64) > 64 {
 105                 panic("too many registers")
 106         }
 107         num := map[string]int{}
 108         for i, name := range regNamesPPC64 {
 109                 num[name] = i
 110         }
 111         buildReg := func(s string) regMask {
 112                 m := regMask(0)
 113                 for _, r := range strings.Split(s, " ") {
 114                         if n, ok := num[r]; ok {
 115                                 m |= regMask(1) << uint(n)
 116                                 continue
 117                         }
 118                         panic("register " + r + " not found")
 119                 }
 120                 return m
 121         }
 122
 123         var (
 124                 gp  = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29")
 125                 fp  = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30")
 126                 sp  = buildReg("SP")
 127                 sb  = buildReg("SB")
 128                 gr  = buildReg("g")
 129                 xer = buildReg("XER")
 130                 // cr  = buildReg("CR")
 131                 // ctr = buildReg("CTR")
 132                 // lr  = buildReg("LR")
 133                 tmp     = buildReg("R31")
 134                 ctxt    = buildReg("R11")
 135                 callptr = buildReg("R12")
 136                 // tls = buildReg("R13")
 137                 gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
 138                 gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
 139                 xergp       = regInfo{inputs: []regMask{xer}, outputs: []regMask{gp}, clobbers: xer}
 140                 gp11cxer    = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer}
 141                 gp11xer     = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp, xer}}
 142                 gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
 143                 gp21a0      = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}}
 144                 gp21cxer    = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer}
 145                 gp21xer     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, xer}, clobbers: xer}
 146                 gp2xer1xer  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer}
 147                 gp31        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
 148                 gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
 149                 gp2cr       = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
 150                 crgp        = regInfo{inputs: nil, outputs: []regMask{gp}}
 151                 crgp11      = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
 152                 crgp21      = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
 153                 gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
 154                 gploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
 155                 prefreg     = regInfo{inputs: []regMask{gp | sp | sb}}
 156                 gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
 157                 gpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}}
 158                 gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
 159                 gpxchg      = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
 160                 gpcas       = regInfo{inputs: []regMask{gp | sp | sb, gp, gp}, outputs: []regMask{gp}}
 161                 fp01        = regInfo{inputs: nil, outputs: []regMask{fp}}
 162                 fp11        = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
 163                 fpgp        = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
 164                 gpfp        = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
 165                 fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
 166                 fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
 167                 fp2cr       = regInfo{inputs: []regMask{fp, fp}}
 168                 fpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}}
 169                 fploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{fp}}
 170                 fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
 171                 fpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, fp}}
 172                 callerSave  = regMask(gp | fp | gr | xer)
 173                 r3          = buildReg("R3")
 174                 r4          = buildReg("R4")
 175                 r5          = buildReg("R5")
 176                 r6          = buildReg("R6")
 177         )
 178         ops := []opData{
 179                 {name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},        // arg0 + arg1
 180                 {name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"},        // arg0 + auxInt
 181                 {name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},      // arg0+arg1
 182                 {name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true},    // arg0+arg1
 183                 {name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                           // arg0-arg1
 184                 {name: "SUBFCconst", argLength: 1, reg: gp11cxer, asm: "SUBC", aux: "Int64"}, // auxInt - arg0 (carry is ignored)
 185                 {name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                         // arg0-arg1
 186                 {name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                       // arg0-arg1
 187
 188                 {name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
 189                 {name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
 190                 {name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
 191                 {name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
 192                 {name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"},                  // (arg0*arg1)+arg2 (signed 64-bit)
 193
 194                 {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},   // (arg0 * arg1) >> 64, signed
 195                 {name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},   // (arg0 * arg1) >> 32, signed
 196                 {name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
 197                 {name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
 198
 199                 {name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
 200                 {name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
 201
 202                 {name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD"},   // arg0*arg1 + arg2
 203                 {name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // arg0*arg1 + arg2
 204                 {name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB"},   // arg0*arg1 - arg2
 205                 {name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // arg0*arg1 - arg2
 206
 207                 {name: "SRAD", argLength: 2, reg: gp21cxer, asm: "SRAD"}, // signed arg0 >> (arg1&127), 64 bit width (note: 127, not 63!)
 208                 {name: "SRAW", argLength: 2, reg: gp21cxer, asm: "SRAW"}, // signed arg0 >> (arg1&63), 32 bit width
 209                 {name: "SRD", argLength: 2, reg: gp21, asm: "SRD"},       // unsigned arg0 >> (arg1&127), 64 bit width
 210                 {name: "SRW", argLength: 2, reg: gp21, asm: "SRW"},       // unsigned arg0 >> (arg1&63), 32 bit width
 211                 {name: "SLD", argLength: 2, reg: gp21, asm: "SLD"},       // arg0 << (arg1&127), 64 bit width
 212                 {name: "SLW", argLength: 2, reg: gp21, asm: "SLW"},       // arg0 << (arg1&63), 32 bit width
 213
 214                 {name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"},   // arg0 rotate left by arg1 mod 64
 215                 {name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
 216                 // The following are ops to implement the extended mnemonics for shifts as described in section C.8 of the ISA.
 217                 // The constant shift values are packed into the aux int32.
 218                 {name: "CLRLSLWI", argLength: 1, reg: gp11, asm: "CLRLSLWI", aux: "Int32"}, //
 219                 {name: "CLRLSLDI", argLength: 1, reg: gp11, asm: "CLRLSLDI", aux: "Int32"}, //
 220
 221                 // Operations which consume or generate the CA (xer)
 222                 {name: "ADDC", argLength: 2, reg: gp21xer, asm: "ADDC", commutative: true, typ: "(UInt64, UInt64)"},    // arg0 + arg1 -> out, CA
 223                 {name: "SUBC", argLength: 2, reg: gp21xer, asm: "SUBC", typ: "(UInt64, UInt64)"},                       // arg0 - arg1 -> out, CA
 224                 {name: "ADDCconst", argLength: 1, reg: gp11xer, asm: "ADDC", typ: "(UInt64, UInt64)", aux: "Int64"},    // arg0 + imm16 -> out, CA
 225                 {name: "SUBCconst", argLength: 1, reg: gp11xer, asm: "SUBC", typ: "(UInt64, UInt64)", aux: "Int64"},    // imm16 - arg0 -> out, CA
 226                 {name: "ADDE", argLength: 3, reg: gp2xer1xer, asm: "ADDE", typ: "(UInt64, UInt64)", commutative: true}, // arg0 + arg1 + CA (arg2) -> out, CA
 227                 {name: "SUBE", argLength: 3, reg: gp2xer1xer, asm: "SUBE", typ: "(UInt64, UInt64)"},                    // arg0 - arg1 - CA (arg2) -> out, CA
 228                 {name: "ADDZEzero", argLength: 1, reg: xergp, asm: "ADDZE", typ: "UInt64"},                             // CA (arg0) + $0 -> out
 229                 {name: "SUBZEzero", argLength: 1, reg: xergp, asm: "SUBZE", typ: "UInt64"},                             // $0 - CA (arg0) -> out
 230
 231                 {name: "SRADconst", argLength: 1, reg: gp11cxer, asm: "SRAD", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
 232                 {name: "SRAWconst", argLength: 1, reg: gp11cxer, asm: "SRAW", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
 233                 {name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"},       // unsigned arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
 234                 {name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"},       // unsigned arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
 235                 {name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},       // arg0 << auxInt, 0 <= auxInt < 64, 64 bit width
 236                 {name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},       // arg0 << auxInt, 0 <= auxInt < 32, 32 bit width
 237
 238                 {name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"},   // arg0 rotate left by auxInt bits
 239                 {name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
 240                 {name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"},
 241
 242                 {name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"},                      // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux
 243                 {name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"},                       // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux
 244                 {name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true}, // "rlwimi" similar aux encoding as above
 245                 {name: "RLDICL", argLength: 1, reg: gp11, asm: "RLDICL", aux: "Int64"},                     // Auxint is encoded similarly to RLWINM, but only MB and SH are valid. ME is always 63.
 246                 {name: "RLDICR", argLength: 1, reg: gp11, asm: "RLDICR", aux: "Int64"},                     // Likewise, but only ME and SH are valid. MB is always 0.
 247
 248                 {name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
 249                 {name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
 250
 251                 {name: "CNTTZD", argLength: 1, reg: gp11, asm: "CNTTZD"}, // count trailing zeros
 252                 {name: "CNTTZW", argLength: 1, reg: gp11, asm: "CNTTZW"}, // count trailing zeros (32 bit)
 253
 254                 {name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
 255                 {name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
 256                 {name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresponding byte
 257
 258                 {name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
 259                 {name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
 260
 261                 {name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"},   // arg0/arg1 (signed 64-bit)
 262                 {name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},   // arg0/arg1 (signed 32-bit)
 263                 {name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
 264                 {name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
 265
 266                 {name: "MODUD", argLength: 2, reg: gp21, asm: "MODUD", typ: "UInt64"}, // arg0 % arg1 (unsigned 64-bit)
 267                 {name: "MODSD", argLength: 2, reg: gp21, asm: "MODSD", typ: "Int64"},  // arg0 % arg1 (signed 64-bit)
 268                 {name: "MODUW", argLength: 2, reg: gp21, asm: "MODUW", typ: "UInt32"}, // arg0 % arg1 (unsigned 32-bit)
 269                 {name: "MODSW", argLength: 2, reg: gp21, asm: "MODSW", typ: "Int32"},  // arg0 % arg1 (signed 32-bit)
 270                 // MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
 271
 272                 // Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
 273                 {name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
 274                 {name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
 275                 {name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"},   // convert 64-bit integer to float
 276                 {name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
 277                 {name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"},     // round float to 32-bit value
 278
 279                 // Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
 280                 // Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the
 281                 // data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianess issues).
 282                 // There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
 283                 // the word-load instructions.  (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
 284
 285                 {name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"},   // move 64 bits of F register into G register
 286                 {name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
 287
 288                 {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},                                               // arg0&arg1
 289                 {name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                                                // arg0&^arg1
 290                 {name: "ANDCC", argLength: 2, reg: gp21, asm: "ANDCC", commutative: true, clobberFlags: true, typ: "(Int64,Flags)"}, // arg0&arg1 sets CC
 291                 {name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                                                 // arg0|arg1
 292                 {name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                                                  // arg0|^arg1
 293                 {name: "ORCC", argLength: 2, reg: gp21, asm: "ORCC", commutative: true, clobberFlags: true, typ: "(Int,Flags)"},     // arg0|arg1 sets CC
 294                 {name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},                                               // ^(arg0|arg1)
 295                 {name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true},                                 // arg0^arg1
 296                 {name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, clobberFlags: true, typ: "(Int,Flags)"},   // arg0^arg1 sets CC
 297                 {name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true},                                 // arg0^^arg1
 298                 {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                                                  // -arg0 (integer)
 299                 {name: "BRD", argLength: 1, reg: gp11, asm: "BRD"},                                                                  // reversebytes64(arg0)
 300                 {name: "BRW", argLength: 1, reg: gp11, asm: "BRW"},                                                                  // reversebytes32(arg0)
 301                 {name: "BRH", argLength: 1, reg: gp11, asm: "BRH"},                                                                  // reversebytes16(arg0)
 302                 {name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                                                // -arg0 (floating point)
 303                 {name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                                                              // sqrt(arg0) (floating point)
 304                 {name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                                                            // sqrt(arg0) (floating point, single precision)
 305                 {name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                                                              // floor(arg0), float64
 306                 {name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                                                               // ceil(arg0), float64
 307                 {name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                                                              // trunc(arg0), float64
 308                 {name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"},                                                              // round(arg0), float64
 309                 {name: "FABS", argLength: 1, reg: fp11, asm: "FABS"},                                                                // abs(arg0), float64
 310                 {name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"},                                                              // -abs(arg0), float64
 311                 {name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"},                                                            // copysign arg0 -> arg1, float64
 312
 313                 {name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},   // arg0|aux
 314                 {name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"}, // arg0^aux
 315                 {name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", clobberFlags: true, typ: "(Int,Flags)"}, // arg0&aux == 0 // and-immediate sets CC on PPC, always.
 316
 317                 {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"},   // sign extend int8 to int64
 318                 {name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"}, // zero extend uint8 to uint64
 319                 {name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"},   // sign extend int16 to int64
 320                 {name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"}, // zero extend uint16 to uint64
 321                 {name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"},   // sign extend int32 to int64
 322                 {name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"}, // zero extend uint32 to uint64
 323
 324                 // Load bytes in the endian order of the arch from arg0+aux+auxint into a 64 bit register.
 325                 {name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // load byte zero extend
 326                 {name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"},    // load 2 bytes sign extend
 327                 {name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend
 328                 {name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"},    // load 4 bytes sign extend
 329                 {name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend
 330                 {name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},    // load 8 bytes
 331
 332                 // Load bytes in reverse endian order of the arch from arg0 into a 64 bit register, all zero extend.
 333                 // The generated instructions are indexed loads with no offset field in the instruction so the aux fields are not used.
 334                 // In these cases the index register field is set to 0 and the full address is in the base register.
 335                 {name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", typ: "UInt64", faultOnNilArg0: true}, // load 8 bytes reverse order
 336                 {name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", typ: "UInt32", faultOnNilArg0: true}, // load 4 bytes zero extend reverse order
 337                 {name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", typ: "UInt16", faultOnNilArg0: true}, // load 2 bytes zero extend reverse order
 338
 339                 // In these cases an index register is used in addition to a base register
 340                 // Loads from memory location arg[0] + arg[1].
 341                 {name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", typ: "UInt8"},  // zero extend uint8 to uint64
 342                 {name: "MOVHloadidx", argLength: 3, reg: gploadidx, asm: "MOVH", typ: "Int16"},    // sign extend int16 to int64
 343                 {name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", typ: "UInt16"}, // zero extend uint16 to uint64
 344                 {name: "MOVWloadidx", argLength: 3, reg: gploadidx, asm: "MOVW", typ: "Int32"},    // sign extend int32 to int64
 345                 {name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", typ: "UInt32"}, // zero extend uint32 to uint64
 346                 {name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", typ: "Int64"},
 347                 {name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", typ: "Int16"}, // sign extend int16 to int64
 348                 {name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", typ: "Int32"}, // sign extend int32 to int64
 349                 {name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", typ: "Int64"},
 350                 {name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", typ: "Float64"},
 351                 {name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", typ: "Float32"},
 352
 353                 // Prefetch instruction
 354                 // Do prefetch of address generated with arg0 and arg1 with option aux. arg0=addr,arg1=memory, aux=option.
 355                 {name: "DCBT", argLength: 2, aux: "Int64", reg: prefreg, asm: "DCBT", hasSideEffects: true},
 356
 357                 // Store bytes in the reverse endian order of the arch into arg0.
 358                 // These are indexed stores with no offset field in the instruction so the auxint fields are not used.
 359                 {name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes reverse order
 360                 {name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes reverse order
 361                 {name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes reverse order
 362
 363                 // Floating point loads from arg0+aux+auxint
 364                 {name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load double float
 365                 {name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load single float
 366
 367                 // Store bytes in the endian order of the arch into arg0+aux+auxint
 368                 {name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte
 369                 {name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes
 370                 {name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes
 371                 {name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes
 372
 373                 // Store floating point value into arg0+aux+auxint
 374                 {name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store double flot
 375                 {name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store single float
 376
 377                 // Stores using index and base registers
 378                 // Stores to arg[0] + arg[1]
 379                 {name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", typ: "Mem"},     // store bye
 380                 {name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", typ: "Mem"},     // store half word
 381                 {name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", typ: "Mem"},     // store word
 382                 {name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", typ: "Mem"},     // store double word
 383                 {name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", typ: "Mem"},   // store double float
 384                 {name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", typ: "Mem"},   // store single float
 385                 {name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", typ: "Mem"}, // store half word reversed byte using index reg
 386                 {name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", typ: "Mem"}, // store word reversed byte using index reg
 387                 {name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", typ: "Mem"}, // store double word reversed byte using index reg
 388
 389                 // The following ops store 0 into arg0+aux+auxint arg1=mem
 390                 {name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 1 byte
 391                 {name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 2 bytes
 392                 {name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 4 bytes
 393                 {name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 8 bytes
 394
 395                 {name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb | gp}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB/GP
 396
 397                 {name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "Int64", rematerializeable: true}, //
 398                 {name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true},           //
 399                 {name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true},           //
 400                 {name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
 401
 402                 {name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"},     // arg0 compare to arg1
 403                 {name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
 404                 {name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"},   // arg0 compare to arg1
 405                 {name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
 406                 {name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"},
 407                 {name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"},
 408                 {name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"},
 409                 {name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"},
 410
 411                 // ISEL  arg2 ? arg0 : arg1
 412                 // ISELZ arg1 ? arg0 : $0
 413                 // auxInt values 0=LT 1=GT 2=EQ 3=SO (summary overflow/unordered) 4=GE 5=LE 6=NE 7=NSO (not summary overflow/not unordered)
 414                 // Note, auxInt^4 inverts the comparison condition. For example, LT^4 becomes GE, and "ISEL [a] x y z" is equivalent to ISEL [a^4] y x z".
 415                 {name: "ISEL", argLength: 3, reg: crgp21, asm: "ISEL", aux: "Int32", typ: "Int32"},
 416                 {name: "ISELZ", argLength: 2, reg: crgp11, asm: "ISEL", aux: "Int32"},
 417
 418                 // SETBC auxInt values 0=LT 1=GT 2=EQ     (CRbit=1)? 1 : 0
 419                 {name: "SETBC", argLength: 1, reg: crgp, asm: "SETBC", aux: "Int32", typ: "Int32"},
 420                 // SETBCR auxInt values 0=LT 1=GT 2=EQ     (CRbit=1)? 0 : 1
 421                 {name: "SETBCR", argLength: 1, reg: crgp, asm: "SETBCR", aux: "Int32", typ: "Int32"},
 422
 423                 // pseudo-ops
 424                 {name: "Equal", argLength: 1, reg: crgp},         // bool, true flags encode x==y false otherwise.
 425                 {name: "NotEqual", argLength: 1, reg: crgp},      // bool, true flags encode x!=y false otherwise.
 426                 {name: "LessThan", argLength: 1, reg: crgp},      // bool, true flags encode  x<y false otherwise.
 427                 {name: "FLessThan", argLength: 1, reg: crgp},     // bool, true flags encode  x<y false otherwise.
 428                 {name: "LessEqual", argLength: 1, reg: crgp},     // bool, true flags encode  x<=y false otherwise.
 429                 {name: "FLessEqual", argLength: 1, reg: crgp},    // bool, true flags encode  x<=y false otherwise; PPC <= === !> which is wrong for NaN
 430                 {name: "GreaterThan", argLength: 1, reg: crgp},   // bool, true flags encode  x>y false otherwise.
 431                 {name: "FGreaterThan", argLength: 1, reg: crgp},  // bool, true flags encode  x>y false otherwise.
 432                 {name: "GreaterEqual", argLength: 1, reg: crgp},  // bool, true flags encode  x>=y false otherwise.
 433                 {name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode  x>=y false otherwise.; PPC >= === !< which is wrong for NaN
 434
 435                 // Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
 436                 // and sorts it to the very beginning of the block to prevent other
 437                 // use of the closure pointer.
 438                 {name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}, zeroWidth: true},
 439
 440                 // LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem.
 441                 {name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true},
 442
 443                 // LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
 444                 // I.e., if f calls g "calls" getcallerpc,
 445                 // the result should be the PC within f that g will return to.
 446                 // See runtime/stubs.go for a more detailed discussion.
 447                 {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
 448
 449                 //arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
 450                 {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
 451                 // Round ops to block fused-multiply-add extraction.
 452                 {name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
 453                 {name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
 454
 455                 {name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                                       // call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
 456                 {name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true},                         // tail call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
 457                 {name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{callptr, ctxt, 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
 458                 {name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{callptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},            // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
 459
 460                 // large or unaligned zeroing
 461                 // arg0 = address of memory to zero (in R3, changed as side effect)
 462                 // returns mem
 463                 //
 464                 // a loop is generated when there is more than one iteration
 465                 // needed to clear 4 doublewords
 466                 //
 467                 //      XXLXOR  VS32,VS32,VS32
 468                 //      MOVD    $len/32,R31
 469                 //      MOVD    R31,CTR
 470                 //      MOVD    $16,R31
 471                 //      loop:
 472                 //      STXVD2X VS32,(R0)(R3)
 473                 //      STXVD2X VS32,(R31)(R3)
 474                 //      ADD     R3,32
 475                 //      BC      loop
 476
 477                 // remaining doubleword clears generated as needed
 478                 //      MOVD    R0,(R3)
 479                 //      MOVD    R0,8(R3)
 480                 //      MOVD    R0,16(R3)
 481                 //      MOVD    R0,24(R3)
 482
 483                 // one or more of these to clear remainder < 8 bytes
 484                 //      MOVW    R0,n1(R3)
 485                 //      MOVH    R0,n2(R3)
 486                 //      MOVB    R0,n3(R3)
 487                 {
 488                         name:      "LoweredZero",
 489                         aux:       "Int64",
 490                         argLength: 2,
 491                         reg: regInfo{
 492                                 inputs:   []regMask{buildReg("R20")},
 493                                 clobbers: buildReg("R20"),
 494                         },
 495                         clobberFlags:   true,
 496                         typ:            "Mem",
 497                         faultOnNilArg0: true,
 498                         unsafePoint:    true,
 499                 },
 500                 {
 501                         name:      "LoweredZeroShort",
 502                         aux:       "Int64",
 503                         argLength: 2,
 504                         reg: regInfo{
 505                                 inputs: []regMask{gp}},
 506                         typ:            "Mem",
 507                         faultOnNilArg0: true,
 508                         unsafePoint:    true,
 509                 },
 510                 {
 511                         name:      "LoweredQuadZeroShort",
 512                         aux:       "Int64",
 513                         argLength: 2,
 514                         reg: regInfo{
 515                                 inputs: []regMask{gp},
 516                         },
 517                         typ:            "Mem",
 518                         faultOnNilArg0: true,
 519                         unsafePoint:    true,
 520                 },
 521                 {
 522                         name:      "LoweredQuadZero",
 523                         aux:       "Int64",
 524                         argLength: 2,
 525                         reg: regInfo{
 526                                 inputs:   []regMask{buildReg("R20")},
 527                                 clobbers: buildReg("R20"),
 528                         },
 529                         clobberFlags:   true,
 530                         typ:            "Mem",
 531                         faultOnNilArg0: true,
 532                         unsafePoint:    true,
 533                 },
 534
 535                 // R31 is temp register
 536                 // Loop code:
 537                 //      MOVD len/32,R31         set up loop ctr
 538                 //      MOVD R31,CTR
 539                 //      MOVD $16,R31            index register
 540                 // loop:
 541                 //      LXVD2X (R0)(R4),VS32
 542                 //      LXVD2X (R31)(R4),VS33
 543                 //      ADD  R4,$32          increment src
 544                 //      STXVD2X VS32,(R0)(R3)
 545                 //      STXVD2X VS33,(R31)(R3)
 546                 //      ADD  R3,$32          increment dst
 547                 //      BC 16,0,loop         branch ctr
 548                 // For this purpose, VS32 and VS33 are treated as
 549                 // scratch registers. Since regalloc does not
 550                 // track vector registers, even if it could be marked
 551                 // as clobbered it would have no effect.
 552                 // TODO: If vector registers are managed by regalloc
 553                 // mark these as clobbered.
 554                 //
 555                 // Bytes not moved by this loop are moved
 556                 // with a combination of the following instructions,
 557                 // starting with the largest sizes and generating as
 558                 // many as needed, using the appropriate offset value.
 559                 //      MOVD  n(R4),R14
 560                 //      MOVD  R14,n(R3)
 561                 //      MOVW  n1(R4),R14
 562                 //      MOVW  R14,n1(R3)
 563                 //      MOVH  n2(R4),R14
 564                 //      MOVH  R14,n2(R3)
 565                 //      MOVB  n3(R4),R14
 566                 //      MOVB  R14,n3(R3)
 567
 568                 {
 569                         name:      "LoweredMove",
 570                         aux:       "Int64",
 571                         argLength: 3,
 572                         reg: regInfo{
 573                                 inputs:   []regMask{buildReg("R20"), buildReg("R21")},
 574                                 clobbers: buildReg("R20 R21"),
 575                         },
 576                         clobberFlags:   true,
 577                         typ:            "Mem",
 578                         faultOnNilArg0: true,
 579                         faultOnNilArg1: true,
 580                         unsafePoint:    true,
 581                 },
 582                 {
 583                         name:      "LoweredMoveShort",
 584                         aux:       "Int64",
 585                         argLength: 3,
 586                         reg: regInfo{
 587                                 inputs: []regMask{gp, gp},
 588                         },
 589                         typ:            "Mem",
 590                         faultOnNilArg0: true,
 591                         faultOnNilArg1: true,
 592                         unsafePoint:    true,
 593                 },
 594
 595                 // The following is similar to the LoweredMove, but uses
 596                 // LXV instead of LXVD2X, which does not require an index
 597                 // register and will do 4 in a loop instead of only.
 598                 {
 599                         name:      "LoweredQuadMove",
 600                         aux:       "Int64",
 601                         argLength: 3,
 602                         reg: regInfo{
 603                                 inputs:   []regMask{buildReg("R20"), buildReg("R21")},
 604                                 clobbers: buildReg("R20 R21"),
 605                         },
 606                         clobberFlags:   true,
 607                         typ:            "Mem",
 608                         faultOnNilArg0: true,
 609                         faultOnNilArg1: true,
 610                         unsafePoint:    true,
 611                 },
 612
 613                 {
 614                         name:      "LoweredQuadMoveShort",
 615                         aux:       "Int64",
 616                         argLength: 3,
 617                         reg: regInfo{
 618                                 inputs: []regMask{gp, gp},
 619                         },
 620                         typ:            "Mem",
 621                         faultOnNilArg0: true,
 622                         faultOnNilArg1: true,
 623                         unsafePoint:    true,
 624                 },
 625
 626                 {name: "LoweredAtomicStore8", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
 627                 {name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
 628                 {name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
 629
 630                 {name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, typ: "UInt8", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
 631                 {name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
 632                 {name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
 633                 {name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
 634
 635                 // atomic add32, 64
 636                 // LWSYNC
 637                 // LDAR         (Rarg0), Rout
 638                 // ADD          Rarg1, Rout
 639                 // STDCCC       Rout, (Rarg0)
 640                 // BNE          -3(PC)
 641                 // return new sum
 642                 {name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
 643                 {name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
 644
 645                 // atomic exchange32, 64
 646                 // LWSYNC
 647                 // LDAR         (Rarg0), Rout
 648                 // STDCCC       Rarg1, (Rarg0)
 649                 // BNE          -2(PC)
 650                 // ISYNC
 651                 // return old val
 652                 {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
 653                 {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
 654
 655                 // atomic compare and swap.
 656                 // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
 657                 // if *arg0 == arg1 {
 658                 //   *arg0 = arg2
 659                 //   return (true, memory)
 660                 // } else {
 661                 //   return (false, memory)
 662                 // }
 663                 // SYNC
 664                 // LDAR         (Rarg0), Rtmp
 665                 // CMP          Rarg1, Rtmp
 666                 // BNE          3(PC)
 667                 // STDCCC       Rarg2, (Rarg0)
 668                 // BNE          -4(PC)
 669                 // CBNZ         Rtmp, -4(PC)
 670                 // CSET         EQ, Rout
 671                 {name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
 672                 {name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
 673
 674                 // atomic 8/32 and/or.
 675                 // *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
 676                 // LBAR/LWAT    (Rarg0), Rtmp
 677                 // AND/OR       Rarg1, Rtmp
 678                 // STBCCC/STWCCC Rtmp, (Rarg0), Rtmp
 679                 // BNE          Rtmp, -3(PC)
 680                 {name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
 681                 {name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
 682                 {name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
 683                 {name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
 684
 685                 // LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed
 686                 // It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and R20 and R21,
 687                 // but may clobber anything else, including R31 (REGTMP).
 688                 // Returns a pointer to a write barrier buffer in R29.
 689                 {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17 R20 R21 g")) | buildReg("R31"), outputs: []regMask{buildReg("R29")}}, clobberFlags: true, aux: "Int64"},
 690
 691                 {name: "LoweredPubBarrier", argLength: 1, asm: "LWSYNC", hasSideEffects: true}, // Do data barrier. arg0=memory
 692                 // There are three of these functions so that they can have three different register inputs.
 693                 // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
 694                 // default registers to match so we don't need to copy registers around unnecessarily.
 695                 {name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r6}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
 696                 {name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r5}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
 697                 {name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
 698
 699                 // (InvertFlags (CMP a b)) == (CMP b a)
 700                 // So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
 701                 // then we do (LessThan (InvertFlags (CMP b a))) instead.
 702                 // Rewrites will convert this to (GreaterThan (CMP b a)).
 703                 // InvertFlags is a pseudo-op which can't appear in assembly output.
 704                 {name: "InvertFlags", argLength: 1}, // reverse direction of arg0
 705
 706                 // Constant flag values. For any comparison, there are 3 possible
 707                 // outcomes: either the three from the signed total order (<,==,>)
 708                 // or the three from the unsigned total order, depending on which
 709                 // comparison operation was used (CMP or CMPU -- PPC is different from
 710                 // the other architectures, which have a single comparison producing
 711                 // both signed and unsigned comparison results.)
 712
 713                 // These ops are for temporary use by rewrite rules. They
 714                 // cannot appear in the generated assembly.
 715                 {name: "FlagEQ"}, // equal
 716                 {name: "FlagLT"}, // signed < or unsigned <
 717                 {name: "FlagGT"}, // signed > or unsigned >
 718         }
 719
 720         blocks := []blockData{
 721                 {name: "EQ", controls: 1},
 722                 {name: "NE", controls: 1},
 723                 {name: "LT", controls: 1},
 724                 {name: "LE", controls: 1},
 725                 {name: "GT", controls: 1},
 726                 {name: "GE", controls: 1},
 727                 {name: "FLT", controls: 1},
 728                 {name: "FLE", controls: 1},
 729                 {name: "FGT", controls: 1},
 730                 {name: "FGE", controls: 1},
 731         }
 732
 733         archs = append(archs, arch{
 734                 name:               "PPC64",
 735                 pkg:                "cmd/internal/obj/ppc64",
 736                 genfile:            "../../ppc64/ssa.go",
 737                 ops:                ops,
 738                 blocks:             blocks,
 739                 regnames:           regNamesPPC64,
 740                 ParamIntRegNames:   "R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17",
 741                 ParamFloatRegNames: "F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12",
 742                 gpregmask:          gp,
 743                 fpregmask:          fp,
 744                 specialregmask:     xer,
 745                 framepointerreg:    -1,
 746                 linkreg:            -1, // not used
 747         })
 748 }