]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
cmd/compile,cmd/asm: fix ppc64 usage of BI argument of BC opcode
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/base"
9         "cmd/compile/internal/ir"
10         "cmd/compile/internal/logopt"
11         "cmd/compile/internal/objw"
12         "cmd/compile/internal/ssa"
13         "cmd/compile/internal/ssagen"
14         "cmd/compile/internal/types"
15         "cmd/internal/obj"
16         "cmd/internal/obj/ppc64"
17         "internal/buildcfg"
18         "math"
19         "strings"
20 )
21
22 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24         //      flive := b.FlagsLiveAtEnd
25         //      if b.Control != nil && b.Control.Type.IsFlags() {
26         //              flive = true
27         //      }
28         //      for i := len(b.Values) - 1; i >= 0; i-- {
29         //              v := b.Values[i]
30         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31         //                      // The "mark" is any non-nil Aux value.
32         //                      v.Aux = v
33         //              }
34         //              if v.Type.IsFlags() {
35         //                      flive = false
36         //              }
37         //              for _, a := range v.Args {
38         //                      if a.Type.IsFlags() {
39         //                              flive = true
40         //                      }
41         //              }
42         //      }
43 }
44
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
47         if t.IsFloat() {
48                 switch t.Size() {
49                 case 4:
50                         return ppc64.AFMOVS
51                 case 8:
52                         return ppc64.AFMOVD
53                 }
54         } else {
55                 switch t.Size() {
56                 case 1:
57                         if t.IsSigned() {
58                                 return ppc64.AMOVB
59                         } else {
60                                 return ppc64.AMOVBZ
61                         }
62                 case 2:
63                         if t.IsSigned() {
64                                 return ppc64.AMOVH
65                         } else {
66                                 return ppc64.AMOVHZ
67                         }
68                 case 4:
69                         if t.IsSigned() {
70                                 return ppc64.AMOVW
71                         } else {
72                                 return ppc64.AMOVWZ
73                         }
74                 case 8:
75                         return ppc64.AMOVD
76                 }
77         }
78         panic("bad load type")
79 }
80
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
83         if t.IsFloat() {
84                 switch t.Size() {
85                 case 4:
86                         return ppc64.AFMOVS
87                 case 8:
88                         return ppc64.AFMOVD
89                 }
90         } else {
91                 switch t.Size() {
92                 case 1:
93                         return ppc64.AMOVB
94                 case 2:
95                         return ppc64.AMOVH
96                 case 4:
97                         return ppc64.AMOVW
98                 case 8:
99                         return ppc64.AMOVD
100                 }
101         }
102         panic("bad store type")
103 }
104
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
106         switch v.Op {
107         case ssa.OpCopy:
108                 t := v.Type
109                 if t.IsMemory() {
110                         return
111                 }
112                 x := v.Args[0].Reg()
113                 y := v.Reg()
114                 if x != y {
115                         rt := obj.TYPE_REG
116                         op := ppc64.AMOVD
117
118                         if t.IsFloat() {
119                                 op = ppc64.AFMOVD
120                         }
121                         p := s.Prog(op)
122                         p.From.Type = rt
123                         p.From.Reg = x
124                         p.To.Type = rt
125                         p.To.Reg = y
126                 }
127
128         case ssa.OpPPC64LoweredMuluhilo:
129                 // MULHDU       Rarg1, Rarg0, Reg0
130                 // MULLD        Rarg1, Rarg0, Reg1
131                 r0 := v.Args[0].Reg()
132                 r1 := v.Args[1].Reg()
133                 p := s.Prog(ppc64.AMULHDU)
134                 p.From.Type = obj.TYPE_REG
135                 p.From.Reg = r1
136                 p.Reg = r0
137                 p.To.Type = obj.TYPE_REG
138                 p.To.Reg = v.Reg0()
139                 p1 := s.Prog(ppc64.AMULLD)
140                 p1.From.Type = obj.TYPE_REG
141                 p1.From.Reg = r1
142                 p1.Reg = r0
143                 p1.To.Type = obj.TYPE_REG
144                 p1.To.Reg = v.Reg1()
145
146         case ssa.OpPPC64LoweredAdd64Carry:
147                 // ADDC         Rarg2, -1, Rtmp
148                 // ADDE         Rarg1, Rarg0, Reg0
149                 // ADDZE        Rzero, Reg1
150                 r0 := v.Args[0].Reg()
151                 r1 := v.Args[1].Reg()
152                 r2 := v.Args[2].Reg()
153                 p := s.Prog(ppc64.AADDC)
154                 p.From.Type = obj.TYPE_CONST
155                 p.From.Offset = -1
156                 p.Reg = r2
157                 p.To.Type = obj.TYPE_REG
158                 p.To.Reg = ppc64.REGTMP
159                 p1 := s.Prog(ppc64.AADDE)
160                 p1.From.Type = obj.TYPE_REG
161                 p1.From.Reg = r1
162                 p1.Reg = r0
163                 p1.To.Type = obj.TYPE_REG
164                 p1.To.Reg = v.Reg0()
165                 p2 := s.Prog(ppc64.AADDZE)
166                 p2.From.Type = obj.TYPE_REG
167                 p2.From.Reg = ppc64.REGZERO
168                 p2.To.Type = obj.TYPE_REG
169                 p2.To.Reg = v.Reg1()
170
171         case ssa.OpPPC64LoweredAtomicAnd8,
172                 ssa.OpPPC64LoweredAtomicAnd32,
173                 ssa.OpPPC64LoweredAtomicOr8,
174                 ssa.OpPPC64LoweredAtomicOr32:
175                 // LWSYNC
176                 // LBAR/LWAR    (Rarg0), Rtmp
177                 // AND/OR       Rarg1, Rtmp
178                 // STBCCC/STWCCC Rtmp, (Rarg0)
179                 // BNE          -3(PC)
180                 ld := ppc64.ALBAR
181                 st := ppc64.ASTBCCC
182                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
183                         ld = ppc64.ALWAR
184                         st = ppc64.ASTWCCC
185                 }
186                 r0 := v.Args[0].Reg()
187                 r1 := v.Args[1].Reg()
188                 // LWSYNC - Assuming shared data not write-through-required nor
189                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
190                 plwsync := s.Prog(ppc64.ALWSYNC)
191                 plwsync.To.Type = obj.TYPE_NONE
192                 // LBAR or LWAR
193                 p := s.Prog(ld)
194                 p.From.Type = obj.TYPE_MEM
195                 p.From.Reg = r0
196                 p.To.Type = obj.TYPE_REG
197                 p.To.Reg = ppc64.REGTMP
198                 // AND/OR reg1,out
199                 p1 := s.Prog(v.Op.Asm())
200                 p1.From.Type = obj.TYPE_REG
201                 p1.From.Reg = r1
202                 p1.To.Type = obj.TYPE_REG
203                 p1.To.Reg = ppc64.REGTMP
204                 // STBCCC or STWCCC
205                 p2 := s.Prog(st)
206                 p2.From.Type = obj.TYPE_REG
207                 p2.From.Reg = ppc64.REGTMP
208                 p2.To.Type = obj.TYPE_MEM
209                 p2.To.Reg = r0
210                 p2.RegTo2 = ppc64.REGTMP
211                 // BNE retry
212                 p3 := s.Prog(ppc64.ABNE)
213                 p3.To.Type = obj.TYPE_BRANCH
214                 p3.To.SetTarget(p)
215
216         case ssa.OpPPC64LoweredAtomicAdd32,
217                 ssa.OpPPC64LoweredAtomicAdd64:
218                 // LWSYNC
219                 // LDAR/LWAR    (Rarg0), Rout
220                 // ADD          Rarg1, Rout
221                 // STDCCC/STWCCC Rout, (Rarg0)
222                 // BNE         -3(PC)
223                 // MOVW         Rout,Rout (if Add32)
224                 ld := ppc64.ALDAR
225                 st := ppc64.ASTDCCC
226                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
227                         ld = ppc64.ALWAR
228                         st = ppc64.ASTWCCC
229                 }
230                 r0 := v.Args[0].Reg()
231                 r1 := v.Args[1].Reg()
232                 out := v.Reg0()
233                 // LWSYNC - Assuming shared data not write-through-required nor
234                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
235                 plwsync := s.Prog(ppc64.ALWSYNC)
236                 plwsync.To.Type = obj.TYPE_NONE
237                 // LDAR or LWAR
238                 p := s.Prog(ld)
239                 p.From.Type = obj.TYPE_MEM
240                 p.From.Reg = r0
241                 p.To.Type = obj.TYPE_REG
242                 p.To.Reg = out
243                 // ADD reg1,out
244                 p1 := s.Prog(ppc64.AADD)
245                 p1.From.Type = obj.TYPE_REG
246                 p1.From.Reg = r1
247                 p1.To.Reg = out
248                 p1.To.Type = obj.TYPE_REG
249                 // STDCCC or STWCCC
250                 p3 := s.Prog(st)
251                 p3.From.Type = obj.TYPE_REG
252                 p3.From.Reg = out
253                 p3.To.Type = obj.TYPE_MEM
254                 p3.To.Reg = r0
255                 // BNE retry
256                 p4 := s.Prog(ppc64.ABNE)
257                 p4.To.Type = obj.TYPE_BRANCH
258                 p4.To.SetTarget(p)
259
260                 // Ensure a 32 bit result
261                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
262                         p5 := s.Prog(ppc64.AMOVWZ)
263                         p5.To.Type = obj.TYPE_REG
264                         p5.To.Reg = out
265                         p5.From.Type = obj.TYPE_REG
266                         p5.From.Reg = out
267                 }
268
269         case ssa.OpPPC64LoweredAtomicExchange32,
270                 ssa.OpPPC64LoweredAtomicExchange64:
271                 // LWSYNC
272                 // LDAR/LWAR    (Rarg0), Rout
273                 // STDCCC/STWCCC Rout, (Rarg0)
274                 // BNE         -2(PC)
275                 // ISYNC
276                 ld := ppc64.ALDAR
277                 st := ppc64.ASTDCCC
278                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
279                         ld = ppc64.ALWAR
280                         st = ppc64.ASTWCCC
281                 }
282                 r0 := v.Args[0].Reg()
283                 r1 := v.Args[1].Reg()
284                 out := v.Reg0()
285                 // LWSYNC - Assuming shared data not write-through-required nor
286                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
287                 plwsync := s.Prog(ppc64.ALWSYNC)
288                 plwsync.To.Type = obj.TYPE_NONE
289                 // LDAR or LWAR
290                 p := s.Prog(ld)
291                 p.From.Type = obj.TYPE_MEM
292                 p.From.Reg = r0
293                 p.To.Type = obj.TYPE_REG
294                 p.To.Reg = out
295                 // STDCCC or STWCCC
296                 p1 := s.Prog(st)
297                 p1.From.Type = obj.TYPE_REG
298                 p1.From.Reg = r1
299                 p1.To.Type = obj.TYPE_MEM
300                 p1.To.Reg = r0
301                 // BNE retry
302                 p2 := s.Prog(ppc64.ABNE)
303                 p2.To.Type = obj.TYPE_BRANCH
304                 p2.To.SetTarget(p)
305                 // ISYNC
306                 pisync := s.Prog(ppc64.AISYNC)
307                 pisync.To.Type = obj.TYPE_NONE
308
309         case ssa.OpPPC64LoweredAtomicLoad8,
310                 ssa.OpPPC64LoweredAtomicLoad32,
311                 ssa.OpPPC64LoweredAtomicLoad64,
312                 ssa.OpPPC64LoweredAtomicLoadPtr:
313                 // SYNC
314                 // MOVB/MOVD/MOVW (Rarg0), Rout
315                 // CMP Rout,Rout
316                 // BNE 1(PC)
317                 // ISYNC
318                 ld := ppc64.AMOVD
319                 cmp := ppc64.ACMP
320                 switch v.Op {
321                 case ssa.OpPPC64LoweredAtomicLoad8:
322                         ld = ppc64.AMOVBZ
323                 case ssa.OpPPC64LoweredAtomicLoad32:
324                         ld = ppc64.AMOVWZ
325                         cmp = ppc64.ACMPW
326                 }
327                 arg0 := v.Args[0].Reg()
328                 out := v.Reg0()
329                 // SYNC when AuxInt == 1; otherwise, load-acquire
330                 if v.AuxInt == 1 {
331                         psync := s.Prog(ppc64.ASYNC)
332                         psync.To.Type = obj.TYPE_NONE
333                 }
334                 // Load
335                 p := s.Prog(ld)
336                 p.From.Type = obj.TYPE_MEM
337                 p.From.Reg = arg0
338                 p.To.Type = obj.TYPE_REG
339                 p.To.Reg = out
340                 // CMP
341                 p1 := s.Prog(cmp)
342                 p1.From.Type = obj.TYPE_REG
343                 p1.From.Reg = out
344                 p1.To.Type = obj.TYPE_REG
345                 p1.To.Reg = out
346                 // BNE
347                 p2 := s.Prog(ppc64.ABNE)
348                 p2.To.Type = obj.TYPE_BRANCH
349                 // ISYNC
350                 pisync := s.Prog(ppc64.AISYNC)
351                 pisync.To.Type = obj.TYPE_NONE
352                 p2.To.SetTarget(pisync)
353
354         case ssa.OpPPC64LoweredAtomicStore8,
355                 ssa.OpPPC64LoweredAtomicStore32,
356                 ssa.OpPPC64LoweredAtomicStore64:
357                 // SYNC or LWSYNC
358                 // MOVB/MOVW/MOVD arg1,(arg0)
359                 st := ppc64.AMOVD
360                 switch v.Op {
361                 case ssa.OpPPC64LoweredAtomicStore8:
362                         st = ppc64.AMOVB
363                 case ssa.OpPPC64LoweredAtomicStore32:
364                         st = ppc64.AMOVW
365                 }
366                 arg0 := v.Args[0].Reg()
367                 arg1 := v.Args[1].Reg()
368                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
369                 // SYNC
370                 syncOp := ppc64.ASYNC
371                 if v.AuxInt == 0 {
372                         syncOp = ppc64.ALWSYNC
373                 }
374                 psync := s.Prog(syncOp)
375                 psync.To.Type = obj.TYPE_NONE
376                 // Store
377                 p := s.Prog(st)
378                 p.To.Type = obj.TYPE_MEM
379                 p.To.Reg = arg0
380                 p.From.Type = obj.TYPE_REG
381                 p.From.Reg = arg1
382
383         case ssa.OpPPC64LoweredAtomicCas64,
384                 ssa.OpPPC64LoweredAtomicCas32:
385                 // LWSYNC
386                 // loop:
387                 // LDAR        (Rarg0), MutexHint, Rtmp
388                 // CMP         Rarg1, Rtmp
389                 // BNE         fail
390                 // STDCCC      Rarg2, (Rarg0)
391                 // BNE         loop
392                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
393                 // MOVD        $1, Rout
394                 // BR          end
395                 // fail:
396                 // MOVD        $0, Rout
397                 // end:
398                 ld := ppc64.ALDAR
399                 st := ppc64.ASTDCCC
400                 cmp := ppc64.ACMP
401                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
402                         ld = ppc64.ALWAR
403                         st = ppc64.ASTWCCC
404                         cmp = ppc64.ACMPW
405                 }
406                 r0 := v.Args[0].Reg()
407                 r1 := v.Args[1].Reg()
408                 r2 := v.Args[2].Reg()
409                 out := v.Reg0()
410                 // LWSYNC - Assuming shared data not write-through-required nor
411                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
412                 plwsync1 := s.Prog(ppc64.ALWSYNC)
413                 plwsync1.To.Type = obj.TYPE_NONE
414                 // LDAR or LWAR
415                 p := s.Prog(ld)
416                 p.From.Type = obj.TYPE_MEM
417                 p.From.Reg = r0
418                 p.To.Type = obj.TYPE_REG
419                 p.To.Reg = ppc64.REGTMP
420                 // If it is a Compare-and-Swap-Release operation, set the EH field with
421                 // the release hint.
422                 if v.AuxInt == 0 {
423                         p.SetFrom3Const(0)
424                 }
425                 // CMP reg1,reg2
426                 p1 := s.Prog(cmp)
427                 p1.From.Type = obj.TYPE_REG
428                 p1.From.Reg = r1
429                 p1.To.Reg = ppc64.REGTMP
430                 p1.To.Type = obj.TYPE_REG
431                 // BNE cas_fail
432                 p2 := s.Prog(ppc64.ABNE)
433                 p2.To.Type = obj.TYPE_BRANCH
434                 // STDCCC or STWCCC
435                 p3 := s.Prog(st)
436                 p3.From.Type = obj.TYPE_REG
437                 p3.From.Reg = r2
438                 p3.To.Type = obj.TYPE_MEM
439                 p3.To.Reg = r0
440                 // BNE retry
441                 p4 := s.Prog(ppc64.ABNE)
442                 p4.To.Type = obj.TYPE_BRANCH
443                 p4.To.SetTarget(p)
444                 // LWSYNC - Assuming shared data not write-through-required nor
445                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
446                 // If the operation is a CAS-Release, then synchronization is not necessary.
447                 if v.AuxInt != 0 {
448                         plwsync2 := s.Prog(ppc64.ALWSYNC)
449                         plwsync2.To.Type = obj.TYPE_NONE
450                 }
451                 // return true
452                 p5 := s.Prog(ppc64.AMOVD)
453                 p5.From.Type = obj.TYPE_CONST
454                 p5.From.Offset = 1
455                 p5.To.Type = obj.TYPE_REG
456                 p5.To.Reg = out
457                 // BR done
458                 p6 := s.Prog(obj.AJMP)
459                 p6.To.Type = obj.TYPE_BRANCH
460                 // return false
461                 p7 := s.Prog(ppc64.AMOVD)
462                 p7.From.Type = obj.TYPE_CONST
463                 p7.From.Offset = 0
464                 p7.To.Type = obj.TYPE_REG
465                 p7.To.Reg = out
466                 p2.To.SetTarget(p7)
467                 // done (label)
468                 p8 := s.Prog(obj.ANOP)
469                 p6.To.SetTarget(p8)
470
471         case ssa.OpPPC64LoweredPubBarrier:
472                 // LWSYNC
473                 s.Prog(v.Op.Asm())
474
475         case ssa.OpPPC64LoweredGetClosurePtr:
476                 // Closure pointer is R11 (already)
477                 ssagen.CheckLoweredGetClosurePtr(v)
478
479         case ssa.OpPPC64LoweredGetCallerSP:
480                 // caller's SP is FixedFrameSize below the address of the first arg
481                 p := s.Prog(ppc64.AMOVD)
482                 p.From.Type = obj.TYPE_ADDR
483                 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
484                 p.From.Name = obj.NAME_PARAM
485                 p.To.Type = obj.TYPE_REG
486                 p.To.Reg = v.Reg()
487
488         case ssa.OpPPC64LoweredGetCallerPC:
489                 p := s.Prog(obj.AGETCALLERPC)
490                 p.To.Type = obj.TYPE_REG
491                 p.To.Reg = v.Reg()
492
493         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
494                 // input is already rounded
495
496         case ssa.OpLoadReg:
497                 loadOp := loadByType(v.Type)
498                 p := s.Prog(loadOp)
499                 ssagen.AddrAuto(&p.From, v.Args[0])
500                 p.To.Type = obj.TYPE_REG
501                 p.To.Reg = v.Reg()
502
503         case ssa.OpStoreReg:
504                 storeOp := storeByType(v.Type)
505                 p := s.Prog(storeOp)
506                 p.From.Type = obj.TYPE_REG
507                 p.From.Reg = v.Args[0].Reg()
508                 ssagen.AddrAuto(&p.To, v)
509
510         case ssa.OpArgIntReg, ssa.OpArgFloatReg:
511                 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
512                 // The loop only runs once.
513                 for _, a := range v.Block.Func.RegArgs {
514                         // Pass the spill/unspill information along to the assembler, offset by size of
515                         // the saved LR slot.
516                         addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
517                         s.FuncInfo().AddSpill(
518                                 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
519                 }
520                 v.Block.Func.RegArgs = nil
521
522                 ssagen.CheckArgReg(v)
523
524         case ssa.OpPPC64DIVD:
525                 // For now,
526                 //
527                 // cmp arg1, -1
528                 // be  ahead
529                 // v = arg0 / arg1
530                 // b over
531                 // ahead: v = - arg0
532                 // over: nop
533                 r := v.Reg()
534                 r0 := v.Args[0].Reg()
535                 r1 := v.Args[1].Reg()
536
537                 p := s.Prog(ppc64.ACMP)
538                 p.From.Type = obj.TYPE_REG
539                 p.From.Reg = r1
540                 p.To.Type = obj.TYPE_CONST
541                 p.To.Offset = -1
542
543                 pbahead := s.Prog(ppc64.ABEQ)
544                 pbahead.To.Type = obj.TYPE_BRANCH
545
546                 p = s.Prog(v.Op.Asm())
547                 p.From.Type = obj.TYPE_REG
548                 p.From.Reg = r1
549                 p.Reg = r0
550                 p.To.Type = obj.TYPE_REG
551                 p.To.Reg = r
552
553                 pbover := s.Prog(obj.AJMP)
554                 pbover.To.Type = obj.TYPE_BRANCH
555
556                 p = s.Prog(ppc64.ANEG)
557                 p.To.Type = obj.TYPE_REG
558                 p.To.Reg = r
559                 p.From.Type = obj.TYPE_REG
560                 p.From.Reg = r0
561                 pbahead.To.SetTarget(p)
562
563                 p = s.Prog(obj.ANOP)
564                 pbover.To.SetTarget(p)
565
566         case ssa.OpPPC64DIVW:
567                 // word-width version of above
568                 r := v.Reg()
569                 r0 := v.Args[0].Reg()
570                 r1 := v.Args[1].Reg()
571
572                 p := s.Prog(ppc64.ACMPW)
573                 p.From.Type = obj.TYPE_REG
574                 p.From.Reg = r1
575                 p.To.Type = obj.TYPE_CONST
576                 p.To.Offset = -1
577
578                 pbahead := s.Prog(ppc64.ABEQ)
579                 pbahead.To.Type = obj.TYPE_BRANCH
580
581                 p = s.Prog(v.Op.Asm())
582                 p.From.Type = obj.TYPE_REG
583                 p.From.Reg = r1
584                 p.Reg = r0
585                 p.To.Type = obj.TYPE_REG
586                 p.To.Reg = r
587
588                 pbover := s.Prog(obj.AJMP)
589                 pbover.To.Type = obj.TYPE_BRANCH
590
591                 p = s.Prog(ppc64.ANEG)
592                 p.To.Type = obj.TYPE_REG
593                 p.To.Reg = r
594                 p.From.Type = obj.TYPE_REG
595                 p.From.Reg = r0
596                 pbahead.To.SetTarget(p)
597
598                 p = s.Prog(obj.ANOP)
599                 pbover.To.SetTarget(p)
600
601         case ssa.OpPPC64CLRLSLWI:
602                 r := v.Reg()
603                 r1 := v.Args[0].Reg()
604                 shifts := v.AuxInt
605                 p := s.Prog(v.Op.Asm())
606                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
607                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
608                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
609                 p.Reg = r1
610                 p.To.Type = obj.TYPE_REG
611                 p.To.Reg = r
612
613         case ssa.OpPPC64CLRLSLDI:
614                 r := v.Reg()
615                 r1 := v.Args[0].Reg()
616                 shifts := v.AuxInt
617                 p := s.Prog(v.Op.Asm())
618                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
619                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
620                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
621                 p.Reg = r1
622                 p.To.Type = obj.TYPE_REG
623                 p.To.Reg = r
624
625                 // Mask has been set as sh
626         case ssa.OpPPC64RLDICL:
627                 r := v.Reg()
628                 r1 := v.Args[0].Reg()
629                 shifts := v.AuxInt
630                 p := s.Prog(v.Op.Asm())
631                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
632                 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
633                 p.Reg = r1
634                 p.To.Type = obj.TYPE_REG
635                 p.To.Reg = r
636
637         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
638                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
639                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
640                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
641                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
642                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
643                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
644                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
645                 r := v.Reg()
646                 r1 := v.Args[0].Reg()
647                 r2 := v.Args[1].Reg()
648                 p := s.Prog(v.Op.Asm())
649                 p.From.Type = obj.TYPE_REG
650                 p.From.Reg = r2
651                 p.Reg = r1
652                 p.To.Type = obj.TYPE_REG
653                 p.To.Reg = r
654
655         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
656                 r1 := v.Args[0].Reg()
657                 r2 := v.Args[1].Reg()
658                 p := s.Prog(v.Op.Asm())
659                 p.From.Type = obj.TYPE_REG
660                 p.From.Reg = r2
661                 p.Reg = r1
662                 p.To.Type = obj.TYPE_REG
663                 p.To.Reg = ppc64.REGTMP // result is not needed
664
665         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
666                 p := s.Prog(v.Op.Asm())
667                 p.From.Type = obj.TYPE_CONST
668                 p.From.Offset = v.AuxInt
669                 p.Reg = v.Args[0].Reg()
670                 p.To.Type = obj.TYPE_REG
671                 p.To.Reg = v.Reg()
672
673                 // Auxint holds encoded rotate + mask
674         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
675                 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
676                 p := s.Prog(v.Op.Asm())
677                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
678                 p.Reg = v.Args[0].Reg()
679                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
680                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
681
682                 // Auxint holds mask
683         case ssa.OpPPC64RLWNM:
684                 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
685                 p := s.Prog(v.Op.Asm())
686                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
687                 p.Reg = v.Args[0].Reg()
688                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
689                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
690
691         case ssa.OpPPC64MADDLD:
692                 r := v.Reg()
693                 r1 := v.Args[0].Reg()
694                 r2 := v.Args[1].Reg()
695                 r3 := v.Args[2].Reg()
696                 // r = r1*r2 Â± r3
697                 p := s.Prog(v.Op.Asm())
698                 p.From.Type = obj.TYPE_REG
699                 p.From.Reg = r1
700                 p.Reg = r2
701                 p.SetFrom3Reg(r3)
702                 p.To.Type = obj.TYPE_REG
703                 p.To.Reg = r
704
705         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
706                 r := v.Reg()
707                 r1 := v.Args[0].Reg()
708                 r2 := v.Args[1].Reg()
709                 r3 := v.Args[2].Reg()
710                 // r = r1*r2 Â± r3
711                 p := s.Prog(v.Op.Asm())
712                 p.From.Type = obj.TYPE_REG
713                 p.From.Reg = r1
714                 p.Reg = r3
715                 p.SetFrom3Reg(r2)
716                 p.To.Type = obj.TYPE_REG
717                 p.To.Reg = r
718
719         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
720                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
721                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
722                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
723                 r := v.Reg()
724                 p := s.Prog(v.Op.Asm())
725                 p.To.Type = obj.TYPE_REG
726                 p.To.Reg = r
727                 p.From.Type = obj.TYPE_REG
728                 p.From.Reg = v.Args[0].Reg()
729
730         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
731                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
732                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
733                 p := s.Prog(v.Op.Asm())
734                 p.Reg = v.Args[0].Reg()
735                 p.From.Type = obj.TYPE_CONST
736                 p.From.Offset = v.AuxInt
737                 p.To.Type = obj.TYPE_REG
738                 p.To.Reg = v.Reg()
739
740         case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
741                 r := v.Reg0() // CA is the first, implied argument.
742                 r1 := v.Args[0].Reg()
743                 r2 := v.Args[1].Reg()
744                 p := s.Prog(v.Op.Asm())
745                 p.From.Type = obj.TYPE_REG
746                 p.From.Reg = r2
747                 p.Reg = r1
748                 p.To.Type = obj.TYPE_REG
749                 p.To.Reg = r
750
751         case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
752                 p := s.Prog(v.Op.Asm())
753                 p.From.Type = obj.TYPE_REG
754                 p.From.Reg = ppc64.REG_R0
755                 p.To.Type = obj.TYPE_REG
756                 p.To.Reg = v.Reg()
757
758         case ssa.OpPPC64ADDCconst:
759                 p := s.Prog(v.Op.Asm())
760                 p.Reg = v.Args[0].Reg()
761                 p.From.Type = obj.TYPE_CONST
762                 p.From.Offset = v.AuxInt
763                 p.To.Type = obj.TYPE_REG
764                 // Output is a pair, the second is the CA, which is implied.
765                 p.To.Reg = v.Reg0()
766
767         case ssa.OpPPC64SUBCconst:
768                 p := s.Prog(v.Op.Asm())
769                 p.SetFrom3Const(v.AuxInt)
770                 p.From.Type = obj.TYPE_REG
771                 p.From.Reg = v.Args[0].Reg()
772                 p.To.Type = obj.TYPE_REG
773                 p.To.Reg = v.Reg0()
774
775         case ssa.OpPPC64SUBFCconst:
776                 p := s.Prog(v.Op.Asm())
777                 p.SetFrom3Const(v.AuxInt)
778                 p.From.Type = obj.TYPE_REG
779                 p.From.Reg = v.Args[0].Reg()
780                 p.To.Type = obj.TYPE_REG
781                 p.To.Reg = v.Reg()
782
783         case ssa.OpPPC64ANDCCconst:
784                 p := s.Prog(v.Op.Asm())
785                 p.Reg = v.Args[0].Reg()
786                 p.From.Type = obj.TYPE_CONST
787                 p.From.Offset = v.AuxInt
788                 p.To.Type = obj.TYPE_REG
789                 p.To.Reg = ppc64.REGTMP // discard result
790
791         case ssa.OpPPC64MOVDaddr:
792                 switch v.Aux.(type) {
793                 default:
794                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
795                 case nil:
796                         // If aux offset and aux int are both 0, and the same
797                         // input and output regs are used, no instruction
798                         // needs to be generated, since it would just be
799                         // addi rx, rx, 0.
800                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
801                                 p := s.Prog(ppc64.AMOVD)
802                                 p.From.Type = obj.TYPE_ADDR
803                                 p.From.Reg = v.Args[0].Reg()
804                                 p.From.Offset = v.AuxInt
805                                 p.To.Type = obj.TYPE_REG
806                                 p.To.Reg = v.Reg()
807                         }
808
809                 case *obj.LSym, ir.Node:
810                         p := s.Prog(ppc64.AMOVD)
811                         p.From.Type = obj.TYPE_ADDR
812                         p.From.Reg = v.Args[0].Reg()
813                         p.To.Type = obj.TYPE_REG
814                         p.To.Reg = v.Reg()
815                         ssagen.AddAux(&p.From, v)
816
817                 }
818
819         case ssa.OpPPC64MOVDconst:
820                 p := s.Prog(v.Op.Asm())
821                 p.From.Type = obj.TYPE_CONST
822                 p.From.Offset = v.AuxInt
823                 p.To.Type = obj.TYPE_REG
824                 p.To.Reg = v.Reg()
825
826         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
827                 p := s.Prog(v.Op.Asm())
828                 p.From.Type = obj.TYPE_FCONST
829                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
830                 p.To.Type = obj.TYPE_REG
831                 p.To.Reg = v.Reg()
832
833         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
834                 p := s.Prog(v.Op.Asm())
835                 p.From.Type = obj.TYPE_REG
836                 p.From.Reg = v.Args[0].Reg()
837                 p.To.Type = obj.TYPE_REG
838                 p.To.Reg = v.Args[1].Reg()
839
840         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
841                 p := s.Prog(v.Op.Asm())
842                 p.From.Type = obj.TYPE_REG
843                 p.From.Reg = v.Args[0].Reg()
844                 p.To.Type = obj.TYPE_CONST
845                 p.To.Offset = v.AuxInt
846
847         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
848                 // Shift in register to required size
849                 p := s.Prog(v.Op.Asm())
850                 p.From.Type = obj.TYPE_REG
851                 p.From.Reg = v.Args[0].Reg()
852                 p.To.Reg = v.Reg()
853                 p.To.Type = obj.TYPE_REG
854
855         case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
856
857                 // MOVDload and MOVWload are DS form instructions that are restricted to
858                 // offsets that are a multiple of 4. If the offset is not a multple of 4,
859                 // then the address of the symbol to be loaded is computed (base + offset)
860                 // and used as the new base register and the offset field in the instruction
861                 // can be set to zero.
862
863                 // This same problem can happen with gostrings since the final offset is not
864                 // known yet, but could be unaligned after the relocation is resolved.
865                 // So gostrings are handled the same way.
866
867                 // This allows the MOVDload and MOVWload to be generated in more cases and
868                 // eliminates some offset and alignment checking in the rules file.
869
870                 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
871                 ssagen.AddAux(&fromAddr, v)
872
873                 genAddr := false
874
875                 switch fromAddr.Name {
876                 case obj.NAME_EXTERN, obj.NAME_STATIC:
877                         // Special case for a rule combines the bytes of gostring.
878                         // The v alignment might seem OK, but we don't want to load it
879                         // using an offset because relocation comes later.
880                         genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go.string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
881                 default:
882                         genAddr = fromAddr.Offset%4 != 0
883                 }
884                 if genAddr {
885                         // Load full address into the temp register.
886                         p := s.Prog(ppc64.AMOVD)
887                         p.From.Type = obj.TYPE_ADDR
888                         p.From.Reg = v.Args[0].Reg()
889                         ssagen.AddAux(&p.From, v)
890                         // Load target using temp as base register
891                         // and offset zero. Setting NAME_NONE
892                         // prevents any extra offsets from being
893                         // added.
894                         p.To.Type = obj.TYPE_REG
895                         p.To.Reg = ppc64.REGTMP
896                         fromAddr.Reg = ppc64.REGTMP
897                         // Clear the offset field and other
898                         // information that might be used
899                         // by the assembler to add to the
900                         // final offset value.
901                         fromAddr.Offset = 0
902                         fromAddr.Name = obj.NAME_NONE
903                         fromAddr.Sym = nil
904                 }
905                 p := s.Prog(v.Op.Asm())
906                 p.From = fromAddr
907                 p.To.Type = obj.TYPE_REG
908                 p.To.Reg = v.Reg()
909                 break
910
911         case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
912                 p := s.Prog(v.Op.Asm())
913                 p.From.Type = obj.TYPE_MEM
914                 p.From.Reg = v.Args[0].Reg()
915                 ssagen.AddAux(&p.From, v)
916                 p.To.Type = obj.TYPE_REG
917                 p.To.Reg = v.Reg()
918
919         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
920                 p := s.Prog(v.Op.Asm())
921                 p.From.Type = obj.TYPE_MEM
922                 p.From.Reg = v.Args[0].Reg()
923                 p.To.Type = obj.TYPE_REG
924                 p.To.Reg = v.Reg()
925
926         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
927                 p := s.Prog(v.Op.Asm())
928                 p.To.Type = obj.TYPE_MEM
929                 p.To.Reg = v.Args[0].Reg()
930                 p.From.Type = obj.TYPE_REG
931                 p.From.Reg = v.Args[1].Reg()
932
933         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
934                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
935                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
936                 p := s.Prog(v.Op.Asm())
937                 p.From.Type = obj.TYPE_MEM
938                 p.From.Reg = v.Args[0].Reg()
939                 p.From.Index = v.Args[1].Reg()
940                 p.To.Type = obj.TYPE_REG
941                 p.To.Reg = v.Reg()
942
943         case ssa.OpPPC64DCBT:
944                 p := s.Prog(v.Op.Asm())
945                 p.From.Type = obj.TYPE_MEM
946                 p.From.Reg = v.Args[0].Reg()
947                 p.To.Type = obj.TYPE_CONST
948                 p.To.Offset = v.AuxInt
949
950         case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
951                 p := s.Prog(v.Op.Asm())
952                 p.From.Type = obj.TYPE_REG
953                 p.From.Reg = ppc64.REGZERO
954                 p.To.Type = obj.TYPE_MEM
955                 p.To.Reg = v.Args[0].Reg()
956                 ssagen.AddAux(&p.To, v)
957
958         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
959
960                 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
961                 // to offset values that are a multple of 4. If the offset field is not a
962                 // multiple of 4, then the full address of the store target is computed (base +
963                 // offset) and used as the new base register and the offset in the instruction
964                 // is set to 0.
965
966                 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
967                 // and prevents checking of the offset value and alignment in the rules.
968
969                 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
970                 ssagen.AddAux(&toAddr, v)
971
972                 if toAddr.Offset%4 != 0 {
973                         p := s.Prog(ppc64.AMOVD)
974                         p.From.Type = obj.TYPE_ADDR
975                         p.From.Reg = v.Args[0].Reg()
976                         ssagen.AddAux(&p.From, v)
977                         p.To.Type = obj.TYPE_REG
978                         p.To.Reg = ppc64.REGTMP
979                         toAddr.Reg = ppc64.REGTMP
980                         // Clear the offset field and other
981                         // information that might be used
982                         // by the assembler to add to the
983                         // final offset value.
984                         toAddr.Offset = 0
985                         toAddr.Name = obj.NAME_NONE
986                         toAddr.Sym = nil
987                 }
988                 p := s.Prog(v.Op.Asm())
989                 p.To = toAddr
990                 p.From.Type = obj.TYPE_REG
991                 if v.Op == ssa.OpPPC64MOVDstorezero {
992                         p.From.Reg = ppc64.REGZERO
993                 } else {
994                         p.From.Reg = v.Args[1].Reg()
995                 }
996
997         case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
998                 p := s.Prog(v.Op.Asm())
999                 p.From.Type = obj.TYPE_REG
1000                 p.From.Reg = v.Args[1].Reg()
1001                 p.To.Type = obj.TYPE_MEM
1002                 p.To.Reg = v.Args[0].Reg()
1003                 ssagen.AddAux(&p.To, v)
1004
1005         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
1006                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
1007                 ssa.OpPPC64MOVHBRstoreidx:
1008                 p := s.Prog(v.Op.Asm())
1009                 p.From.Type = obj.TYPE_REG
1010                 p.From.Reg = v.Args[2].Reg()
1011                 p.To.Index = v.Args[1].Reg()
1012                 p.To.Type = obj.TYPE_MEM
1013                 p.To.Reg = v.Args[0].Reg()
1014
1015         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
1016                 // ISEL, ISELB
1017                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
1018                 // ISEL only accepts 0, 1, 2 condition values but the others can be
1019                 // achieved by swapping operand order.
1020                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
1021                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
1022                 // ISELB is used when a boolean result is needed, returning 0 or 1
1023                 p := s.Prog(ppc64.AISEL)
1024                 p.To.Type = obj.TYPE_REG
1025                 p.To.Reg = v.Reg()
1026                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
1027                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
1028                 if v.Op == ssa.OpPPC64ISEL {
1029                         r.Reg = v.Args[1].Reg()
1030                 }
1031                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
1032                 if v.AuxInt > 3 {
1033                         p.Reg = r.Reg
1034                         p.SetFrom3Reg(v.Args[0].Reg())
1035                 } else {
1036                         p.Reg = v.Args[0].Reg()
1037                         p.SetFrom3(r)
1038                 }
1039                 p.From.Type = obj.TYPE_CONST
1040                 p.From.Offset = v.AuxInt & 3
1041
1042         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
1043                 // The LoweredQuad code generation
1044                 // generates STXV instructions on
1045                 // power9. The Short variation is used
1046                 // if no loop is generated.
1047
1048                 // sizes >= 64 generate a loop as follows:
1049
1050                 // Set up loop counter in CTR, used by BC
1051                 // XXLXOR clears VS32
1052                 //       XXLXOR VS32,VS32,VS32
1053                 //       MOVD len/64,REG_TMP
1054                 //       MOVD REG_TMP,CTR
1055                 //       loop:
1056                 //       STXV VS32,0(R20)
1057                 //       STXV VS32,16(R20)
1058                 //       STXV VS32,32(R20)
1059                 //       STXV VS32,48(R20)
1060                 //       ADD  $64,R20
1061                 //       BC   16, 0, loop
1062
1063                 // Bytes per iteration
1064                 ctr := v.AuxInt / 64
1065
1066                 // Remainder bytes
1067                 rem := v.AuxInt % 64
1068
1069                 // Only generate a loop if there is more
1070                 // than 1 iteration.
1071                 if ctr > 1 {
1072                         // Set up VS32 (V0) to hold 0s
1073                         p := s.Prog(ppc64.AXXLXOR)
1074                         p.From.Type = obj.TYPE_REG
1075                         p.From.Reg = ppc64.REG_VS32
1076                         p.To.Type = obj.TYPE_REG
1077                         p.To.Reg = ppc64.REG_VS32
1078                         p.Reg = ppc64.REG_VS32
1079
1080                         // Set up CTR loop counter
1081                         p = s.Prog(ppc64.AMOVD)
1082                         p.From.Type = obj.TYPE_CONST
1083                         p.From.Offset = ctr
1084                         p.To.Type = obj.TYPE_REG
1085                         p.To.Reg = ppc64.REGTMP
1086
1087                         p = s.Prog(ppc64.AMOVD)
1088                         p.From.Type = obj.TYPE_REG
1089                         p.From.Reg = ppc64.REGTMP
1090                         p.To.Type = obj.TYPE_REG
1091                         p.To.Reg = ppc64.REG_CTR
1092
1093                         // Don't generate padding for
1094                         // loops with few iterations.
1095                         if ctr > 3 {
1096                                 p = s.Prog(obj.APCALIGN)
1097                                 p.From.Type = obj.TYPE_CONST
1098                                 p.From.Offset = 16
1099                         }
1100
1101                         // generate 4 STXVs to zero 64 bytes
1102                         var top *obj.Prog
1103
1104                         p = s.Prog(ppc64.ASTXV)
1105                         p.From.Type = obj.TYPE_REG
1106                         p.From.Reg = ppc64.REG_VS32
1107                         p.To.Type = obj.TYPE_MEM
1108                         p.To.Reg = v.Args[0].Reg()
1109
1110                         //  Save the top of loop
1111                         if top == nil {
1112                                 top = p
1113                         }
1114                         p = s.Prog(ppc64.ASTXV)
1115                         p.From.Type = obj.TYPE_REG
1116                         p.From.Reg = ppc64.REG_VS32
1117                         p.To.Type = obj.TYPE_MEM
1118                         p.To.Reg = v.Args[0].Reg()
1119                         p.To.Offset = 16
1120
1121                         p = s.Prog(ppc64.ASTXV)
1122                         p.From.Type = obj.TYPE_REG
1123                         p.From.Reg = ppc64.REG_VS32
1124                         p.To.Type = obj.TYPE_MEM
1125                         p.To.Reg = v.Args[0].Reg()
1126                         p.To.Offset = 32
1127
1128                         p = s.Prog(ppc64.ASTXV)
1129                         p.From.Type = obj.TYPE_REG
1130                         p.From.Reg = ppc64.REG_VS32
1131                         p.To.Type = obj.TYPE_MEM
1132                         p.To.Reg = v.Args[0].Reg()
1133                         p.To.Offset = 48
1134
1135                         // Increment address for the
1136                         // 64 bytes just zeroed.
1137                         p = s.Prog(ppc64.AADD)
1138                         p.Reg = v.Args[0].Reg()
1139                         p.From.Type = obj.TYPE_CONST
1140                         p.From.Offset = 64
1141                         p.To.Type = obj.TYPE_REG
1142                         p.To.Reg = v.Args[0].Reg()
1143
1144                         // Branch back to top of loop
1145                         // based on CTR
1146                         // BC with BO_BCTR generates bdnz
1147                         p = s.Prog(ppc64.ABC)
1148                         p.From.Type = obj.TYPE_CONST
1149                         p.From.Offset = ppc64.BO_BCTR
1150                         p.Reg = ppc64.REG_CR0LT
1151                         p.To.Type = obj.TYPE_BRANCH
1152                         p.To.SetTarget(top)
1153                 }
1154                 // When ctr == 1 the loop was not generated but
1155                 // there are at least 64 bytes to clear, so add
1156                 // that to the remainder to generate the code
1157                 // to clear those doublewords
1158                 if ctr == 1 {
1159                         rem += 64
1160                 }
1161
1162                 // Clear the remainder starting at offset zero
1163                 offset := int64(0)
1164
1165                 if rem >= 16 && ctr <= 1 {
1166                         // If the XXLXOR hasn't already been
1167                         // generated, do it here to initialize
1168                         // VS32 (V0) to 0.
1169                         p := s.Prog(ppc64.AXXLXOR)
1170                         p.From.Type = obj.TYPE_REG
1171                         p.From.Reg = ppc64.REG_VS32
1172                         p.To.Type = obj.TYPE_REG
1173                         p.To.Reg = ppc64.REG_VS32
1174                         p.Reg = ppc64.REG_VS32
1175                 }
1176                 // Generate STXV for 32 or 64
1177                 // bytes.
1178                 for rem >= 32 {
1179                         p := s.Prog(ppc64.ASTXV)
1180                         p.From.Type = obj.TYPE_REG
1181                         p.From.Reg = ppc64.REG_VS32
1182                         p.To.Type = obj.TYPE_MEM
1183                         p.To.Reg = v.Args[0].Reg()
1184                         p.To.Offset = offset
1185
1186                         p = s.Prog(ppc64.ASTXV)
1187                         p.From.Type = obj.TYPE_REG
1188                         p.From.Reg = ppc64.REG_VS32
1189                         p.To.Type = obj.TYPE_MEM
1190                         p.To.Reg = v.Args[0].Reg()
1191                         p.To.Offset = offset + 16
1192                         offset += 32
1193                         rem -= 32
1194                 }
1195                 // Generate 16 bytes
1196                 if rem >= 16 {
1197                         p := s.Prog(ppc64.ASTXV)
1198                         p.From.Type = obj.TYPE_REG
1199                         p.From.Reg = ppc64.REG_VS32
1200                         p.To.Type = obj.TYPE_MEM
1201                         p.To.Reg = v.Args[0].Reg()
1202                         p.To.Offset = offset
1203                         offset += 16
1204                         rem -= 16
1205                 }
1206
1207                 // first clear as many doublewords as possible
1208                 // then clear remaining sizes as available
1209                 for rem > 0 {
1210                         op, size := ppc64.AMOVB, int64(1)
1211                         switch {
1212                         case rem >= 8:
1213                                 op, size = ppc64.AMOVD, 8
1214                         case rem >= 4:
1215                                 op, size = ppc64.AMOVW, 4
1216                         case rem >= 2:
1217                                 op, size = ppc64.AMOVH, 2
1218                         }
1219                         p := s.Prog(op)
1220                         p.From.Type = obj.TYPE_REG
1221                         p.From.Reg = ppc64.REG_R0
1222                         p.To.Type = obj.TYPE_MEM
1223                         p.To.Reg = v.Args[0].Reg()
1224                         p.To.Offset = offset
1225                         rem -= size
1226                         offset += size
1227                 }
1228
1229         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1230
1231                 // Unaligned data doesn't hurt performance
1232                 // for these instructions on power8.
1233
1234                 // For sizes >= 64 generate a loop as follows:
1235
1236                 // Set up loop counter in CTR, used by BC
1237                 //       XXLXOR VS32,VS32,VS32
1238                 //       MOVD len/32,REG_TMP
1239                 //       MOVD REG_TMP,CTR
1240                 //       MOVD $16,REG_TMP
1241                 //       loop:
1242                 //       STXVD2X VS32,(R0)(R20)
1243                 //       STXVD2X VS32,(R31)(R20)
1244                 //       ADD  $32,R20
1245                 //       BC   16, 0, loop
1246                 //
1247                 // any remainder is done as described below
1248
1249                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1250                 // then handle the remainder
1251                 //      MOVD R0,(R20)
1252                 //      MOVD R0,8(R20)
1253                 // .... etc.
1254                 //
1255                 // the remainder bytes are cleared using one or more
1256                 // of the following instructions with the appropriate
1257                 // offsets depending which instructions are needed
1258                 //
1259                 //      MOVW R0,n1(R20) 4 bytes
1260                 //      MOVH R0,n2(R20) 2 bytes
1261                 //      MOVB R0,n3(R20) 1 byte
1262                 //
1263                 // 7 bytes: MOVW, MOVH, MOVB
1264                 // 6 bytes: MOVW, MOVH
1265                 // 5 bytes: MOVW, MOVB
1266                 // 3 bytes: MOVH, MOVB
1267
1268                 // each loop iteration does 32 bytes
1269                 ctr := v.AuxInt / 32
1270
1271                 // remainder bytes
1272                 rem := v.AuxInt % 32
1273
1274                 // only generate a loop if there is more
1275                 // than 1 iteration.
1276                 if ctr > 1 {
1277                         // Set up VS32 (V0) to hold 0s
1278                         p := s.Prog(ppc64.AXXLXOR)
1279                         p.From.Type = obj.TYPE_REG
1280                         p.From.Reg = ppc64.REG_VS32
1281                         p.To.Type = obj.TYPE_REG
1282                         p.To.Reg = ppc64.REG_VS32
1283                         p.Reg = ppc64.REG_VS32
1284
1285                         // Set up CTR loop counter
1286                         p = s.Prog(ppc64.AMOVD)
1287                         p.From.Type = obj.TYPE_CONST
1288                         p.From.Offset = ctr
1289                         p.To.Type = obj.TYPE_REG
1290                         p.To.Reg = ppc64.REGTMP
1291
1292                         p = s.Prog(ppc64.AMOVD)
1293                         p.From.Type = obj.TYPE_REG
1294                         p.From.Reg = ppc64.REGTMP
1295                         p.To.Type = obj.TYPE_REG
1296                         p.To.Reg = ppc64.REG_CTR
1297
1298                         // Set up R31 to hold index value 16
1299                         p = s.Prog(ppc64.AMOVD)
1300                         p.From.Type = obj.TYPE_CONST
1301                         p.From.Offset = 16
1302                         p.To.Type = obj.TYPE_REG
1303                         p.To.Reg = ppc64.REGTMP
1304
1305                         // Don't add padding for alignment
1306                         // with few loop iterations.
1307                         if ctr > 3 {
1308                                 p = s.Prog(obj.APCALIGN)
1309                                 p.From.Type = obj.TYPE_CONST
1310                                 p.From.Offset = 16
1311                         }
1312
1313                         // generate 2 STXVD2Xs to store 16 bytes
1314                         // when this is a loop then the top must be saved
1315                         var top *obj.Prog
1316                         // This is the top of loop
1317
1318                         p = s.Prog(ppc64.ASTXVD2X)
1319                         p.From.Type = obj.TYPE_REG
1320                         p.From.Reg = ppc64.REG_VS32
1321                         p.To.Type = obj.TYPE_MEM
1322                         p.To.Reg = v.Args[0].Reg()
1323                         p.To.Index = ppc64.REGZERO
1324                         // Save the top of loop
1325                         if top == nil {
1326                                 top = p
1327                         }
1328                         p = s.Prog(ppc64.ASTXVD2X)
1329                         p.From.Type = obj.TYPE_REG
1330                         p.From.Reg = ppc64.REG_VS32
1331                         p.To.Type = obj.TYPE_MEM
1332                         p.To.Reg = v.Args[0].Reg()
1333                         p.To.Index = ppc64.REGTMP
1334
1335                         // Increment address for the
1336                         // 4 doublewords just zeroed.
1337                         p = s.Prog(ppc64.AADD)
1338                         p.Reg = v.Args[0].Reg()
1339                         p.From.Type = obj.TYPE_CONST
1340                         p.From.Offset = 32
1341                         p.To.Type = obj.TYPE_REG
1342                         p.To.Reg = v.Args[0].Reg()
1343
1344                         // Branch back to top of loop
1345                         // based on CTR
1346                         // BC with BO_BCTR generates bdnz
1347                         p = s.Prog(ppc64.ABC)
1348                         p.From.Type = obj.TYPE_CONST
1349                         p.From.Offset = ppc64.BO_BCTR
1350                         p.Reg = ppc64.REG_CR0LT
1351                         p.To.Type = obj.TYPE_BRANCH
1352                         p.To.SetTarget(top)
1353                 }
1354
1355                 // when ctr == 1 the loop was not generated but
1356                 // there are at least 32 bytes to clear, so add
1357                 // that to the remainder to generate the code
1358                 // to clear those doublewords
1359                 if ctr == 1 {
1360                         rem += 32
1361                 }
1362
1363                 // clear the remainder starting at offset zero
1364                 offset := int64(0)
1365
1366                 // first clear as many doublewords as possible
1367                 // then clear remaining sizes as available
1368                 for rem > 0 {
1369                         op, size := ppc64.AMOVB, int64(1)
1370                         switch {
1371                         case rem >= 8:
1372                                 op, size = ppc64.AMOVD, 8
1373                         case rem >= 4:
1374                                 op, size = ppc64.AMOVW, 4
1375                         case rem >= 2:
1376                                 op, size = ppc64.AMOVH, 2
1377                         }
1378                         p := s.Prog(op)
1379                         p.From.Type = obj.TYPE_REG
1380                         p.From.Reg = ppc64.REG_R0
1381                         p.To.Type = obj.TYPE_MEM
1382                         p.To.Reg = v.Args[0].Reg()
1383                         p.To.Offset = offset
1384                         rem -= size
1385                         offset += size
1386                 }
1387
1388         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1389
1390                 bytesPerLoop := int64(32)
1391                 // This will be used when moving more
1392                 // than 8 bytes.  Moves start with
1393                 // as many 8 byte moves as possible, then
1394                 // 4, 2, or 1 byte(s) as remaining.  This will
1395                 // work and be efficient for power8 or later.
1396                 // If there are 64 or more bytes, then a
1397                 // loop is generated to move 32 bytes and
1398                 // update the src and dst addresses on each
1399                 // iteration. When < 64 bytes, the appropriate
1400                 // number of moves are generated based on the
1401                 // size.
1402                 // When moving >= 64 bytes a loop is used
1403                 //      MOVD len/32,REG_TMP
1404                 //      MOVD REG_TMP,CTR
1405                 //      MOVD $16,REG_TMP
1406                 // top:
1407                 //      LXVD2X (R0)(R21),VS32
1408                 //      LXVD2X (R31)(R21),VS33
1409                 //      ADD $32,R21
1410                 //      STXVD2X VS32,(R0)(R20)
1411                 //      STXVD2X VS33,(R31)(R20)
1412                 //      ADD $32,R20
1413                 //      BC 16,0,top
1414                 // Bytes not moved by this loop are moved
1415                 // with a combination of the following instructions,
1416                 // starting with the largest sizes and generating as
1417                 // many as needed, using the appropriate offset value.
1418                 //      MOVD  n(R21),R31
1419                 //      MOVD  R31,n(R20)
1420                 //      MOVW  n1(R21),R31
1421                 //      MOVW  R31,n1(R20)
1422                 //      MOVH  n2(R21),R31
1423                 //      MOVH  R31,n2(R20)
1424                 //      MOVB  n3(R21),R31
1425                 //      MOVB  R31,n3(R20)
1426
1427                 // Each loop iteration moves 32 bytes
1428                 ctr := v.AuxInt / bytesPerLoop
1429
1430                 // Remainder after the loop
1431                 rem := v.AuxInt % bytesPerLoop
1432
1433                 dstReg := v.Args[0].Reg()
1434                 srcReg := v.Args[1].Reg()
1435
1436                 // The set of registers used here, must match the clobbered reg list
1437                 // in PPC64Ops.go.
1438                 offset := int64(0)
1439
1440                 // top of the loop
1441                 var top *obj.Prog
1442                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1443                 if ctr > 1 {
1444                         // Set up the CTR
1445                         p := s.Prog(ppc64.AMOVD)
1446                         p.From.Type = obj.TYPE_CONST
1447                         p.From.Offset = ctr
1448                         p.To.Type = obj.TYPE_REG
1449                         p.To.Reg = ppc64.REGTMP
1450
1451                         p = s.Prog(ppc64.AMOVD)
1452                         p.From.Type = obj.TYPE_REG
1453                         p.From.Reg = ppc64.REGTMP
1454                         p.To.Type = obj.TYPE_REG
1455                         p.To.Reg = ppc64.REG_CTR
1456
1457                         // Use REGTMP as index reg
1458                         p = s.Prog(ppc64.AMOVD)
1459                         p.From.Type = obj.TYPE_CONST
1460                         p.From.Offset = 16
1461                         p.To.Type = obj.TYPE_REG
1462                         p.To.Reg = ppc64.REGTMP
1463
1464                         // Don't adding padding for
1465                         // alignment with small iteration
1466                         // counts.
1467                         if ctr > 3 {
1468                                 p = s.Prog(obj.APCALIGN)
1469                                 p.From.Type = obj.TYPE_CONST
1470                                 p.From.Offset = 16
1471                         }
1472
1473                         // Generate 16 byte loads and stores.
1474                         // Use temp register for index (16)
1475                         // on the second one.
1476
1477                         p = s.Prog(ppc64.ALXVD2X)
1478                         p.From.Type = obj.TYPE_MEM
1479                         p.From.Reg = srcReg
1480                         p.From.Index = ppc64.REGZERO
1481                         p.To.Type = obj.TYPE_REG
1482                         p.To.Reg = ppc64.REG_VS32
1483                         if top == nil {
1484                                 top = p
1485                         }
1486                         p = s.Prog(ppc64.ALXVD2X)
1487                         p.From.Type = obj.TYPE_MEM
1488                         p.From.Reg = srcReg
1489                         p.From.Index = ppc64.REGTMP
1490                         p.To.Type = obj.TYPE_REG
1491                         p.To.Reg = ppc64.REG_VS33
1492
1493                         // increment the src reg for next iteration
1494                         p = s.Prog(ppc64.AADD)
1495                         p.Reg = srcReg
1496                         p.From.Type = obj.TYPE_CONST
1497                         p.From.Offset = bytesPerLoop
1498                         p.To.Type = obj.TYPE_REG
1499                         p.To.Reg = srcReg
1500
1501                         // generate 16 byte stores
1502                         p = s.Prog(ppc64.ASTXVD2X)
1503                         p.From.Type = obj.TYPE_REG
1504                         p.From.Reg = ppc64.REG_VS32
1505                         p.To.Type = obj.TYPE_MEM
1506                         p.To.Reg = dstReg
1507                         p.To.Index = ppc64.REGZERO
1508
1509                         p = s.Prog(ppc64.ASTXVD2X)
1510                         p.From.Type = obj.TYPE_REG
1511                         p.From.Reg = ppc64.REG_VS33
1512                         p.To.Type = obj.TYPE_MEM
1513                         p.To.Reg = dstReg
1514                         p.To.Index = ppc64.REGTMP
1515
1516                         // increment the dst reg for next iteration
1517                         p = s.Prog(ppc64.AADD)
1518                         p.Reg = dstReg
1519                         p.From.Type = obj.TYPE_CONST
1520                         p.From.Offset = bytesPerLoop
1521                         p.To.Type = obj.TYPE_REG
1522                         p.To.Reg = dstReg
1523
1524                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1525                         // to loop top.
1526                         p = s.Prog(ppc64.ABC)
1527                         p.From.Type = obj.TYPE_CONST
1528                         p.From.Offset = ppc64.BO_BCTR
1529                         p.Reg = ppc64.REG_CR0LT
1530                         p.To.Type = obj.TYPE_BRANCH
1531                         p.To.SetTarget(top)
1532
1533                         // srcReg and dstReg were incremented in the loop, so
1534                         // later instructions start with offset 0.
1535                         offset = int64(0)
1536                 }
1537
1538                 // No loop was generated for one iteration, so
1539                 // add 32 bytes to the remainder to move those bytes.
1540                 if ctr == 1 {
1541                         rem += bytesPerLoop
1542                 }
1543
1544                 if rem >= 16 {
1545                         // Generate 16 byte loads and stores.
1546                         // Use temp register for index (value 16)
1547                         // on the second one.
1548                         p := s.Prog(ppc64.ALXVD2X)
1549                         p.From.Type = obj.TYPE_MEM
1550                         p.From.Reg = srcReg
1551                         p.From.Index = ppc64.REGZERO
1552                         p.To.Type = obj.TYPE_REG
1553                         p.To.Reg = ppc64.REG_VS32
1554
1555                         p = s.Prog(ppc64.ASTXVD2X)
1556                         p.From.Type = obj.TYPE_REG
1557                         p.From.Reg = ppc64.REG_VS32
1558                         p.To.Type = obj.TYPE_MEM
1559                         p.To.Reg = dstReg
1560                         p.To.Index = ppc64.REGZERO
1561
1562                         offset = 16
1563                         rem -= 16
1564
1565                         if rem >= 16 {
1566                                 // Use REGTMP as index reg
1567                                 p := s.Prog(ppc64.AMOVD)
1568                                 p.From.Type = obj.TYPE_CONST
1569                                 p.From.Offset = 16
1570                                 p.To.Type = obj.TYPE_REG
1571                                 p.To.Reg = ppc64.REGTMP
1572
1573                                 p = s.Prog(ppc64.ALXVD2X)
1574                                 p.From.Type = obj.TYPE_MEM
1575                                 p.From.Reg = srcReg
1576                                 p.From.Index = ppc64.REGTMP
1577                                 p.To.Type = obj.TYPE_REG
1578                                 p.To.Reg = ppc64.REG_VS32
1579
1580                                 p = s.Prog(ppc64.ASTXVD2X)
1581                                 p.From.Type = obj.TYPE_REG
1582                                 p.From.Reg = ppc64.REG_VS32
1583                                 p.To.Type = obj.TYPE_MEM
1584                                 p.To.Reg = dstReg
1585                                 p.To.Index = ppc64.REGTMP
1586
1587                                 offset = 32
1588                                 rem -= 16
1589                         }
1590                 }
1591
1592                 // Generate all the remaining load and store pairs, starting with
1593                 // as many 8 byte moves as possible, then 4, 2, 1.
1594                 for rem > 0 {
1595                         op, size := ppc64.AMOVB, int64(1)
1596                         switch {
1597                         case rem >= 8:
1598                                 op, size = ppc64.AMOVD, 8
1599                         case rem >= 4:
1600                                 op, size = ppc64.AMOVWZ, 4
1601                         case rem >= 2:
1602                                 op, size = ppc64.AMOVH, 2
1603                         }
1604                         // Load
1605                         p := s.Prog(op)
1606                         p.To.Type = obj.TYPE_REG
1607                         p.To.Reg = ppc64.REGTMP
1608                         p.From.Type = obj.TYPE_MEM
1609                         p.From.Reg = srcReg
1610                         p.From.Offset = offset
1611
1612                         // Store
1613                         p = s.Prog(op)
1614                         p.From.Type = obj.TYPE_REG
1615                         p.From.Reg = ppc64.REGTMP
1616                         p.To.Type = obj.TYPE_MEM
1617                         p.To.Reg = dstReg
1618                         p.To.Offset = offset
1619                         rem -= size
1620                         offset += size
1621                 }
1622
1623         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1624                 bytesPerLoop := int64(64)
1625                 // This is used when moving more
1626                 // than 8 bytes on power9.  Moves start with
1627                 // as many 8 byte moves as possible, then
1628                 // 4, 2, or 1 byte(s) as remaining.  This will
1629                 // work and be efficient for power8 or later.
1630                 // If there are 64 or more bytes, then a
1631                 // loop is generated to move 32 bytes and
1632                 // update the src and dst addresses on each
1633                 // iteration. When < 64 bytes, the appropriate
1634                 // number of moves are generated based on the
1635                 // size.
1636                 // When moving >= 64 bytes a loop is used
1637                 //      MOVD len/32,REG_TMP
1638                 //      MOVD REG_TMP,CTR
1639                 // top:
1640                 //      LXV 0(R21),VS32
1641                 //      LXV 16(R21),VS33
1642                 //      ADD $32,R21
1643                 //      STXV VS32,0(R20)
1644                 //      STXV VS33,16(R20)
1645                 //      ADD $32,R20
1646                 //      BC 16,0,top
1647                 // Bytes not moved by this loop are moved
1648                 // with a combination of the following instructions,
1649                 // starting with the largest sizes and generating as
1650                 // many as needed, using the appropriate offset value.
1651                 //      MOVD  n(R21),R31
1652                 //      MOVD  R31,n(R20)
1653                 //      MOVW  n1(R21),R31
1654                 //      MOVW  R31,n1(R20)
1655                 //      MOVH  n2(R21),R31
1656                 //      MOVH  R31,n2(R20)
1657                 //      MOVB  n3(R21),R31
1658                 //      MOVB  R31,n3(R20)
1659
1660                 // Each loop iteration moves 32 bytes
1661                 ctr := v.AuxInt / bytesPerLoop
1662
1663                 // Remainder after the loop
1664                 rem := v.AuxInt % bytesPerLoop
1665
1666                 dstReg := v.Args[0].Reg()
1667                 srcReg := v.Args[1].Reg()
1668
1669                 offset := int64(0)
1670
1671                 // top of the loop
1672                 var top *obj.Prog
1673
1674                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1675                 if ctr > 1 {
1676                         // Set up the CTR
1677                         p := s.Prog(ppc64.AMOVD)
1678                         p.From.Type = obj.TYPE_CONST
1679                         p.From.Offset = ctr
1680                         p.To.Type = obj.TYPE_REG
1681                         p.To.Reg = ppc64.REGTMP
1682
1683                         p = s.Prog(ppc64.AMOVD)
1684                         p.From.Type = obj.TYPE_REG
1685                         p.From.Reg = ppc64.REGTMP
1686                         p.To.Type = obj.TYPE_REG
1687                         p.To.Reg = ppc64.REG_CTR
1688
1689                         p = s.Prog(obj.APCALIGN)
1690                         p.From.Type = obj.TYPE_CONST
1691                         p.From.Offset = 16
1692
1693                         // Generate 16 byte loads and stores.
1694                         p = s.Prog(ppc64.ALXV)
1695                         p.From.Type = obj.TYPE_MEM
1696                         p.From.Reg = srcReg
1697                         p.From.Offset = offset
1698                         p.To.Type = obj.TYPE_REG
1699                         p.To.Reg = ppc64.REG_VS32
1700                         if top == nil {
1701                                 top = p
1702                         }
1703                         p = s.Prog(ppc64.ALXV)
1704                         p.From.Type = obj.TYPE_MEM
1705                         p.From.Reg = srcReg
1706                         p.From.Offset = offset + 16
1707                         p.To.Type = obj.TYPE_REG
1708                         p.To.Reg = ppc64.REG_VS33
1709
1710                         // generate 16 byte stores
1711                         p = s.Prog(ppc64.ASTXV)
1712                         p.From.Type = obj.TYPE_REG
1713                         p.From.Reg = ppc64.REG_VS32
1714                         p.To.Type = obj.TYPE_MEM
1715                         p.To.Reg = dstReg
1716                         p.To.Offset = offset
1717
1718                         p = s.Prog(ppc64.ASTXV)
1719                         p.From.Type = obj.TYPE_REG
1720                         p.From.Reg = ppc64.REG_VS33
1721                         p.To.Type = obj.TYPE_MEM
1722                         p.To.Reg = dstReg
1723                         p.To.Offset = offset + 16
1724
1725                         // Generate 16 byte loads and stores.
1726                         p = s.Prog(ppc64.ALXV)
1727                         p.From.Type = obj.TYPE_MEM
1728                         p.From.Reg = srcReg
1729                         p.From.Offset = offset + 32
1730                         p.To.Type = obj.TYPE_REG
1731                         p.To.Reg = ppc64.REG_VS32
1732
1733                         p = s.Prog(ppc64.ALXV)
1734                         p.From.Type = obj.TYPE_MEM
1735                         p.From.Reg = srcReg
1736                         p.From.Offset = offset + 48
1737                         p.To.Type = obj.TYPE_REG
1738                         p.To.Reg = ppc64.REG_VS33
1739
1740                         // generate 16 byte stores
1741                         p = s.Prog(ppc64.ASTXV)
1742                         p.From.Type = obj.TYPE_REG
1743                         p.From.Reg = ppc64.REG_VS32
1744                         p.To.Type = obj.TYPE_MEM
1745                         p.To.Reg = dstReg
1746                         p.To.Offset = offset + 32
1747
1748                         p = s.Prog(ppc64.ASTXV)
1749                         p.From.Type = obj.TYPE_REG
1750                         p.From.Reg = ppc64.REG_VS33
1751                         p.To.Type = obj.TYPE_MEM
1752                         p.To.Reg = dstReg
1753                         p.To.Offset = offset + 48
1754
1755                         // increment the src reg for next iteration
1756                         p = s.Prog(ppc64.AADD)
1757                         p.Reg = srcReg
1758                         p.From.Type = obj.TYPE_CONST
1759                         p.From.Offset = bytesPerLoop
1760                         p.To.Type = obj.TYPE_REG
1761                         p.To.Reg = srcReg
1762
1763                         // increment the dst reg for next iteration
1764                         p = s.Prog(ppc64.AADD)
1765                         p.Reg = dstReg
1766                         p.From.Type = obj.TYPE_CONST
1767                         p.From.Offset = bytesPerLoop
1768                         p.To.Type = obj.TYPE_REG
1769                         p.To.Reg = dstReg
1770
1771                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1772                         // to loop top.
1773                         p = s.Prog(ppc64.ABC)
1774                         p.From.Type = obj.TYPE_CONST
1775                         p.From.Offset = ppc64.BO_BCTR
1776                         p.Reg = ppc64.REG_CR0LT
1777                         p.To.Type = obj.TYPE_BRANCH
1778                         p.To.SetTarget(top)
1779
1780                         // srcReg and dstReg were incremented in the loop, so
1781                         // later instructions start with offset 0.
1782                         offset = int64(0)
1783                 }
1784
1785                 // No loop was generated for one iteration, so
1786                 // add 32 bytes to the remainder to move those bytes.
1787                 if ctr == 1 {
1788                         rem += bytesPerLoop
1789                 }
1790                 if rem >= 32 {
1791                         p := s.Prog(ppc64.ALXV)
1792                         p.From.Type = obj.TYPE_MEM
1793                         p.From.Reg = srcReg
1794                         p.To.Type = obj.TYPE_REG
1795                         p.To.Reg = ppc64.REG_VS32
1796
1797                         p = s.Prog(ppc64.ALXV)
1798                         p.From.Type = obj.TYPE_MEM
1799                         p.From.Reg = srcReg
1800                         p.From.Offset = 16
1801                         p.To.Type = obj.TYPE_REG
1802                         p.To.Reg = ppc64.REG_VS33
1803
1804                         p = s.Prog(ppc64.ASTXV)
1805                         p.From.Type = obj.TYPE_REG
1806                         p.From.Reg = ppc64.REG_VS32
1807                         p.To.Type = obj.TYPE_MEM
1808                         p.To.Reg = dstReg
1809
1810                         p = s.Prog(ppc64.ASTXV)
1811                         p.From.Type = obj.TYPE_REG
1812                         p.From.Reg = ppc64.REG_VS33
1813                         p.To.Type = obj.TYPE_MEM
1814                         p.To.Reg = dstReg
1815                         p.To.Offset = 16
1816
1817                         offset = 32
1818                         rem -= 32
1819                 }
1820
1821                 if rem >= 16 {
1822                         // Generate 16 byte loads and stores.
1823                         p := s.Prog(ppc64.ALXV)
1824                         p.From.Type = obj.TYPE_MEM
1825                         p.From.Reg = srcReg
1826                         p.From.Offset = offset
1827                         p.To.Type = obj.TYPE_REG
1828                         p.To.Reg = ppc64.REG_VS32
1829
1830                         p = s.Prog(ppc64.ASTXV)
1831                         p.From.Type = obj.TYPE_REG
1832                         p.From.Reg = ppc64.REG_VS32
1833                         p.To.Type = obj.TYPE_MEM
1834                         p.To.Reg = dstReg
1835                         p.To.Offset = offset
1836
1837                         offset += 16
1838                         rem -= 16
1839
1840                         if rem >= 16 {
1841                                 p := s.Prog(ppc64.ALXV)
1842                                 p.From.Type = obj.TYPE_MEM
1843                                 p.From.Reg = srcReg
1844                                 p.From.Offset = offset
1845                                 p.To.Type = obj.TYPE_REG
1846                                 p.To.Reg = ppc64.REG_VS32
1847
1848                                 p = s.Prog(ppc64.ASTXV)
1849                                 p.From.Type = obj.TYPE_REG
1850                                 p.From.Reg = ppc64.REG_VS32
1851                                 p.To.Type = obj.TYPE_MEM
1852                                 p.To.Reg = dstReg
1853                                 p.To.Offset = offset
1854
1855                                 offset += 16
1856                                 rem -= 16
1857                         }
1858                 }
1859                 // Generate all the remaining load and store pairs, starting with
1860                 // as many 8 byte moves as possible, then 4, 2, 1.
1861                 for rem > 0 {
1862                         op, size := ppc64.AMOVB, int64(1)
1863                         switch {
1864                         case rem >= 8:
1865                                 op, size = ppc64.AMOVD, 8
1866                         case rem >= 4:
1867                                 op, size = ppc64.AMOVWZ, 4
1868                         case rem >= 2:
1869                                 op, size = ppc64.AMOVH, 2
1870                         }
1871                         // Load
1872                         p := s.Prog(op)
1873                         p.To.Type = obj.TYPE_REG
1874                         p.To.Reg = ppc64.REGTMP
1875                         p.From.Type = obj.TYPE_MEM
1876                         p.From.Reg = srcReg
1877                         p.From.Offset = offset
1878
1879                         // Store
1880                         p = s.Prog(op)
1881                         p.From.Type = obj.TYPE_REG
1882                         p.From.Reg = ppc64.REGTMP
1883                         p.To.Type = obj.TYPE_MEM
1884                         p.To.Reg = dstReg
1885                         p.To.Offset = offset
1886                         rem -= size
1887                         offset += size
1888                 }
1889
1890         case ssa.OpPPC64CALLstatic:
1891                 s.Call(v)
1892
1893         case ssa.OpPPC64CALLtail:
1894                 s.TailCall(v)
1895
1896         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1897                 p := s.Prog(ppc64.AMOVD)
1898                 p.From.Type = obj.TYPE_REG
1899                 p.From.Reg = v.Args[0].Reg()
1900                 p.To.Type = obj.TYPE_REG
1901                 p.To.Reg = ppc64.REG_LR
1902
1903                 if v.Args[0].Reg() != ppc64.REG_R12 {
1904                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1905                 }
1906
1907                 pp := s.Call(v)
1908
1909                 // Convert the call into a blrl with hint this is not a subroutine return.
1910                 // The full bclrl opcode must be specified when passing a hint.
1911                 pp.As = ppc64.ABCL
1912                 pp.From.Type = obj.TYPE_CONST
1913                 pp.From.Offset = ppc64.BO_ALWAYS
1914                 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1915                 pp.To.Reg = ppc64.REG_LR
1916                 pp.SetFrom3Const(1)
1917
1918                 if base.Ctxt.Flag_shared {
1919                         // When compiling Go into PIC, the function we just
1920                         // called via pointer might have been implemented in
1921                         // a separate module and so overwritten the TOC
1922                         // pointer in R2; reload it.
1923                         q := s.Prog(ppc64.AMOVD)
1924                         q.From.Type = obj.TYPE_MEM
1925                         q.From.Offset = 24
1926                         q.From.Reg = ppc64.REGSP
1927                         q.To.Type = obj.TYPE_REG
1928                         q.To.Reg = ppc64.REG_R2
1929                 }
1930
1931         case ssa.OpPPC64LoweredWB:
1932                 p := s.Prog(obj.ACALL)
1933                 p.To.Type = obj.TYPE_MEM
1934                 p.To.Name = obj.NAME_EXTERN
1935                 p.To.Sym = v.Aux.(*obj.LSym)
1936
1937         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1938                 p := s.Prog(obj.ACALL)
1939                 p.To.Type = obj.TYPE_MEM
1940                 p.To.Name = obj.NAME_EXTERN
1941                 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1942                 s.UseArgs(16) // space used in callee args area by assembly stubs
1943
1944         case ssa.OpPPC64LoweredNilCheck:
1945                 if buildcfg.GOOS == "aix" {
1946                         // CMP Rarg0, R0
1947                         // BNE 2(PC)
1948                         // STW R0, 0(R0)
1949                         // NOP (so the BNE has somewhere to land)
1950
1951                         // CMP Rarg0, R0
1952                         p := s.Prog(ppc64.ACMP)
1953                         p.From.Type = obj.TYPE_REG
1954                         p.From.Reg = v.Args[0].Reg()
1955                         p.To.Type = obj.TYPE_REG
1956                         p.To.Reg = ppc64.REG_R0
1957
1958                         // BNE 2(PC)
1959                         p2 := s.Prog(ppc64.ABNE)
1960                         p2.To.Type = obj.TYPE_BRANCH
1961
1962                         // STW R0, 0(R0)
1963                         // Write at 0 is forbidden and will trigger a SIGSEGV
1964                         p = s.Prog(ppc64.AMOVW)
1965                         p.From.Type = obj.TYPE_REG
1966                         p.From.Reg = ppc64.REG_R0
1967                         p.To.Type = obj.TYPE_MEM
1968                         p.To.Reg = ppc64.REG_R0
1969
1970                         // NOP (so the BNE has somewhere to land)
1971                         nop := s.Prog(obj.ANOP)
1972                         p2.To.SetTarget(nop)
1973
1974                 } else {
1975                         // Issue a load which will fault if arg is nil.
1976                         p := s.Prog(ppc64.AMOVBZ)
1977                         p.From.Type = obj.TYPE_MEM
1978                         p.From.Reg = v.Args[0].Reg()
1979                         ssagen.AddAux(&p.From, v)
1980                         p.To.Type = obj.TYPE_REG
1981                         p.To.Reg = ppc64.REGTMP
1982                 }
1983                 if logopt.Enabled() {
1984                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1985                 }
1986                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1987                         base.WarnfAt(v.Pos, "generated nil check")
1988                 }
1989
1990         // These should be resolved by rules and not make it here.
1991         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1992                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1993                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1994                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1995         case ssa.OpPPC64InvertFlags:
1996                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1997         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1998                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1999         case ssa.OpClobber, ssa.OpClobberReg:
2000                 // TODO: implement for clobberdead experiment. Nop is ok for now.
2001         default:
2002                 v.Fatalf("genValue not implemented: %s", v.LongString())
2003         }
2004 }
2005
2006 var blockJump = [...]struct {
2007         asm, invasm     obj.As
2008         asmeq, invasmun bool
2009 }{
2010         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
2011         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
2012
2013         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
2014         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
2015         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
2016         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
2017
2018         // TODO: need to work FP comparisons into block jumps
2019         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
2020         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
2021         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
2022         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
2023 }
2024
2025 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
2026         switch b.Kind {
2027         case ssa.BlockDefer:
2028                 // defer returns in R3:
2029                 // 0 if we should continue executing
2030                 // 1 if we should jump to deferreturn call
2031                 p := s.Prog(ppc64.ACMP)
2032                 p.From.Type = obj.TYPE_REG
2033                 p.From.Reg = ppc64.REG_R3
2034                 p.To.Type = obj.TYPE_REG
2035                 p.To.Reg = ppc64.REG_R0
2036
2037                 p = s.Prog(ppc64.ABNE)
2038                 p.To.Type = obj.TYPE_BRANCH
2039                 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
2040                 if b.Succs[0].Block() != next {
2041                         p := s.Prog(obj.AJMP)
2042                         p.To.Type = obj.TYPE_BRANCH
2043                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2044                 }
2045
2046         case ssa.BlockPlain:
2047                 if b.Succs[0].Block() != next {
2048                         p := s.Prog(obj.AJMP)
2049                         p.To.Type = obj.TYPE_BRANCH
2050                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2051                 }
2052         case ssa.BlockExit, ssa.BlockRetJmp:
2053         case ssa.BlockRet:
2054                 s.Prog(obj.ARET)
2055
2056         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2057                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2058                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2059                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2060                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2061                 jmp := blockJump[b.Kind]
2062                 switch next {
2063                 case b.Succs[0].Block():
2064                         s.Br(jmp.invasm, b.Succs[1].Block())
2065                         if jmp.invasmun {
2066                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2067                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
2068                         }
2069                 case b.Succs[1].Block():
2070                         s.Br(jmp.asm, b.Succs[0].Block())
2071                         if jmp.asmeq {
2072                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2073                         }
2074                 default:
2075                         if b.Likely != ssa.BranchUnlikely {
2076                                 s.Br(jmp.asm, b.Succs[0].Block())
2077                                 if jmp.asmeq {
2078                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
2079                                 }
2080                                 s.Br(obj.AJMP, b.Succs[1].Block())
2081                         } else {
2082                                 s.Br(jmp.invasm, b.Succs[1].Block())
2083                                 if jmp.invasmun {
2084                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2085                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
2086                                 }
2087                                 s.Br(obj.AJMP, b.Succs[0].Block())
2088                         }
2089                 }
2090         default:
2091                 b.Fatalf("branch not implemented: %s", b.LongString())
2092         }
2093 }
2094
2095 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2096         p := s.Prog(loadByType(t))
2097         p.From.Type = obj.TYPE_MEM
2098         p.From.Name = obj.NAME_AUTO
2099         p.From.Sym = n.Linksym()
2100         p.From.Offset = n.FrameOffset() + off
2101         p.To.Type = obj.TYPE_REG
2102         p.To.Reg = reg
2103         return p
2104 }
2105
2106 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2107         p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2108         p.To.Name = obj.NAME_PARAM
2109         p.To.Sym = n.Linksym()
2110         p.Pos = p.Pos.WithNotStmt()
2111         return p
2112 }