]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
cmd/compile: leverage cc ops in more cases on ppc64x
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/base"
9         "cmd/compile/internal/ir"
10         "cmd/compile/internal/logopt"
11         "cmd/compile/internal/objw"
12         "cmd/compile/internal/ssa"
13         "cmd/compile/internal/ssagen"
14         "cmd/compile/internal/types"
15         "cmd/internal/obj"
16         "cmd/internal/obj/ppc64"
17         "internal/buildcfg"
18         "math"
19         "strings"
20 )
21
22 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24         //      flive := b.FlagsLiveAtEnd
25         //      if b.Control != nil && b.Control.Type.IsFlags() {
26         //              flive = true
27         //      }
28         //      for i := len(b.Values) - 1; i >= 0; i-- {
29         //              v := b.Values[i]
30         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31         //                      // The "mark" is any non-nil Aux value.
32         //                      v.Aux = v
33         //              }
34         //              if v.Type.IsFlags() {
35         //                      flive = false
36         //              }
37         //              for _, a := range v.Args {
38         //                      if a.Type.IsFlags() {
39         //                              flive = true
40         //                      }
41         //              }
42         //      }
43 }
44
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
47         if t.IsFloat() {
48                 switch t.Size() {
49                 case 4:
50                         return ppc64.AFMOVS
51                 case 8:
52                         return ppc64.AFMOVD
53                 }
54         } else {
55                 switch t.Size() {
56                 case 1:
57                         if t.IsSigned() {
58                                 return ppc64.AMOVB
59                         } else {
60                                 return ppc64.AMOVBZ
61                         }
62                 case 2:
63                         if t.IsSigned() {
64                                 return ppc64.AMOVH
65                         } else {
66                                 return ppc64.AMOVHZ
67                         }
68                 case 4:
69                         if t.IsSigned() {
70                                 return ppc64.AMOVW
71                         } else {
72                                 return ppc64.AMOVWZ
73                         }
74                 case 8:
75                         return ppc64.AMOVD
76                 }
77         }
78         panic("bad load type")
79 }
80
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
83         if t.IsFloat() {
84                 switch t.Size() {
85                 case 4:
86                         return ppc64.AFMOVS
87                 case 8:
88                         return ppc64.AFMOVD
89                 }
90         } else {
91                 switch t.Size() {
92                 case 1:
93                         return ppc64.AMOVB
94                 case 2:
95                         return ppc64.AMOVH
96                 case 4:
97                         return ppc64.AMOVW
98                 case 8:
99                         return ppc64.AMOVD
100                 }
101         }
102         panic("bad store type")
103 }
104
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
106         switch v.Op {
107         case ssa.OpCopy:
108                 t := v.Type
109                 if t.IsMemory() {
110                         return
111                 }
112                 x := v.Args[0].Reg()
113                 y := v.Reg()
114                 if x != y {
115                         rt := obj.TYPE_REG
116                         op := ppc64.AMOVD
117
118                         if t.IsFloat() {
119                                 op = ppc64.AFMOVD
120                         }
121                         p := s.Prog(op)
122                         p.From.Type = rt
123                         p.From.Reg = x
124                         p.To.Type = rt
125                         p.To.Reg = y
126                 }
127
128         case ssa.OpPPC64LoweredAtomicAnd8,
129                 ssa.OpPPC64LoweredAtomicAnd32,
130                 ssa.OpPPC64LoweredAtomicOr8,
131                 ssa.OpPPC64LoweredAtomicOr32:
132                 // LWSYNC
133                 // LBAR/LWAR    (Rarg0), Rtmp
134                 // AND/OR       Rarg1, Rtmp
135                 // STBCCC/STWCCC Rtmp, (Rarg0)
136                 // BNE          -3(PC)
137                 ld := ppc64.ALBAR
138                 st := ppc64.ASTBCCC
139                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
140                         ld = ppc64.ALWAR
141                         st = ppc64.ASTWCCC
142                 }
143                 r0 := v.Args[0].Reg()
144                 r1 := v.Args[1].Reg()
145                 // LWSYNC - Assuming shared data not write-through-required nor
146                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
147                 plwsync := s.Prog(ppc64.ALWSYNC)
148                 plwsync.To.Type = obj.TYPE_NONE
149                 // LBAR or LWAR
150                 p := s.Prog(ld)
151                 p.From.Type = obj.TYPE_MEM
152                 p.From.Reg = r0
153                 p.To.Type = obj.TYPE_REG
154                 p.To.Reg = ppc64.REGTMP
155                 // AND/OR reg1,out
156                 p1 := s.Prog(v.Op.Asm())
157                 p1.From.Type = obj.TYPE_REG
158                 p1.From.Reg = r1
159                 p1.To.Type = obj.TYPE_REG
160                 p1.To.Reg = ppc64.REGTMP
161                 // STBCCC or STWCCC
162                 p2 := s.Prog(st)
163                 p2.From.Type = obj.TYPE_REG
164                 p2.From.Reg = ppc64.REGTMP
165                 p2.To.Type = obj.TYPE_MEM
166                 p2.To.Reg = r0
167                 p2.RegTo2 = ppc64.REGTMP
168                 // BNE retry
169                 p3 := s.Prog(ppc64.ABNE)
170                 p3.To.Type = obj.TYPE_BRANCH
171                 p3.To.SetTarget(p)
172
173         case ssa.OpPPC64LoweredAtomicAdd32,
174                 ssa.OpPPC64LoweredAtomicAdd64:
175                 // LWSYNC
176                 // LDAR/LWAR    (Rarg0), Rout
177                 // ADD          Rarg1, Rout
178                 // STDCCC/STWCCC Rout, (Rarg0)
179                 // BNE         -3(PC)
180                 // MOVW         Rout,Rout (if Add32)
181                 ld := ppc64.ALDAR
182                 st := ppc64.ASTDCCC
183                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
184                         ld = ppc64.ALWAR
185                         st = ppc64.ASTWCCC
186                 }
187                 r0 := v.Args[0].Reg()
188                 r1 := v.Args[1].Reg()
189                 out := v.Reg0()
190                 // LWSYNC - Assuming shared data not write-through-required nor
191                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
192                 plwsync := s.Prog(ppc64.ALWSYNC)
193                 plwsync.To.Type = obj.TYPE_NONE
194                 // LDAR or LWAR
195                 p := s.Prog(ld)
196                 p.From.Type = obj.TYPE_MEM
197                 p.From.Reg = r0
198                 p.To.Type = obj.TYPE_REG
199                 p.To.Reg = out
200                 // ADD reg1,out
201                 p1 := s.Prog(ppc64.AADD)
202                 p1.From.Type = obj.TYPE_REG
203                 p1.From.Reg = r1
204                 p1.To.Reg = out
205                 p1.To.Type = obj.TYPE_REG
206                 // STDCCC or STWCCC
207                 p3 := s.Prog(st)
208                 p3.From.Type = obj.TYPE_REG
209                 p3.From.Reg = out
210                 p3.To.Type = obj.TYPE_MEM
211                 p3.To.Reg = r0
212                 // BNE retry
213                 p4 := s.Prog(ppc64.ABNE)
214                 p4.To.Type = obj.TYPE_BRANCH
215                 p4.To.SetTarget(p)
216
217                 // Ensure a 32 bit result
218                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
219                         p5 := s.Prog(ppc64.AMOVWZ)
220                         p5.To.Type = obj.TYPE_REG
221                         p5.To.Reg = out
222                         p5.From.Type = obj.TYPE_REG
223                         p5.From.Reg = out
224                 }
225
226         case ssa.OpPPC64LoweredAtomicExchange32,
227                 ssa.OpPPC64LoweredAtomicExchange64:
228                 // LWSYNC
229                 // LDAR/LWAR    (Rarg0), Rout
230                 // STDCCC/STWCCC Rout, (Rarg0)
231                 // BNE         -2(PC)
232                 // ISYNC
233                 ld := ppc64.ALDAR
234                 st := ppc64.ASTDCCC
235                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
236                         ld = ppc64.ALWAR
237                         st = ppc64.ASTWCCC
238                 }
239                 r0 := v.Args[0].Reg()
240                 r1 := v.Args[1].Reg()
241                 out := v.Reg0()
242                 // LWSYNC - Assuming shared data not write-through-required nor
243                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
244                 plwsync := s.Prog(ppc64.ALWSYNC)
245                 plwsync.To.Type = obj.TYPE_NONE
246                 // LDAR or LWAR
247                 p := s.Prog(ld)
248                 p.From.Type = obj.TYPE_MEM
249                 p.From.Reg = r0
250                 p.To.Type = obj.TYPE_REG
251                 p.To.Reg = out
252                 // STDCCC or STWCCC
253                 p1 := s.Prog(st)
254                 p1.From.Type = obj.TYPE_REG
255                 p1.From.Reg = r1
256                 p1.To.Type = obj.TYPE_MEM
257                 p1.To.Reg = r0
258                 // BNE retry
259                 p2 := s.Prog(ppc64.ABNE)
260                 p2.To.Type = obj.TYPE_BRANCH
261                 p2.To.SetTarget(p)
262                 // ISYNC
263                 pisync := s.Prog(ppc64.AISYNC)
264                 pisync.To.Type = obj.TYPE_NONE
265
266         case ssa.OpPPC64LoweredAtomicLoad8,
267                 ssa.OpPPC64LoweredAtomicLoad32,
268                 ssa.OpPPC64LoweredAtomicLoad64,
269                 ssa.OpPPC64LoweredAtomicLoadPtr:
270                 // SYNC
271                 // MOVB/MOVD/MOVW (Rarg0), Rout
272                 // CMP Rout,Rout
273                 // BNE 1(PC)
274                 // ISYNC
275                 ld := ppc64.AMOVD
276                 cmp := ppc64.ACMP
277                 switch v.Op {
278                 case ssa.OpPPC64LoweredAtomicLoad8:
279                         ld = ppc64.AMOVBZ
280                 case ssa.OpPPC64LoweredAtomicLoad32:
281                         ld = ppc64.AMOVWZ
282                         cmp = ppc64.ACMPW
283                 }
284                 arg0 := v.Args[0].Reg()
285                 out := v.Reg0()
286                 // SYNC when AuxInt == 1; otherwise, load-acquire
287                 if v.AuxInt == 1 {
288                         psync := s.Prog(ppc64.ASYNC)
289                         psync.To.Type = obj.TYPE_NONE
290                 }
291                 // Load
292                 p := s.Prog(ld)
293                 p.From.Type = obj.TYPE_MEM
294                 p.From.Reg = arg0
295                 p.To.Type = obj.TYPE_REG
296                 p.To.Reg = out
297                 // CMP
298                 p1 := s.Prog(cmp)
299                 p1.From.Type = obj.TYPE_REG
300                 p1.From.Reg = out
301                 p1.To.Type = obj.TYPE_REG
302                 p1.To.Reg = out
303                 // BNE
304                 p2 := s.Prog(ppc64.ABNE)
305                 p2.To.Type = obj.TYPE_BRANCH
306                 // ISYNC
307                 pisync := s.Prog(ppc64.AISYNC)
308                 pisync.To.Type = obj.TYPE_NONE
309                 p2.To.SetTarget(pisync)
310
311         case ssa.OpPPC64LoweredAtomicStore8,
312                 ssa.OpPPC64LoweredAtomicStore32,
313                 ssa.OpPPC64LoweredAtomicStore64:
314                 // SYNC or LWSYNC
315                 // MOVB/MOVW/MOVD arg1,(arg0)
316                 st := ppc64.AMOVD
317                 switch v.Op {
318                 case ssa.OpPPC64LoweredAtomicStore8:
319                         st = ppc64.AMOVB
320                 case ssa.OpPPC64LoweredAtomicStore32:
321                         st = ppc64.AMOVW
322                 }
323                 arg0 := v.Args[0].Reg()
324                 arg1 := v.Args[1].Reg()
325                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
326                 // SYNC
327                 syncOp := ppc64.ASYNC
328                 if v.AuxInt == 0 {
329                         syncOp = ppc64.ALWSYNC
330                 }
331                 psync := s.Prog(syncOp)
332                 psync.To.Type = obj.TYPE_NONE
333                 // Store
334                 p := s.Prog(st)
335                 p.To.Type = obj.TYPE_MEM
336                 p.To.Reg = arg0
337                 p.From.Type = obj.TYPE_REG
338                 p.From.Reg = arg1
339
340         case ssa.OpPPC64LoweredAtomicCas64,
341                 ssa.OpPPC64LoweredAtomicCas32:
342                 // MOVD        $0, Rout
343                 // LWSYNC
344                 // loop:
345                 // LDAR        (Rarg0), MutexHint, Rtmp
346                 // CMP         Rarg1, Rtmp
347                 // BNE         end
348                 // STDCCC      Rarg2, (Rarg0)
349                 // BNE         loop
350                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
351                 // MOVD        $1, Rout
352                 // end:
353                 ld := ppc64.ALDAR
354                 st := ppc64.ASTDCCC
355                 cmp := ppc64.ACMP
356                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
357                         ld = ppc64.ALWAR
358                         st = ppc64.ASTWCCC
359                         cmp = ppc64.ACMPW
360                 }
361                 r0 := v.Args[0].Reg()
362                 r1 := v.Args[1].Reg()
363                 r2 := v.Args[2].Reg()
364                 out := v.Reg0()
365                 // Initialize return value to false
366                 p := s.Prog(ppc64.AMOVD)
367                 p.From.Type = obj.TYPE_CONST
368                 p.From.Offset = 0
369                 p.To.Type = obj.TYPE_REG
370                 p.To.Reg = out
371                 // LWSYNC - Assuming shared data not write-through-required nor
372                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
373                 plwsync1 := s.Prog(ppc64.ALWSYNC)
374                 plwsync1.To.Type = obj.TYPE_NONE
375                 // LDAR or LWAR
376                 p0 := s.Prog(ld)
377                 p0.From.Type = obj.TYPE_MEM
378                 p0.From.Reg = r0
379                 p0.To.Type = obj.TYPE_REG
380                 p0.To.Reg = ppc64.REGTMP
381                 // If it is a Compare-and-Swap-Release operation, set the EH field with
382                 // the release hint.
383                 if v.AuxInt == 0 {
384                         p0.SetFrom3Const(0)
385                 }
386                 // CMP reg1,reg2
387                 p1 := s.Prog(cmp)
388                 p1.From.Type = obj.TYPE_REG
389                 p1.From.Reg = r1
390                 p1.To.Reg = ppc64.REGTMP
391                 p1.To.Type = obj.TYPE_REG
392                 // BNE done with return value = false
393                 p2 := s.Prog(ppc64.ABNE)
394                 p2.To.Type = obj.TYPE_BRANCH
395                 // STDCCC or STWCCC
396                 p3 := s.Prog(st)
397                 p3.From.Type = obj.TYPE_REG
398                 p3.From.Reg = r2
399                 p3.To.Type = obj.TYPE_MEM
400                 p3.To.Reg = r0
401                 // BNE retry
402                 p4 := s.Prog(ppc64.ABNE)
403                 p4.To.Type = obj.TYPE_BRANCH
404                 p4.To.SetTarget(p0)
405                 // LWSYNC - Assuming shared data not write-through-required nor
406                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
407                 // If the operation is a CAS-Release, then synchronization is not necessary.
408                 if v.AuxInt != 0 {
409                         plwsync2 := s.Prog(ppc64.ALWSYNC)
410                         plwsync2.To.Type = obj.TYPE_NONE
411                 }
412                 // return value true
413                 p5 := s.Prog(ppc64.AMOVD)
414                 p5.From.Type = obj.TYPE_CONST
415                 p5.From.Offset = 1
416                 p5.To.Type = obj.TYPE_REG
417                 p5.To.Reg = out
418                 // done (label)
419                 p6 := s.Prog(obj.ANOP)
420                 p2.To.SetTarget(p6)
421
422         case ssa.OpPPC64LoweredPubBarrier:
423                 // LWSYNC
424                 s.Prog(v.Op.Asm())
425
426         case ssa.OpPPC64LoweredGetClosurePtr:
427                 // Closure pointer is R11 (already)
428                 ssagen.CheckLoweredGetClosurePtr(v)
429
430         case ssa.OpPPC64LoweredGetCallerSP:
431                 // caller's SP is FixedFrameSize below the address of the first arg
432                 p := s.Prog(ppc64.AMOVD)
433                 p.From.Type = obj.TYPE_ADDR
434                 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
435                 p.From.Name = obj.NAME_PARAM
436                 p.To.Type = obj.TYPE_REG
437                 p.To.Reg = v.Reg()
438
439         case ssa.OpPPC64LoweredGetCallerPC:
440                 p := s.Prog(obj.AGETCALLERPC)
441                 p.To.Type = obj.TYPE_REG
442                 p.To.Reg = v.Reg()
443
444         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
445                 // input is already rounded
446
447         case ssa.OpLoadReg:
448                 loadOp := loadByType(v.Type)
449                 p := s.Prog(loadOp)
450                 ssagen.AddrAuto(&p.From, v.Args[0])
451                 p.To.Type = obj.TYPE_REG
452                 p.To.Reg = v.Reg()
453
454         case ssa.OpStoreReg:
455                 storeOp := storeByType(v.Type)
456                 p := s.Prog(storeOp)
457                 p.From.Type = obj.TYPE_REG
458                 p.From.Reg = v.Args[0].Reg()
459                 ssagen.AddrAuto(&p.To, v)
460
461         case ssa.OpArgIntReg, ssa.OpArgFloatReg:
462                 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
463                 // The loop only runs once.
464                 for _, a := range v.Block.Func.RegArgs {
465                         // Pass the spill/unspill information along to the assembler, offset by size of
466                         // the saved LR slot.
467                         addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
468                         s.FuncInfo().AddSpill(
469                                 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
470                 }
471                 v.Block.Func.RegArgs = nil
472
473                 ssagen.CheckArgReg(v)
474
475         case ssa.OpPPC64DIVD:
476                 // For now,
477                 //
478                 // cmp arg1, -1
479                 // be  ahead
480                 // v = arg0 / arg1
481                 // b over
482                 // ahead: v = - arg0
483                 // over: nop
484                 r := v.Reg()
485                 r0 := v.Args[0].Reg()
486                 r1 := v.Args[1].Reg()
487
488                 p := s.Prog(ppc64.ACMP)
489                 p.From.Type = obj.TYPE_REG
490                 p.From.Reg = r1
491                 p.To.Type = obj.TYPE_CONST
492                 p.To.Offset = -1
493
494                 pbahead := s.Prog(ppc64.ABEQ)
495                 pbahead.To.Type = obj.TYPE_BRANCH
496
497                 p = s.Prog(v.Op.Asm())
498                 p.From.Type = obj.TYPE_REG
499                 p.From.Reg = r1
500                 p.Reg = r0
501                 p.To.Type = obj.TYPE_REG
502                 p.To.Reg = r
503
504                 pbover := s.Prog(obj.AJMP)
505                 pbover.To.Type = obj.TYPE_BRANCH
506
507                 p = s.Prog(ppc64.ANEG)
508                 p.To.Type = obj.TYPE_REG
509                 p.To.Reg = r
510                 p.From.Type = obj.TYPE_REG
511                 p.From.Reg = r0
512                 pbahead.To.SetTarget(p)
513
514                 p = s.Prog(obj.ANOP)
515                 pbover.To.SetTarget(p)
516
517         case ssa.OpPPC64DIVW:
518                 // word-width version of above
519                 r := v.Reg()
520                 r0 := v.Args[0].Reg()
521                 r1 := v.Args[1].Reg()
522
523                 p := s.Prog(ppc64.ACMPW)
524                 p.From.Type = obj.TYPE_REG
525                 p.From.Reg = r1
526                 p.To.Type = obj.TYPE_CONST
527                 p.To.Offset = -1
528
529                 pbahead := s.Prog(ppc64.ABEQ)
530                 pbahead.To.Type = obj.TYPE_BRANCH
531
532                 p = s.Prog(v.Op.Asm())
533                 p.From.Type = obj.TYPE_REG
534                 p.From.Reg = r1
535                 p.Reg = r0
536                 p.To.Type = obj.TYPE_REG
537                 p.To.Reg = r
538
539                 pbover := s.Prog(obj.AJMP)
540                 pbover.To.Type = obj.TYPE_BRANCH
541
542                 p = s.Prog(ppc64.ANEG)
543                 p.To.Type = obj.TYPE_REG
544                 p.To.Reg = r
545                 p.From.Type = obj.TYPE_REG
546                 p.From.Reg = r0
547                 pbahead.To.SetTarget(p)
548
549                 p = s.Prog(obj.ANOP)
550                 pbover.To.SetTarget(p)
551
552         case ssa.OpPPC64CLRLSLWI:
553                 r := v.Reg()
554                 r1 := v.Args[0].Reg()
555                 shifts := v.AuxInt
556                 p := s.Prog(v.Op.Asm())
557                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
558                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
559                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
560                 p.Reg = r1
561                 p.To.Type = obj.TYPE_REG
562                 p.To.Reg = r
563
564         case ssa.OpPPC64CLRLSLDI:
565                 r := v.Reg()
566                 r1 := v.Args[0].Reg()
567                 shifts := v.AuxInt
568                 p := s.Prog(v.Op.Asm())
569                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
570                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
571                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
572                 p.Reg = r1
573                 p.To.Type = obj.TYPE_REG
574                 p.To.Reg = r
575
576                 // Mask has been set as sh
577         case ssa.OpPPC64RLDICL:
578                 r := v.Reg()
579                 r1 := v.Args[0].Reg()
580                 shifts := v.AuxInt
581                 p := s.Prog(v.Op.Asm())
582                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
583                 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
584                 p.Reg = r1
585                 p.To.Type = obj.TYPE_REG
586                 p.To.Reg = r
587
588         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
589                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
590                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
591                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
592                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
593                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
594                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
595                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
596                 r := v.Reg()
597                 r1 := v.Args[0].Reg()
598                 r2 := v.Args[1].Reg()
599                 p := s.Prog(v.Op.Asm())
600                 p.From.Type = obj.TYPE_REG
601                 p.From.Reg = r2
602                 p.Reg = r1
603                 p.To.Type = obj.TYPE_REG
604                 p.To.Reg = r
605
606         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
607                 r1 := v.Args[0].Reg()
608                 r2 := v.Args[1].Reg()
609                 p := s.Prog(v.Op.Asm())
610                 p.From.Type = obj.TYPE_REG
611                 p.From.Reg = r2
612                 p.Reg = r1
613                 p.To.Type = obj.TYPE_REG
614                 p.To.Reg = v.Reg0()
615
616         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
617                 p := s.Prog(v.Op.Asm())
618                 p.From.Type = obj.TYPE_CONST
619                 p.From.Offset = v.AuxInt
620                 p.Reg = v.Args[0].Reg()
621                 p.To.Type = obj.TYPE_REG
622                 p.To.Reg = v.Reg()
623
624                 // Auxint holds encoded rotate + mask
625         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
626                 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
627                 p := s.Prog(v.Op.Asm())
628                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
629                 p.Reg = v.Args[0].Reg()
630                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
631                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
632
633                 // Auxint holds mask
634         case ssa.OpPPC64RLWNM:
635                 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
636                 p := s.Prog(v.Op.Asm())
637                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
638                 p.Reg = v.Args[0].Reg()
639                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
640                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
641
642         case ssa.OpPPC64MADDLD:
643                 r := v.Reg()
644                 r1 := v.Args[0].Reg()
645                 r2 := v.Args[1].Reg()
646                 r3 := v.Args[2].Reg()
647                 // r = r1*r2 Â± r3
648                 p := s.Prog(v.Op.Asm())
649                 p.From.Type = obj.TYPE_REG
650                 p.From.Reg = r1
651                 p.Reg = r2
652                 p.SetFrom3Reg(r3)
653                 p.To.Type = obj.TYPE_REG
654                 p.To.Reg = r
655
656         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
657                 r := v.Reg()
658                 r1 := v.Args[0].Reg()
659                 r2 := v.Args[1].Reg()
660                 r3 := v.Args[2].Reg()
661                 // r = r1*r2 Â± r3
662                 p := s.Prog(v.Op.Asm())
663                 p.From.Type = obj.TYPE_REG
664                 p.From.Reg = r1
665                 p.Reg = r3
666                 p.SetFrom3Reg(r2)
667                 p.To.Type = obj.TYPE_REG
668                 p.To.Reg = r
669
670         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
671                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
672                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
673                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
674                 r := v.Reg()
675                 p := s.Prog(v.Op.Asm())
676                 p.To.Type = obj.TYPE_REG
677                 p.To.Reg = r
678                 p.From.Type = obj.TYPE_REG
679                 p.From.Reg = v.Args[0].Reg()
680
681         case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
682                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
683                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
684                 p := s.Prog(v.Op.Asm())
685                 p.Reg = v.Args[0].Reg()
686                 p.From.Type = obj.TYPE_CONST
687                 p.From.Offset = v.AuxInt
688                 p.To.Type = obj.TYPE_REG
689                 p.To.Reg = v.Reg()
690
691         case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
692                 r := v.Reg0() // CA is the first, implied argument.
693                 r1 := v.Args[0].Reg()
694                 r2 := v.Args[1].Reg()
695                 p := s.Prog(v.Op.Asm())
696                 p.From.Type = obj.TYPE_REG
697                 p.From.Reg = r2
698                 p.Reg = r1
699                 p.To.Type = obj.TYPE_REG
700                 p.To.Reg = r
701
702         case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
703                 p := s.Prog(v.Op.Asm())
704                 p.From.Type = obj.TYPE_REG
705                 p.From.Reg = ppc64.REG_R0
706                 p.To.Type = obj.TYPE_REG
707                 p.To.Reg = v.Reg()
708
709         case ssa.OpPPC64ADDCconst:
710                 p := s.Prog(v.Op.Asm())
711                 p.Reg = v.Args[0].Reg()
712                 p.From.Type = obj.TYPE_CONST
713                 p.From.Offset = v.AuxInt
714                 p.To.Type = obj.TYPE_REG
715                 // Output is a pair, the second is the CA, which is implied.
716                 p.To.Reg = v.Reg0()
717
718         case ssa.OpPPC64SUBCconst:
719                 p := s.Prog(v.Op.Asm())
720                 p.SetFrom3Const(v.AuxInt)
721                 p.From.Type = obj.TYPE_REG
722                 p.From.Reg = v.Args[0].Reg()
723                 p.To.Type = obj.TYPE_REG
724                 p.To.Reg = v.Reg0()
725
726         case ssa.OpPPC64SUBFCconst:
727                 p := s.Prog(v.Op.Asm())
728                 p.SetFrom3Const(v.AuxInt)
729                 p.From.Type = obj.TYPE_REG
730                 p.From.Reg = v.Args[0].Reg()
731                 p.To.Type = obj.TYPE_REG
732                 p.To.Reg = v.Reg()
733
734         case ssa.OpPPC64ANDCCconst:
735                 p := s.Prog(v.Op.Asm())
736                 p.Reg = v.Args[0].Reg()
737                 p.From.Type = obj.TYPE_CONST
738                 p.From.Offset = v.AuxInt
739                 p.To.Type = obj.TYPE_REG
740                 //              p.To.Reg = ppc64.REGTMP // discard result
741                 p.To.Reg = v.Reg0()
742
743         case ssa.OpPPC64MOVDaddr:
744                 switch v.Aux.(type) {
745                 default:
746                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
747                 case nil:
748                         // If aux offset and aux int are both 0, and the same
749                         // input and output regs are used, no instruction
750                         // needs to be generated, since it would just be
751                         // addi rx, rx, 0.
752                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
753                                 p := s.Prog(ppc64.AMOVD)
754                                 p.From.Type = obj.TYPE_ADDR
755                                 p.From.Reg = v.Args[0].Reg()
756                                 p.From.Offset = v.AuxInt
757                                 p.To.Type = obj.TYPE_REG
758                                 p.To.Reg = v.Reg()
759                         }
760
761                 case *obj.LSym, ir.Node:
762                         p := s.Prog(ppc64.AMOVD)
763                         p.From.Type = obj.TYPE_ADDR
764                         p.From.Reg = v.Args[0].Reg()
765                         p.To.Type = obj.TYPE_REG
766                         p.To.Reg = v.Reg()
767                         ssagen.AddAux(&p.From, v)
768
769                 }
770
771         case ssa.OpPPC64MOVDconst:
772                 p := s.Prog(v.Op.Asm())
773                 p.From.Type = obj.TYPE_CONST
774                 p.From.Offset = v.AuxInt
775                 p.To.Type = obj.TYPE_REG
776                 p.To.Reg = v.Reg()
777
778         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
779                 p := s.Prog(v.Op.Asm())
780                 p.From.Type = obj.TYPE_FCONST
781                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
782                 p.To.Type = obj.TYPE_REG
783                 p.To.Reg = v.Reg()
784
785         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
786                 p := s.Prog(v.Op.Asm())
787                 p.From.Type = obj.TYPE_REG
788                 p.From.Reg = v.Args[0].Reg()
789                 p.To.Type = obj.TYPE_REG
790                 p.To.Reg = v.Args[1].Reg()
791
792         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
793                 p := s.Prog(v.Op.Asm())
794                 p.From.Type = obj.TYPE_REG
795                 p.From.Reg = v.Args[0].Reg()
796                 p.To.Type = obj.TYPE_CONST
797                 p.To.Offset = v.AuxInt
798
799         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
800                 // Shift in register to required size
801                 p := s.Prog(v.Op.Asm())
802                 p.From.Type = obj.TYPE_REG
803                 p.From.Reg = v.Args[0].Reg()
804                 p.To.Reg = v.Reg()
805                 p.To.Type = obj.TYPE_REG
806
807         case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
808
809                 // MOVDload and MOVWload are DS form instructions that are restricted to
810                 // offsets that are a multiple of 4. If the offset is not a multiple of 4,
811                 // then the address of the symbol to be loaded is computed (base + offset)
812                 // and used as the new base register and the offset field in the instruction
813                 // can be set to zero.
814
815                 // This same problem can happen with gostrings since the final offset is not
816                 // known yet, but could be unaligned after the relocation is resolved.
817                 // So gostrings are handled the same way.
818
819                 // This allows the MOVDload and MOVWload to be generated in more cases and
820                 // eliminates some offset and alignment checking in the rules file.
821
822                 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
823                 ssagen.AddAux(&fromAddr, v)
824
825                 genAddr := false
826
827                 switch fromAddr.Name {
828                 case obj.NAME_EXTERN, obj.NAME_STATIC:
829                         // Special case for a rule combines the bytes of gostring.
830                         // The v alignment might seem OK, but we don't want to load it
831                         // using an offset because relocation comes later.
832                         genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
833                 default:
834                         genAddr = fromAddr.Offset%4 != 0
835                 }
836                 if genAddr {
837                         // Load full address into the temp register.
838                         p := s.Prog(ppc64.AMOVD)
839                         p.From.Type = obj.TYPE_ADDR
840                         p.From.Reg = v.Args[0].Reg()
841                         ssagen.AddAux(&p.From, v)
842                         // Load target using temp as base register
843                         // and offset zero. Setting NAME_NONE
844                         // prevents any extra offsets from being
845                         // added.
846                         p.To.Type = obj.TYPE_REG
847                         p.To.Reg = ppc64.REGTMP
848                         fromAddr.Reg = ppc64.REGTMP
849                         // Clear the offset field and other
850                         // information that might be used
851                         // by the assembler to add to the
852                         // final offset value.
853                         fromAddr.Offset = 0
854                         fromAddr.Name = obj.NAME_NONE
855                         fromAddr.Sym = nil
856                 }
857                 p := s.Prog(v.Op.Asm())
858                 p.From = fromAddr
859                 p.To.Type = obj.TYPE_REG
860                 p.To.Reg = v.Reg()
861
862         case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
863                 p := s.Prog(v.Op.Asm())
864                 p.From.Type = obj.TYPE_MEM
865                 p.From.Reg = v.Args[0].Reg()
866                 ssagen.AddAux(&p.From, v)
867                 p.To.Type = obj.TYPE_REG
868                 p.To.Reg = v.Reg()
869
870         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
871                 p := s.Prog(v.Op.Asm())
872                 p.From.Type = obj.TYPE_MEM
873                 p.From.Reg = v.Args[0].Reg()
874                 p.To.Type = obj.TYPE_REG
875                 p.To.Reg = v.Reg()
876
877         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
878                 p := s.Prog(v.Op.Asm())
879                 p.To.Type = obj.TYPE_MEM
880                 p.To.Reg = v.Args[0].Reg()
881                 p.From.Type = obj.TYPE_REG
882                 p.From.Reg = v.Args[1].Reg()
883
884         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
885                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
886                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
887                 p := s.Prog(v.Op.Asm())
888                 p.From.Type = obj.TYPE_MEM
889                 p.From.Reg = v.Args[0].Reg()
890                 p.From.Index = v.Args[1].Reg()
891                 p.To.Type = obj.TYPE_REG
892                 p.To.Reg = v.Reg()
893
894         case ssa.OpPPC64DCBT:
895                 p := s.Prog(v.Op.Asm())
896                 p.From.Type = obj.TYPE_MEM
897                 p.From.Reg = v.Args[0].Reg()
898                 p.To.Type = obj.TYPE_CONST
899                 p.To.Offset = v.AuxInt
900
901         case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
902                 p := s.Prog(v.Op.Asm())
903                 p.From.Type = obj.TYPE_REG
904                 p.From.Reg = ppc64.REGZERO
905                 p.To.Type = obj.TYPE_MEM
906                 p.To.Reg = v.Args[0].Reg()
907                 ssagen.AddAux(&p.To, v)
908
909         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
910
911                 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
912                 // to offset values that are a multiple of 4. If the offset field is not a
913                 // multiple of 4, then the full address of the store target is computed (base +
914                 // offset) and used as the new base register and the offset in the instruction
915                 // is set to 0.
916
917                 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
918                 // and prevents checking of the offset value and alignment in the rules.
919
920                 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
921                 ssagen.AddAux(&toAddr, v)
922
923                 if toAddr.Offset%4 != 0 {
924                         p := s.Prog(ppc64.AMOVD)
925                         p.From.Type = obj.TYPE_ADDR
926                         p.From.Reg = v.Args[0].Reg()
927                         ssagen.AddAux(&p.From, v)
928                         p.To.Type = obj.TYPE_REG
929                         p.To.Reg = ppc64.REGTMP
930                         toAddr.Reg = ppc64.REGTMP
931                         // Clear the offset field and other
932                         // information that might be used
933                         // by the assembler to add to the
934                         // final offset value.
935                         toAddr.Offset = 0
936                         toAddr.Name = obj.NAME_NONE
937                         toAddr.Sym = nil
938                 }
939                 p := s.Prog(v.Op.Asm())
940                 p.To = toAddr
941                 p.From.Type = obj.TYPE_REG
942                 if v.Op == ssa.OpPPC64MOVDstorezero {
943                         p.From.Reg = ppc64.REGZERO
944                 } else {
945                         p.From.Reg = v.Args[1].Reg()
946                 }
947
948         case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
949                 p := s.Prog(v.Op.Asm())
950                 p.From.Type = obj.TYPE_REG
951                 p.From.Reg = v.Args[1].Reg()
952                 p.To.Type = obj.TYPE_MEM
953                 p.To.Reg = v.Args[0].Reg()
954                 ssagen.AddAux(&p.To, v)
955
956         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
957                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
958                 ssa.OpPPC64MOVHBRstoreidx:
959                 p := s.Prog(v.Op.Asm())
960                 p.From.Type = obj.TYPE_REG
961                 p.From.Reg = v.Args[2].Reg()
962                 p.To.Index = v.Args[1].Reg()
963                 p.To.Type = obj.TYPE_MEM
964                 p.To.Reg = v.Args[0].Reg()
965
966         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
967                 // ISEL, ISELB
968                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
969                 // ISEL only accepts 0, 1, 2 condition values but the others can be
970                 // achieved by swapping operand order.
971                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
972                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
973                 // ISELB is used when a boolean result is needed, returning 0 or 1
974                 p := s.Prog(ppc64.AISEL)
975                 p.To.Type = obj.TYPE_REG
976                 p.To.Reg = v.Reg()
977                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
978                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
979                 if v.Op == ssa.OpPPC64ISEL {
980                         r.Reg = v.Args[1].Reg()
981                 }
982                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
983                 if v.AuxInt > 3 {
984                         p.Reg = r.Reg
985                         p.SetFrom3Reg(v.Args[0].Reg())
986                 } else {
987                         p.Reg = v.Args[0].Reg()
988                         p.SetFrom3(r)
989                 }
990                 p.From.Type = obj.TYPE_CONST
991                 p.From.Offset = v.AuxInt & 3
992
993         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
994                 // The LoweredQuad code generation
995                 // generates STXV instructions on
996                 // power9. The Short variation is used
997                 // if no loop is generated.
998
999                 // sizes >= 64 generate a loop as follows:
1000
1001                 // Set up loop counter in CTR, used by BC
1002                 // XXLXOR clears VS32
1003                 //       XXLXOR VS32,VS32,VS32
1004                 //       MOVD len/64,REG_TMP
1005                 //       MOVD REG_TMP,CTR
1006                 //       loop:
1007                 //       STXV VS32,0(R20)
1008                 //       STXV VS32,16(R20)
1009                 //       STXV VS32,32(R20)
1010                 //       STXV VS32,48(R20)
1011                 //       ADD  $64,R20
1012                 //       BC   16, 0, loop
1013
1014                 // Bytes per iteration
1015                 ctr := v.AuxInt / 64
1016
1017                 // Remainder bytes
1018                 rem := v.AuxInt % 64
1019
1020                 // Only generate a loop if there is more
1021                 // than 1 iteration.
1022                 if ctr > 1 {
1023                         // Set up VS32 (V0) to hold 0s
1024                         p := s.Prog(ppc64.AXXLXOR)
1025                         p.From.Type = obj.TYPE_REG
1026                         p.From.Reg = ppc64.REG_VS32
1027                         p.To.Type = obj.TYPE_REG
1028                         p.To.Reg = ppc64.REG_VS32
1029                         p.Reg = ppc64.REG_VS32
1030
1031                         // Set up CTR loop counter
1032                         p = s.Prog(ppc64.AMOVD)
1033                         p.From.Type = obj.TYPE_CONST
1034                         p.From.Offset = ctr
1035                         p.To.Type = obj.TYPE_REG
1036                         p.To.Reg = ppc64.REGTMP
1037
1038                         p = s.Prog(ppc64.AMOVD)
1039                         p.From.Type = obj.TYPE_REG
1040                         p.From.Reg = ppc64.REGTMP
1041                         p.To.Type = obj.TYPE_REG
1042                         p.To.Reg = ppc64.REG_CTR
1043
1044                         // Don't generate padding for
1045                         // loops with few iterations.
1046                         if ctr > 3 {
1047                                 p = s.Prog(obj.APCALIGN)
1048                                 p.From.Type = obj.TYPE_CONST
1049                                 p.From.Offset = 16
1050                         }
1051
1052                         // generate 4 STXVs to zero 64 bytes
1053                         var top *obj.Prog
1054
1055                         p = s.Prog(ppc64.ASTXV)
1056                         p.From.Type = obj.TYPE_REG
1057                         p.From.Reg = ppc64.REG_VS32
1058                         p.To.Type = obj.TYPE_MEM
1059                         p.To.Reg = v.Args[0].Reg()
1060
1061                         //  Save the top of loop
1062                         if top == nil {
1063                                 top = p
1064                         }
1065                         p = s.Prog(ppc64.ASTXV)
1066                         p.From.Type = obj.TYPE_REG
1067                         p.From.Reg = ppc64.REG_VS32
1068                         p.To.Type = obj.TYPE_MEM
1069                         p.To.Reg = v.Args[0].Reg()
1070                         p.To.Offset = 16
1071
1072                         p = s.Prog(ppc64.ASTXV)
1073                         p.From.Type = obj.TYPE_REG
1074                         p.From.Reg = ppc64.REG_VS32
1075                         p.To.Type = obj.TYPE_MEM
1076                         p.To.Reg = v.Args[0].Reg()
1077                         p.To.Offset = 32
1078
1079                         p = s.Prog(ppc64.ASTXV)
1080                         p.From.Type = obj.TYPE_REG
1081                         p.From.Reg = ppc64.REG_VS32
1082                         p.To.Type = obj.TYPE_MEM
1083                         p.To.Reg = v.Args[0].Reg()
1084                         p.To.Offset = 48
1085
1086                         // Increment address for the
1087                         // 64 bytes just zeroed.
1088                         p = s.Prog(ppc64.AADD)
1089                         p.Reg = v.Args[0].Reg()
1090                         p.From.Type = obj.TYPE_CONST
1091                         p.From.Offset = 64
1092                         p.To.Type = obj.TYPE_REG
1093                         p.To.Reg = v.Args[0].Reg()
1094
1095                         // Branch back to top of loop
1096                         // based on CTR
1097                         // BC with BO_BCTR generates bdnz
1098                         p = s.Prog(ppc64.ABC)
1099                         p.From.Type = obj.TYPE_CONST
1100                         p.From.Offset = ppc64.BO_BCTR
1101                         p.Reg = ppc64.REG_CR0LT
1102                         p.To.Type = obj.TYPE_BRANCH
1103                         p.To.SetTarget(top)
1104                 }
1105                 // When ctr == 1 the loop was not generated but
1106                 // there are at least 64 bytes to clear, so add
1107                 // that to the remainder to generate the code
1108                 // to clear those doublewords
1109                 if ctr == 1 {
1110                         rem += 64
1111                 }
1112
1113                 // Clear the remainder starting at offset zero
1114                 offset := int64(0)
1115
1116                 if rem >= 16 && ctr <= 1 {
1117                         // If the XXLXOR hasn't already been
1118                         // generated, do it here to initialize
1119                         // VS32 (V0) to 0.
1120                         p := s.Prog(ppc64.AXXLXOR)
1121                         p.From.Type = obj.TYPE_REG
1122                         p.From.Reg = ppc64.REG_VS32
1123                         p.To.Type = obj.TYPE_REG
1124                         p.To.Reg = ppc64.REG_VS32
1125                         p.Reg = ppc64.REG_VS32
1126                 }
1127                 // Generate STXV for 32 or 64
1128                 // bytes.
1129                 for rem >= 32 {
1130                         p := s.Prog(ppc64.ASTXV)
1131                         p.From.Type = obj.TYPE_REG
1132                         p.From.Reg = ppc64.REG_VS32
1133                         p.To.Type = obj.TYPE_MEM
1134                         p.To.Reg = v.Args[0].Reg()
1135                         p.To.Offset = offset
1136
1137                         p = s.Prog(ppc64.ASTXV)
1138                         p.From.Type = obj.TYPE_REG
1139                         p.From.Reg = ppc64.REG_VS32
1140                         p.To.Type = obj.TYPE_MEM
1141                         p.To.Reg = v.Args[0].Reg()
1142                         p.To.Offset = offset + 16
1143                         offset += 32
1144                         rem -= 32
1145                 }
1146                 // Generate 16 bytes
1147                 if rem >= 16 {
1148                         p := s.Prog(ppc64.ASTXV)
1149                         p.From.Type = obj.TYPE_REG
1150                         p.From.Reg = ppc64.REG_VS32
1151                         p.To.Type = obj.TYPE_MEM
1152                         p.To.Reg = v.Args[0].Reg()
1153                         p.To.Offset = offset
1154                         offset += 16
1155                         rem -= 16
1156                 }
1157
1158                 // first clear as many doublewords as possible
1159                 // then clear remaining sizes as available
1160                 for rem > 0 {
1161                         op, size := ppc64.AMOVB, int64(1)
1162                         switch {
1163                         case rem >= 8:
1164                                 op, size = ppc64.AMOVD, 8
1165                         case rem >= 4:
1166                                 op, size = ppc64.AMOVW, 4
1167                         case rem >= 2:
1168                                 op, size = ppc64.AMOVH, 2
1169                         }
1170                         p := s.Prog(op)
1171                         p.From.Type = obj.TYPE_REG
1172                         p.From.Reg = ppc64.REG_R0
1173                         p.To.Type = obj.TYPE_MEM
1174                         p.To.Reg = v.Args[0].Reg()
1175                         p.To.Offset = offset
1176                         rem -= size
1177                         offset += size
1178                 }
1179
1180         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1181
1182                 // Unaligned data doesn't hurt performance
1183                 // for these instructions on power8.
1184
1185                 // For sizes >= 64 generate a loop as follows:
1186
1187                 // Set up loop counter in CTR, used by BC
1188                 //       XXLXOR VS32,VS32,VS32
1189                 //       MOVD len/32,REG_TMP
1190                 //       MOVD REG_TMP,CTR
1191                 //       MOVD $16,REG_TMP
1192                 //       loop:
1193                 //       STXVD2X VS32,(R0)(R20)
1194                 //       STXVD2X VS32,(R31)(R20)
1195                 //       ADD  $32,R20
1196                 //       BC   16, 0, loop
1197                 //
1198                 // any remainder is done as described below
1199
1200                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1201                 // then handle the remainder
1202                 //      MOVD R0,(R20)
1203                 //      MOVD R0,8(R20)
1204                 // .... etc.
1205                 //
1206                 // the remainder bytes are cleared using one or more
1207                 // of the following instructions with the appropriate
1208                 // offsets depending which instructions are needed
1209                 //
1210                 //      MOVW R0,n1(R20) 4 bytes
1211                 //      MOVH R0,n2(R20) 2 bytes
1212                 //      MOVB R0,n3(R20) 1 byte
1213                 //
1214                 // 7 bytes: MOVW, MOVH, MOVB
1215                 // 6 bytes: MOVW, MOVH
1216                 // 5 bytes: MOVW, MOVB
1217                 // 3 bytes: MOVH, MOVB
1218
1219                 // each loop iteration does 32 bytes
1220                 ctr := v.AuxInt / 32
1221
1222                 // remainder bytes
1223                 rem := v.AuxInt % 32
1224
1225                 // only generate a loop if there is more
1226                 // than 1 iteration.
1227                 if ctr > 1 {
1228                         // Set up VS32 (V0) to hold 0s
1229                         p := s.Prog(ppc64.AXXLXOR)
1230                         p.From.Type = obj.TYPE_REG
1231                         p.From.Reg = ppc64.REG_VS32
1232                         p.To.Type = obj.TYPE_REG
1233                         p.To.Reg = ppc64.REG_VS32
1234                         p.Reg = ppc64.REG_VS32
1235
1236                         // Set up CTR loop counter
1237                         p = s.Prog(ppc64.AMOVD)
1238                         p.From.Type = obj.TYPE_CONST
1239                         p.From.Offset = ctr
1240                         p.To.Type = obj.TYPE_REG
1241                         p.To.Reg = ppc64.REGTMP
1242
1243                         p = s.Prog(ppc64.AMOVD)
1244                         p.From.Type = obj.TYPE_REG
1245                         p.From.Reg = ppc64.REGTMP
1246                         p.To.Type = obj.TYPE_REG
1247                         p.To.Reg = ppc64.REG_CTR
1248
1249                         // Set up R31 to hold index value 16
1250                         p = s.Prog(ppc64.AMOVD)
1251                         p.From.Type = obj.TYPE_CONST
1252                         p.From.Offset = 16
1253                         p.To.Type = obj.TYPE_REG
1254                         p.To.Reg = ppc64.REGTMP
1255
1256                         // Don't add padding for alignment
1257                         // with few loop iterations.
1258                         if ctr > 3 {
1259                                 p = s.Prog(obj.APCALIGN)
1260                                 p.From.Type = obj.TYPE_CONST
1261                                 p.From.Offset = 16
1262                         }
1263
1264                         // generate 2 STXVD2Xs to store 16 bytes
1265                         // when this is a loop then the top must be saved
1266                         var top *obj.Prog
1267                         // This is the top of loop
1268
1269                         p = s.Prog(ppc64.ASTXVD2X)
1270                         p.From.Type = obj.TYPE_REG
1271                         p.From.Reg = ppc64.REG_VS32
1272                         p.To.Type = obj.TYPE_MEM
1273                         p.To.Reg = v.Args[0].Reg()
1274                         p.To.Index = ppc64.REGZERO
1275                         // Save the top of loop
1276                         if top == nil {
1277                                 top = p
1278                         }
1279                         p = s.Prog(ppc64.ASTXVD2X)
1280                         p.From.Type = obj.TYPE_REG
1281                         p.From.Reg = ppc64.REG_VS32
1282                         p.To.Type = obj.TYPE_MEM
1283                         p.To.Reg = v.Args[0].Reg()
1284                         p.To.Index = ppc64.REGTMP
1285
1286                         // Increment address for the
1287                         // 4 doublewords just zeroed.
1288                         p = s.Prog(ppc64.AADD)
1289                         p.Reg = v.Args[0].Reg()
1290                         p.From.Type = obj.TYPE_CONST
1291                         p.From.Offset = 32
1292                         p.To.Type = obj.TYPE_REG
1293                         p.To.Reg = v.Args[0].Reg()
1294
1295                         // Branch back to top of loop
1296                         // based on CTR
1297                         // BC with BO_BCTR generates bdnz
1298                         p = s.Prog(ppc64.ABC)
1299                         p.From.Type = obj.TYPE_CONST
1300                         p.From.Offset = ppc64.BO_BCTR
1301                         p.Reg = ppc64.REG_CR0LT
1302                         p.To.Type = obj.TYPE_BRANCH
1303                         p.To.SetTarget(top)
1304                 }
1305
1306                 // when ctr == 1 the loop was not generated but
1307                 // there are at least 32 bytes to clear, so add
1308                 // that to the remainder to generate the code
1309                 // to clear those doublewords
1310                 if ctr == 1 {
1311                         rem += 32
1312                 }
1313
1314                 // clear the remainder starting at offset zero
1315                 offset := int64(0)
1316
1317                 // first clear as many doublewords as possible
1318                 // then clear remaining sizes as available
1319                 for rem > 0 {
1320                         op, size := ppc64.AMOVB, int64(1)
1321                         switch {
1322                         case rem >= 8:
1323                                 op, size = ppc64.AMOVD, 8
1324                         case rem >= 4:
1325                                 op, size = ppc64.AMOVW, 4
1326                         case rem >= 2:
1327                                 op, size = ppc64.AMOVH, 2
1328                         }
1329                         p := s.Prog(op)
1330                         p.From.Type = obj.TYPE_REG
1331                         p.From.Reg = ppc64.REG_R0
1332                         p.To.Type = obj.TYPE_MEM
1333                         p.To.Reg = v.Args[0].Reg()
1334                         p.To.Offset = offset
1335                         rem -= size
1336                         offset += size
1337                 }
1338
1339         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1340
1341                 bytesPerLoop := int64(32)
1342                 // This will be used when moving more
1343                 // than 8 bytes.  Moves start with
1344                 // as many 8 byte moves as possible, then
1345                 // 4, 2, or 1 byte(s) as remaining.  This will
1346                 // work and be efficient for power8 or later.
1347                 // If there are 64 or more bytes, then a
1348                 // loop is generated to move 32 bytes and
1349                 // update the src and dst addresses on each
1350                 // iteration. When < 64 bytes, the appropriate
1351                 // number of moves are generated based on the
1352                 // size.
1353                 // When moving >= 64 bytes a loop is used
1354                 //      MOVD len/32,REG_TMP
1355                 //      MOVD REG_TMP,CTR
1356                 //      MOVD $16,REG_TMP
1357                 // top:
1358                 //      LXVD2X (R0)(R21),VS32
1359                 //      LXVD2X (R31)(R21),VS33
1360                 //      ADD $32,R21
1361                 //      STXVD2X VS32,(R0)(R20)
1362                 //      STXVD2X VS33,(R31)(R20)
1363                 //      ADD $32,R20
1364                 //      BC 16,0,top
1365                 // Bytes not moved by this loop are moved
1366                 // with a combination of the following instructions,
1367                 // starting with the largest sizes and generating as
1368                 // many as needed, using the appropriate offset value.
1369                 //      MOVD  n(R21),R31
1370                 //      MOVD  R31,n(R20)
1371                 //      MOVW  n1(R21),R31
1372                 //      MOVW  R31,n1(R20)
1373                 //      MOVH  n2(R21),R31
1374                 //      MOVH  R31,n2(R20)
1375                 //      MOVB  n3(R21),R31
1376                 //      MOVB  R31,n3(R20)
1377
1378                 // Each loop iteration moves 32 bytes
1379                 ctr := v.AuxInt / bytesPerLoop
1380
1381                 // Remainder after the loop
1382                 rem := v.AuxInt % bytesPerLoop
1383
1384                 dstReg := v.Args[0].Reg()
1385                 srcReg := v.Args[1].Reg()
1386
1387                 // The set of registers used here, must match the clobbered reg list
1388                 // in PPC64Ops.go.
1389                 offset := int64(0)
1390
1391                 // top of the loop
1392                 var top *obj.Prog
1393                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1394                 if ctr > 1 {
1395                         // Set up the CTR
1396                         p := s.Prog(ppc64.AMOVD)
1397                         p.From.Type = obj.TYPE_CONST
1398                         p.From.Offset = ctr
1399                         p.To.Type = obj.TYPE_REG
1400                         p.To.Reg = ppc64.REGTMP
1401
1402                         p = s.Prog(ppc64.AMOVD)
1403                         p.From.Type = obj.TYPE_REG
1404                         p.From.Reg = ppc64.REGTMP
1405                         p.To.Type = obj.TYPE_REG
1406                         p.To.Reg = ppc64.REG_CTR
1407
1408                         // Use REGTMP as index reg
1409                         p = s.Prog(ppc64.AMOVD)
1410                         p.From.Type = obj.TYPE_CONST
1411                         p.From.Offset = 16
1412                         p.To.Type = obj.TYPE_REG
1413                         p.To.Reg = ppc64.REGTMP
1414
1415                         // Don't adding padding for
1416                         // alignment with small iteration
1417                         // counts.
1418                         if ctr > 3 {
1419                                 p = s.Prog(obj.APCALIGN)
1420                                 p.From.Type = obj.TYPE_CONST
1421                                 p.From.Offset = 16
1422                         }
1423
1424                         // Generate 16 byte loads and stores.
1425                         // Use temp register for index (16)
1426                         // on the second one.
1427
1428                         p = s.Prog(ppc64.ALXVD2X)
1429                         p.From.Type = obj.TYPE_MEM
1430                         p.From.Reg = srcReg
1431                         p.From.Index = ppc64.REGZERO
1432                         p.To.Type = obj.TYPE_REG
1433                         p.To.Reg = ppc64.REG_VS32
1434                         if top == nil {
1435                                 top = p
1436                         }
1437                         p = s.Prog(ppc64.ALXVD2X)
1438                         p.From.Type = obj.TYPE_MEM
1439                         p.From.Reg = srcReg
1440                         p.From.Index = ppc64.REGTMP
1441                         p.To.Type = obj.TYPE_REG
1442                         p.To.Reg = ppc64.REG_VS33
1443
1444                         // increment the src reg for next iteration
1445                         p = s.Prog(ppc64.AADD)
1446                         p.Reg = srcReg
1447                         p.From.Type = obj.TYPE_CONST
1448                         p.From.Offset = bytesPerLoop
1449                         p.To.Type = obj.TYPE_REG
1450                         p.To.Reg = srcReg
1451
1452                         // generate 16 byte stores
1453                         p = s.Prog(ppc64.ASTXVD2X)
1454                         p.From.Type = obj.TYPE_REG
1455                         p.From.Reg = ppc64.REG_VS32
1456                         p.To.Type = obj.TYPE_MEM
1457                         p.To.Reg = dstReg
1458                         p.To.Index = ppc64.REGZERO
1459
1460                         p = s.Prog(ppc64.ASTXVD2X)
1461                         p.From.Type = obj.TYPE_REG
1462                         p.From.Reg = ppc64.REG_VS33
1463                         p.To.Type = obj.TYPE_MEM
1464                         p.To.Reg = dstReg
1465                         p.To.Index = ppc64.REGTMP
1466
1467                         // increment the dst reg for next iteration
1468                         p = s.Prog(ppc64.AADD)
1469                         p.Reg = dstReg
1470                         p.From.Type = obj.TYPE_CONST
1471                         p.From.Offset = bytesPerLoop
1472                         p.To.Type = obj.TYPE_REG
1473                         p.To.Reg = dstReg
1474
1475                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1476                         // to loop top.
1477                         p = s.Prog(ppc64.ABC)
1478                         p.From.Type = obj.TYPE_CONST
1479                         p.From.Offset = ppc64.BO_BCTR
1480                         p.Reg = ppc64.REG_CR0LT
1481                         p.To.Type = obj.TYPE_BRANCH
1482                         p.To.SetTarget(top)
1483
1484                         // srcReg and dstReg were incremented in the loop, so
1485                         // later instructions start with offset 0.
1486                         offset = int64(0)
1487                 }
1488
1489                 // No loop was generated for one iteration, so
1490                 // add 32 bytes to the remainder to move those bytes.
1491                 if ctr == 1 {
1492                         rem += bytesPerLoop
1493                 }
1494
1495                 if rem >= 16 {
1496                         // Generate 16 byte loads and stores.
1497                         // Use temp register for index (value 16)
1498                         // on the second one.
1499                         p := s.Prog(ppc64.ALXVD2X)
1500                         p.From.Type = obj.TYPE_MEM
1501                         p.From.Reg = srcReg
1502                         p.From.Index = ppc64.REGZERO
1503                         p.To.Type = obj.TYPE_REG
1504                         p.To.Reg = ppc64.REG_VS32
1505
1506                         p = s.Prog(ppc64.ASTXVD2X)
1507                         p.From.Type = obj.TYPE_REG
1508                         p.From.Reg = ppc64.REG_VS32
1509                         p.To.Type = obj.TYPE_MEM
1510                         p.To.Reg = dstReg
1511                         p.To.Index = ppc64.REGZERO
1512
1513                         offset = 16
1514                         rem -= 16
1515
1516                         if rem >= 16 {
1517                                 // Use REGTMP as index reg
1518                                 p := s.Prog(ppc64.AMOVD)
1519                                 p.From.Type = obj.TYPE_CONST
1520                                 p.From.Offset = 16
1521                                 p.To.Type = obj.TYPE_REG
1522                                 p.To.Reg = ppc64.REGTMP
1523
1524                                 p = s.Prog(ppc64.ALXVD2X)
1525                                 p.From.Type = obj.TYPE_MEM
1526                                 p.From.Reg = srcReg
1527                                 p.From.Index = ppc64.REGTMP
1528                                 p.To.Type = obj.TYPE_REG
1529                                 p.To.Reg = ppc64.REG_VS32
1530
1531                                 p = s.Prog(ppc64.ASTXVD2X)
1532                                 p.From.Type = obj.TYPE_REG
1533                                 p.From.Reg = ppc64.REG_VS32
1534                                 p.To.Type = obj.TYPE_MEM
1535                                 p.To.Reg = dstReg
1536                                 p.To.Index = ppc64.REGTMP
1537
1538                                 offset = 32
1539                                 rem -= 16
1540                         }
1541                 }
1542
1543                 // Generate all the remaining load and store pairs, starting with
1544                 // as many 8 byte moves as possible, then 4, 2, 1.
1545                 for rem > 0 {
1546                         op, size := ppc64.AMOVB, int64(1)
1547                         switch {
1548                         case rem >= 8:
1549                                 op, size = ppc64.AMOVD, 8
1550                         case rem >= 4:
1551                                 op, size = ppc64.AMOVWZ, 4
1552                         case rem >= 2:
1553                                 op, size = ppc64.AMOVH, 2
1554                         }
1555                         // Load
1556                         p := s.Prog(op)
1557                         p.To.Type = obj.TYPE_REG
1558                         p.To.Reg = ppc64.REGTMP
1559                         p.From.Type = obj.TYPE_MEM
1560                         p.From.Reg = srcReg
1561                         p.From.Offset = offset
1562
1563                         // Store
1564                         p = s.Prog(op)
1565                         p.From.Type = obj.TYPE_REG
1566                         p.From.Reg = ppc64.REGTMP
1567                         p.To.Type = obj.TYPE_MEM
1568                         p.To.Reg = dstReg
1569                         p.To.Offset = offset
1570                         rem -= size
1571                         offset += size
1572                 }
1573
1574         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1575                 bytesPerLoop := int64(64)
1576                 // This is used when moving more
1577                 // than 8 bytes on power9.  Moves start with
1578                 // as many 8 byte moves as possible, then
1579                 // 4, 2, or 1 byte(s) as remaining.  This will
1580                 // work and be efficient for power8 or later.
1581                 // If there are 64 or more bytes, then a
1582                 // loop is generated to move 32 bytes and
1583                 // update the src and dst addresses on each
1584                 // iteration. When < 64 bytes, the appropriate
1585                 // number of moves are generated based on the
1586                 // size.
1587                 // When moving >= 64 bytes a loop is used
1588                 //      MOVD len/32,REG_TMP
1589                 //      MOVD REG_TMP,CTR
1590                 // top:
1591                 //      LXV 0(R21),VS32
1592                 //      LXV 16(R21),VS33
1593                 //      ADD $32,R21
1594                 //      STXV VS32,0(R20)
1595                 //      STXV VS33,16(R20)
1596                 //      ADD $32,R20
1597                 //      BC 16,0,top
1598                 // Bytes not moved by this loop are moved
1599                 // with a combination of the following instructions,
1600                 // starting with the largest sizes and generating as
1601                 // many as needed, using the appropriate offset value.
1602                 //      MOVD  n(R21),R31
1603                 //      MOVD  R31,n(R20)
1604                 //      MOVW  n1(R21),R31
1605                 //      MOVW  R31,n1(R20)
1606                 //      MOVH  n2(R21),R31
1607                 //      MOVH  R31,n2(R20)
1608                 //      MOVB  n3(R21),R31
1609                 //      MOVB  R31,n3(R20)
1610
1611                 // Each loop iteration moves 32 bytes
1612                 ctr := v.AuxInt / bytesPerLoop
1613
1614                 // Remainder after the loop
1615                 rem := v.AuxInt % bytesPerLoop
1616
1617                 dstReg := v.Args[0].Reg()
1618                 srcReg := v.Args[1].Reg()
1619
1620                 offset := int64(0)
1621
1622                 // top of the loop
1623                 var top *obj.Prog
1624
1625                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1626                 if ctr > 1 {
1627                         // Set up the CTR
1628                         p := s.Prog(ppc64.AMOVD)
1629                         p.From.Type = obj.TYPE_CONST
1630                         p.From.Offset = ctr
1631                         p.To.Type = obj.TYPE_REG
1632                         p.To.Reg = ppc64.REGTMP
1633
1634                         p = s.Prog(ppc64.AMOVD)
1635                         p.From.Type = obj.TYPE_REG
1636                         p.From.Reg = ppc64.REGTMP
1637                         p.To.Type = obj.TYPE_REG
1638                         p.To.Reg = ppc64.REG_CTR
1639
1640                         p = s.Prog(obj.APCALIGN)
1641                         p.From.Type = obj.TYPE_CONST
1642                         p.From.Offset = 16
1643
1644                         // Generate 16 byte loads and stores.
1645                         p = s.Prog(ppc64.ALXV)
1646                         p.From.Type = obj.TYPE_MEM
1647                         p.From.Reg = srcReg
1648                         p.From.Offset = offset
1649                         p.To.Type = obj.TYPE_REG
1650                         p.To.Reg = ppc64.REG_VS32
1651                         if top == nil {
1652                                 top = p
1653                         }
1654                         p = s.Prog(ppc64.ALXV)
1655                         p.From.Type = obj.TYPE_MEM
1656                         p.From.Reg = srcReg
1657                         p.From.Offset = offset + 16
1658                         p.To.Type = obj.TYPE_REG
1659                         p.To.Reg = ppc64.REG_VS33
1660
1661                         // generate 16 byte stores
1662                         p = s.Prog(ppc64.ASTXV)
1663                         p.From.Type = obj.TYPE_REG
1664                         p.From.Reg = ppc64.REG_VS32
1665                         p.To.Type = obj.TYPE_MEM
1666                         p.To.Reg = dstReg
1667                         p.To.Offset = offset
1668
1669                         p = s.Prog(ppc64.ASTXV)
1670                         p.From.Type = obj.TYPE_REG
1671                         p.From.Reg = ppc64.REG_VS33
1672                         p.To.Type = obj.TYPE_MEM
1673                         p.To.Reg = dstReg
1674                         p.To.Offset = offset + 16
1675
1676                         // Generate 16 byte loads and stores.
1677                         p = s.Prog(ppc64.ALXV)
1678                         p.From.Type = obj.TYPE_MEM
1679                         p.From.Reg = srcReg
1680                         p.From.Offset = offset + 32
1681                         p.To.Type = obj.TYPE_REG
1682                         p.To.Reg = ppc64.REG_VS32
1683
1684                         p = s.Prog(ppc64.ALXV)
1685                         p.From.Type = obj.TYPE_MEM
1686                         p.From.Reg = srcReg
1687                         p.From.Offset = offset + 48
1688                         p.To.Type = obj.TYPE_REG
1689                         p.To.Reg = ppc64.REG_VS33
1690
1691                         // generate 16 byte stores
1692                         p = s.Prog(ppc64.ASTXV)
1693                         p.From.Type = obj.TYPE_REG
1694                         p.From.Reg = ppc64.REG_VS32
1695                         p.To.Type = obj.TYPE_MEM
1696                         p.To.Reg = dstReg
1697                         p.To.Offset = offset + 32
1698
1699                         p = s.Prog(ppc64.ASTXV)
1700                         p.From.Type = obj.TYPE_REG
1701                         p.From.Reg = ppc64.REG_VS33
1702                         p.To.Type = obj.TYPE_MEM
1703                         p.To.Reg = dstReg
1704                         p.To.Offset = offset + 48
1705
1706                         // increment the src reg for next iteration
1707                         p = s.Prog(ppc64.AADD)
1708                         p.Reg = srcReg
1709                         p.From.Type = obj.TYPE_CONST
1710                         p.From.Offset = bytesPerLoop
1711                         p.To.Type = obj.TYPE_REG
1712                         p.To.Reg = srcReg
1713
1714                         // increment the dst reg for next iteration
1715                         p = s.Prog(ppc64.AADD)
1716                         p.Reg = dstReg
1717                         p.From.Type = obj.TYPE_CONST
1718                         p.From.Offset = bytesPerLoop
1719                         p.To.Type = obj.TYPE_REG
1720                         p.To.Reg = dstReg
1721
1722                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1723                         // to loop top.
1724                         p = s.Prog(ppc64.ABC)
1725                         p.From.Type = obj.TYPE_CONST
1726                         p.From.Offset = ppc64.BO_BCTR
1727                         p.Reg = ppc64.REG_CR0LT
1728                         p.To.Type = obj.TYPE_BRANCH
1729                         p.To.SetTarget(top)
1730
1731                         // srcReg and dstReg were incremented in the loop, so
1732                         // later instructions start with offset 0.
1733                         offset = int64(0)
1734                 }
1735
1736                 // No loop was generated for one iteration, so
1737                 // add 32 bytes to the remainder to move those bytes.
1738                 if ctr == 1 {
1739                         rem += bytesPerLoop
1740                 }
1741                 if rem >= 32 {
1742                         p := s.Prog(ppc64.ALXV)
1743                         p.From.Type = obj.TYPE_MEM
1744                         p.From.Reg = srcReg
1745                         p.To.Type = obj.TYPE_REG
1746                         p.To.Reg = ppc64.REG_VS32
1747
1748                         p = s.Prog(ppc64.ALXV)
1749                         p.From.Type = obj.TYPE_MEM
1750                         p.From.Reg = srcReg
1751                         p.From.Offset = 16
1752                         p.To.Type = obj.TYPE_REG
1753                         p.To.Reg = ppc64.REG_VS33
1754
1755                         p = s.Prog(ppc64.ASTXV)
1756                         p.From.Type = obj.TYPE_REG
1757                         p.From.Reg = ppc64.REG_VS32
1758                         p.To.Type = obj.TYPE_MEM
1759                         p.To.Reg = dstReg
1760
1761                         p = s.Prog(ppc64.ASTXV)
1762                         p.From.Type = obj.TYPE_REG
1763                         p.From.Reg = ppc64.REG_VS33
1764                         p.To.Type = obj.TYPE_MEM
1765                         p.To.Reg = dstReg
1766                         p.To.Offset = 16
1767
1768                         offset = 32
1769                         rem -= 32
1770                 }
1771
1772                 if rem >= 16 {
1773                         // Generate 16 byte loads and stores.
1774                         p := s.Prog(ppc64.ALXV)
1775                         p.From.Type = obj.TYPE_MEM
1776                         p.From.Reg = srcReg
1777                         p.From.Offset = offset
1778                         p.To.Type = obj.TYPE_REG
1779                         p.To.Reg = ppc64.REG_VS32
1780
1781                         p = s.Prog(ppc64.ASTXV)
1782                         p.From.Type = obj.TYPE_REG
1783                         p.From.Reg = ppc64.REG_VS32
1784                         p.To.Type = obj.TYPE_MEM
1785                         p.To.Reg = dstReg
1786                         p.To.Offset = offset
1787
1788                         offset += 16
1789                         rem -= 16
1790
1791                         if rem >= 16 {
1792                                 p := s.Prog(ppc64.ALXV)
1793                                 p.From.Type = obj.TYPE_MEM
1794                                 p.From.Reg = srcReg
1795                                 p.From.Offset = offset
1796                                 p.To.Type = obj.TYPE_REG
1797                                 p.To.Reg = ppc64.REG_VS32
1798
1799                                 p = s.Prog(ppc64.ASTXV)
1800                                 p.From.Type = obj.TYPE_REG
1801                                 p.From.Reg = ppc64.REG_VS32
1802                                 p.To.Type = obj.TYPE_MEM
1803                                 p.To.Reg = dstReg
1804                                 p.To.Offset = offset
1805
1806                                 offset += 16
1807                                 rem -= 16
1808                         }
1809                 }
1810                 // Generate all the remaining load and store pairs, starting with
1811                 // as many 8 byte moves as possible, then 4, 2, 1.
1812                 for rem > 0 {
1813                         op, size := ppc64.AMOVB, int64(1)
1814                         switch {
1815                         case rem >= 8:
1816                                 op, size = ppc64.AMOVD, 8
1817                         case rem >= 4:
1818                                 op, size = ppc64.AMOVWZ, 4
1819                         case rem >= 2:
1820                                 op, size = ppc64.AMOVH, 2
1821                         }
1822                         // Load
1823                         p := s.Prog(op)
1824                         p.To.Type = obj.TYPE_REG
1825                         p.To.Reg = ppc64.REGTMP
1826                         p.From.Type = obj.TYPE_MEM
1827                         p.From.Reg = srcReg
1828                         p.From.Offset = offset
1829
1830                         // Store
1831                         p = s.Prog(op)
1832                         p.From.Type = obj.TYPE_REG
1833                         p.From.Reg = ppc64.REGTMP
1834                         p.To.Type = obj.TYPE_MEM
1835                         p.To.Reg = dstReg
1836                         p.To.Offset = offset
1837                         rem -= size
1838                         offset += size
1839                 }
1840
1841         case ssa.OpPPC64CALLstatic:
1842                 s.Call(v)
1843
1844         case ssa.OpPPC64CALLtail:
1845                 s.TailCall(v)
1846
1847         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1848                 p := s.Prog(ppc64.AMOVD)
1849                 p.From.Type = obj.TYPE_REG
1850                 p.From.Reg = v.Args[0].Reg()
1851                 p.To.Type = obj.TYPE_REG
1852                 p.To.Reg = ppc64.REG_LR
1853
1854                 if v.Args[0].Reg() != ppc64.REG_R12 {
1855                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1856                 }
1857
1858                 pp := s.Call(v)
1859
1860                 // Convert the call into a blrl with hint this is not a subroutine return.
1861                 // The full bclrl opcode must be specified when passing a hint.
1862                 pp.As = ppc64.ABCL
1863                 pp.From.Type = obj.TYPE_CONST
1864                 pp.From.Offset = ppc64.BO_ALWAYS
1865                 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1866                 pp.To.Reg = ppc64.REG_LR
1867                 pp.SetFrom3Const(1)
1868
1869                 if base.Ctxt.Flag_shared {
1870                         // When compiling Go into PIC, the function we just
1871                         // called via pointer might have been implemented in
1872                         // a separate module and so overwritten the TOC
1873                         // pointer in R2; reload it.
1874                         q := s.Prog(ppc64.AMOVD)
1875                         q.From.Type = obj.TYPE_MEM
1876                         q.From.Offset = 24
1877                         q.From.Reg = ppc64.REGSP
1878                         q.To.Type = obj.TYPE_REG
1879                         q.To.Reg = ppc64.REG_R2
1880                 }
1881
1882         case ssa.OpPPC64LoweredWB:
1883                 p := s.Prog(obj.ACALL)
1884                 p.To.Type = obj.TYPE_MEM
1885                 p.To.Name = obj.NAME_EXTERN
1886                 p.To.Sym = v.Aux.(*obj.LSym)
1887
1888         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1889                 p := s.Prog(obj.ACALL)
1890                 p.To.Type = obj.TYPE_MEM
1891                 p.To.Name = obj.NAME_EXTERN
1892                 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1893                 s.UseArgs(16) // space used in callee args area by assembly stubs
1894
1895         case ssa.OpPPC64LoweredNilCheck:
1896                 if buildcfg.GOOS == "aix" {
1897                         // CMP Rarg0, R0
1898                         // BNE 2(PC)
1899                         // STW R0, 0(R0)
1900                         // NOP (so the BNE has somewhere to land)
1901
1902                         // CMP Rarg0, R0
1903                         p := s.Prog(ppc64.ACMP)
1904                         p.From.Type = obj.TYPE_REG
1905                         p.From.Reg = v.Args[0].Reg()
1906                         p.To.Type = obj.TYPE_REG
1907                         p.To.Reg = ppc64.REG_R0
1908
1909                         // BNE 2(PC)
1910                         p2 := s.Prog(ppc64.ABNE)
1911                         p2.To.Type = obj.TYPE_BRANCH
1912
1913                         // STW R0, 0(R0)
1914                         // Write at 0 is forbidden and will trigger a SIGSEGV
1915                         p = s.Prog(ppc64.AMOVW)
1916                         p.From.Type = obj.TYPE_REG
1917                         p.From.Reg = ppc64.REG_R0
1918                         p.To.Type = obj.TYPE_MEM
1919                         p.To.Reg = ppc64.REG_R0
1920
1921                         // NOP (so the BNE has somewhere to land)
1922                         nop := s.Prog(obj.ANOP)
1923                         p2.To.SetTarget(nop)
1924
1925                 } else {
1926                         // Issue a load which will fault if arg is nil.
1927                         p := s.Prog(ppc64.AMOVBZ)
1928                         p.From.Type = obj.TYPE_MEM
1929                         p.From.Reg = v.Args[0].Reg()
1930                         ssagen.AddAux(&p.From, v)
1931                         p.To.Type = obj.TYPE_REG
1932                         p.To.Reg = ppc64.REGTMP
1933                 }
1934                 if logopt.Enabled() {
1935                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1936                 }
1937                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1938                         base.WarnfAt(v.Pos, "generated nil check")
1939                 }
1940
1941         // These should be resolved by rules and not make it here.
1942         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1943                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1944                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1945                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1946         case ssa.OpPPC64InvertFlags:
1947                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1948         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1949                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1950         case ssa.OpClobber, ssa.OpClobberReg:
1951                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1952         default:
1953                 v.Fatalf("genValue not implemented: %s", v.LongString())
1954         }
1955 }
1956
1957 var blockJump = [...]struct {
1958         asm, invasm     obj.As
1959         asmeq, invasmun bool
1960 }{
1961         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1962         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1963
1964         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1965         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1966         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1967         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1968
1969         // TODO: need to work FP comparisons into block jumps
1970         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1971         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1972         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1973         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1974 }
1975
1976 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1977         switch b.Kind {
1978         case ssa.BlockDefer:
1979                 // defer returns in R3:
1980                 // 0 if we should continue executing
1981                 // 1 if we should jump to deferreturn call
1982                 p := s.Prog(ppc64.ACMP)
1983                 p.From.Type = obj.TYPE_REG
1984                 p.From.Reg = ppc64.REG_R3
1985                 p.To.Type = obj.TYPE_REG
1986                 p.To.Reg = ppc64.REG_R0
1987
1988                 p = s.Prog(ppc64.ABNE)
1989                 p.To.Type = obj.TYPE_BRANCH
1990                 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1991                 if b.Succs[0].Block() != next {
1992                         p := s.Prog(obj.AJMP)
1993                         p.To.Type = obj.TYPE_BRANCH
1994                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1995                 }
1996
1997         case ssa.BlockPlain:
1998                 if b.Succs[0].Block() != next {
1999                         p := s.Prog(obj.AJMP)
2000                         p.To.Type = obj.TYPE_BRANCH
2001                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2002                 }
2003         case ssa.BlockExit, ssa.BlockRetJmp:
2004         case ssa.BlockRet:
2005                 s.Prog(obj.ARET)
2006
2007         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2008                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2009                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2010                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2011                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2012                 jmp := blockJump[b.Kind]
2013                 switch next {
2014                 case b.Succs[0].Block():
2015                         s.Br(jmp.invasm, b.Succs[1].Block())
2016                         if jmp.invasmun {
2017                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2018                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
2019                         }
2020                 case b.Succs[1].Block():
2021                         s.Br(jmp.asm, b.Succs[0].Block())
2022                         if jmp.asmeq {
2023                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2024                         }
2025                 default:
2026                         if b.Likely != ssa.BranchUnlikely {
2027                                 s.Br(jmp.asm, b.Succs[0].Block())
2028                                 if jmp.asmeq {
2029                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
2030                                 }
2031                                 s.Br(obj.AJMP, b.Succs[1].Block())
2032                         } else {
2033                                 s.Br(jmp.invasm, b.Succs[1].Block())
2034                                 if jmp.invasmun {
2035                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2036                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
2037                                 }
2038                                 s.Br(obj.AJMP, b.Succs[0].Block())
2039                         }
2040                 }
2041         default:
2042                 b.Fatalf("branch not implemented: %s", b.LongString())
2043         }
2044 }
2045
2046 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2047         p := s.Prog(loadByType(t))
2048         p.From.Type = obj.TYPE_MEM
2049         p.From.Name = obj.NAME_AUTO
2050         p.From.Sym = n.Linksym()
2051         p.From.Offset = n.FrameOffset() + off
2052         p.To.Type = obj.TYPE_REG
2053         p.To.Reg = reg
2054         return p
2055 }
2056
2057 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2058         p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2059         p.To.Name = obj.NAME_PARAM
2060         p.To.Sym = n.Linksym()
2061         p.Pos = p.Pos.WithNotStmt()
2062         return p
2063 }