]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
cmd/compile: rework PPC64 Mul64uhilo lowering rules
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/base"
9         "cmd/compile/internal/ir"
10         "cmd/compile/internal/logopt"
11         "cmd/compile/internal/objw"
12         "cmd/compile/internal/ssa"
13         "cmd/compile/internal/ssagen"
14         "cmd/compile/internal/types"
15         "cmd/internal/obj"
16         "cmd/internal/obj/ppc64"
17         "internal/buildcfg"
18         "math"
19         "strings"
20 )
21
22 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24         //      flive := b.FlagsLiveAtEnd
25         //      if b.Control != nil && b.Control.Type.IsFlags() {
26         //              flive = true
27         //      }
28         //      for i := len(b.Values) - 1; i >= 0; i-- {
29         //              v := b.Values[i]
30         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31         //                      // The "mark" is any non-nil Aux value.
32         //                      v.Aux = v
33         //              }
34         //              if v.Type.IsFlags() {
35         //                      flive = false
36         //              }
37         //              for _, a := range v.Args {
38         //                      if a.Type.IsFlags() {
39         //                              flive = true
40         //                      }
41         //              }
42         //      }
43 }
44
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
47         if t.IsFloat() {
48                 switch t.Size() {
49                 case 4:
50                         return ppc64.AFMOVS
51                 case 8:
52                         return ppc64.AFMOVD
53                 }
54         } else {
55                 switch t.Size() {
56                 case 1:
57                         if t.IsSigned() {
58                                 return ppc64.AMOVB
59                         } else {
60                                 return ppc64.AMOVBZ
61                         }
62                 case 2:
63                         if t.IsSigned() {
64                                 return ppc64.AMOVH
65                         } else {
66                                 return ppc64.AMOVHZ
67                         }
68                 case 4:
69                         if t.IsSigned() {
70                                 return ppc64.AMOVW
71                         } else {
72                                 return ppc64.AMOVWZ
73                         }
74                 case 8:
75                         return ppc64.AMOVD
76                 }
77         }
78         panic("bad load type")
79 }
80
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
83         if t.IsFloat() {
84                 switch t.Size() {
85                 case 4:
86                         return ppc64.AFMOVS
87                 case 8:
88                         return ppc64.AFMOVD
89                 }
90         } else {
91                 switch t.Size() {
92                 case 1:
93                         return ppc64.AMOVB
94                 case 2:
95                         return ppc64.AMOVH
96                 case 4:
97                         return ppc64.AMOVW
98                 case 8:
99                         return ppc64.AMOVD
100                 }
101         }
102         panic("bad store type")
103 }
104
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
106         switch v.Op {
107         case ssa.OpCopy:
108                 t := v.Type
109                 if t.IsMemory() {
110                         return
111                 }
112                 x := v.Args[0].Reg()
113                 y := v.Reg()
114                 if x != y {
115                         rt := obj.TYPE_REG
116                         op := ppc64.AMOVD
117
118                         if t.IsFloat() {
119                                 op = ppc64.AFMOVD
120                         }
121                         p := s.Prog(op)
122                         p.From.Type = rt
123                         p.From.Reg = x
124                         p.To.Type = rt
125                         p.To.Reg = y
126                 }
127
128         case ssa.OpPPC64LoweredAtomicAnd8,
129                 ssa.OpPPC64LoweredAtomicAnd32,
130                 ssa.OpPPC64LoweredAtomicOr8,
131                 ssa.OpPPC64LoweredAtomicOr32:
132                 // LWSYNC
133                 // LBAR/LWAR    (Rarg0), Rtmp
134                 // AND/OR       Rarg1, Rtmp
135                 // STBCCC/STWCCC Rtmp, (Rarg0)
136                 // BNE          -3(PC)
137                 ld := ppc64.ALBAR
138                 st := ppc64.ASTBCCC
139                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
140                         ld = ppc64.ALWAR
141                         st = ppc64.ASTWCCC
142                 }
143                 r0 := v.Args[0].Reg()
144                 r1 := v.Args[1].Reg()
145                 // LWSYNC - Assuming shared data not write-through-required nor
146                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
147                 plwsync := s.Prog(ppc64.ALWSYNC)
148                 plwsync.To.Type = obj.TYPE_NONE
149                 // LBAR or LWAR
150                 p := s.Prog(ld)
151                 p.From.Type = obj.TYPE_MEM
152                 p.From.Reg = r0
153                 p.To.Type = obj.TYPE_REG
154                 p.To.Reg = ppc64.REGTMP
155                 // AND/OR reg1,out
156                 p1 := s.Prog(v.Op.Asm())
157                 p1.From.Type = obj.TYPE_REG
158                 p1.From.Reg = r1
159                 p1.To.Type = obj.TYPE_REG
160                 p1.To.Reg = ppc64.REGTMP
161                 // STBCCC or STWCCC
162                 p2 := s.Prog(st)
163                 p2.From.Type = obj.TYPE_REG
164                 p2.From.Reg = ppc64.REGTMP
165                 p2.To.Type = obj.TYPE_MEM
166                 p2.To.Reg = r0
167                 p2.RegTo2 = ppc64.REGTMP
168                 // BNE retry
169                 p3 := s.Prog(ppc64.ABNE)
170                 p3.To.Type = obj.TYPE_BRANCH
171                 p3.To.SetTarget(p)
172
173         case ssa.OpPPC64LoweredAtomicAdd32,
174                 ssa.OpPPC64LoweredAtomicAdd64:
175                 // LWSYNC
176                 // LDAR/LWAR    (Rarg0), Rout
177                 // ADD          Rarg1, Rout
178                 // STDCCC/STWCCC Rout, (Rarg0)
179                 // BNE         -3(PC)
180                 // MOVW         Rout,Rout (if Add32)
181                 ld := ppc64.ALDAR
182                 st := ppc64.ASTDCCC
183                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
184                         ld = ppc64.ALWAR
185                         st = ppc64.ASTWCCC
186                 }
187                 r0 := v.Args[0].Reg()
188                 r1 := v.Args[1].Reg()
189                 out := v.Reg0()
190                 // LWSYNC - Assuming shared data not write-through-required nor
191                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
192                 plwsync := s.Prog(ppc64.ALWSYNC)
193                 plwsync.To.Type = obj.TYPE_NONE
194                 // LDAR or LWAR
195                 p := s.Prog(ld)
196                 p.From.Type = obj.TYPE_MEM
197                 p.From.Reg = r0
198                 p.To.Type = obj.TYPE_REG
199                 p.To.Reg = out
200                 // ADD reg1,out
201                 p1 := s.Prog(ppc64.AADD)
202                 p1.From.Type = obj.TYPE_REG
203                 p1.From.Reg = r1
204                 p1.To.Reg = out
205                 p1.To.Type = obj.TYPE_REG
206                 // STDCCC or STWCCC
207                 p3 := s.Prog(st)
208                 p3.From.Type = obj.TYPE_REG
209                 p3.From.Reg = out
210                 p3.To.Type = obj.TYPE_MEM
211                 p3.To.Reg = r0
212                 // BNE retry
213                 p4 := s.Prog(ppc64.ABNE)
214                 p4.To.Type = obj.TYPE_BRANCH
215                 p4.To.SetTarget(p)
216
217                 // Ensure a 32 bit result
218                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
219                         p5 := s.Prog(ppc64.AMOVWZ)
220                         p5.To.Type = obj.TYPE_REG
221                         p5.To.Reg = out
222                         p5.From.Type = obj.TYPE_REG
223                         p5.From.Reg = out
224                 }
225
226         case ssa.OpPPC64LoweredAtomicExchange32,
227                 ssa.OpPPC64LoweredAtomicExchange64:
228                 // LWSYNC
229                 // LDAR/LWAR    (Rarg0), Rout
230                 // STDCCC/STWCCC Rout, (Rarg0)
231                 // BNE         -2(PC)
232                 // ISYNC
233                 ld := ppc64.ALDAR
234                 st := ppc64.ASTDCCC
235                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
236                         ld = ppc64.ALWAR
237                         st = ppc64.ASTWCCC
238                 }
239                 r0 := v.Args[0].Reg()
240                 r1 := v.Args[1].Reg()
241                 out := v.Reg0()
242                 // LWSYNC - Assuming shared data not write-through-required nor
243                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
244                 plwsync := s.Prog(ppc64.ALWSYNC)
245                 plwsync.To.Type = obj.TYPE_NONE
246                 // LDAR or LWAR
247                 p := s.Prog(ld)
248                 p.From.Type = obj.TYPE_MEM
249                 p.From.Reg = r0
250                 p.To.Type = obj.TYPE_REG
251                 p.To.Reg = out
252                 // STDCCC or STWCCC
253                 p1 := s.Prog(st)
254                 p1.From.Type = obj.TYPE_REG
255                 p1.From.Reg = r1
256                 p1.To.Type = obj.TYPE_MEM
257                 p1.To.Reg = r0
258                 // BNE retry
259                 p2 := s.Prog(ppc64.ABNE)
260                 p2.To.Type = obj.TYPE_BRANCH
261                 p2.To.SetTarget(p)
262                 // ISYNC
263                 pisync := s.Prog(ppc64.AISYNC)
264                 pisync.To.Type = obj.TYPE_NONE
265
266         case ssa.OpPPC64LoweredAtomicLoad8,
267                 ssa.OpPPC64LoweredAtomicLoad32,
268                 ssa.OpPPC64LoweredAtomicLoad64,
269                 ssa.OpPPC64LoweredAtomicLoadPtr:
270                 // SYNC
271                 // MOVB/MOVD/MOVW (Rarg0), Rout
272                 // CMP Rout,Rout
273                 // BNE 1(PC)
274                 // ISYNC
275                 ld := ppc64.AMOVD
276                 cmp := ppc64.ACMP
277                 switch v.Op {
278                 case ssa.OpPPC64LoweredAtomicLoad8:
279                         ld = ppc64.AMOVBZ
280                 case ssa.OpPPC64LoweredAtomicLoad32:
281                         ld = ppc64.AMOVWZ
282                         cmp = ppc64.ACMPW
283                 }
284                 arg0 := v.Args[0].Reg()
285                 out := v.Reg0()
286                 // SYNC when AuxInt == 1; otherwise, load-acquire
287                 if v.AuxInt == 1 {
288                         psync := s.Prog(ppc64.ASYNC)
289                         psync.To.Type = obj.TYPE_NONE
290                 }
291                 // Load
292                 p := s.Prog(ld)
293                 p.From.Type = obj.TYPE_MEM
294                 p.From.Reg = arg0
295                 p.To.Type = obj.TYPE_REG
296                 p.To.Reg = out
297                 // CMP
298                 p1 := s.Prog(cmp)
299                 p1.From.Type = obj.TYPE_REG
300                 p1.From.Reg = out
301                 p1.To.Type = obj.TYPE_REG
302                 p1.To.Reg = out
303                 // BNE
304                 p2 := s.Prog(ppc64.ABNE)
305                 p2.To.Type = obj.TYPE_BRANCH
306                 // ISYNC
307                 pisync := s.Prog(ppc64.AISYNC)
308                 pisync.To.Type = obj.TYPE_NONE
309                 p2.To.SetTarget(pisync)
310
311         case ssa.OpPPC64LoweredAtomicStore8,
312                 ssa.OpPPC64LoweredAtomicStore32,
313                 ssa.OpPPC64LoweredAtomicStore64:
314                 // SYNC or LWSYNC
315                 // MOVB/MOVW/MOVD arg1,(arg0)
316                 st := ppc64.AMOVD
317                 switch v.Op {
318                 case ssa.OpPPC64LoweredAtomicStore8:
319                         st = ppc64.AMOVB
320                 case ssa.OpPPC64LoweredAtomicStore32:
321                         st = ppc64.AMOVW
322                 }
323                 arg0 := v.Args[0].Reg()
324                 arg1 := v.Args[1].Reg()
325                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
326                 // SYNC
327                 syncOp := ppc64.ASYNC
328                 if v.AuxInt == 0 {
329                         syncOp = ppc64.ALWSYNC
330                 }
331                 psync := s.Prog(syncOp)
332                 psync.To.Type = obj.TYPE_NONE
333                 // Store
334                 p := s.Prog(st)
335                 p.To.Type = obj.TYPE_MEM
336                 p.To.Reg = arg0
337                 p.From.Type = obj.TYPE_REG
338                 p.From.Reg = arg1
339
340         case ssa.OpPPC64LoweredAtomicCas64,
341                 ssa.OpPPC64LoweredAtomicCas32:
342                 // MOVD        $0, Rout
343                 // LWSYNC
344                 // loop:
345                 // LDAR        (Rarg0), MutexHint, Rtmp
346                 // CMP         Rarg1, Rtmp
347                 // BNE         end
348                 // STDCCC      Rarg2, (Rarg0)
349                 // BNE         loop
350                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
351                 // MOVD        $1, Rout
352                 // end:
353                 ld := ppc64.ALDAR
354                 st := ppc64.ASTDCCC
355                 cmp := ppc64.ACMP
356                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
357                         ld = ppc64.ALWAR
358                         st = ppc64.ASTWCCC
359                         cmp = ppc64.ACMPW
360                 }
361                 r0 := v.Args[0].Reg()
362                 r1 := v.Args[1].Reg()
363                 r2 := v.Args[2].Reg()
364                 out := v.Reg0()
365                 // Initialize return value to false
366                 p := s.Prog(ppc64.AMOVD)
367                 p.From.Type = obj.TYPE_CONST
368                 p.From.Offset = 0
369                 p.To.Type = obj.TYPE_REG
370                 p.To.Reg = out
371                 // LWSYNC - Assuming shared data not write-through-required nor
372                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
373                 plwsync1 := s.Prog(ppc64.ALWSYNC)
374                 plwsync1.To.Type = obj.TYPE_NONE
375                 // LDAR or LWAR
376                 p0 := s.Prog(ld)
377                 p0.From.Type = obj.TYPE_MEM
378                 p0.From.Reg = r0
379                 p0.To.Type = obj.TYPE_REG
380                 p0.To.Reg = ppc64.REGTMP
381                 // If it is a Compare-and-Swap-Release operation, set the EH field with
382                 // the release hint.
383                 if v.AuxInt == 0 {
384                         p0.SetFrom3Const(0)
385                 }
386                 // CMP reg1,reg2
387                 p1 := s.Prog(cmp)
388                 p1.From.Type = obj.TYPE_REG
389                 p1.From.Reg = r1
390                 p1.To.Reg = ppc64.REGTMP
391                 p1.To.Type = obj.TYPE_REG
392                 // BNE done with return value = false
393                 p2 := s.Prog(ppc64.ABNE)
394                 p2.To.Type = obj.TYPE_BRANCH
395                 // STDCCC or STWCCC
396                 p3 := s.Prog(st)
397                 p3.From.Type = obj.TYPE_REG
398                 p3.From.Reg = r2
399                 p3.To.Type = obj.TYPE_MEM
400                 p3.To.Reg = r0
401                 // BNE retry
402                 p4 := s.Prog(ppc64.ABNE)
403                 p4.To.Type = obj.TYPE_BRANCH
404                 p4.To.SetTarget(p0)
405                 // LWSYNC - Assuming shared data not write-through-required nor
406                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
407                 // If the operation is a CAS-Release, then synchronization is not necessary.
408                 if v.AuxInt != 0 {
409                         plwsync2 := s.Prog(ppc64.ALWSYNC)
410                         plwsync2.To.Type = obj.TYPE_NONE
411                 }
412                 // return value true
413                 p5 := s.Prog(ppc64.AMOVD)
414                 p5.From.Type = obj.TYPE_CONST
415                 p5.From.Offset = 1
416                 p5.To.Type = obj.TYPE_REG
417                 p5.To.Reg = out
418                 // done (label)
419                 p6 := s.Prog(obj.ANOP)
420                 p2.To.SetTarget(p6)
421
422         case ssa.OpPPC64LoweredPubBarrier:
423                 // LWSYNC
424                 s.Prog(v.Op.Asm())
425
426         case ssa.OpPPC64LoweredGetClosurePtr:
427                 // Closure pointer is R11 (already)
428                 ssagen.CheckLoweredGetClosurePtr(v)
429
430         case ssa.OpPPC64LoweredGetCallerSP:
431                 // caller's SP is FixedFrameSize below the address of the first arg
432                 p := s.Prog(ppc64.AMOVD)
433                 p.From.Type = obj.TYPE_ADDR
434                 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
435                 p.From.Name = obj.NAME_PARAM
436                 p.To.Type = obj.TYPE_REG
437                 p.To.Reg = v.Reg()
438
439         case ssa.OpPPC64LoweredGetCallerPC:
440                 p := s.Prog(obj.AGETCALLERPC)
441                 p.To.Type = obj.TYPE_REG
442                 p.To.Reg = v.Reg()
443
444         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
445                 // input is already rounded
446
447         case ssa.OpLoadReg:
448                 loadOp := loadByType(v.Type)
449                 p := s.Prog(loadOp)
450                 ssagen.AddrAuto(&p.From, v.Args[0])
451                 p.To.Type = obj.TYPE_REG
452                 p.To.Reg = v.Reg()
453
454         case ssa.OpStoreReg:
455                 storeOp := storeByType(v.Type)
456                 p := s.Prog(storeOp)
457                 p.From.Type = obj.TYPE_REG
458                 p.From.Reg = v.Args[0].Reg()
459                 ssagen.AddrAuto(&p.To, v)
460
461         case ssa.OpArgIntReg, ssa.OpArgFloatReg:
462                 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
463                 // The loop only runs once.
464                 for _, a := range v.Block.Func.RegArgs {
465                         // Pass the spill/unspill information along to the assembler, offset by size of
466                         // the saved LR slot.
467                         addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
468                         s.FuncInfo().AddSpill(
469                                 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
470                 }
471                 v.Block.Func.RegArgs = nil
472
473                 ssagen.CheckArgReg(v)
474
475         case ssa.OpPPC64DIVD:
476                 // For now,
477                 //
478                 // cmp arg1, -1
479                 // be  ahead
480                 // v = arg0 / arg1
481                 // b over
482                 // ahead: v = - arg0
483                 // over: nop
484                 r := v.Reg()
485                 r0 := v.Args[0].Reg()
486                 r1 := v.Args[1].Reg()
487
488                 p := s.Prog(ppc64.ACMP)
489                 p.From.Type = obj.TYPE_REG
490                 p.From.Reg = r1
491                 p.To.Type = obj.TYPE_CONST
492                 p.To.Offset = -1
493
494                 pbahead := s.Prog(ppc64.ABEQ)
495                 pbahead.To.Type = obj.TYPE_BRANCH
496
497                 p = s.Prog(v.Op.Asm())
498                 p.From.Type = obj.TYPE_REG
499                 p.From.Reg = r1
500                 p.Reg = r0
501                 p.To.Type = obj.TYPE_REG
502                 p.To.Reg = r
503
504                 pbover := s.Prog(obj.AJMP)
505                 pbover.To.Type = obj.TYPE_BRANCH
506
507                 p = s.Prog(ppc64.ANEG)
508                 p.To.Type = obj.TYPE_REG
509                 p.To.Reg = r
510                 p.From.Type = obj.TYPE_REG
511                 p.From.Reg = r0
512                 pbahead.To.SetTarget(p)
513
514                 p = s.Prog(obj.ANOP)
515                 pbover.To.SetTarget(p)
516
517         case ssa.OpPPC64DIVW:
518                 // word-width version of above
519                 r := v.Reg()
520                 r0 := v.Args[0].Reg()
521                 r1 := v.Args[1].Reg()
522
523                 p := s.Prog(ppc64.ACMPW)
524                 p.From.Type = obj.TYPE_REG
525                 p.From.Reg = r1
526                 p.To.Type = obj.TYPE_CONST
527                 p.To.Offset = -1
528
529                 pbahead := s.Prog(ppc64.ABEQ)
530                 pbahead.To.Type = obj.TYPE_BRANCH
531
532                 p = s.Prog(v.Op.Asm())
533                 p.From.Type = obj.TYPE_REG
534                 p.From.Reg = r1
535                 p.Reg = r0
536                 p.To.Type = obj.TYPE_REG
537                 p.To.Reg = r
538
539                 pbover := s.Prog(obj.AJMP)
540                 pbover.To.Type = obj.TYPE_BRANCH
541
542                 p = s.Prog(ppc64.ANEG)
543                 p.To.Type = obj.TYPE_REG
544                 p.To.Reg = r
545                 p.From.Type = obj.TYPE_REG
546                 p.From.Reg = r0
547                 pbahead.To.SetTarget(p)
548
549                 p = s.Prog(obj.ANOP)
550                 pbover.To.SetTarget(p)
551
552         case ssa.OpPPC64CLRLSLWI:
553                 r := v.Reg()
554                 r1 := v.Args[0].Reg()
555                 shifts := v.AuxInt
556                 p := s.Prog(v.Op.Asm())
557                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
558                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
559                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
560                 p.Reg = r1
561                 p.To.Type = obj.TYPE_REG
562                 p.To.Reg = r
563
564         case ssa.OpPPC64CLRLSLDI:
565                 r := v.Reg()
566                 r1 := v.Args[0].Reg()
567                 shifts := v.AuxInt
568                 p := s.Prog(v.Op.Asm())
569                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
570                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
571                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
572                 p.Reg = r1
573                 p.To.Type = obj.TYPE_REG
574                 p.To.Reg = r
575
576                 // Mask has been set as sh
577         case ssa.OpPPC64RLDICL:
578                 r := v.Reg()
579                 r1 := v.Args[0].Reg()
580                 shifts := v.AuxInt
581                 p := s.Prog(v.Op.Asm())
582                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
583                 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
584                 p.Reg = r1
585                 p.To.Type = obj.TYPE_REG
586                 p.To.Reg = r
587
588         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
589                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
590                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
591                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
592                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
593                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
594                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
595                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
596                 r := v.Reg()
597                 r1 := v.Args[0].Reg()
598                 r2 := v.Args[1].Reg()
599                 p := s.Prog(v.Op.Asm())
600                 p.From.Type = obj.TYPE_REG
601                 p.From.Reg = r2
602                 p.Reg = r1
603                 p.To.Type = obj.TYPE_REG
604                 p.To.Reg = r
605
606         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
607                 r1 := v.Args[0].Reg()
608                 r2 := v.Args[1].Reg()
609                 p := s.Prog(v.Op.Asm())
610                 p.From.Type = obj.TYPE_REG
611                 p.From.Reg = r2
612                 p.Reg = r1
613                 p.To.Type = obj.TYPE_REG
614                 p.To.Reg = ppc64.REGTMP // result is not needed
615
616         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
617                 p := s.Prog(v.Op.Asm())
618                 p.From.Type = obj.TYPE_CONST
619                 p.From.Offset = v.AuxInt
620                 p.Reg = v.Args[0].Reg()
621                 p.To.Type = obj.TYPE_REG
622                 p.To.Reg = v.Reg()
623
624                 // Auxint holds encoded rotate + mask
625         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
626                 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
627                 p := s.Prog(v.Op.Asm())
628                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
629                 p.Reg = v.Args[0].Reg()
630                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
631                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
632
633                 // Auxint holds mask
634         case ssa.OpPPC64RLWNM:
635                 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
636                 p := s.Prog(v.Op.Asm())
637                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
638                 p.Reg = v.Args[0].Reg()
639                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
640                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
641
642         case ssa.OpPPC64MADDLD:
643                 r := v.Reg()
644                 r1 := v.Args[0].Reg()
645                 r2 := v.Args[1].Reg()
646                 r3 := v.Args[2].Reg()
647                 // r = r1*r2 Â± r3
648                 p := s.Prog(v.Op.Asm())
649                 p.From.Type = obj.TYPE_REG
650                 p.From.Reg = r1
651                 p.Reg = r2
652                 p.SetFrom3Reg(r3)
653                 p.To.Type = obj.TYPE_REG
654                 p.To.Reg = r
655
656         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
657                 r := v.Reg()
658                 r1 := v.Args[0].Reg()
659                 r2 := v.Args[1].Reg()
660                 r3 := v.Args[2].Reg()
661                 // r = r1*r2 Â± r3
662                 p := s.Prog(v.Op.Asm())
663                 p.From.Type = obj.TYPE_REG
664                 p.From.Reg = r1
665                 p.Reg = r3
666                 p.SetFrom3Reg(r2)
667                 p.To.Type = obj.TYPE_REG
668                 p.To.Reg = r
669
670         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
671                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
672                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
673                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
674                 r := v.Reg()
675                 p := s.Prog(v.Op.Asm())
676                 p.To.Type = obj.TYPE_REG
677                 p.To.Reg = r
678                 p.From.Type = obj.TYPE_REG
679                 p.From.Reg = v.Args[0].Reg()
680
681         case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
682                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
683                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
684                 p := s.Prog(v.Op.Asm())
685                 p.Reg = v.Args[0].Reg()
686                 p.From.Type = obj.TYPE_CONST
687                 p.From.Offset = v.AuxInt
688                 p.To.Type = obj.TYPE_REG
689                 p.To.Reg = v.Reg()
690
691         case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
692                 r := v.Reg0() // CA is the first, implied argument.
693                 r1 := v.Args[0].Reg()
694                 r2 := v.Args[1].Reg()
695                 p := s.Prog(v.Op.Asm())
696                 p.From.Type = obj.TYPE_REG
697                 p.From.Reg = r2
698                 p.Reg = r1
699                 p.To.Type = obj.TYPE_REG
700                 p.To.Reg = r
701
702         case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
703                 p := s.Prog(v.Op.Asm())
704                 p.From.Type = obj.TYPE_REG
705                 p.From.Reg = ppc64.REG_R0
706                 p.To.Type = obj.TYPE_REG
707                 p.To.Reg = v.Reg()
708
709         case ssa.OpPPC64ADDCconst:
710                 p := s.Prog(v.Op.Asm())
711                 p.Reg = v.Args[0].Reg()
712                 p.From.Type = obj.TYPE_CONST
713                 p.From.Offset = v.AuxInt
714                 p.To.Type = obj.TYPE_REG
715                 // Output is a pair, the second is the CA, which is implied.
716                 p.To.Reg = v.Reg0()
717
718         case ssa.OpPPC64SUBCconst:
719                 p := s.Prog(v.Op.Asm())
720                 p.SetFrom3Const(v.AuxInt)
721                 p.From.Type = obj.TYPE_REG
722                 p.From.Reg = v.Args[0].Reg()
723                 p.To.Type = obj.TYPE_REG
724                 p.To.Reg = v.Reg0()
725
726         case ssa.OpPPC64SUBFCconst:
727                 p := s.Prog(v.Op.Asm())
728                 p.SetFrom3Const(v.AuxInt)
729                 p.From.Type = obj.TYPE_REG
730                 p.From.Reg = v.Args[0].Reg()
731                 p.To.Type = obj.TYPE_REG
732                 p.To.Reg = v.Reg()
733
734         case ssa.OpPPC64ANDCCconst:
735                 p := s.Prog(v.Op.Asm())
736                 p.Reg = v.Args[0].Reg()
737                 p.From.Type = obj.TYPE_CONST
738                 p.From.Offset = v.AuxInt
739                 p.To.Type = obj.TYPE_REG
740                 //              p.To.Reg = ppc64.REGTMP // discard result
741                 p.To.Reg = v.Reg0()
742
743         case ssa.OpPPC64MOVDaddr:
744                 switch v.Aux.(type) {
745                 default:
746                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
747                 case nil:
748                         // If aux offset and aux int are both 0, and the same
749                         // input and output regs are used, no instruction
750                         // needs to be generated, since it would just be
751                         // addi rx, rx, 0.
752                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
753                                 p := s.Prog(ppc64.AMOVD)
754                                 p.From.Type = obj.TYPE_ADDR
755                                 p.From.Reg = v.Args[0].Reg()
756                                 p.From.Offset = v.AuxInt
757                                 p.To.Type = obj.TYPE_REG
758                                 p.To.Reg = v.Reg()
759                         }
760
761                 case *obj.LSym, ir.Node:
762                         p := s.Prog(ppc64.AMOVD)
763                         p.From.Type = obj.TYPE_ADDR
764                         p.From.Reg = v.Args[0].Reg()
765                         p.To.Type = obj.TYPE_REG
766                         p.To.Reg = v.Reg()
767                         ssagen.AddAux(&p.From, v)
768
769                 }
770
771         case ssa.OpPPC64MOVDconst:
772                 p := s.Prog(v.Op.Asm())
773                 p.From.Type = obj.TYPE_CONST
774                 p.From.Offset = v.AuxInt
775                 p.To.Type = obj.TYPE_REG
776                 p.To.Reg = v.Reg()
777
778         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
779                 p := s.Prog(v.Op.Asm())
780                 p.From.Type = obj.TYPE_FCONST
781                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
782                 p.To.Type = obj.TYPE_REG
783                 p.To.Reg = v.Reg()
784
785         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
786                 p := s.Prog(v.Op.Asm())
787                 p.From.Type = obj.TYPE_REG
788                 p.From.Reg = v.Args[0].Reg()
789                 p.To.Type = obj.TYPE_REG
790                 p.To.Reg = v.Args[1].Reg()
791
792         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
793                 p := s.Prog(v.Op.Asm())
794                 p.From.Type = obj.TYPE_REG
795                 p.From.Reg = v.Args[0].Reg()
796                 p.To.Type = obj.TYPE_CONST
797                 p.To.Offset = v.AuxInt
798
799         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
800                 // Shift in register to required size
801                 p := s.Prog(v.Op.Asm())
802                 p.From.Type = obj.TYPE_REG
803                 p.From.Reg = v.Args[0].Reg()
804                 p.To.Reg = v.Reg()
805                 p.To.Type = obj.TYPE_REG
806
807         case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
808
809                 // MOVDload and MOVWload are DS form instructions that are restricted to
810                 // offsets that are a multiple of 4. If the offset is not a multiple of 4,
811                 // then the address of the symbol to be loaded is computed (base + offset)
812                 // and used as the new base register and the offset field in the instruction
813                 // can be set to zero.
814
815                 // This same problem can happen with gostrings since the final offset is not
816                 // known yet, but could be unaligned after the relocation is resolved.
817                 // So gostrings are handled the same way.
818
819                 // This allows the MOVDload and MOVWload to be generated in more cases and
820                 // eliminates some offset and alignment checking in the rules file.
821
822                 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
823                 ssagen.AddAux(&fromAddr, v)
824
825                 genAddr := false
826
827                 switch fromAddr.Name {
828                 case obj.NAME_EXTERN, obj.NAME_STATIC:
829                         // Special case for a rule combines the bytes of gostring.
830                         // The v alignment might seem OK, but we don't want to load it
831                         // using an offset because relocation comes later.
832                         genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
833                 default:
834                         genAddr = fromAddr.Offset%4 != 0
835                 }
836                 if genAddr {
837                         // Load full address into the temp register.
838                         p := s.Prog(ppc64.AMOVD)
839                         p.From.Type = obj.TYPE_ADDR
840                         p.From.Reg = v.Args[0].Reg()
841                         ssagen.AddAux(&p.From, v)
842                         // Load target using temp as base register
843                         // and offset zero. Setting NAME_NONE
844                         // prevents any extra offsets from being
845                         // added.
846                         p.To.Type = obj.TYPE_REG
847                         p.To.Reg = ppc64.REGTMP
848                         fromAddr.Reg = ppc64.REGTMP
849                         // Clear the offset field and other
850                         // information that might be used
851                         // by the assembler to add to the
852                         // final offset value.
853                         fromAddr.Offset = 0
854                         fromAddr.Name = obj.NAME_NONE
855                         fromAddr.Sym = nil
856                 }
857                 p := s.Prog(v.Op.Asm())
858                 p.From = fromAddr
859                 p.To.Type = obj.TYPE_REG
860                 p.To.Reg = v.Reg()
861                 break
862
863         case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
864                 p := s.Prog(v.Op.Asm())
865                 p.From.Type = obj.TYPE_MEM
866                 p.From.Reg = v.Args[0].Reg()
867                 ssagen.AddAux(&p.From, v)
868                 p.To.Type = obj.TYPE_REG
869                 p.To.Reg = v.Reg()
870
871         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
872                 p := s.Prog(v.Op.Asm())
873                 p.From.Type = obj.TYPE_MEM
874                 p.From.Reg = v.Args[0].Reg()
875                 p.To.Type = obj.TYPE_REG
876                 p.To.Reg = v.Reg()
877
878         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
879                 p := s.Prog(v.Op.Asm())
880                 p.To.Type = obj.TYPE_MEM
881                 p.To.Reg = v.Args[0].Reg()
882                 p.From.Type = obj.TYPE_REG
883                 p.From.Reg = v.Args[1].Reg()
884
885         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
886                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
887                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
888                 p := s.Prog(v.Op.Asm())
889                 p.From.Type = obj.TYPE_MEM
890                 p.From.Reg = v.Args[0].Reg()
891                 p.From.Index = v.Args[1].Reg()
892                 p.To.Type = obj.TYPE_REG
893                 p.To.Reg = v.Reg()
894
895         case ssa.OpPPC64DCBT:
896                 p := s.Prog(v.Op.Asm())
897                 p.From.Type = obj.TYPE_MEM
898                 p.From.Reg = v.Args[0].Reg()
899                 p.To.Type = obj.TYPE_CONST
900                 p.To.Offset = v.AuxInt
901
902         case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
903                 p := s.Prog(v.Op.Asm())
904                 p.From.Type = obj.TYPE_REG
905                 p.From.Reg = ppc64.REGZERO
906                 p.To.Type = obj.TYPE_MEM
907                 p.To.Reg = v.Args[0].Reg()
908                 ssagen.AddAux(&p.To, v)
909
910         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
911
912                 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
913                 // to offset values that are a multiple of 4. If the offset field is not a
914                 // multiple of 4, then the full address of the store target is computed (base +
915                 // offset) and used as the new base register and the offset in the instruction
916                 // is set to 0.
917
918                 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
919                 // and prevents checking of the offset value and alignment in the rules.
920
921                 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
922                 ssagen.AddAux(&toAddr, v)
923
924                 if toAddr.Offset%4 != 0 {
925                         p := s.Prog(ppc64.AMOVD)
926                         p.From.Type = obj.TYPE_ADDR
927                         p.From.Reg = v.Args[0].Reg()
928                         ssagen.AddAux(&p.From, v)
929                         p.To.Type = obj.TYPE_REG
930                         p.To.Reg = ppc64.REGTMP
931                         toAddr.Reg = ppc64.REGTMP
932                         // Clear the offset field and other
933                         // information that might be used
934                         // by the assembler to add to the
935                         // final offset value.
936                         toAddr.Offset = 0
937                         toAddr.Name = obj.NAME_NONE
938                         toAddr.Sym = nil
939                 }
940                 p := s.Prog(v.Op.Asm())
941                 p.To = toAddr
942                 p.From.Type = obj.TYPE_REG
943                 if v.Op == ssa.OpPPC64MOVDstorezero {
944                         p.From.Reg = ppc64.REGZERO
945                 } else {
946                         p.From.Reg = v.Args[1].Reg()
947                 }
948
949         case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
950                 p := s.Prog(v.Op.Asm())
951                 p.From.Type = obj.TYPE_REG
952                 p.From.Reg = v.Args[1].Reg()
953                 p.To.Type = obj.TYPE_MEM
954                 p.To.Reg = v.Args[0].Reg()
955                 ssagen.AddAux(&p.To, v)
956
957         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
958                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
959                 ssa.OpPPC64MOVHBRstoreidx:
960                 p := s.Prog(v.Op.Asm())
961                 p.From.Type = obj.TYPE_REG
962                 p.From.Reg = v.Args[2].Reg()
963                 p.To.Index = v.Args[1].Reg()
964                 p.To.Type = obj.TYPE_MEM
965                 p.To.Reg = v.Args[0].Reg()
966
967         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
968                 // ISEL, ISELB
969                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
970                 // ISEL only accepts 0, 1, 2 condition values but the others can be
971                 // achieved by swapping operand order.
972                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
973                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
974                 // ISELB is used when a boolean result is needed, returning 0 or 1
975                 p := s.Prog(ppc64.AISEL)
976                 p.To.Type = obj.TYPE_REG
977                 p.To.Reg = v.Reg()
978                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
979                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
980                 if v.Op == ssa.OpPPC64ISEL {
981                         r.Reg = v.Args[1].Reg()
982                 }
983                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
984                 if v.AuxInt > 3 {
985                         p.Reg = r.Reg
986                         p.SetFrom3Reg(v.Args[0].Reg())
987                 } else {
988                         p.Reg = v.Args[0].Reg()
989                         p.SetFrom3(r)
990                 }
991                 p.From.Type = obj.TYPE_CONST
992                 p.From.Offset = v.AuxInt & 3
993
994         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
995                 // The LoweredQuad code generation
996                 // generates STXV instructions on
997                 // power9. The Short variation is used
998                 // if no loop is generated.
999
1000                 // sizes >= 64 generate a loop as follows:
1001
1002                 // Set up loop counter in CTR, used by BC
1003                 // XXLXOR clears VS32
1004                 //       XXLXOR VS32,VS32,VS32
1005                 //       MOVD len/64,REG_TMP
1006                 //       MOVD REG_TMP,CTR
1007                 //       loop:
1008                 //       STXV VS32,0(R20)
1009                 //       STXV VS32,16(R20)
1010                 //       STXV VS32,32(R20)
1011                 //       STXV VS32,48(R20)
1012                 //       ADD  $64,R20
1013                 //       BC   16, 0, loop
1014
1015                 // Bytes per iteration
1016                 ctr := v.AuxInt / 64
1017
1018                 // Remainder bytes
1019                 rem := v.AuxInt % 64
1020
1021                 // Only generate a loop if there is more
1022                 // than 1 iteration.
1023                 if ctr > 1 {
1024                         // Set up VS32 (V0) to hold 0s
1025                         p := s.Prog(ppc64.AXXLXOR)
1026                         p.From.Type = obj.TYPE_REG
1027                         p.From.Reg = ppc64.REG_VS32
1028                         p.To.Type = obj.TYPE_REG
1029                         p.To.Reg = ppc64.REG_VS32
1030                         p.Reg = ppc64.REG_VS32
1031
1032                         // Set up CTR loop counter
1033                         p = s.Prog(ppc64.AMOVD)
1034                         p.From.Type = obj.TYPE_CONST
1035                         p.From.Offset = ctr
1036                         p.To.Type = obj.TYPE_REG
1037                         p.To.Reg = ppc64.REGTMP
1038
1039                         p = s.Prog(ppc64.AMOVD)
1040                         p.From.Type = obj.TYPE_REG
1041                         p.From.Reg = ppc64.REGTMP
1042                         p.To.Type = obj.TYPE_REG
1043                         p.To.Reg = ppc64.REG_CTR
1044
1045                         // Don't generate padding for
1046                         // loops with few iterations.
1047                         if ctr > 3 {
1048                                 p = s.Prog(obj.APCALIGN)
1049                                 p.From.Type = obj.TYPE_CONST
1050                                 p.From.Offset = 16
1051                         }
1052
1053                         // generate 4 STXVs to zero 64 bytes
1054                         var top *obj.Prog
1055
1056                         p = s.Prog(ppc64.ASTXV)
1057                         p.From.Type = obj.TYPE_REG
1058                         p.From.Reg = ppc64.REG_VS32
1059                         p.To.Type = obj.TYPE_MEM
1060                         p.To.Reg = v.Args[0].Reg()
1061
1062                         //  Save the top of loop
1063                         if top == nil {
1064                                 top = p
1065                         }
1066                         p = s.Prog(ppc64.ASTXV)
1067                         p.From.Type = obj.TYPE_REG
1068                         p.From.Reg = ppc64.REG_VS32
1069                         p.To.Type = obj.TYPE_MEM
1070                         p.To.Reg = v.Args[0].Reg()
1071                         p.To.Offset = 16
1072
1073                         p = s.Prog(ppc64.ASTXV)
1074                         p.From.Type = obj.TYPE_REG
1075                         p.From.Reg = ppc64.REG_VS32
1076                         p.To.Type = obj.TYPE_MEM
1077                         p.To.Reg = v.Args[0].Reg()
1078                         p.To.Offset = 32
1079
1080                         p = s.Prog(ppc64.ASTXV)
1081                         p.From.Type = obj.TYPE_REG
1082                         p.From.Reg = ppc64.REG_VS32
1083                         p.To.Type = obj.TYPE_MEM
1084                         p.To.Reg = v.Args[0].Reg()
1085                         p.To.Offset = 48
1086
1087                         // Increment address for the
1088                         // 64 bytes just zeroed.
1089                         p = s.Prog(ppc64.AADD)
1090                         p.Reg = v.Args[0].Reg()
1091                         p.From.Type = obj.TYPE_CONST
1092                         p.From.Offset = 64
1093                         p.To.Type = obj.TYPE_REG
1094                         p.To.Reg = v.Args[0].Reg()
1095
1096                         // Branch back to top of loop
1097                         // based on CTR
1098                         // BC with BO_BCTR generates bdnz
1099                         p = s.Prog(ppc64.ABC)
1100                         p.From.Type = obj.TYPE_CONST
1101                         p.From.Offset = ppc64.BO_BCTR
1102                         p.Reg = ppc64.REG_CR0LT
1103                         p.To.Type = obj.TYPE_BRANCH
1104                         p.To.SetTarget(top)
1105                 }
1106                 // When ctr == 1 the loop was not generated but
1107                 // there are at least 64 bytes to clear, so add
1108                 // that to the remainder to generate the code
1109                 // to clear those doublewords
1110                 if ctr == 1 {
1111                         rem += 64
1112                 }
1113
1114                 // Clear the remainder starting at offset zero
1115                 offset := int64(0)
1116
1117                 if rem >= 16 && ctr <= 1 {
1118                         // If the XXLXOR hasn't already been
1119                         // generated, do it here to initialize
1120                         // VS32 (V0) to 0.
1121                         p := s.Prog(ppc64.AXXLXOR)
1122                         p.From.Type = obj.TYPE_REG
1123                         p.From.Reg = ppc64.REG_VS32
1124                         p.To.Type = obj.TYPE_REG
1125                         p.To.Reg = ppc64.REG_VS32
1126                         p.Reg = ppc64.REG_VS32
1127                 }
1128                 // Generate STXV for 32 or 64
1129                 // bytes.
1130                 for rem >= 32 {
1131                         p := s.Prog(ppc64.ASTXV)
1132                         p.From.Type = obj.TYPE_REG
1133                         p.From.Reg = ppc64.REG_VS32
1134                         p.To.Type = obj.TYPE_MEM
1135                         p.To.Reg = v.Args[0].Reg()
1136                         p.To.Offset = offset
1137
1138                         p = s.Prog(ppc64.ASTXV)
1139                         p.From.Type = obj.TYPE_REG
1140                         p.From.Reg = ppc64.REG_VS32
1141                         p.To.Type = obj.TYPE_MEM
1142                         p.To.Reg = v.Args[0].Reg()
1143                         p.To.Offset = offset + 16
1144                         offset += 32
1145                         rem -= 32
1146                 }
1147                 // Generate 16 bytes
1148                 if rem >= 16 {
1149                         p := s.Prog(ppc64.ASTXV)
1150                         p.From.Type = obj.TYPE_REG
1151                         p.From.Reg = ppc64.REG_VS32
1152                         p.To.Type = obj.TYPE_MEM
1153                         p.To.Reg = v.Args[0].Reg()
1154                         p.To.Offset = offset
1155                         offset += 16
1156                         rem -= 16
1157                 }
1158
1159                 // first clear as many doublewords as possible
1160                 // then clear remaining sizes as available
1161                 for rem > 0 {
1162                         op, size := ppc64.AMOVB, int64(1)
1163                         switch {
1164                         case rem >= 8:
1165                                 op, size = ppc64.AMOVD, 8
1166                         case rem >= 4:
1167                                 op, size = ppc64.AMOVW, 4
1168                         case rem >= 2:
1169                                 op, size = ppc64.AMOVH, 2
1170                         }
1171                         p := s.Prog(op)
1172                         p.From.Type = obj.TYPE_REG
1173                         p.From.Reg = ppc64.REG_R0
1174                         p.To.Type = obj.TYPE_MEM
1175                         p.To.Reg = v.Args[0].Reg()
1176                         p.To.Offset = offset
1177                         rem -= size
1178                         offset += size
1179                 }
1180
1181         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1182
1183                 // Unaligned data doesn't hurt performance
1184                 // for these instructions on power8.
1185
1186                 // For sizes >= 64 generate a loop as follows:
1187
1188                 // Set up loop counter in CTR, used by BC
1189                 //       XXLXOR VS32,VS32,VS32
1190                 //       MOVD len/32,REG_TMP
1191                 //       MOVD REG_TMP,CTR
1192                 //       MOVD $16,REG_TMP
1193                 //       loop:
1194                 //       STXVD2X VS32,(R0)(R20)
1195                 //       STXVD2X VS32,(R31)(R20)
1196                 //       ADD  $32,R20
1197                 //       BC   16, 0, loop
1198                 //
1199                 // any remainder is done as described below
1200
1201                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1202                 // then handle the remainder
1203                 //      MOVD R0,(R20)
1204                 //      MOVD R0,8(R20)
1205                 // .... etc.
1206                 //
1207                 // the remainder bytes are cleared using one or more
1208                 // of the following instructions with the appropriate
1209                 // offsets depending which instructions are needed
1210                 //
1211                 //      MOVW R0,n1(R20) 4 bytes
1212                 //      MOVH R0,n2(R20) 2 bytes
1213                 //      MOVB R0,n3(R20) 1 byte
1214                 //
1215                 // 7 bytes: MOVW, MOVH, MOVB
1216                 // 6 bytes: MOVW, MOVH
1217                 // 5 bytes: MOVW, MOVB
1218                 // 3 bytes: MOVH, MOVB
1219
1220                 // each loop iteration does 32 bytes
1221                 ctr := v.AuxInt / 32
1222
1223                 // remainder bytes
1224                 rem := v.AuxInt % 32
1225
1226                 // only generate a loop if there is more
1227                 // than 1 iteration.
1228                 if ctr > 1 {
1229                         // Set up VS32 (V0) to hold 0s
1230                         p := s.Prog(ppc64.AXXLXOR)
1231                         p.From.Type = obj.TYPE_REG
1232                         p.From.Reg = ppc64.REG_VS32
1233                         p.To.Type = obj.TYPE_REG
1234                         p.To.Reg = ppc64.REG_VS32
1235                         p.Reg = ppc64.REG_VS32
1236
1237                         // Set up CTR loop counter
1238                         p = s.Prog(ppc64.AMOVD)
1239                         p.From.Type = obj.TYPE_CONST
1240                         p.From.Offset = ctr
1241                         p.To.Type = obj.TYPE_REG
1242                         p.To.Reg = ppc64.REGTMP
1243
1244                         p = s.Prog(ppc64.AMOVD)
1245                         p.From.Type = obj.TYPE_REG
1246                         p.From.Reg = ppc64.REGTMP
1247                         p.To.Type = obj.TYPE_REG
1248                         p.To.Reg = ppc64.REG_CTR
1249
1250                         // Set up R31 to hold index value 16
1251                         p = s.Prog(ppc64.AMOVD)
1252                         p.From.Type = obj.TYPE_CONST
1253                         p.From.Offset = 16
1254                         p.To.Type = obj.TYPE_REG
1255                         p.To.Reg = ppc64.REGTMP
1256
1257                         // Don't add padding for alignment
1258                         // with few loop iterations.
1259                         if ctr > 3 {
1260                                 p = s.Prog(obj.APCALIGN)
1261                                 p.From.Type = obj.TYPE_CONST
1262                                 p.From.Offset = 16
1263                         }
1264
1265                         // generate 2 STXVD2Xs to store 16 bytes
1266                         // when this is a loop then the top must be saved
1267                         var top *obj.Prog
1268                         // This is the top of loop
1269
1270                         p = s.Prog(ppc64.ASTXVD2X)
1271                         p.From.Type = obj.TYPE_REG
1272                         p.From.Reg = ppc64.REG_VS32
1273                         p.To.Type = obj.TYPE_MEM
1274                         p.To.Reg = v.Args[0].Reg()
1275                         p.To.Index = ppc64.REGZERO
1276                         // Save the top of loop
1277                         if top == nil {
1278                                 top = p
1279                         }
1280                         p = s.Prog(ppc64.ASTXVD2X)
1281                         p.From.Type = obj.TYPE_REG
1282                         p.From.Reg = ppc64.REG_VS32
1283                         p.To.Type = obj.TYPE_MEM
1284                         p.To.Reg = v.Args[0].Reg()
1285                         p.To.Index = ppc64.REGTMP
1286
1287                         // Increment address for the
1288                         // 4 doublewords just zeroed.
1289                         p = s.Prog(ppc64.AADD)
1290                         p.Reg = v.Args[0].Reg()
1291                         p.From.Type = obj.TYPE_CONST
1292                         p.From.Offset = 32
1293                         p.To.Type = obj.TYPE_REG
1294                         p.To.Reg = v.Args[0].Reg()
1295
1296                         // Branch back to top of loop
1297                         // based on CTR
1298                         // BC with BO_BCTR generates bdnz
1299                         p = s.Prog(ppc64.ABC)
1300                         p.From.Type = obj.TYPE_CONST
1301                         p.From.Offset = ppc64.BO_BCTR
1302                         p.Reg = ppc64.REG_CR0LT
1303                         p.To.Type = obj.TYPE_BRANCH
1304                         p.To.SetTarget(top)
1305                 }
1306
1307                 // when ctr == 1 the loop was not generated but
1308                 // there are at least 32 bytes to clear, so add
1309                 // that to the remainder to generate the code
1310                 // to clear those doublewords
1311                 if ctr == 1 {
1312                         rem += 32
1313                 }
1314
1315                 // clear the remainder starting at offset zero
1316                 offset := int64(0)
1317
1318                 // first clear as many doublewords as possible
1319                 // then clear remaining sizes as available
1320                 for rem > 0 {
1321                         op, size := ppc64.AMOVB, int64(1)
1322                         switch {
1323                         case rem >= 8:
1324                                 op, size = ppc64.AMOVD, 8
1325                         case rem >= 4:
1326                                 op, size = ppc64.AMOVW, 4
1327                         case rem >= 2:
1328                                 op, size = ppc64.AMOVH, 2
1329                         }
1330                         p := s.Prog(op)
1331                         p.From.Type = obj.TYPE_REG
1332                         p.From.Reg = ppc64.REG_R0
1333                         p.To.Type = obj.TYPE_MEM
1334                         p.To.Reg = v.Args[0].Reg()
1335                         p.To.Offset = offset
1336                         rem -= size
1337                         offset += size
1338                 }
1339
1340         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1341
1342                 bytesPerLoop := int64(32)
1343                 // This will be used when moving more
1344                 // than 8 bytes.  Moves start with
1345                 // as many 8 byte moves as possible, then
1346                 // 4, 2, or 1 byte(s) as remaining.  This will
1347                 // work and be efficient for power8 or later.
1348                 // If there are 64 or more bytes, then a
1349                 // loop is generated to move 32 bytes and
1350                 // update the src and dst addresses on each
1351                 // iteration. When < 64 bytes, the appropriate
1352                 // number of moves are generated based on the
1353                 // size.
1354                 // When moving >= 64 bytes a loop is used
1355                 //      MOVD len/32,REG_TMP
1356                 //      MOVD REG_TMP,CTR
1357                 //      MOVD $16,REG_TMP
1358                 // top:
1359                 //      LXVD2X (R0)(R21),VS32
1360                 //      LXVD2X (R31)(R21),VS33
1361                 //      ADD $32,R21
1362                 //      STXVD2X VS32,(R0)(R20)
1363                 //      STXVD2X VS33,(R31)(R20)
1364                 //      ADD $32,R20
1365                 //      BC 16,0,top
1366                 // Bytes not moved by this loop are moved
1367                 // with a combination of the following instructions,
1368                 // starting with the largest sizes and generating as
1369                 // many as needed, using the appropriate offset value.
1370                 //      MOVD  n(R21),R31
1371                 //      MOVD  R31,n(R20)
1372                 //      MOVW  n1(R21),R31
1373                 //      MOVW  R31,n1(R20)
1374                 //      MOVH  n2(R21),R31
1375                 //      MOVH  R31,n2(R20)
1376                 //      MOVB  n3(R21),R31
1377                 //      MOVB  R31,n3(R20)
1378
1379                 // Each loop iteration moves 32 bytes
1380                 ctr := v.AuxInt / bytesPerLoop
1381
1382                 // Remainder after the loop
1383                 rem := v.AuxInt % bytesPerLoop
1384
1385                 dstReg := v.Args[0].Reg()
1386                 srcReg := v.Args[1].Reg()
1387
1388                 // The set of registers used here, must match the clobbered reg list
1389                 // in PPC64Ops.go.
1390                 offset := int64(0)
1391
1392                 // top of the loop
1393                 var top *obj.Prog
1394                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1395                 if ctr > 1 {
1396                         // Set up the CTR
1397                         p := s.Prog(ppc64.AMOVD)
1398                         p.From.Type = obj.TYPE_CONST
1399                         p.From.Offset = ctr
1400                         p.To.Type = obj.TYPE_REG
1401                         p.To.Reg = ppc64.REGTMP
1402
1403                         p = s.Prog(ppc64.AMOVD)
1404                         p.From.Type = obj.TYPE_REG
1405                         p.From.Reg = ppc64.REGTMP
1406                         p.To.Type = obj.TYPE_REG
1407                         p.To.Reg = ppc64.REG_CTR
1408
1409                         // Use REGTMP as index reg
1410                         p = s.Prog(ppc64.AMOVD)
1411                         p.From.Type = obj.TYPE_CONST
1412                         p.From.Offset = 16
1413                         p.To.Type = obj.TYPE_REG
1414                         p.To.Reg = ppc64.REGTMP
1415
1416                         // Don't adding padding for
1417                         // alignment with small iteration
1418                         // counts.
1419                         if ctr > 3 {
1420                                 p = s.Prog(obj.APCALIGN)
1421                                 p.From.Type = obj.TYPE_CONST
1422                                 p.From.Offset = 16
1423                         }
1424
1425                         // Generate 16 byte loads and stores.
1426                         // Use temp register for index (16)
1427                         // on the second one.
1428
1429                         p = s.Prog(ppc64.ALXVD2X)
1430                         p.From.Type = obj.TYPE_MEM
1431                         p.From.Reg = srcReg
1432                         p.From.Index = ppc64.REGZERO
1433                         p.To.Type = obj.TYPE_REG
1434                         p.To.Reg = ppc64.REG_VS32
1435                         if top == nil {
1436                                 top = p
1437                         }
1438                         p = s.Prog(ppc64.ALXVD2X)
1439                         p.From.Type = obj.TYPE_MEM
1440                         p.From.Reg = srcReg
1441                         p.From.Index = ppc64.REGTMP
1442                         p.To.Type = obj.TYPE_REG
1443                         p.To.Reg = ppc64.REG_VS33
1444
1445                         // increment the src reg for next iteration
1446                         p = s.Prog(ppc64.AADD)
1447                         p.Reg = srcReg
1448                         p.From.Type = obj.TYPE_CONST
1449                         p.From.Offset = bytesPerLoop
1450                         p.To.Type = obj.TYPE_REG
1451                         p.To.Reg = srcReg
1452
1453                         // generate 16 byte stores
1454                         p = s.Prog(ppc64.ASTXVD2X)
1455                         p.From.Type = obj.TYPE_REG
1456                         p.From.Reg = ppc64.REG_VS32
1457                         p.To.Type = obj.TYPE_MEM
1458                         p.To.Reg = dstReg
1459                         p.To.Index = ppc64.REGZERO
1460
1461                         p = s.Prog(ppc64.ASTXVD2X)
1462                         p.From.Type = obj.TYPE_REG
1463                         p.From.Reg = ppc64.REG_VS33
1464                         p.To.Type = obj.TYPE_MEM
1465                         p.To.Reg = dstReg
1466                         p.To.Index = ppc64.REGTMP
1467
1468                         // increment the dst reg for next iteration
1469                         p = s.Prog(ppc64.AADD)
1470                         p.Reg = dstReg
1471                         p.From.Type = obj.TYPE_CONST
1472                         p.From.Offset = bytesPerLoop
1473                         p.To.Type = obj.TYPE_REG
1474                         p.To.Reg = dstReg
1475
1476                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1477                         // to loop top.
1478                         p = s.Prog(ppc64.ABC)
1479                         p.From.Type = obj.TYPE_CONST
1480                         p.From.Offset = ppc64.BO_BCTR
1481                         p.Reg = ppc64.REG_CR0LT
1482                         p.To.Type = obj.TYPE_BRANCH
1483                         p.To.SetTarget(top)
1484
1485                         // srcReg and dstReg were incremented in the loop, so
1486                         // later instructions start with offset 0.
1487                         offset = int64(0)
1488                 }
1489
1490                 // No loop was generated for one iteration, so
1491                 // add 32 bytes to the remainder to move those bytes.
1492                 if ctr == 1 {
1493                         rem += bytesPerLoop
1494                 }
1495
1496                 if rem >= 16 {
1497                         // Generate 16 byte loads and stores.
1498                         // Use temp register for index (value 16)
1499                         // on the second one.
1500                         p := s.Prog(ppc64.ALXVD2X)
1501                         p.From.Type = obj.TYPE_MEM
1502                         p.From.Reg = srcReg
1503                         p.From.Index = ppc64.REGZERO
1504                         p.To.Type = obj.TYPE_REG
1505                         p.To.Reg = ppc64.REG_VS32
1506
1507                         p = s.Prog(ppc64.ASTXVD2X)
1508                         p.From.Type = obj.TYPE_REG
1509                         p.From.Reg = ppc64.REG_VS32
1510                         p.To.Type = obj.TYPE_MEM
1511                         p.To.Reg = dstReg
1512                         p.To.Index = ppc64.REGZERO
1513
1514                         offset = 16
1515                         rem -= 16
1516
1517                         if rem >= 16 {
1518                                 // Use REGTMP as index reg
1519                                 p := s.Prog(ppc64.AMOVD)
1520                                 p.From.Type = obj.TYPE_CONST
1521                                 p.From.Offset = 16
1522                                 p.To.Type = obj.TYPE_REG
1523                                 p.To.Reg = ppc64.REGTMP
1524
1525                                 p = s.Prog(ppc64.ALXVD2X)
1526                                 p.From.Type = obj.TYPE_MEM
1527                                 p.From.Reg = srcReg
1528                                 p.From.Index = ppc64.REGTMP
1529                                 p.To.Type = obj.TYPE_REG
1530                                 p.To.Reg = ppc64.REG_VS32
1531
1532                                 p = s.Prog(ppc64.ASTXVD2X)
1533                                 p.From.Type = obj.TYPE_REG
1534                                 p.From.Reg = ppc64.REG_VS32
1535                                 p.To.Type = obj.TYPE_MEM
1536                                 p.To.Reg = dstReg
1537                                 p.To.Index = ppc64.REGTMP
1538
1539                                 offset = 32
1540                                 rem -= 16
1541                         }
1542                 }
1543
1544                 // Generate all the remaining load and store pairs, starting with
1545                 // as many 8 byte moves as possible, then 4, 2, 1.
1546                 for rem > 0 {
1547                         op, size := ppc64.AMOVB, int64(1)
1548                         switch {
1549                         case rem >= 8:
1550                                 op, size = ppc64.AMOVD, 8
1551                         case rem >= 4:
1552                                 op, size = ppc64.AMOVWZ, 4
1553                         case rem >= 2:
1554                                 op, size = ppc64.AMOVH, 2
1555                         }
1556                         // Load
1557                         p := s.Prog(op)
1558                         p.To.Type = obj.TYPE_REG
1559                         p.To.Reg = ppc64.REGTMP
1560                         p.From.Type = obj.TYPE_MEM
1561                         p.From.Reg = srcReg
1562                         p.From.Offset = offset
1563
1564                         // Store
1565                         p = s.Prog(op)
1566                         p.From.Type = obj.TYPE_REG
1567                         p.From.Reg = ppc64.REGTMP
1568                         p.To.Type = obj.TYPE_MEM
1569                         p.To.Reg = dstReg
1570                         p.To.Offset = offset
1571                         rem -= size
1572                         offset += size
1573                 }
1574
1575         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1576                 bytesPerLoop := int64(64)
1577                 // This is used when moving more
1578                 // than 8 bytes on power9.  Moves start with
1579                 // as many 8 byte moves as possible, then
1580                 // 4, 2, or 1 byte(s) as remaining.  This will
1581                 // work and be efficient for power8 or later.
1582                 // If there are 64 or more bytes, then a
1583                 // loop is generated to move 32 bytes and
1584                 // update the src and dst addresses on each
1585                 // iteration. When < 64 bytes, the appropriate
1586                 // number of moves are generated based on the
1587                 // size.
1588                 // When moving >= 64 bytes a loop is used
1589                 //      MOVD len/32,REG_TMP
1590                 //      MOVD REG_TMP,CTR
1591                 // top:
1592                 //      LXV 0(R21),VS32
1593                 //      LXV 16(R21),VS33
1594                 //      ADD $32,R21
1595                 //      STXV VS32,0(R20)
1596                 //      STXV VS33,16(R20)
1597                 //      ADD $32,R20
1598                 //      BC 16,0,top
1599                 // Bytes not moved by this loop are moved
1600                 // with a combination of the following instructions,
1601                 // starting with the largest sizes and generating as
1602                 // many as needed, using the appropriate offset value.
1603                 //      MOVD  n(R21),R31
1604                 //      MOVD  R31,n(R20)
1605                 //      MOVW  n1(R21),R31
1606                 //      MOVW  R31,n1(R20)
1607                 //      MOVH  n2(R21),R31
1608                 //      MOVH  R31,n2(R20)
1609                 //      MOVB  n3(R21),R31
1610                 //      MOVB  R31,n3(R20)
1611
1612                 // Each loop iteration moves 32 bytes
1613                 ctr := v.AuxInt / bytesPerLoop
1614
1615                 // Remainder after the loop
1616                 rem := v.AuxInt % bytesPerLoop
1617
1618                 dstReg := v.Args[0].Reg()
1619                 srcReg := v.Args[1].Reg()
1620
1621                 offset := int64(0)
1622
1623                 // top of the loop
1624                 var top *obj.Prog
1625
1626                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1627                 if ctr > 1 {
1628                         // Set up the CTR
1629                         p := s.Prog(ppc64.AMOVD)
1630                         p.From.Type = obj.TYPE_CONST
1631                         p.From.Offset = ctr
1632                         p.To.Type = obj.TYPE_REG
1633                         p.To.Reg = ppc64.REGTMP
1634
1635                         p = s.Prog(ppc64.AMOVD)
1636                         p.From.Type = obj.TYPE_REG
1637                         p.From.Reg = ppc64.REGTMP
1638                         p.To.Type = obj.TYPE_REG
1639                         p.To.Reg = ppc64.REG_CTR
1640
1641                         p = s.Prog(obj.APCALIGN)
1642                         p.From.Type = obj.TYPE_CONST
1643                         p.From.Offset = 16
1644
1645                         // Generate 16 byte loads and stores.
1646                         p = s.Prog(ppc64.ALXV)
1647                         p.From.Type = obj.TYPE_MEM
1648                         p.From.Reg = srcReg
1649                         p.From.Offset = offset
1650                         p.To.Type = obj.TYPE_REG
1651                         p.To.Reg = ppc64.REG_VS32
1652                         if top == nil {
1653                                 top = p
1654                         }
1655                         p = s.Prog(ppc64.ALXV)
1656                         p.From.Type = obj.TYPE_MEM
1657                         p.From.Reg = srcReg
1658                         p.From.Offset = offset + 16
1659                         p.To.Type = obj.TYPE_REG
1660                         p.To.Reg = ppc64.REG_VS33
1661
1662                         // generate 16 byte stores
1663                         p = s.Prog(ppc64.ASTXV)
1664                         p.From.Type = obj.TYPE_REG
1665                         p.From.Reg = ppc64.REG_VS32
1666                         p.To.Type = obj.TYPE_MEM
1667                         p.To.Reg = dstReg
1668                         p.To.Offset = offset
1669
1670                         p = s.Prog(ppc64.ASTXV)
1671                         p.From.Type = obj.TYPE_REG
1672                         p.From.Reg = ppc64.REG_VS33
1673                         p.To.Type = obj.TYPE_MEM
1674                         p.To.Reg = dstReg
1675                         p.To.Offset = offset + 16
1676
1677                         // Generate 16 byte loads and stores.
1678                         p = s.Prog(ppc64.ALXV)
1679                         p.From.Type = obj.TYPE_MEM
1680                         p.From.Reg = srcReg
1681                         p.From.Offset = offset + 32
1682                         p.To.Type = obj.TYPE_REG
1683                         p.To.Reg = ppc64.REG_VS32
1684
1685                         p = s.Prog(ppc64.ALXV)
1686                         p.From.Type = obj.TYPE_MEM
1687                         p.From.Reg = srcReg
1688                         p.From.Offset = offset + 48
1689                         p.To.Type = obj.TYPE_REG
1690                         p.To.Reg = ppc64.REG_VS33
1691
1692                         // generate 16 byte stores
1693                         p = s.Prog(ppc64.ASTXV)
1694                         p.From.Type = obj.TYPE_REG
1695                         p.From.Reg = ppc64.REG_VS32
1696                         p.To.Type = obj.TYPE_MEM
1697                         p.To.Reg = dstReg
1698                         p.To.Offset = offset + 32
1699
1700                         p = s.Prog(ppc64.ASTXV)
1701                         p.From.Type = obj.TYPE_REG
1702                         p.From.Reg = ppc64.REG_VS33
1703                         p.To.Type = obj.TYPE_MEM
1704                         p.To.Reg = dstReg
1705                         p.To.Offset = offset + 48
1706
1707                         // increment the src reg for next iteration
1708                         p = s.Prog(ppc64.AADD)
1709                         p.Reg = srcReg
1710                         p.From.Type = obj.TYPE_CONST
1711                         p.From.Offset = bytesPerLoop
1712                         p.To.Type = obj.TYPE_REG
1713                         p.To.Reg = srcReg
1714
1715                         // increment the dst reg for next iteration
1716                         p = s.Prog(ppc64.AADD)
1717                         p.Reg = dstReg
1718                         p.From.Type = obj.TYPE_CONST
1719                         p.From.Offset = bytesPerLoop
1720                         p.To.Type = obj.TYPE_REG
1721                         p.To.Reg = dstReg
1722
1723                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1724                         // to loop top.
1725                         p = s.Prog(ppc64.ABC)
1726                         p.From.Type = obj.TYPE_CONST
1727                         p.From.Offset = ppc64.BO_BCTR
1728                         p.Reg = ppc64.REG_CR0LT
1729                         p.To.Type = obj.TYPE_BRANCH
1730                         p.To.SetTarget(top)
1731
1732                         // srcReg and dstReg were incremented in the loop, so
1733                         // later instructions start with offset 0.
1734                         offset = int64(0)
1735                 }
1736
1737                 // No loop was generated for one iteration, so
1738                 // add 32 bytes to the remainder to move those bytes.
1739                 if ctr == 1 {
1740                         rem += bytesPerLoop
1741                 }
1742                 if rem >= 32 {
1743                         p := s.Prog(ppc64.ALXV)
1744                         p.From.Type = obj.TYPE_MEM
1745                         p.From.Reg = srcReg
1746                         p.To.Type = obj.TYPE_REG
1747                         p.To.Reg = ppc64.REG_VS32
1748
1749                         p = s.Prog(ppc64.ALXV)
1750                         p.From.Type = obj.TYPE_MEM
1751                         p.From.Reg = srcReg
1752                         p.From.Offset = 16
1753                         p.To.Type = obj.TYPE_REG
1754                         p.To.Reg = ppc64.REG_VS33
1755
1756                         p = s.Prog(ppc64.ASTXV)
1757                         p.From.Type = obj.TYPE_REG
1758                         p.From.Reg = ppc64.REG_VS32
1759                         p.To.Type = obj.TYPE_MEM
1760                         p.To.Reg = dstReg
1761
1762                         p = s.Prog(ppc64.ASTXV)
1763                         p.From.Type = obj.TYPE_REG
1764                         p.From.Reg = ppc64.REG_VS33
1765                         p.To.Type = obj.TYPE_MEM
1766                         p.To.Reg = dstReg
1767                         p.To.Offset = 16
1768
1769                         offset = 32
1770                         rem -= 32
1771                 }
1772
1773                 if rem >= 16 {
1774                         // Generate 16 byte loads and stores.
1775                         p := s.Prog(ppc64.ALXV)
1776                         p.From.Type = obj.TYPE_MEM
1777                         p.From.Reg = srcReg
1778                         p.From.Offset = offset
1779                         p.To.Type = obj.TYPE_REG
1780                         p.To.Reg = ppc64.REG_VS32
1781
1782                         p = s.Prog(ppc64.ASTXV)
1783                         p.From.Type = obj.TYPE_REG
1784                         p.From.Reg = ppc64.REG_VS32
1785                         p.To.Type = obj.TYPE_MEM
1786                         p.To.Reg = dstReg
1787                         p.To.Offset = offset
1788
1789                         offset += 16
1790                         rem -= 16
1791
1792                         if rem >= 16 {
1793                                 p := s.Prog(ppc64.ALXV)
1794                                 p.From.Type = obj.TYPE_MEM
1795                                 p.From.Reg = srcReg
1796                                 p.From.Offset = offset
1797                                 p.To.Type = obj.TYPE_REG
1798                                 p.To.Reg = ppc64.REG_VS32
1799
1800                                 p = s.Prog(ppc64.ASTXV)
1801                                 p.From.Type = obj.TYPE_REG
1802                                 p.From.Reg = ppc64.REG_VS32
1803                                 p.To.Type = obj.TYPE_MEM
1804                                 p.To.Reg = dstReg
1805                                 p.To.Offset = offset
1806
1807                                 offset += 16
1808                                 rem -= 16
1809                         }
1810                 }
1811                 // Generate all the remaining load and store pairs, starting with
1812                 // as many 8 byte moves as possible, then 4, 2, 1.
1813                 for rem > 0 {
1814                         op, size := ppc64.AMOVB, int64(1)
1815                         switch {
1816                         case rem >= 8:
1817                                 op, size = ppc64.AMOVD, 8
1818                         case rem >= 4:
1819                                 op, size = ppc64.AMOVWZ, 4
1820                         case rem >= 2:
1821                                 op, size = ppc64.AMOVH, 2
1822                         }
1823                         // Load
1824                         p := s.Prog(op)
1825                         p.To.Type = obj.TYPE_REG
1826                         p.To.Reg = ppc64.REGTMP
1827                         p.From.Type = obj.TYPE_MEM
1828                         p.From.Reg = srcReg
1829                         p.From.Offset = offset
1830
1831                         // Store
1832                         p = s.Prog(op)
1833                         p.From.Type = obj.TYPE_REG
1834                         p.From.Reg = ppc64.REGTMP
1835                         p.To.Type = obj.TYPE_MEM
1836                         p.To.Reg = dstReg
1837                         p.To.Offset = offset
1838                         rem -= size
1839                         offset += size
1840                 }
1841
1842         case ssa.OpPPC64CALLstatic:
1843                 s.Call(v)
1844
1845         case ssa.OpPPC64CALLtail:
1846                 s.TailCall(v)
1847
1848         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1849                 p := s.Prog(ppc64.AMOVD)
1850                 p.From.Type = obj.TYPE_REG
1851                 p.From.Reg = v.Args[0].Reg()
1852                 p.To.Type = obj.TYPE_REG
1853                 p.To.Reg = ppc64.REG_LR
1854
1855                 if v.Args[0].Reg() != ppc64.REG_R12 {
1856                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1857                 }
1858
1859                 pp := s.Call(v)
1860
1861                 // Convert the call into a blrl with hint this is not a subroutine return.
1862                 // The full bclrl opcode must be specified when passing a hint.
1863                 pp.As = ppc64.ABCL
1864                 pp.From.Type = obj.TYPE_CONST
1865                 pp.From.Offset = ppc64.BO_ALWAYS
1866                 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1867                 pp.To.Reg = ppc64.REG_LR
1868                 pp.SetFrom3Const(1)
1869
1870                 if base.Ctxt.Flag_shared {
1871                         // When compiling Go into PIC, the function we just
1872                         // called via pointer might have been implemented in
1873                         // a separate module and so overwritten the TOC
1874                         // pointer in R2; reload it.
1875                         q := s.Prog(ppc64.AMOVD)
1876                         q.From.Type = obj.TYPE_MEM
1877                         q.From.Offset = 24
1878                         q.From.Reg = ppc64.REGSP
1879                         q.To.Type = obj.TYPE_REG
1880                         q.To.Reg = ppc64.REG_R2
1881                 }
1882
1883         case ssa.OpPPC64LoweredWB:
1884                 p := s.Prog(obj.ACALL)
1885                 p.To.Type = obj.TYPE_MEM
1886                 p.To.Name = obj.NAME_EXTERN
1887                 p.To.Sym = v.Aux.(*obj.LSym)
1888
1889         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1890                 p := s.Prog(obj.ACALL)
1891                 p.To.Type = obj.TYPE_MEM
1892                 p.To.Name = obj.NAME_EXTERN
1893                 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1894                 s.UseArgs(16) // space used in callee args area by assembly stubs
1895
1896         case ssa.OpPPC64LoweredNilCheck:
1897                 if buildcfg.GOOS == "aix" {
1898                         // CMP Rarg0, R0
1899                         // BNE 2(PC)
1900                         // STW R0, 0(R0)
1901                         // NOP (so the BNE has somewhere to land)
1902
1903                         // CMP Rarg0, R0
1904                         p := s.Prog(ppc64.ACMP)
1905                         p.From.Type = obj.TYPE_REG
1906                         p.From.Reg = v.Args[0].Reg()
1907                         p.To.Type = obj.TYPE_REG
1908                         p.To.Reg = ppc64.REG_R0
1909
1910                         // BNE 2(PC)
1911                         p2 := s.Prog(ppc64.ABNE)
1912                         p2.To.Type = obj.TYPE_BRANCH
1913
1914                         // STW R0, 0(R0)
1915                         // Write at 0 is forbidden and will trigger a SIGSEGV
1916                         p = s.Prog(ppc64.AMOVW)
1917                         p.From.Type = obj.TYPE_REG
1918                         p.From.Reg = ppc64.REG_R0
1919                         p.To.Type = obj.TYPE_MEM
1920                         p.To.Reg = ppc64.REG_R0
1921
1922                         // NOP (so the BNE has somewhere to land)
1923                         nop := s.Prog(obj.ANOP)
1924                         p2.To.SetTarget(nop)
1925
1926                 } else {
1927                         // Issue a load which will fault if arg is nil.
1928                         p := s.Prog(ppc64.AMOVBZ)
1929                         p.From.Type = obj.TYPE_MEM
1930                         p.From.Reg = v.Args[0].Reg()
1931                         ssagen.AddAux(&p.From, v)
1932                         p.To.Type = obj.TYPE_REG
1933                         p.To.Reg = ppc64.REGTMP
1934                 }
1935                 if logopt.Enabled() {
1936                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1937                 }
1938                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1939                         base.WarnfAt(v.Pos, "generated nil check")
1940                 }
1941
1942         // These should be resolved by rules and not make it here.
1943         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1944                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1945                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1946                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1947         case ssa.OpPPC64InvertFlags:
1948                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1949         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1950                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1951         case ssa.OpClobber, ssa.OpClobberReg:
1952                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1953         default:
1954                 v.Fatalf("genValue not implemented: %s", v.LongString())
1955         }
1956 }
1957
1958 var blockJump = [...]struct {
1959         asm, invasm     obj.As
1960         asmeq, invasmun bool
1961 }{
1962         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1963         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1964
1965         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1966         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1967         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1968         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1969
1970         // TODO: need to work FP comparisons into block jumps
1971         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1972         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1973         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1974         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1975 }
1976
1977 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1978         switch b.Kind {
1979         case ssa.BlockDefer:
1980                 // defer returns in R3:
1981                 // 0 if we should continue executing
1982                 // 1 if we should jump to deferreturn call
1983                 p := s.Prog(ppc64.ACMP)
1984                 p.From.Type = obj.TYPE_REG
1985                 p.From.Reg = ppc64.REG_R3
1986                 p.To.Type = obj.TYPE_REG
1987                 p.To.Reg = ppc64.REG_R0
1988
1989                 p = s.Prog(ppc64.ABNE)
1990                 p.To.Type = obj.TYPE_BRANCH
1991                 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1992                 if b.Succs[0].Block() != next {
1993                         p := s.Prog(obj.AJMP)
1994                         p.To.Type = obj.TYPE_BRANCH
1995                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1996                 }
1997
1998         case ssa.BlockPlain:
1999                 if b.Succs[0].Block() != next {
2000                         p := s.Prog(obj.AJMP)
2001                         p.To.Type = obj.TYPE_BRANCH
2002                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2003                 }
2004         case ssa.BlockExit, ssa.BlockRetJmp:
2005         case ssa.BlockRet:
2006                 s.Prog(obj.ARET)
2007
2008         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2009                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2010                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2011                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2012                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2013                 jmp := blockJump[b.Kind]
2014                 switch next {
2015                 case b.Succs[0].Block():
2016                         s.Br(jmp.invasm, b.Succs[1].Block())
2017                         if jmp.invasmun {
2018                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2019                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
2020                         }
2021                 case b.Succs[1].Block():
2022                         s.Br(jmp.asm, b.Succs[0].Block())
2023                         if jmp.asmeq {
2024                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2025                         }
2026                 default:
2027                         if b.Likely != ssa.BranchUnlikely {
2028                                 s.Br(jmp.asm, b.Succs[0].Block())
2029                                 if jmp.asmeq {
2030                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
2031                                 }
2032                                 s.Br(obj.AJMP, b.Succs[1].Block())
2033                         } else {
2034                                 s.Br(jmp.invasm, b.Succs[1].Block())
2035                                 if jmp.invasmun {
2036                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2037                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
2038                                 }
2039                                 s.Br(obj.AJMP, b.Succs[0].Block())
2040                         }
2041                 }
2042         default:
2043                 b.Fatalf("branch not implemented: %s", b.LongString())
2044         }
2045 }
2046
2047 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2048         p := s.Prog(loadByType(t))
2049         p.From.Type = obj.TYPE_MEM
2050         p.From.Name = obj.NAME_AUTO
2051         p.From.Sym = n.Linksym()
2052         p.From.Offset = n.FrameOffset() + off
2053         p.To.Type = obj.TYPE_REG
2054         p.To.Reg = reg
2055         return p
2056 }
2057
2058 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2059         p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2060         p.To.Name = obj.NAME_PARAM
2061         p.To.Sym = n.Linksym()
2062         p.Pos = p.Pos.WithNotStmt()
2063         return p
2064 }