]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
cmd/compile: merge zero constant ISEL in PPC64 lateLower pass
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/base"
9         "cmd/compile/internal/ir"
10         "cmd/compile/internal/logopt"
11         "cmd/compile/internal/objw"
12         "cmd/compile/internal/ssa"
13         "cmd/compile/internal/ssagen"
14         "cmd/compile/internal/types"
15         "cmd/internal/obj"
16         "cmd/internal/obj/ppc64"
17         "internal/buildcfg"
18         "math"
19         "strings"
20 )
21
22 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24         //      flive := b.FlagsLiveAtEnd
25         //      if b.Control != nil && b.Control.Type.IsFlags() {
26         //              flive = true
27         //      }
28         //      for i := len(b.Values) - 1; i >= 0; i-- {
29         //              v := b.Values[i]
30         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31         //                      // The "mark" is any non-nil Aux value.
32         //                      v.Aux = v
33         //              }
34         //              if v.Type.IsFlags() {
35         //                      flive = false
36         //              }
37         //              for _, a := range v.Args {
38         //                      if a.Type.IsFlags() {
39         //                              flive = true
40         //                      }
41         //              }
42         //      }
43 }
44
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
47         if t.IsFloat() {
48                 switch t.Size() {
49                 case 4:
50                         return ppc64.AFMOVS
51                 case 8:
52                         return ppc64.AFMOVD
53                 }
54         } else {
55                 switch t.Size() {
56                 case 1:
57                         if t.IsSigned() {
58                                 return ppc64.AMOVB
59                         } else {
60                                 return ppc64.AMOVBZ
61                         }
62                 case 2:
63                         if t.IsSigned() {
64                                 return ppc64.AMOVH
65                         } else {
66                                 return ppc64.AMOVHZ
67                         }
68                 case 4:
69                         if t.IsSigned() {
70                                 return ppc64.AMOVW
71                         } else {
72                                 return ppc64.AMOVWZ
73                         }
74                 case 8:
75                         return ppc64.AMOVD
76                 }
77         }
78         panic("bad load type")
79 }
80
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
83         if t.IsFloat() {
84                 switch t.Size() {
85                 case 4:
86                         return ppc64.AFMOVS
87                 case 8:
88                         return ppc64.AFMOVD
89                 }
90         } else {
91                 switch t.Size() {
92                 case 1:
93                         return ppc64.AMOVB
94                 case 2:
95                         return ppc64.AMOVH
96                 case 4:
97                         return ppc64.AMOVW
98                 case 8:
99                         return ppc64.AMOVD
100                 }
101         }
102         panic("bad store type")
103 }
104
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
106         switch v.Op {
107         case ssa.OpCopy:
108                 t := v.Type
109                 if t.IsMemory() {
110                         return
111                 }
112                 x := v.Args[0].Reg()
113                 y := v.Reg()
114                 if x != y {
115                         rt := obj.TYPE_REG
116                         op := ppc64.AMOVD
117
118                         if t.IsFloat() {
119                                 op = ppc64.AFMOVD
120                         }
121                         p := s.Prog(op)
122                         p.From.Type = rt
123                         p.From.Reg = x
124                         p.To.Type = rt
125                         p.To.Reg = y
126                 }
127
128         case ssa.OpPPC64LoweredAtomicAnd8,
129                 ssa.OpPPC64LoweredAtomicAnd32,
130                 ssa.OpPPC64LoweredAtomicOr8,
131                 ssa.OpPPC64LoweredAtomicOr32:
132                 // LWSYNC
133                 // LBAR/LWAR    (Rarg0), Rtmp
134                 // AND/OR       Rarg1, Rtmp
135                 // STBCCC/STWCCC Rtmp, (Rarg0)
136                 // BNE          -3(PC)
137                 ld := ppc64.ALBAR
138                 st := ppc64.ASTBCCC
139                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
140                         ld = ppc64.ALWAR
141                         st = ppc64.ASTWCCC
142                 }
143                 r0 := v.Args[0].Reg()
144                 r1 := v.Args[1].Reg()
145                 // LWSYNC - Assuming shared data not write-through-required nor
146                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
147                 plwsync := s.Prog(ppc64.ALWSYNC)
148                 plwsync.To.Type = obj.TYPE_NONE
149                 // LBAR or LWAR
150                 p := s.Prog(ld)
151                 p.From.Type = obj.TYPE_MEM
152                 p.From.Reg = r0
153                 p.To.Type = obj.TYPE_REG
154                 p.To.Reg = ppc64.REGTMP
155                 // AND/OR reg1,out
156                 p1 := s.Prog(v.Op.Asm())
157                 p1.From.Type = obj.TYPE_REG
158                 p1.From.Reg = r1
159                 p1.To.Type = obj.TYPE_REG
160                 p1.To.Reg = ppc64.REGTMP
161                 // STBCCC or STWCCC
162                 p2 := s.Prog(st)
163                 p2.From.Type = obj.TYPE_REG
164                 p2.From.Reg = ppc64.REGTMP
165                 p2.To.Type = obj.TYPE_MEM
166                 p2.To.Reg = r0
167                 p2.RegTo2 = ppc64.REGTMP
168                 // BNE retry
169                 p3 := s.Prog(ppc64.ABNE)
170                 p3.To.Type = obj.TYPE_BRANCH
171                 p3.To.SetTarget(p)
172
173         case ssa.OpPPC64LoweredAtomicAdd32,
174                 ssa.OpPPC64LoweredAtomicAdd64:
175                 // LWSYNC
176                 // LDAR/LWAR    (Rarg0), Rout
177                 // ADD          Rarg1, Rout
178                 // STDCCC/STWCCC Rout, (Rarg0)
179                 // BNE         -3(PC)
180                 // MOVW         Rout,Rout (if Add32)
181                 ld := ppc64.ALDAR
182                 st := ppc64.ASTDCCC
183                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
184                         ld = ppc64.ALWAR
185                         st = ppc64.ASTWCCC
186                 }
187                 r0 := v.Args[0].Reg()
188                 r1 := v.Args[1].Reg()
189                 out := v.Reg0()
190                 // LWSYNC - Assuming shared data not write-through-required nor
191                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
192                 plwsync := s.Prog(ppc64.ALWSYNC)
193                 plwsync.To.Type = obj.TYPE_NONE
194                 // LDAR or LWAR
195                 p := s.Prog(ld)
196                 p.From.Type = obj.TYPE_MEM
197                 p.From.Reg = r0
198                 p.To.Type = obj.TYPE_REG
199                 p.To.Reg = out
200                 // ADD reg1,out
201                 p1 := s.Prog(ppc64.AADD)
202                 p1.From.Type = obj.TYPE_REG
203                 p1.From.Reg = r1
204                 p1.To.Reg = out
205                 p1.To.Type = obj.TYPE_REG
206                 // STDCCC or STWCCC
207                 p3 := s.Prog(st)
208                 p3.From.Type = obj.TYPE_REG
209                 p3.From.Reg = out
210                 p3.To.Type = obj.TYPE_MEM
211                 p3.To.Reg = r0
212                 // BNE retry
213                 p4 := s.Prog(ppc64.ABNE)
214                 p4.To.Type = obj.TYPE_BRANCH
215                 p4.To.SetTarget(p)
216
217                 // Ensure a 32 bit result
218                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
219                         p5 := s.Prog(ppc64.AMOVWZ)
220                         p5.To.Type = obj.TYPE_REG
221                         p5.To.Reg = out
222                         p5.From.Type = obj.TYPE_REG
223                         p5.From.Reg = out
224                 }
225
226         case ssa.OpPPC64LoweredAtomicExchange32,
227                 ssa.OpPPC64LoweredAtomicExchange64:
228                 // LWSYNC
229                 // LDAR/LWAR    (Rarg0), Rout
230                 // STDCCC/STWCCC Rout, (Rarg0)
231                 // BNE         -2(PC)
232                 // ISYNC
233                 ld := ppc64.ALDAR
234                 st := ppc64.ASTDCCC
235                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
236                         ld = ppc64.ALWAR
237                         st = ppc64.ASTWCCC
238                 }
239                 r0 := v.Args[0].Reg()
240                 r1 := v.Args[1].Reg()
241                 out := v.Reg0()
242                 // LWSYNC - Assuming shared data not write-through-required nor
243                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
244                 plwsync := s.Prog(ppc64.ALWSYNC)
245                 plwsync.To.Type = obj.TYPE_NONE
246                 // LDAR or LWAR
247                 p := s.Prog(ld)
248                 p.From.Type = obj.TYPE_MEM
249                 p.From.Reg = r0
250                 p.To.Type = obj.TYPE_REG
251                 p.To.Reg = out
252                 // STDCCC or STWCCC
253                 p1 := s.Prog(st)
254                 p1.From.Type = obj.TYPE_REG
255                 p1.From.Reg = r1
256                 p1.To.Type = obj.TYPE_MEM
257                 p1.To.Reg = r0
258                 // BNE retry
259                 p2 := s.Prog(ppc64.ABNE)
260                 p2.To.Type = obj.TYPE_BRANCH
261                 p2.To.SetTarget(p)
262                 // ISYNC
263                 pisync := s.Prog(ppc64.AISYNC)
264                 pisync.To.Type = obj.TYPE_NONE
265
266         case ssa.OpPPC64LoweredAtomicLoad8,
267                 ssa.OpPPC64LoweredAtomicLoad32,
268                 ssa.OpPPC64LoweredAtomicLoad64,
269                 ssa.OpPPC64LoweredAtomicLoadPtr:
270                 // SYNC
271                 // MOVB/MOVD/MOVW (Rarg0), Rout
272                 // CMP Rout,Rout
273                 // BNE 1(PC)
274                 // ISYNC
275                 ld := ppc64.AMOVD
276                 cmp := ppc64.ACMP
277                 switch v.Op {
278                 case ssa.OpPPC64LoweredAtomicLoad8:
279                         ld = ppc64.AMOVBZ
280                 case ssa.OpPPC64LoweredAtomicLoad32:
281                         ld = ppc64.AMOVWZ
282                         cmp = ppc64.ACMPW
283                 }
284                 arg0 := v.Args[0].Reg()
285                 out := v.Reg0()
286                 // SYNC when AuxInt == 1; otherwise, load-acquire
287                 if v.AuxInt == 1 {
288                         psync := s.Prog(ppc64.ASYNC)
289                         psync.To.Type = obj.TYPE_NONE
290                 }
291                 // Load
292                 p := s.Prog(ld)
293                 p.From.Type = obj.TYPE_MEM
294                 p.From.Reg = arg0
295                 p.To.Type = obj.TYPE_REG
296                 p.To.Reg = out
297                 // CMP
298                 p1 := s.Prog(cmp)
299                 p1.From.Type = obj.TYPE_REG
300                 p1.From.Reg = out
301                 p1.To.Type = obj.TYPE_REG
302                 p1.To.Reg = out
303                 // BNE
304                 p2 := s.Prog(ppc64.ABNE)
305                 p2.To.Type = obj.TYPE_BRANCH
306                 // ISYNC
307                 pisync := s.Prog(ppc64.AISYNC)
308                 pisync.To.Type = obj.TYPE_NONE
309                 p2.To.SetTarget(pisync)
310
311         case ssa.OpPPC64LoweredAtomicStore8,
312                 ssa.OpPPC64LoweredAtomicStore32,
313                 ssa.OpPPC64LoweredAtomicStore64:
314                 // SYNC or LWSYNC
315                 // MOVB/MOVW/MOVD arg1,(arg0)
316                 st := ppc64.AMOVD
317                 switch v.Op {
318                 case ssa.OpPPC64LoweredAtomicStore8:
319                         st = ppc64.AMOVB
320                 case ssa.OpPPC64LoweredAtomicStore32:
321                         st = ppc64.AMOVW
322                 }
323                 arg0 := v.Args[0].Reg()
324                 arg1 := v.Args[1].Reg()
325                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
326                 // SYNC
327                 syncOp := ppc64.ASYNC
328                 if v.AuxInt == 0 {
329                         syncOp = ppc64.ALWSYNC
330                 }
331                 psync := s.Prog(syncOp)
332                 psync.To.Type = obj.TYPE_NONE
333                 // Store
334                 p := s.Prog(st)
335                 p.To.Type = obj.TYPE_MEM
336                 p.To.Reg = arg0
337                 p.From.Type = obj.TYPE_REG
338                 p.From.Reg = arg1
339
340         case ssa.OpPPC64LoweredAtomicCas64,
341                 ssa.OpPPC64LoweredAtomicCas32:
342                 // MOVD        $0, Rout
343                 // LWSYNC
344                 // loop:
345                 // LDAR        (Rarg0), MutexHint, Rtmp
346                 // CMP         Rarg1, Rtmp
347                 // BNE         end
348                 // STDCCC      Rarg2, (Rarg0)
349                 // BNE         loop
350                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
351                 // MOVD        $1, Rout
352                 // end:
353                 ld := ppc64.ALDAR
354                 st := ppc64.ASTDCCC
355                 cmp := ppc64.ACMP
356                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
357                         ld = ppc64.ALWAR
358                         st = ppc64.ASTWCCC
359                         cmp = ppc64.ACMPW
360                 }
361                 r0 := v.Args[0].Reg()
362                 r1 := v.Args[1].Reg()
363                 r2 := v.Args[2].Reg()
364                 out := v.Reg0()
365                 // Initialize return value to false
366                 p := s.Prog(ppc64.AMOVD)
367                 p.From.Type = obj.TYPE_CONST
368                 p.From.Offset = 0
369                 p.To.Type = obj.TYPE_REG
370                 p.To.Reg = out
371                 // LWSYNC - Assuming shared data not write-through-required nor
372                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
373                 plwsync1 := s.Prog(ppc64.ALWSYNC)
374                 plwsync1.To.Type = obj.TYPE_NONE
375                 // LDAR or LWAR
376                 p0 := s.Prog(ld)
377                 p0.From.Type = obj.TYPE_MEM
378                 p0.From.Reg = r0
379                 p0.To.Type = obj.TYPE_REG
380                 p0.To.Reg = ppc64.REGTMP
381                 // If it is a Compare-and-Swap-Release operation, set the EH field with
382                 // the release hint.
383                 if v.AuxInt == 0 {
384                         p0.SetFrom3Const(0)
385                 }
386                 // CMP reg1,reg2
387                 p1 := s.Prog(cmp)
388                 p1.From.Type = obj.TYPE_REG
389                 p1.From.Reg = r1
390                 p1.To.Reg = ppc64.REGTMP
391                 p1.To.Type = obj.TYPE_REG
392                 // BNE done with return value = false
393                 p2 := s.Prog(ppc64.ABNE)
394                 p2.To.Type = obj.TYPE_BRANCH
395                 // STDCCC or STWCCC
396                 p3 := s.Prog(st)
397                 p3.From.Type = obj.TYPE_REG
398                 p3.From.Reg = r2
399                 p3.To.Type = obj.TYPE_MEM
400                 p3.To.Reg = r0
401                 // BNE retry
402                 p4 := s.Prog(ppc64.ABNE)
403                 p4.To.Type = obj.TYPE_BRANCH
404                 p4.To.SetTarget(p0)
405                 // LWSYNC - Assuming shared data not write-through-required nor
406                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
407                 // If the operation is a CAS-Release, then synchronization is not necessary.
408                 if v.AuxInt != 0 {
409                         plwsync2 := s.Prog(ppc64.ALWSYNC)
410                         plwsync2.To.Type = obj.TYPE_NONE
411                 }
412                 // return value true
413                 p5 := s.Prog(ppc64.AMOVD)
414                 p5.From.Type = obj.TYPE_CONST
415                 p5.From.Offset = 1
416                 p5.To.Type = obj.TYPE_REG
417                 p5.To.Reg = out
418                 // done (label)
419                 p6 := s.Prog(obj.ANOP)
420                 p2.To.SetTarget(p6)
421
422         case ssa.OpPPC64LoweredPubBarrier:
423                 // LWSYNC
424                 s.Prog(v.Op.Asm())
425
426         case ssa.OpPPC64LoweredGetClosurePtr:
427                 // Closure pointer is R11 (already)
428                 ssagen.CheckLoweredGetClosurePtr(v)
429
430         case ssa.OpPPC64LoweredGetCallerSP:
431                 // caller's SP is FixedFrameSize below the address of the first arg
432                 p := s.Prog(ppc64.AMOVD)
433                 p.From.Type = obj.TYPE_ADDR
434                 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
435                 p.From.Name = obj.NAME_PARAM
436                 p.To.Type = obj.TYPE_REG
437                 p.To.Reg = v.Reg()
438
439         case ssa.OpPPC64LoweredGetCallerPC:
440                 p := s.Prog(obj.AGETCALLERPC)
441                 p.To.Type = obj.TYPE_REG
442                 p.To.Reg = v.Reg()
443
444         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
445                 // input is already rounded
446
447         case ssa.OpLoadReg:
448                 loadOp := loadByType(v.Type)
449                 p := s.Prog(loadOp)
450                 ssagen.AddrAuto(&p.From, v.Args[0])
451                 p.To.Type = obj.TYPE_REG
452                 p.To.Reg = v.Reg()
453
454         case ssa.OpStoreReg:
455                 storeOp := storeByType(v.Type)
456                 p := s.Prog(storeOp)
457                 p.From.Type = obj.TYPE_REG
458                 p.From.Reg = v.Args[0].Reg()
459                 ssagen.AddrAuto(&p.To, v)
460
461         case ssa.OpArgIntReg, ssa.OpArgFloatReg:
462                 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
463                 // The loop only runs once.
464                 for _, a := range v.Block.Func.RegArgs {
465                         // Pass the spill/unspill information along to the assembler, offset by size of
466                         // the saved LR slot.
467                         addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
468                         s.FuncInfo().AddSpill(
469                                 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
470                 }
471                 v.Block.Func.RegArgs = nil
472
473                 ssagen.CheckArgReg(v)
474
475         case ssa.OpPPC64DIVD:
476                 // For now,
477                 //
478                 // cmp arg1, -1
479                 // be  ahead
480                 // v = arg0 / arg1
481                 // b over
482                 // ahead: v = - arg0
483                 // over: nop
484                 r := v.Reg()
485                 r0 := v.Args[0].Reg()
486                 r1 := v.Args[1].Reg()
487
488                 p := s.Prog(ppc64.ACMP)
489                 p.From.Type = obj.TYPE_REG
490                 p.From.Reg = r1
491                 p.To.Type = obj.TYPE_CONST
492                 p.To.Offset = -1
493
494                 pbahead := s.Prog(ppc64.ABEQ)
495                 pbahead.To.Type = obj.TYPE_BRANCH
496
497                 p = s.Prog(v.Op.Asm())
498                 p.From.Type = obj.TYPE_REG
499                 p.From.Reg = r1
500                 p.Reg = r0
501                 p.To.Type = obj.TYPE_REG
502                 p.To.Reg = r
503
504                 pbover := s.Prog(obj.AJMP)
505                 pbover.To.Type = obj.TYPE_BRANCH
506
507                 p = s.Prog(ppc64.ANEG)
508                 p.To.Type = obj.TYPE_REG
509                 p.To.Reg = r
510                 p.From.Type = obj.TYPE_REG
511                 p.From.Reg = r0
512                 pbahead.To.SetTarget(p)
513
514                 p = s.Prog(obj.ANOP)
515                 pbover.To.SetTarget(p)
516
517         case ssa.OpPPC64DIVW:
518                 // word-width version of above
519                 r := v.Reg()
520                 r0 := v.Args[0].Reg()
521                 r1 := v.Args[1].Reg()
522
523                 p := s.Prog(ppc64.ACMPW)
524                 p.From.Type = obj.TYPE_REG
525                 p.From.Reg = r1
526                 p.To.Type = obj.TYPE_CONST
527                 p.To.Offset = -1
528
529                 pbahead := s.Prog(ppc64.ABEQ)
530                 pbahead.To.Type = obj.TYPE_BRANCH
531
532                 p = s.Prog(v.Op.Asm())
533                 p.From.Type = obj.TYPE_REG
534                 p.From.Reg = r1
535                 p.Reg = r0
536                 p.To.Type = obj.TYPE_REG
537                 p.To.Reg = r
538
539                 pbover := s.Prog(obj.AJMP)
540                 pbover.To.Type = obj.TYPE_BRANCH
541
542                 p = s.Prog(ppc64.ANEG)
543                 p.To.Type = obj.TYPE_REG
544                 p.To.Reg = r
545                 p.From.Type = obj.TYPE_REG
546                 p.From.Reg = r0
547                 pbahead.To.SetTarget(p)
548
549                 p = s.Prog(obj.ANOP)
550                 pbover.To.SetTarget(p)
551
552         case ssa.OpPPC64CLRLSLWI:
553                 r := v.Reg()
554                 r1 := v.Args[0].Reg()
555                 shifts := v.AuxInt
556                 p := s.Prog(v.Op.Asm())
557                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
558                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
559                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
560                 p.Reg = r1
561                 p.To.Type = obj.TYPE_REG
562                 p.To.Reg = r
563
564         case ssa.OpPPC64CLRLSLDI:
565                 r := v.Reg()
566                 r1 := v.Args[0].Reg()
567                 shifts := v.AuxInt
568                 p := s.Prog(v.Op.Asm())
569                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
570                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
571                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
572                 p.Reg = r1
573                 p.To.Type = obj.TYPE_REG
574                 p.To.Reg = r
575
576                 // Mask has been set as sh
577         case ssa.OpPPC64RLDICL:
578                 r := v.Reg()
579                 r1 := v.Args[0].Reg()
580                 shifts := v.AuxInt
581                 p := s.Prog(v.Op.Asm())
582                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
583                 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
584                 p.Reg = r1
585                 p.To.Type = obj.TYPE_REG
586                 p.To.Reg = r
587
588         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
589                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
590                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
591                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
592                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
593                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
594                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
595                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
596                 r := v.Reg()
597                 r1 := v.Args[0].Reg()
598                 r2 := v.Args[1].Reg()
599                 p := s.Prog(v.Op.Asm())
600                 p.From.Type = obj.TYPE_REG
601                 p.From.Reg = r2
602                 p.Reg = r1
603                 p.To.Type = obj.TYPE_REG
604                 p.To.Reg = r
605
606         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
607                 r1 := v.Args[0].Reg()
608                 r2 := v.Args[1].Reg()
609                 p := s.Prog(v.Op.Asm())
610                 p.From.Type = obj.TYPE_REG
611                 p.From.Reg = r2
612                 p.Reg = r1
613                 p.To.Type = obj.TYPE_REG
614                 p.To.Reg = v.Reg0()
615
616         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
617                 p := s.Prog(v.Op.Asm())
618                 p.From.Type = obj.TYPE_CONST
619                 p.From.Offset = v.AuxInt
620                 p.Reg = v.Args[0].Reg()
621                 p.To.Type = obj.TYPE_REG
622                 p.To.Reg = v.Reg()
623
624                 // Auxint holds encoded rotate + mask
625         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
626                 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
627                 p := s.Prog(v.Op.Asm())
628                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
629                 p.Reg = v.Args[0].Reg()
630                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
631                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
632
633                 // Auxint holds mask
634         case ssa.OpPPC64RLWNM:
635                 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
636                 p := s.Prog(v.Op.Asm())
637                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
638                 p.Reg = v.Args[0].Reg()
639                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
640                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
641
642         case ssa.OpPPC64MADDLD:
643                 r := v.Reg()
644                 r1 := v.Args[0].Reg()
645                 r2 := v.Args[1].Reg()
646                 r3 := v.Args[2].Reg()
647                 // r = r1*r2 Â± r3
648                 p := s.Prog(v.Op.Asm())
649                 p.From.Type = obj.TYPE_REG
650                 p.From.Reg = r1
651                 p.Reg = r2
652                 p.SetFrom3Reg(r3)
653                 p.To.Type = obj.TYPE_REG
654                 p.To.Reg = r
655
656         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
657                 r := v.Reg()
658                 r1 := v.Args[0].Reg()
659                 r2 := v.Args[1].Reg()
660                 r3 := v.Args[2].Reg()
661                 // r = r1*r2 Â± r3
662                 p := s.Prog(v.Op.Asm())
663                 p.From.Type = obj.TYPE_REG
664                 p.From.Reg = r1
665                 p.Reg = r3
666                 p.SetFrom3Reg(r2)
667                 p.To.Type = obj.TYPE_REG
668                 p.To.Reg = r
669
670         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
671                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
672                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
673                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
674                 r := v.Reg()
675                 p := s.Prog(v.Op.Asm())
676                 p.To.Type = obj.TYPE_REG
677                 p.To.Reg = r
678                 p.From.Type = obj.TYPE_REG
679                 p.From.Reg = v.Args[0].Reg()
680
681         case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
682                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
683                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
684                 p := s.Prog(v.Op.Asm())
685                 p.Reg = v.Args[0].Reg()
686                 p.From.Type = obj.TYPE_CONST
687                 p.From.Offset = v.AuxInt
688                 p.To.Type = obj.TYPE_REG
689                 p.To.Reg = v.Reg()
690
691         case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
692                 r := v.Reg0() // CA is the first, implied argument.
693                 r1 := v.Args[0].Reg()
694                 r2 := v.Args[1].Reg()
695                 p := s.Prog(v.Op.Asm())
696                 p.From.Type = obj.TYPE_REG
697                 p.From.Reg = r2
698                 p.Reg = r1
699                 p.To.Type = obj.TYPE_REG
700                 p.To.Reg = r
701
702         case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
703                 p := s.Prog(v.Op.Asm())
704                 p.From.Type = obj.TYPE_REG
705                 p.From.Reg = ppc64.REG_R0
706                 p.To.Type = obj.TYPE_REG
707                 p.To.Reg = v.Reg()
708
709         case ssa.OpPPC64ADDCconst:
710                 p := s.Prog(v.Op.Asm())
711                 p.Reg = v.Args[0].Reg()
712                 p.From.Type = obj.TYPE_CONST
713                 p.From.Offset = v.AuxInt
714                 p.To.Type = obj.TYPE_REG
715                 // Output is a pair, the second is the CA, which is implied.
716                 p.To.Reg = v.Reg0()
717
718         case ssa.OpPPC64SUBCconst:
719                 p := s.Prog(v.Op.Asm())
720                 p.SetFrom3Const(v.AuxInt)
721                 p.From.Type = obj.TYPE_REG
722                 p.From.Reg = v.Args[0].Reg()
723                 p.To.Type = obj.TYPE_REG
724                 p.To.Reg = v.Reg0()
725
726         case ssa.OpPPC64SUBFCconst:
727                 p := s.Prog(v.Op.Asm())
728                 p.SetFrom3Const(v.AuxInt)
729                 p.From.Type = obj.TYPE_REG
730                 p.From.Reg = v.Args[0].Reg()
731                 p.To.Type = obj.TYPE_REG
732                 p.To.Reg = v.Reg()
733
734         case ssa.OpPPC64ANDCCconst:
735                 p := s.Prog(v.Op.Asm())
736                 p.Reg = v.Args[0].Reg()
737                 p.From.Type = obj.TYPE_CONST
738                 p.From.Offset = v.AuxInt
739                 p.To.Type = obj.TYPE_REG
740                 //              p.To.Reg = ppc64.REGTMP // discard result
741                 p.To.Reg = v.Reg0()
742
743         case ssa.OpPPC64MOVDaddr:
744                 switch v.Aux.(type) {
745                 default:
746                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
747                 case nil:
748                         // If aux offset and aux int are both 0, and the same
749                         // input and output regs are used, no instruction
750                         // needs to be generated, since it would just be
751                         // addi rx, rx, 0.
752                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
753                                 p := s.Prog(ppc64.AMOVD)
754                                 p.From.Type = obj.TYPE_ADDR
755                                 p.From.Reg = v.Args[0].Reg()
756                                 p.From.Offset = v.AuxInt
757                                 p.To.Type = obj.TYPE_REG
758                                 p.To.Reg = v.Reg()
759                         }
760
761                 case *obj.LSym, ir.Node:
762                         p := s.Prog(ppc64.AMOVD)
763                         p.From.Type = obj.TYPE_ADDR
764                         p.From.Reg = v.Args[0].Reg()
765                         p.To.Type = obj.TYPE_REG
766                         p.To.Reg = v.Reg()
767                         ssagen.AddAux(&p.From, v)
768
769                 }
770
771         case ssa.OpPPC64MOVDconst:
772                 p := s.Prog(v.Op.Asm())
773                 p.From.Type = obj.TYPE_CONST
774                 p.From.Offset = v.AuxInt
775                 p.To.Type = obj.TYPE_REG
776                 p.To.Reg = v.Reg()
777
778         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
779                 p := s.Prog(v.Op.Asm())
780                 p.From.Type = obj.TYPE_FCONST
781                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
782                 p.To.Type = obj.TYPE_REG
783                 p.To.Reg = v.Reg()
784
785         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
786                 p := s.Prog(v.Op.Asm())
787                 p.From.Type = obj.TYPE_REG
788                 p.From.Reg = v.Args[0].Reg()
789                 p.To.Type = obj.TYPE_REG
790                 p.To.Reg = v.Args[1].Reg()
791
792         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
793                 p := s.Prog(v.Op.Asm())
794                 p.From.Type = obj.TYPE_REG
795                 p.From.Reg = v.Args[0].Reg()
796                 p.To.Type = obj.TYPE_CONST
797                 p.To.Offset = v.AuxInt
798
799         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
800                 // Shift in register to required size
801                 p := s.Prog(v.Op.Asm())
802                 p.From.Type = obj.TYPE_REG
803                 p.From.Reg = v.Args[0].Reg()
804                 p.To.Reg = v.Reg()
805                 p.To.Type = obj.TYPE_REG
806
807         case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
808
809                 // MOVDload and MOVWload are DS form instructions that are restricted to
810                 // offsets that are a multiple of 4. If the offset is not a multiple of 4,
811                 // then the address of the symbol to be loaded is computed (base + offset)
812                 // and used as the new base register and the offset field in the instruction
813                 // can be set to zero.
814
815                 // This same problem can happen with gostrings since the final offset is not
816                 // known yet, but could be unaligned after the relocation is resolved.
817                 // So gostrings are handled the same way.
818
819                 // This allows the MOVDload and MOVWload to be generated in more cases and
820                 // eliminates some offset and alignment checking in the rules file.
821
822                 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
823                 ssagen.AddAux(&fromAddr, v)
824
825                 genAddr := false
826
827                 switch fromAddr.Name {
828                 case obj.NAME_EXTERN, obj.NAME_STATIC:
829                         // Special case for a rule combines the bytes of gostring.
830                         // The v alignment might seem OK, but we don't want to load it
831                         // using an offset because relocation comes later.
832                         genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
833                 default:
834                         genAddr = fromAddr.Offset%4 != 0
835                 }
836                 if genAddr {
837                         // Load full address into the temp register.
838                         p := s.Prog(ppc64.AMOVD)
839                         p.From.Type = obj.TYPE_ADDR
840                         p.From.Reg = v.Args[0].Reg()
841                         ssagen.AddAux(&p.From, v)
842                         // Load target using temp as base register
843                         // and offset zero. Setting NAME_NONE
844                         // prevents any extra offsets from being
845                         // added.
846                         p.To.Type = obj.TYPE_REG
847                         p.To.Reg = ppc64.REGTMP
848                         fromAddr.Reg = ppc64.REGTMP
849                         // Clear the offset field and other
850                         // information that might be used
851                         // by the assembler to add to the
852                         // final offset value.
853                         fromAddr.Offset = 0
854                         fromAddr.Name = obj.NAME_NONE
855                         fromAddr.Sym = nil
856                 }
857                 p := s.Prog(v.Op.Asm())
858                 p.From = fromAddr
859                 p.To.Type = obj.TYPE_REG
860                 p.To.Reg = v.Reg()
861
862         case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
863                 p := s.Prog(v.Op.Asm())
864                 p.From.Type = obj.TYPE_MEM
865                 p.From.Reg = v.Args[0].Reg()
866                 ssagen.AddAux(&p.From, v)
867                 p.To.Type = obj.TYPE_REG
868                 p.To.Reg = v.Reg()
869
870         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
871                 p := s.Prog(v.Op.Asm())
872                 p.From.Type = obj.TYPE_MEM
873                 p.From.Reg = v.Args[0].Reg()
874                 p.To.Type = obj.TYPE_REG
875                 p.To.Reg = v.Reg()
876
877         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
878                 p := s.Prog(v.Op.Asm())
879                 p.To.Type = obj.TYPE_MEM
880                 p.To.Reg = v.Args[0].Reg()
881                 p.From.Type = obj.TYPE_REG
882                 p.From.Reg = v.Args[1].Reg()
883
884         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
885                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
886                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
887                 p := s.Prog(v.Op.Asm())
888                 p.From.Type = obj.TYPE_MEM
889                 p.From.Reg = v.Args[0].Reg()
890                 p.From.Index = v.Args[1].Reg()
891                 p.To.Type = obj.TYPE_REG
892                 p.To.Reg = v.Reg()
893
894         case ssa.OpPPC64DCBT:
895                 p := s.Prog(v.Op.Asm())
896                 p.From.Type = obj.TYPE_MEM
897                 p.From.Reg = v.Args[0].Reg()
898                 p.To.Type = obj.TYPE_CONST
899                 p.To.Offset = v.AuxInt
900
901         case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
902                 p := s.Prog(v.Op.Asm())
903                 p.From.Type = obj.TYPE_REG
904                 p.From.Reg = ppc64.REGZERO
905                 p.To.Type = obj.TYPE_MEM
906                 p.To.Reg = v.Args[0].Reg()
907                 ssagen.AddAux(&p.To, v)
908
909         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
910
911                 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
912                 // to offset values that are a multiple of 4. If the offset field is not a
913                 // multiple of 4, then the full address of the store target is computed (base +
914                 // offset) and used as the new base register and the offset in the instruction
915                 // is set to 0.
916
917                 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
918                 // and prevents checking of the offset value and alignment in the rules.
919
920                 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
921                 ssagen.AddAux(&toAddr, v)
922
923                 if toAddr.Offset%4 != 0 {
924                         p := s.Prog(ppc64.AMOVD)
925                         p.From.Type = obj.TYPE_ADDR
926                         p.From.Reg = v.Args[0].Reg()
927                         ssagen.AddAux(&p.From, v)
928                         p.To.Type = obj.TYPE_REG
929                         p.To.Reg = ppc64.REGTMP
930                         toAddr.Reg = ppc64.REGTMP
931                         // Clear the offset field and other
932                         // information that might be used
933                         // by the assembler to add to the
934                         // final offset value.
935                         toAddr.Offset = 0
936                         toAddr.Name = obj.NAME_NONE
937                         toAddr.Sym = nil
938                 }
939                 p := s.Prog(v.Op.Asm())
940                 p.To = toAddr
941                 p.From.Type = obj.TYPE_REG
942                 if v.Op == ssa.OpPPC64MOVDstorezero {
943                         p.From.Reg = ppc64.REGZERO
944                 } else {
945                         p.From.Reg = v.Args[1].Reg()
946                 }
947
948         case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
949                 p := s.Prog(v.Op.Asm())
950                 p.From.Type = obj.TYPE_REG
951                 p.From.Reg = v.Args[1].Reg()
952                 p.To.Type = obj.TYPE_MEM
953                 p.To.Reg = v.Args[0].Reg()
954                 ssagen.AddAux(&p.To, v)
955
956         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
957                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
958                 ssa.OpPPC64MOVHBRstoreidx:
959                 p := s.Prog(v.Op.Asm())
960                 p.From.Type = obj.TYPE_REG
961                 p.From.Reg = v.Args[2].Reg()
962                 p.To.Index = v.Args[1].Reg()
963                 p.To.Type = obj.TYPE_MEM
964                 p.To.Reg = v.Args[0].Reg()
965
966         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB, ssa.OpPPC64ISELZ:
967                 // ISEL  AuxInt ? arg0 : arg1
968                 // ISELB is a special case of ISEL where AuxInt ? $1 (arg0) : $0.
969                 // ISELZ is a special case of ISEL where arg1 is implicitly $0.
970                 //
971                 // AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO.
972                 // ISEL accepts a CR bit argument, not a condition as expressed by AuxInt.
973                 // Convert the condition to a CR bit argument by the following conversion:
974                 //
975                 // AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO
976                 // AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO
977                 p := s.Prog(ppc64.AISEL)
978                 p.To.Type = obj.TYPE_REG
979                 p.To.Reg = v.Reg()
980                 // For ISELB/ISELZ Use R0 for 0 operand to avoid load.
981                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
982                 if v.Op == ssa.OpPPC64ISEL {
983                         r.Reg = v.Args[1].Reg()
984                 }
985                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
986                 if v.AuxInt > 3 {
987                         p.Reg = r.Reg
988                         p.SetFrom3Reg(v.Args[0].Reg())
989                 } else {
990                         p.Reg = v.Args[0].Reg()
991                         p.SetFrom3(r)
992                 }
993                 p.From.Type = obj.TYPE_CONST
994                 p.From.Offset = v.AuxInt & 3
995
996         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
997                 // The LoweredQuad code generation
998                 // generates STXV instructions on
999                 // power9. The Short variation is used
1000                 // if no loop is generated.
1001
1002                 // sizes >= 64 generate a loop as follows:
1003
1004                 // Set up loop counter in CTR, used by BC
1005                 // XXLXOR clears VS32
1006                 //       XXLXOR VS32,VS32,VS32
1007                 //       MOVD len/64,REG_TMP
1008                 //       MOVD REG_TMP,CTR
1009                 //       loop:
1010                 //       STXV VS32,0(R20)
1011                 //       STXV VS32,16(R20)
1012                 //       STXV VS32,32(R20)
1013                 //       STXV VS32,48(R20)
1014                 //       ADD  $64,R20
1015                 //       BC   16, 0, loop
1016
1017                 // Bytes per iteration
1018                 ctr := v.AuxInt / 64
1019
1020                 // Remainder bytes
1021                 rem := v.AuxInt % 64
1022
1023                 // Only generate a loop if there is more
1024                 // than 1 iteration.
1025                 if ctr > 1 {
1026                         // Set up VS32 (V0) to hold 0s
1027                         p := s.Prog(ppc64.AXXLXOR)
1028                         p.From.Type = obj.TYPE_REG
1029                         p.From.Reg = ppc64.REG_VS32
1030                         p.To.Type = obj.TYPE_REG
1031                         p.To.Reg = ppc64.REG_VS32
1032                         p.Reg = ppc64.REG_VS32
1033
1034                         // Set up CTR loop counter
1035                         p = s.Prog(ppc64.AMOVD)
1036                         p.From.Type = obj.TYPE_CONST
1037                         p.From.Offset = ctr
1038                         p.To.Type = obj.TYPE_REG
1039                         p.To.Reg = ppc64.REGTMP
1040
1041                         p = s.Prog(ppc64.AMOVD)
1042                         p.From.Type = obj.TYPE_REG
1043                         p.From.Reg = ppc64.REGTMP
1044                         p.To.Type = obj.TYPE_REG
1045                         p.To.Reg = ppc64.REG_CTR
1046
1047                         // Don't generate padding for
1048                         // loops with few iterations.
1049                         if ctr > 3 {
1050                                 p = s.Prog(obj.APCALIGN)
1051                                 p.From.Type = obj.TYPE_CONST
1052                                 p.From.Offset = 16
1053                         }
1054
1055                         // generate 4 STXVs to zero 64 bytes
1056                         var top *obj.Prog
1057
1058                         p = s.Prog(ppc64.ASTXV)
1059                         p.From.Type = obj.TYPE_REG
1060                         p.From.Reg = ppc64.REG_VS32
1061                         p.To.Type = obj.TYPE_MEM
1062                         p.To.Reg = v.Args[0].Reg()
1063
1064                         //  Save the top of loop
1065                         if top == nil {
1066                                 top = p
1067                         }
1068                         p = s.Prog(ppc64.ASTXV)
1069                         p.From.Type = obj.TYPE_REG
1070                         p.From.Reg = ppc64.REG_VS32
1071                         p.To.Type = obj.TYPE_MEM
1072                         p.To.Reg = v.Args[0].Reg()
1073                         p.To.Offset = 16
1074
1075                         p = s.Prog(ppc64.ASTXV)
1076                         p.From.Type = obj.TYPE_REG
1077                         p.From.Reg = ppc64.REG_VS32
1078                         p.To.Type = obj.TYPE_MEM
1079                         p.To.Reg = v.Args[0].Reg()
1080                         p.To.Offset = 32
1081
1082                         p = s.Prog(ppc64.ASTXV)
1083                         p.From.Type = obj.TYPE_REG
1084                         p.From.Reg = ppc64.REG_VS32
1085                         p.To.Type = obj.TYPE_MEM
1086                         p.To.Reg = v.Args[0].Reg()
1087                         p.To.Offset = 48
1088
1089                         // Increment address for the
1090                         // 64 bytes just zeroed.
1091                         p = s.Prog(ppc64.AADD)
1092                         p.Reg = v.Args[0].Reg()
1093                         p.From.Type = obj.TYPE_CONST
1094                         p.From.Offset = 64
1095                         p.To.Type = obj.TYPE_REG
1096                         p.To.Reg = v.Args[0].Reg()
1097
1098                         // Branch back to top of loop
1099                         // based on CTR
1100                         // BC with BO_BCTR generates bdnz
1101                         p = s.Prog(ppc64.ABC)
1102                         p.From.Type = obj.TYPE_CONST
1103                         p.From.Offset = ppc64.BO_BCTR
1104                         p.Reg = ppc64.REG_CR0LT
1105                         p.To.Type = obj.TYPE_BRANCH
1106                         p.To.SetTarget(top)
1107                 }
1108                 // When ctr == 1 the loop was not generated but
1109                 // there are at least 64 bytes to clear, so add
1110                 // that to the remainder to generate the code
1111                 // to clear those doublewords
1112                 if ctr == 1 {
1113                         rem += 64
1114                 }
1115
1116                 // Clear the remainder starting at offset zero
1117                 offset := int64(0)
1118
1119                 if rem >= 16 && ctr <= 1 {
1120                         // If the XXLXOR hasn't already been
1121                         // generated, do it here to initialize
1122                         // VS32 (V0) to 0.
1123                         p := s.Prog(ppc64.AXXLXOR)
1124                         p.From.Type = obj.TYPE_REG
1125                         p.From.Reg = ppc64.REG_VS32
1126                         p.To.Type = obj.TYPE_REG
1127                         p.To.Reg = ppc64.REG_VS32
1128                         p.Reg = ppc64.REG_VS32
1129                 }
1130                 // Generate STXV for 32 or 64
1131                 // bytes.
1132                 for rem >= 32 {
1133                         p := s.Prog(ppc64.ASTXV)
1134                         p.From.Type = obj.TYPE_REG
1135                         p.From.Reg = ppc64.REG_VS32
1136                         p.To.Type = obj.TYPE_MEM
1137                         p.To.Reg = v.Args[0].Reg()
1138                         p.To.Offset = offset
1139
1140                         p = s.Prog(ppc64.ASTXV)
1141                         p.From.Type = obj.TYPE_REG
1142                         p.From.Reg = ppc64.REG_VS32
1143                         p.To.Type = obj.TYPE_MEM
1144                         p.To.Reg = v.Args[0].Reg()
1145                         p.To.Offset = offset + 16
1146                         offset += 32
1147                         rem -= 32
1148                 }
1149                 // Generate 16 bytes
1150                 if rem >= 16 {
1151                         p := s.Prog(ppc64.ASTXV)
1152                         p.From.Type = obj.TYPE_REG
1153                         p.From.Reg = ppc64.REG_VS32
1154                         p.To.Type = obj.TYPE_MEM
1155                         p.To.Reg = v.Args[0].Reg()
1156                         p.To.Offset = offset
1157                         offset += 16
1158                         rem -= 16
1159                 }
1160
1161                 // first clear as many doublewords as possible
1162                 // then clear remaining sizes as available
1163                 for rem > 0 {
1164                         op, size := ppc64.AMOVB, int64(1)
1165                         switch {
1166                         case rem >= 8:
1167                                 op, size = ppc64.AMOVD, 8
1168                         case rem >= 4:
1169                                 op, size = ppc64.AMOVW, 4
1170                         case rem >= 2:
1171                                 op, size = ppc64.AMOVH, 2
1172                         }
1173                         p := s.Prog(op)
1174                         p.From.Type = obj.TYPE_REG
1175                         p.From.Reg = ppc64.REG_R0
1176                         p.To.Type = obj.TYPE_MEM
1177                         p.To.Reg = v.Args[0].Reg()
1178                         p.To.Offset = offset
1179                         rem -= size
1180                         offset += size
1181                 }
1182
1183         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1184
1185                 // Unaligned data doesn't hurt performance
1186                 // for these instructions on power8.
1187
1188                 // For sizes >= 64 generate a loop as follows:
1189
1190                 // Set up loop counter in CTR, used by BC
1191                 //       XXLXOR VS32,VS32,VS32
1192                 //       MOVD len/32,REG_TMP
1193                 //       MOVD REG_TMP,CTR
1194                 //       MOVD $16,REG_TMP
1195                 //       loop:
1196                 //       STXVD2X VS32,(R0)(R20)
1197                 //       STXVD2X VS32,(R31)(R20)
1198                 //       ADD  $32,R20
1199                 //       BC   16, 0, loop
1200                 //
1201                 // any remainder is done as described below
1202
1203                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1204                 // then handle the remainder
1205                 //      MOVD R0,(R20)
1206                 //      MOVD R0,8(R20)
1207                 // .... etc.
1208                 //
1209                 // the remainder bytes are cleared using one or more
1210                 // of the following instructions with the appropriate
1211                 // offsets depending which instructions are needed
1212                 //
1213                 //      MOVW R0,n1(R20) 4 bytes
1214                 //      MOVH R0,n2(R20) 2 bytes
1215                 //      MOVB R0,n3(R20) 1 byte
1216                 //
1217                 // 7 bytes: MOVW, MOVH, MOVB
1218                 // 6 bytes: MOVW, MOVH
1219                 // 5 bytes: MOVW, MOVB
1220                 // 3 bytes: MOVH, MOVB
1221
1222                 // each loop iteration does 32 bytes
1223                 ctr := v.AuxInt / 32
1224
1225                 // remainder bytes
1226                 rem := v.AuxInt % 32
1227
1228                 // only generate a loop if there is more
1229                 // than 1 iteration.
1230                 if ctr > 1 {
1231                         // Set up VS32 (V0) to hold 0s
1232                         p := s.Prog(ppc64.AXXLXOR)
1233                         p.From.Type = obj.TYPE_REG
1234                         p.From.Reg = ppc64.REG_VS32
1235                         p.To.Type = obj.TYPE_REG
1236                         p.To.Reg = ppc64.REG_VS32
1237                         p.Reg = ppc64.REG_VS32
1238
1239                         // Set up CTR loop counter
1240                         p = s.Prog(ppc64.AMOVD)
1241                         p.From.Type = obj.TYPE_CONST
1242                         p.From.Offset = ctr
1243                         p.To.Type = obj.TYPE_REG
1244                         p.To.Reg = ppc64.REGTMP
1245
1246                         p = s.Prog(ppc64.AMOVD)
1247                         p.From.Type = obj.TYPE_REG
1248                         p.From.Reg = ppc64.REGTMP
1249                         p.To.Type = obj.TYPE_REG
1250                         p.To.Reg = ppc64.REG_CTR
1251
1252                         // Set up R31 to hold index value 16
1253                         p = s.Prog(ppc64.AMOVD)
1254                         p.From.Type = obj.TYPE_CONST
1255                         p.From.Offset = 16
1256                         p.To.Type = obj.TYPE_REG
1257                         p.To.Reg = ppc64.REGTMP
1258
1259                         // Don't add padding for alignment
1260                         // with few loop iterations.
1261                         if ctr > 3 {
1262                                 p = s.Prog(obj.APCALIGN)
1263                                 p.From.Type = obj.TYPE_CONST
1264                                 p.From.Offset = 16
1265                         }
1266
1267                         // generate 2 STXVD2Xs to store 16 bytes
1268                         // when this is a loop then the top must be saved
1269                         var top *obj.Prog
1270                         // This is the top of loop
1271
1272                         p = s.Prog(ppc64.ASTXVD2X)
1273                         p.From.Type = obj.TYPE_REG
1274                         p.From.Reg = ppc64.REG_VS32
1275                         p.To.Type = obj.TYPE_MEM
1276                         p.To.Reg = v.Args[0].Reg()
1277                         p.To.Index = ppc64.REGZERO
1278                         // Save the top of loop
1279                         if top == nil {
1280                                 top = p
1281                         }
1282                         p = s.Prog(ppc64.ASTXVD2X)
1283                         p.From.Type = obj.TYPE_REG
1284                         p.From.Reg = ppc64.REG_VS32
1285                         p.To.Type = obj.TYPE_MEM
1286                         p.To.Reg = v.Args[0].Reg()
1287                         p.To.Index = ppc64.REGTMP
1288
1289                         // Increment address for the
1290                         // 4 doublewords just zeroed.
1291                         p = s.Prog(ppc64.AADD)
1292                         p.Reg = v.Args[0].Reg()
1293                         p.From.Type = obj.TYPE_CONST
1294                         p.From.Offset = 32
1295                         p.To.Type = obj.TYPE_REG
1296                         p.To.Reg = v.Args[0].Reg()
1297
1298                         // Branch back to top of loop
1299                         // based on CTR
1300                         // BC with BO_BCTR generates bdnz
1301                         p = s.Prog(ppc64.ABC)
1302                         p.From.Type = obj.TYPE_CONST
1303                         p.From.Offset = ppc64.BO_BCTR
1304                         p.Reg = ppc64.REG_CR0LT
1305                         p.To.Type = obj.TYPE_BRANCH
1306                         p.To.SetTarget(top)
1307                 }
1308
1309                 // when ctr == 1 the loop was not generated but
1310                 // there are at least 32 bytes to clear, so add
1311                 // that to the remainder to generate the code
1312                 // to clear those doublewords
1313                 if ctr == 1 {
1314                         rem += 32
1315                 }
1316
1317                 // clear the remainder starting at offset zero
1318                 offset := int64(0)
1319
1320                 // first clear as many doublewords as possible
1321                 // then clear remaining sizes as available
1322                 for rem > 0 {
1323                         op, size := ppc64.AMOVB, int64(1)
1324                         switch {
1325                         case rem >= 8:
1326                                 op, size = ppc64.AMOVD, 8
1327                         case rem >= 4:
1328                                 op, size = ppc64.AMOVW, 4
1329                         case rem >= 2:
1330                                 op, size = ppc64.AMOVH, 2
1331                         }
1332                         p := s.Prog(op)
1333                         p.From.Type = obj.TYPE_REG
1334                         p.From.Reg = ppc64.REG_R0
1335                         p.To.Type = obj.TYPE_MEM
1336                         p.To.Reg = v.Args[0].Reg()
1337                         p.To.Offset = offset
1338                         rem -= size
1339                         offset += size
1340                 }
1341
1342         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1343
1344                 bytesPerLoop := int64(32)
1345                 // This will be used when moving more
1346                 // than 8 bytes.  Moves start with
1347                 // as many 8 byte moves as possible, then
1348                 // 4, 2, or 1 byte(s) as remaining.  This will
1349                 // work and be efficient for power8 or later.
1350                 // If there are 64 or more bytes, then a
1351                 // loop is generated to move 32 bytes and
1352                 // update the src and dst addresses on each
1353                 // iteration. When < 64 bytes, the appropriate
1354                 // number of moves are generated based on the
1355                 // size.
1356                 // When moving >= 64 bytes a loop is used
1357                 //      MOVD len/32,REG_TMP
1358                 //      MOVD REG_TMP,CTR
1359                 //      MOVD $16,REG_TMP
1360                 // top:
1361                 //      LXVD2X (R0)(R21),VS32
1362                 //      LXVD2X (R31)(R21),VS33
1363                 //      ADD $32,R21
1364                 //      STXVD2X VS32,(R0)(R20)
1365                 //      STXVD2X VS33,(R31)(R20)
1366                 //      ADD $32,R20
1367                 //      BC 16,0,top
1368                 // Bytes not moved by this loop are moved
1369                 // with a combination of the following instructions,
1370                 // starting with the largest sizes and generating as
1371                 // many as needed, using the appropriate offset value.
1372                 //      MOVD  n(R21),R31
1373                 //      MOVD  R31,n(R20)
1374                 //      MOVW  n1(R21),R31
1375                 //      MOVW  R31,n1(R20)
1376                 //      MOVH  n2(R21),R31
1377                 //      MOVH  R31,n2(R20)
1378                 //      MOVB  n3(R21),R31
1379                 //      MOVB  R31,n3(R20)
1380
1381                 // Each loop iteration moves 32 bytes
1382                 ctr := v.AuxInt / bytesPerLoop
1383
1384                 // Remainder after the loop
1385                 rem := v.AuxInt % bytesPerLoop
1386
1387                 dstReg := v.Args[0].Reg()
1388                 srcReg := v.Args[1].Reg()
1389
1390                 // The set of registers used here, must match the clobbered reg list
1391                 // in PPC64Ops.go.
1392                 offset := int64(0)
1393
1394                 // top of the loop
1395                 var top *obj.Prog
1396                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1397                 if ctr > 1 {
1398                         // Set up the CTR
1399                         p := s.Prog(ppc64.AMOVD)
1400                         p.From.Type = obj.TYPE_CONST
1401                         p.From.Offset = ctr
1402                         p.To.Type = obj.TYPE_REG
1403                         p.To.Reg = ppc64.REGTMP
1404
1405                         p = s.Prog(ppc64.AMOVD)
1406                         p.From.Type = obj.TYPE_REG
1407                         p.From.Reg = ppc64.REGTMP
1408                         p.To.Type = obj.TYPE_REG
1409                         p.To.Reg = ppc64.REG_CTR
1410
1411                         // Use REGTMP as index reg
1412                         p = s.Prog(ppc64.AMOVD)
1413                         p.From.Type = obj.TYPE_CONST
1414                         p.From.Offset = 16
1415                         p.To.Type = obj.TYPE_REG
1416                         p.To.Reg = ppc64.REGTMP
1417
1418                         // Don't adding padding for
1419                         // alignment with small iteration
1420                         // counts.
1421                         if ctr > 3 {
1422                                 p = s.Prog(obj.APCALIGN)
1423                                 p.From.Type = obj.TYPE_CONST
1424                                 p.From.Offset = 16
1425                         }
1426
1427                         // Generate 16 byte loads and stores.
1428                         // Use temp register for index (16)
1429                         // on the second one.
1430
1431                         p = s.Prog(ppc64.ALXVD2X)
1432                         p.From.Type = obj.TYPE_MEM
1433                         p.From.Reg = srcReg
1434                         p.From.Index = ppc64.REGZERO
1435                         p.To.Type = obj.TYPE_REG
1436                         p.To.Reg = ppc64.REG_VS32
1437                         if top == nil {
1438                                 top = p
1439                         }
1440                         p = s.Prog(ppc64.ALXVD2X)
1441                         p.From.Type = obj.TYPE_MEM
1442                         p.From.Reg = srcReg
1443                         p.From.Index = ppc64.REGTMP
1444                         p.To.Type = obj.TYPE_REG
1445                         p.To.Reg = ppc64.REG_VS33
1446
1447                         // increment the src reg for next iteration
1448                         p = s.Prog(ppc64.AADD)
1449                         p.Reg = srcReg
1450                         p.From.Type = obj.TYPE_CONST
1451                         p.From.Offset = bytesPerLoop
1452                         p.To.Type = obj.TYPE_REG
1453                         p.To.Reg = srcReg
1454
1455                         // generate 16 byte stores
1456                         p = s.Prog(ppc64.ASTXVD2X)
1457                         p.From.Type = obj.TYPE_REG
1458                         p.From.Reg = ppc64.REG_VS32
1459                         p.To.Type = obj.TYPE_MEM
1460                         p.To.Reg = dstReg
1461                         p.To.Index = ppc64.REGZERO
1462
1463                         p = s.Prog(ppc64.ASTXVD2X)
1464                         p.From.Type = obj.TYPE_REG
1465                         p.From.Reg = ppc64.REG_VS33
1466                         p.To.Type = obj.TYPE_MEM
1467                         p.To.Reg = dstReg
1468                         p.To.Index = ppc64.REGTMP
1469
1470                         // increment the dst reg for next iteration
1471                         p = s.Prog(ppc64.AADD)
1472                         p.Reg = dstReg
1473                         p.From.Type = obj.TYPE_CONST
1474                         p.From.Offset = bytesPerLoop
1475                         p.To.Type = obj.TYPE_REG
1476                         p.To.Reg = dstReg
1477
1478                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1479                         // to loop top.
1480                         p = s.Prog(ppc64.ABC)
1481                         p.From.Type = obj.TYPE_CONST
1482                         p.From.Offset = ppc64.BO_BCTR
1483                         p.Reg = ppc64.REG_CR0LT
1484                         p.To.Type = obj.TYPE_BRANCH
1485                         p.To.SetTarget(top)
1486
1487                         // srcReg and dstReg were incremented in the loop, so
1488                         // later instructions start with offset 0.
1489                         offset = int64(0)
1490                 }
1491
1492                 // No loop was generated for one iteration, so
1493                 // add 32 bytes to the remainder to move those bytes.
1494                 if ctr == 1 {
1495                         rem += bytesPerLoop
1496                 }
1497
1498                 if rem >= 16 {
1499                         // Generate 16 byte loads and stores.
1500                         // Use temp register for index (value 16)
1501                         // on the second one.
1502                         p := s.Prog(ppc64.ALXVD2X)
1503                         p.From.Type = obj.TYPE_MEM
1504                         p.From.Reg = srcReg
1505                         p.From.Index = ppc64.REGZERO
1506                         p.To.Type = obj.TYPE_REG
1507                         p.To.Reg = ppc64.REG_VS32
1508
1509                         p = s.Prog(ppc64.ASTXVD2X)
1510                         p.From.Type = obj.TYPE_REG
1511                         p.From.Reg = ppc64.REG_VS32
1512                         p.To.Type = obj.TYPE_MEM
1513                         p.To.Reg = dstReg
1514                         p.To.Index = ppc64.REGZERO
1515
1516                         offset = 16
1517                         rem -= 16
1518
1519                         if rem >= 16 {
1520                                 // Use REGTMP as index reg
1521                                 p := s.Prog(ppc64.AMOVD)
1522                                 p.From.Type = obj.TYPE_CONST
1523                                 p.From.Offset = 16
1524                                 p.To.Type = obj.TYPE_REG
1525                                 p.To.Reg = ppc64.REGTMP
1526
1527                                 p = s.Prog(ppc64.ALXVD2X)
1528                                 p.From.Type = obj.TYPE_MEM
1529                                 p.From.Reg = srcReg
1530                                 p.From.Index = ppc64.REGTMP
1531                                 p.To.Type = obj.TYPE_REG
1532                                 p.To.Reg = ppc64.REG_VS32
1533
1534                                 p = s.Prog(ppc64.ASTXVD2X)
1535                                 p.From.Type = obj.TYPE_REG
1536                                 p.From.Reg = ppc64.REG_VS32
1537                                 p.To.Type = obj.TYPE_MEM
1538                                 p.To.Reg = dstReg
1539                                 p.To.Index = ppc64.REGTMP
1540
1541                                 offset = 32
1542                                 rem -= 16
1543                         }
1544                 }
1545
1546                 // Generate all the remaining load and store pairs, starting with
1547                 // as many 8 byte moves as possible, then 4, 2, 1.
1548                 for rem > 0 {
1549                         op, size := ppc64.AMOVB, int64(1)
1550                         switch {
1551                         case rem >= 8:
1552                                 op, size = ppc64.AMOVD, 8
1553                         case rem >= 4:
1554                                 op, size = ppc64.AMOVWZ, 4
1555                         case rem >= 2:
1556                                 op, size = ppc64.AMOVH, 2
1557                         }
1558                         // Load
1559                         p := s.Prog(op)
1560                         p.To.Type = obj.TYPE_REG
1561                         p.To.Reg = ppc64.REGTMP
1562                         p.From.Type = obj.TYPE_MEM
1563                         p.From.Reg = srcReg
1564                         p.From.Offset = offset
1565
1566                         // Store
1567                         p = s.Prog(op)
1568                         p.From.Type = obj.TYPE_REG
1569                         p.From.Reg = ppc64.REGTMP
1570                         p.To.Type = obj.TYPE_MEM
1571                         p.To.Reg = dstReg
1572                         p.To.Offset = offset
1573                         rem -= size
1574                         offset += size
1575                 }
1576
1577         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1578                 bytesPerLoop := int64(64)
1579                 // This is used when moving more
1580                 // than 8 bytes on power9.  Moves start with
1581                 // as many 8 byte moves as possible, then
1582                 // 4, 2, or 1 byte(s) as remaining.  This will
1583                 // work and be efficient for power8 or later.
1584                 // If there are 64 or more bytes, then a
1585                 // loop is generated to move 32 bytes and
1586                 // update the src and dst addresses on each
1587                 // iteration. When < 64 bytes, the appropriate
1588                 // number of moves are generated based on the
1589                 // size.
1590                 // When moving >= 64 bytes a loop is used
1591                 //      MOVD len/32,REG_TMP
1592                 //      MOVD REG_TMP,CTR
1593                 // top:
1594                 //      LXV 0(R21),VS32
1595                 //      LXV 16(R21),VS33
1596                 //      ADD $32,R21
1597                 //      STXV VS32,0(R20)
1598                 //      STXV VS33,16(R20)
1599                 //      ADD $32,R20
1600                 //      BC 16,0,top
1601                 // Bytes not moved by this loop are moved
1602                 // with a combination of the following instructions,
1603                 // starting with the largest sizes and generating as
1604                 // many as needed, using the appropriate offset value.
1605                 //      MOVD  n(R21),R31
1606                 //      MOVD  R31,n(R20)
1607                 //      MOVW  n1(R21),R31
1608                 //      MOVW  R31,n1(R20)
1609                 //      MOVH  n2(R21),R31
1610                 //      MOVH  R31,n2(R20)
1611                 //      MOVB  n3(R21),R31
1612                 //      MOVB  R31,n3(R20)
1613
1614                 // Each loop iteration moves 32 bytes
1615                 ctr := v.AuxInt / bytesPerLoop
1616
1617                 // Remainder after the loop
1618                 rem := v.AuxInt % bytesPerLoop
1619
1620                 dstReg := v.Args[0].Reg()
1621                 srcReg := v.Args[1].Reg()
1622
1623                 offset := int64(0)
1624
1625                 // top of the loop
1626                 var top *obj.Prog
1627
1628                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1629                 if ctr > 1 {
1630                         // Set up the CTR
1631                         p := s.Prog(ppc64.AMOVD)
1632                         p.From.Type = obj.TYPE_CONST
1633                         p.From.Offset = ctr
1634                         p.To.Type = obj.TYPE_REG
1635                         p.To.Reg = ppc64.REGTMP
1636
1637                         p = s.Prog(ppc64.AMOVD)
1638                         p.From.Type = obj.TYPE_REG
1639                         p.From.Reg = ppc64.REGTMP
1640                         p.To.Type = obj.TYPE_REG
1641                         p.To.Reg = ppc64.REG_CTR
1642
1643                         p = s.Prog(obj.APCALIGN)
1644                         p.From.Type = obj.TYPE_CONST
1645                         p.From.Offset = 16
1646
1647                         // Generate 16 byte loads and stores.
1648                         p = s.Prog(ppc64.ALXV)
1649                         p.From.Type = obj.TYPE_MEM
1650                         p.From.Reg = srcReg
1651                         p.From.Offset = offset
1652                         p.To.Type = obj.TYPE_REG
1653                         p.To.Reg = ppc64.REG_VS32
1654                         if top == nil {
1655                                 top = p
1656                         }
1657                         p = s.Prog(ppc64.ALXV)
1658                         p.From.Type = obj.TYPE_MEM
1659                         p.From.Reg = srcReg
1660                         p.From.Offset = offset + 16
1661                         p.To.Type = obj.TYPE_REG
1662                         p.To.Reg = ppc64.REG_VS33
1663
1664                         // generate 16 byte stores
1665                         p = s.Prog(ppc64.ASTXV)
1666                         p.From.Type = obj.TYPE_REG
1667                         p.From.Reg = ppc64.REG_VS32
1668                         p.To.Type = obj.TYPE_MEM
1669                         p.To.Reg = dstReg
1670                         p.To.Offset = offset
1671
1672                         p = s.Prog(ppc64.ASTXV)
1673                         p.From.Type = obj.TYPE_REG
1674                         p.From.Reg = ppc64.REG_VS33
1675                         p.To.Type = obj.TYPE_MEM
1676                         p.To.Reg = dstReg
1677                         p.To.Offset = offset + 16
1678
1679                         // Generate 16 byte loads and stores.
1680                         p = s.Prog(ppc64.ALXV)
1681                         p.From.Type = obj.TYPE_MEM
1682                         p.From.Reg = srcReg
1683                         p.From.Offset = offset + 32
1684                         p.To.Type = obj.TYPE_REG
1685                         p.To.Reg = ppc64.REG_VS32
1686
1687                         p = s.Prog(ppc64.ALXV)
1688                         p.From.Type = obj.TYPE_MEM
1689                         p.From.Reg = srcReg
1690                         p.From.Offset = offset + 48
1691                         p.To.Type = obj.TYPE_REG
1692                         p.To.Reg = ppc64.REG_VS33
1693
1694                         // generate 16 byte stores
1695                         p = s.Prog(ppc64.ASTXV)
1696                         p.From.Type = obj.TYPE_REG
1697                         p.From.Reg = ppc64.REG_VS32
1698                         p.To.Type = obj.TYPE_MEM
1699                         p.To.Reg = dstReg
1700                         p.To.Offset = offset + 32
1701
1702                         p = s.Prog(ppc64.ASTXV)
1703                         p.From.Type = obj.TYPE_REG
1704                         p.From.Reg = ppc64.REG_VS33
1705                         p.To.Type = obj.TYPE_MEM
1706                         p.To.Reg = dstReg
1707                         p.To.Offset = offset + 48
1708
1709                         // increment the src reg for next iteration
1710                         p = s.Prog(ppc64.AADD)
1711                         p.Reg = srcReg
1712                         p.From.Type = obj.TYPE_CONST
1713                         p.From.Offset = bytesPerLoop
1714                         p.To.Type = obj.TYPE_REG
1715                         p.To.Reg = srcReg
1716
1717                         // increment the dst reg for next iteration
1718                         p = s.Prog(ppc64.AADD)
1719                         p.Reg = dstReg
1720                         p.From.Type = obj.TYPE_CONST
1721                         p.From.Offset = bytesPerLoop
1722                         p.To.Type = obj.TYPE_REG
1723                         p.To.Reg = dstReg
1724
1725                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1726                         // to loop top.
1727                         p = s.Prog(ppc64.ABC)
1728                         p.From.Type = obj.TYPE_CONST
1729                         p.From.Offset = ppc64.BO_BCTR
1730                         p.Reg = ppc64.REG_CR0LT
1731                         p.To.Type = obj.TYPE_BRANCH
1732                         p.To.SetTarget(top)
1733
1734                         // srcReg and dstReg were incremented in the loop, so
1735                         // later instructions start with offset 0.
1736                         offset = int64(0)
1737                 }
1738
1739                 // No loop was generated for one iteration, so
1740                 // add 32 bytes to the remainder to move those bytes.
1741                 if ctr == 1 {
1742                         rem += bytesPerLoop
1743                 }
1744                 if rem >= 32 {
1745                         p := s.Prog(ppc64.ALXV)
1746                         p.From.Type = obj.TYPE_MEM
1747                         p.From.Reg = srcReg
1748                         p.To.Type = obj.TYPE_REG
1749                         p.To.Reg = ppc64.REG_VS32
1750
1751                         p = s.Prog(ppc64.ALXV)
1752                         p.From.Type = obj.TYPE_MEM
1753                         p.From.Reg = srcReg
1754                         p.From.Offset = 16
1755                         p.To.Type = obj.TYPE_REG
1756                         p.To.Reg = ppc64.REG_VS33
1757
1758                         p = s.Prog(ppc64.ASTXV)
1759                         p.From.Type = obj.TYPE_REG
1760                         p.From.Reg = ppc64.REG_VS32
1761                         p.To.Type = obj.TYPE_MEM
1762                         p.To.Reg = dstReg
1763
1764                         p = s.Prog(ppc64.ASTXV)
1765                         p.From.Type = obj.TYPE_REG
1766                         p.From.Reg = ppc64.REG_VS33
1767                         p.To.Type = obj.TYPE_MEM
1768                         p.To.Reg = dstReg
1769                         p.To.Offset = 16
1770
1771                         offset = 32
1772                         rem -= 32
1773                 }
1774
1775                 if rem >= 16 {
1776                         // Generate 16 byte loads and stores.
1777                         p := s.Prog(ppc64.ALXV)
1778                         p.From.Type = obj.TYPE_MEM
1779                         p.From.Reg = srcReg
1780                         p.From.Offset = offset
1781                         p.To.Type = obj.TYPE_REG
1782                         p.To.Reg = ppc64.REG_VS32
1783
1784                         p = s.Prog(ppc64.ASTXV)
1785                         p.From.Type = obj.TYPE_REG
1786                         p.From.Reg = ppc64.REG_VS32
1787                         p.To.Type = obj.TYPE_MEM
1788                         p.To.Reg = dstReg
1789                         p.To.Offset = offset
1790
1791                         offset += 16
1792                         rem -= 16
1793
1794                         if rem >= 16 {
1795                                 p := s.Prog(ppc64.ALXV)
1796                                 p.From.Type = obj.TYPE_MEM
1797                                 p.From.Reg = srcReg
1798                                 p.From.Offset = offset
1799                                 p.To.Type = obj.TYPE_REG
1800                                 p.To.Reg = ppc64.REG_VS32
1801
1802                                 p = s.Prog(ppc64.ASTXV)
1803                                 p.From.Type = obj.TYPE_REG
1804                                 p.From.Reg = ppc64.REG_VS32
1805                                 p.To.Type = obj.TYPE_MEM
1806                                 p.To.Reg = dstReg
1807                                 p.To.Offset = offset
1808
1809                                 offset += 16
1810                                 rem -= 16
1811                         }
1812                 }
1813                 // Generate all the remaining load and store pairs, starting with
1814                 // as many 8 byte moves as possible, then 4, 2, 1.
1815                 for rem > 0 {
1816                         op, size := ppc64.AMOVB, int64(1)
1817                         switch {
1818                         case rem >= 8:
1819                                 op, size = ppc64.AMOVD, 8
1820                         case rem >= 4:
1821                                 op, size = ppc64.AMOVWZ, 4
1822                         case rem >= 2:
1823                                 op, size = ppc64.AMOVH, 2
1824                         }
1825                         // Load
1826                         p := s.Prog(op)
1827                         p.To.Type = obj.TYPE_REG
1828                         p.To.Reg = ppc64.REGTMP
1829                         p.From.Type = obj.TYPE_MEM
1830                         p.From.Reg = srcReg
1831                         p.From.Offset = offset
1832
1833                         // Store
1834                         p = s.Prog(op)
1835                         p.From.Type = obj.TYPE_REG
1836                         p.From.Reg = ppc64.REGTMP
1837                         p.To.Type = obj.TYPE_MEM
1838                         p.To.Reg = dstReg
1839                         p.To.Offset = offset
1840                         rem -= size
1841                         offset += size
1842                 }
1843
1844         case ssa.OpPPC64CALLstatic:
1845                 s.Call(v)
1846
1847         case ssa.OpPPC64CALLtail:
1848                 s.TailCall(v)
1849
1850         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1851                 p := s.Prog(ppc64.AMOVD)
1852                 p.From.Type = obj.TYPE_REG
1853                 p.From.Reg = v.Args[0].Reg()
1854                 p.To.Type = obj.TYPE_REG
1855                 p.To.Reg = ppc64.REG_LR
1856
1857                 if v.Args[0].Reg() != ppc64.REG_R12 {
1858                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1859                 }
1860
1861                 pp := s.Call(v)
1862
1863                 // Convert the call into a blrl with hint this is not a subroutine return.
1864                 // The full bclrl opcode must be specified when passing a hint.
1865                 pp.As = ppc64.ABCL
1866                 pp.From.Type = obj.TYPE_CONST
1867                 pp.From.Offset = ppc64.BO_ALWAYS
1868                 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1869                 pp.To.Reg = ppc64.REG_LR
1870                 pp.SetFrom3Const(1)
1871
1872                 if base.Ctxt.Flag_shared {
1873                         // When compiling Go into PIC, the function we just
1874                         // called via pointer might have been implemented in
1875                         // a separate module and so overwritten the TOC
1876                         // pointer in R2; reload it.
1877                         q := s.Prog(ppc64.AMOVD)
1878                         q.From.Type = obj.TYPE_MEM
1879                         q.From.Offset = 24
1880                         q.From.Reg = ppc64.REGSP
1881                         q.To.Type = obj.TYPE_REG
1882                         q.To.Reg = ppc64.REG_R2
1883                 }
1884
1885         case ssa.OpPPC64LoweredWB:
1886                 p := s.Prog(obj.ACALL)
1887                 p.To.Type = obj.TYPE_MEM
1888                 p.To.Name = obj.NAME_EXTERN
1889                 p.To.Sym = v.Aux.(*obj.LSym)
1890
1891         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1892                 p := s.Prog(obj.ACALL)
1893                 p.To.Type = obj.TYPE_MEM
1894                 p.To.Name = obj.NAME_EXTERN
1895                 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1896                 s.UseArgs(16) // space used in callee args area by assembly stubs
1897
1898         case ssa.OpPPC64LoweredNilCheck:
1899                 if buildcfg.GOOS == "aix" {
1900                         // CMP Rarg0, R0
1901                         // BNE 2(PC)
1902                         // STW R0, 0(R0)
1903                         // NOP (so the BNE has somewhere to land)
1904
1905                         // CMP Rarg0, R0
1906                         p := s.Prog(ppc64.ACMP)
1907                         p.From.Type = obj.TYPE_REG
1908                         p.From.Reg = v.Args[0].Reg()
1909                         p.To.Type = obj.TYPE_REG
1910                         p.To.Reg = ppc64.REG_R0
1911
1912                         // BNE 2(PC)
1913                         p2 := s.Prog(ppc64.ABNE)
1914                         p2.To.Type = obj.TYPE_BRANCH
1915
1916                         // STW R0, 0(R0)
1917                         // Write at 0 is forbidden and will trigger a SIGSEGV
1918                         p = s.Prog(ppc64.AMOVW)
1919                         p.From.Type = obj.TYPE_REG
1920                         p.From.Reg = ppc64.REG_R0
1921                         p.To.Type = obj.TYPE_MEM
1922                         p.To.Reg = ppc64.REG_R0
1923
1924                         // NOP (so the BNE has somewhere to land)
1925                         nop := s.Prog(obj.ANOP)
1926                         p2.To.SetTarget(nop)
1927
1928                 } else {
1929                         // Issue a load which will fault if arg is nil.
1930                         p := s.Prog(ppc64.AMOVBZ)
1931                         p.From.Type = obj.TYPE_MEM
1932                         p.From.Reg = v.Args[0].Reg()
1933                         ssagen.AddAux(&p.From, v)
1934                         p.To.Type = obj.TYPE_REG
1935                         p.To.Reg = ppc64.REGTMP
1936                 }
1937                 if logopt.Enabled() {
1938                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1939                 }
1940                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1941                         base.WarnfAt(v.Pos, "generated nil check")
1942                 }
1943
1944         // These should be resolved by rules and not make it here.
1945         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1946                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1947                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1948                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1949         case ssa.OpPPC64InvertFlags:
1950                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1951         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1952                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1953         case ssa.OpClobber, ssa.OpClobberReg:
1954                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1955         default:
1956                 v.Fatalf("genValue not implemented: %s", v.LongString())
1957         }
1958 }
1959
1960 var blockJump = [...]struct {
1961         asm, invasm     obj.As
1962         asmeq, invasmun bool
1963 }{
1964         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1965         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1966
1967         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1968         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1969         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1970         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1971
1972         // TODO: need to work FP comparisons into block jumps
1973         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1974         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1975         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1976         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1977 }
1978
1979 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1980         switch b.Kind {
1981         case ssa.BlockDefer:
1982                 // defer returns in R3:
1983                 // 0 if we should continue executing
1984                 // 1 if we should jump to deferreturn call
1985                 p := s.Prog(ppc64.ACMP)
1986                 p.From.Type = obj.TYPE_REG
1987                 p.From.Reg = ppc64.REG_R3
1988                 p.To.Type = obj.TYPE_REG
1989                 p.To.Reg = ppc64.REG_R0
1990
1991                 p = s.Prog(ppc64.ABNE)
1992                 p.To.Type = obj.TYPE_BRANCH
1993                 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1994                 if b.Succs[0].Block() != next {
1995                         p := s.Prog(obj.AJMP)
1996                         p.To.Type = obj.TYPE_BRANCH
1997                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1998                 }
1999
2000         case ssa.BlockPlain:
2001                 if b.Succs[0].Block() != next {
2002                         p := s.Prog(obj.AJMP)
2003                         p.To.Type = obj.TYPE_BRANCH
2004                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2005                 }
2006         case ssa.BlockExit, ssa.BlockRetJmp:
2007         case ssa.BlockRet:
2008                 s.Prog(obj.ARET)
2009
2010         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2011                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2012                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2013                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2014                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2015                 jmp := blockJump[b.Kind]
2016                 switch next {
2017                 case b.Succs[0].Block():
2018                         s.Br(jmp.invasm, b.Succs[1].Block())
2019                         if jmp.invasmun {
2020                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2021                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
2022                         }
2023                 case b.Succs[1].Block():
2024                         s.Br(jmp.asm, b.Succs[0].Block())
2025                         if jmp.asmeq {
2026                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2027                         }
2028                 default:
2029                         if b.Likely != ssa.BranchUnlikely {
2030                                 s.Br(jmp.asm, b.Succs[0].Block())
2031                                 if jmp.asmeq {
2032                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
2033                                 }
2034                                 s.Br(obj.AJMP, b.Succs[1].Block())
2035                         } else {
2036                                 s.Br(jmp.invasm, b.Succs[1].Block())
2037                                 if jmp.invasmun {
2038                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2039                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
2040                                 }
2041                                 s.Br(obj.AJMP, b.Succs[0].Block())
2042                         }
2043                 }
2044         default:
2045                 b.Fatalf("branch not implemented: %s", b.LongString())
2046         }
2047 }
2048
2049 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2050         p := s.Prog(loadByType(t))
2051         p.From.Type = obj.TYPE_MEM
2052         p.From.Name = obj.NAME_AUTO
2053         p.From.Sym = n.Linksym()
2054         p.From.Offset = n.FrameOffset() + off
2055         p.To.Type = obj.TYPE_REG
2056         p.To.Reg = reg
2057         return p
2058 }
2059
2060 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2061         p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2062         p.To.Name = obj.NAME_PARAM
2063         p.To.Sym = n.Linksym()
2064         p.Pos = p.Pos.WithNotStmt()
2065         return p
2066 }