]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
9ba66b35f39167c80cf51a02b5b5e82b3a1858f9
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/base"
9         "cmd/compile/internal/ir"
10         "cmd/compile/internal/logopt"
11         "cmd/compile/internal/objw"
12         "cmd/compile/internal/ssa"
13         "cmd/compile/internal/ssagen"
14         "cmd/compile/internal/types"
15         "cmd/internal/obj"
16         "cmd/internal/obj/ppc64"
17         "internal/buildcfg"
18         "math"
19         "strings"
20 )
21
22 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24         //      flive := b.FlagsLiveAtEnd
25         //      if b.Control != nil && b.Control.Type.IsFlags() {
26         //              flive = true
27         //      }
28         //      for i := len(b.Values) - 1; i >= 0; i-- {
29         //              v := b.Values[i]
30         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31         //                      // The "mark" is any non-nil Aux value.
32         //                      v.Aux = v
33         //              }
34         //              if v.Type.IsFlags() {
35         //                      flive = false
36         //              }
37         //              for _, a := range v.Args {
38         //                      if a.Type.IsFlags() {
39         //                              flive = true
40         //                      }
41         //              }
42         //      }
43 }
44
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
47         if t.IsFloat() {
48                 switch t.Size() {
49                 case 4:
50                         return ppc64.AFMOVS
51                 case 8:
52                         return ppc64.AFMOVD
53                 }
54         } else {
55                 switch t.Size() {
56                 case 1:
57                         if t.IsSigned() {
58                                 return ppc64.AMOVB
59                         } else {
60                                 return ppc64.AMOVBZ
61                         }
62                 case 2:
63                         if t.IsSigned() {
64                                 return ppc64.AMOVH
65                         } else {
66                                 return ppc64.AMOVHZ
67                         }
68                 case 4:
69                         if t.IsSigned() {
70                                 return ppc64.AMOVW
71                         } else {
72                                 return ppc64.AMOVWZ
73                         }
74                 case 8:
75                         return ppc64.AMOVD
76                 }
77         }
78         panic("bad load type")
79 }
80
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
83         if t.IsFloat() {
84                 switch t.Size() {
85                 case 4:
86                         return ppc64.AFMOVS
87                 case 8:
88                         return ppc64.AFMOVD
89                 }
90         } else {
91                 switch t.Size() {
92                 case 1:
93                         return ppc64.AMOVB
94                 case 2:
95                         return ppc64.AMOVH
96                 case 4:
97                         return ppc64.AMOVW
98                 case 8:
99                         return ppc64.AMOVD
100                 }
101         }
102         panic("bad store type")
103 }
104
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
106         switch v.Op {
107         case ssa.OpCopy:
108                 t := v.Type
109                 if t.IsMemory() {
110                         return
111                 }
112                 x := v.Args[0].Reg()
113                 y := v.Reg()
114                 if x != y {
115                         rt := obj.TYPE_REG
116                         op := ppc64.AMOVD
117
118                         if t.IsFloat() {
119                                 op = ppc64.AFMOVD
120                         }
121                         p := s.Prog(op)
122                         p.From.Type = rt
123                         p.From.Reg = x
124                         p.To.Type = rt
125                         p.To.Reg = y
126                 }
127
128         case ssa.OpPPC64LoweredAtomicAnd8,
129                 ssa.OpPPC64LoweredAtomicAnd32,
130                 ssa.OpPPC64LoweredAtomicOr8,
131                 ssa.OpPPC64LoweredAtomicOr32:
132                 // LWSYNC
133                 // LBAR/LWAR    (Rarg0), Rtmp
134                 // AND/OR       Rarg1, Rtmp
135                 // STBCCC/STWCCC Rtmp, (Rarg0)
136                 // BNE          -3(PC)
137                 ld := ppc64.ALBAR
138                 st := ppc64.ASTBCCC
139                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
140                         ld = ppc64.ALWAR
141                         st = ppc64.ASTWCCC
142                 }
143                 r0 := v.Args[0].Reg()
144                 r1 := v.Args[1].Reg()
145                 // LWSYNC - Assuming shared data not write-through-required nor
146                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
147                 plwsync := s.Prog(ppc64.ALWSYNC)
148                 plwsync.To.Type = obj.TYPE_NONE
149                 // LBAR or LWAR
150                 p := s.Prog(ld)
151                 p.From.Type = obj.TYPE_MEM
152                 p.From.Reg = r0
153                 p.To.Type = obj.TYPE_REG
154                 p.To.Reg = ppc64.REGTMP
155                 // AND/OR reg1,out
156                 p1 := s.Prog(v.Op.Asm())
157                 p1.From.Type = obj.TYPE_REG
158                 p1.From.Reg = r1
159                 p1.To.Type = obj.TYPE_REG
160                 p1.To.Reg = ppc64.REGTMP
161                 // STBCCC or STWCCC
162                 p2 := s.Prog(st)
163                 p2.From.Type = obj.TYPE_REG
164                 p2.From.Reg = ppc64.REGTMP
165                 p2.To.Type = obj.TYPE_MEM
166                 p2.To.Reg = r0
167                 p2.RegTo2 = ppc64.REGTMP
168                 // BNE retry
169                 p3 := s.Prog(ppc64.ABNE)
170                 p3.To.Type = obj.TYPE_BRANCH
171                 p3.To.SetTarget(p)
172
173         case ssa.OpPPC64LoweredAtomicAdd32,
174                 ssa.OpPPC64LoweredAtomicAdd64:
175                 // LWSYNC
176                 // LDAR/LWAR    (Rarg0), Rout
177                 // ADD          Rarg1, Rout
178                 // STDCCC/STWCCC Rout, (Rarg0)
179                 // BNE         -3(PC)
180                 // MOVW         Rout,Rout (if Add32)
181                 ld := ppc64.ALDAR
182                 st := ppc64.ASTDCCC
183                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
184                         ld = ppc64.ALWAR
185                         st = ppc64.ASTWCCC
186                 }
187                 r0 := v.Args[0].Reg()
188                 r1 := v.Args[1].Reg()
189                 out := v.Reg0()
190                 // LWSYNC - Assuming shared data not write-through-required nor
191                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
192                 plwsync := s.Prog(ppc64.ALWSYNC)
193                 plwsync.To.Type = obj.TYPE_NONE
194                 // LDAR or LWAR
195                 p := s.Prog(ld)
196                 p.From.Type = obj.TYPE_MEM
197                 p.From.Reg = r0
198                 p.To.Type = obj.TYPE_REG
199                 p.To.Reg = out
200                 // ADD reg1,out
201                 p1 := s.Prog(ppc64.AADD)
202                 p1.From.Type = obj.TYPE_REG
203                 p1.From.Reg = r1
204                 p1.To.Reg = out
205                 p1.To.Type = obj.TYPE_REG
206                 // STDCCC or STWCCC
207                 p3 := s.Prog(st)
208                 p3.From.Type = obj.TYPE_REG
209                 p3.From.Reg = out
210                 p3.To.Type = obj.TYPE_MEM
211                 p3.To.Reg = r0
212                 // BNE retry
213                 p4 := s.Prog(ppc64.ABNE)
214                 p4.To.Type = obj.TYPE_BRANCH
215                 p4.To.SetTarget(p)
216
217                 // Ensure a 32 bit result
218                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
219                         p5 := s.Prog(ppc64.AMOVWZ)
220                         p5.To.Type = obj.TYPE_REG
221                         p5.To.Reg = out
222                         p5.From.Type = obj.TYPE_REG
223                         p5.From.Reg = out
224                 }
225
226         case ssa.OpPPC64LoweredAtomicExchange32,
227                 ssa.OpPPC64LoweredAtomicExchange64:
228                 // LWSYNC
229                 // LDAR/LWAR    (Rarg0), Rout
230                 // STDCCC/STWCCC Rout, (Rarg0)
231                 // BNE         -2(PC)
232                 // ISYNC
233                 ld := ppc64.ALDAR
234                 st := ppc64.ASTDCCC
235                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
236                         ld = ppc64.ALWAR
237                         st = ppc64.ASTWCCC
238                 }
239                 r0 := v.Args[0].Reg()
240                 r1 := v.Args[1].Reg()
241                 out := v.Reg0()
242                 // LWSYNC - Assuming shared data not write-through-required nor
243                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
244                 plwsync := s.Prog(ppc64.ALWSYNC)
245                 plwsync.To.Type = obj.TYPE_NONE
246                 // LDAR or LWAR
247                 p := s.Prog(ld)
248                 p.From.Type = obj.TYPE_MEM
249                 p.From.Reg = r0
250                 p.To.Type = obj.TYPE_REG
251                 p.To.Reg = out
252                 // STDCCC or STWCCC
253                 p1 := s.Prog(st)
254                 p1.From.Type = obj.TYPE_REG
255                 p1.From.Reg = r1
256                 p1.To.Type = obj.TYPE_MEM
257                 p1.To.Reg = r0
258                 // BNE retry
259                 p2 := s.Prog(ppc64.ABNE)
260                 p2.To.Type = obj.TYPE_BRANCH
261                 p2.To.SetTarget(p)
262                 // ISYNC
263                 pisync := s.Prog(ppc64.AISYNC)
264                 pisync.To.Type = obj.TYPE_NONE
265
266         case ssa.OpPPC64LoweredAtomicLoad8,
267                 ssa.OpPPC64LoweredAtomicLoad32,
268                 ssa.OpPPC64LoweredAtomicLoad64,
269                 ssa.OpPPC64LoweredAtomicLoadPtr:
270                 // SYNC
271                 // MOVB/MOVD/MOVW (Rarg0), Rout
272                 // CMP Rout,Rout
273                 // BNE 1(PC)
274                 // ISYNC
275                 ld := ppc64.AMOVD
276                 cmp := ppc64.ACMP
277                 switch v.Op {
278                 case ssa.OpPPC64LoweredAtomicLoad8:
279                         ld = ppc64.AMOVBZ
280                 case ssa.OpPPC64LoweredAtomicLoad32:
281                         ld = ppc64.AMOVWZ
282                         cmp = ppc64.ACMPW
283                 }
284                 arg0 := v.Args[0].Reg()
285                 out := v.Reg0()
286                 // SYNC when AuxInt == 1; otherwise, load-acquire
287                 if v.AuxInt == 1 {
288                         psync := s.Prog(ppc64.ASYNC)
289                         psync.To.Type = obj.TYPE_NONE
290                 }
291                 // Load
292                 p := s.Prog(ld)
293                 p.From.Type = obj.TYPE_MEM
294                 p.From.Reg = arg0
295                 p.To.Type = obj.TYPE_REG
296                 p.To.Reg = out
297                 // CMP
298                 p1 := s.Prog(cmp)
299                 p1.From.Type = obj.TYPE_REG
300                 p1.From.Reg = out
301                 p1.To.Type = obj.TYPE_REG
302                 p1.To.Reg = out
303                 // BNE
304                 p2 := s.Prog(ppc64.ABNE)
305                 p2.To.Type = obj.TYPE_BRANCH
306                 // ISYNC
307                 pisync := s.Prog(ppc64.AISYNC)
308                 pisync.To.Type = obj.TYPE_NONE
309                 p2.To.SetTarget(pisync)
310
311         case ssa.OpPPC64LoweredAtomicStore8,
312                 ssa.OpPPC64LoweredAtomicStore32,
313                 ssa.OpPPC64LoweredAtomicStore64:
314                 // SYNC or LWSYNC
315                 // MOVB/MOVW/MOVD arg1,(arg0)
316                 st := ppc64.AMOVD
317                 switch v.Op {
318                 case ssa.OpPPC64LoweredAtomicStore8:
319                         st = ppc64.AMOVB
320                 case ssa.OpPPC64LoweredAtomicStore32:
321                         st = ppc64.AMOVW
322                 }
323                 arg0 := v.Args[0].Reg()
324                 arg1 := v.Args[1].Reg()
325                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
326                 // SYNC
327                 syncOp := ppc64.ASYNC
328                 if v.AuxInt == 0 {
329                         syncOp = ppc64.ALWSYNC
330                 }
331                 psync := s.Prog(syncOp)
332                 psync.To.Type = obj.TYPE_NONE
333                 // Store
334                 p := s.Prog(st)
335                 p.To.Type = obj.TYPE_MEM
336                 p.To.Reg = arg0
337                 p.From.Type = obj.TYPE_REG
338                 p.From.Reg = arg1
339
340         case ssa.OpPPC64LoweredAtomicCas64,
341                 ssa.OpPPC64LoweredAtomicCas32:
342                 // MOVD        $0, Rout
343                 // LWSYNC
344                 // loop:
345                 // LDAR        (Rarg0), MutexHint, Rtmp
346                 // CMP         Rarg1, Rtmp
347                 // BNE         end
348                 // STDCCC      Rarg2, (Rarg0)
349                 // BNE         loop
350                 // MOVD        $1, Rout
351                 // end:
352                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
353                 ld := ppc64.ALDAR
354                 st := ppc64.ASTDCCC
355                 cmp := ppc64.ACMP
356                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
357                         ld = ppc64.ALWAR
358                         st = ppc64.ASTWCCC
359                         cmp = ppc64.ACMPW
360                 }
361                 r0 := v.Args[0].Reg()
362                 r1 := v.Args[1].Reg()
363                 r2 := v.Args[2].Reg()
364                 out := v.Reg0()
365                 // Initialize return value to false
366                 p := s.Prog(ppc64.AMOVD)
367                 p.From.Type = obj.TYPE_CONST
368                 p.From.Offset = 0
369                 p.To.Type = obj.TYPE_REG
370                 p.To.Reg = out
371                 // LWSYNC - Assuming shared data not write-through-required nor
372                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
373                 plwsync1 := s.Prog(ppc64.ALWSYNC)
374                 plwsync1.To.Type = obj.TYPE_NONE
375                 // LDAR or LWAR
376                 p0 := s.Prog(ld)
377                 p0.From.Type = obj.TYPE_MEM
378                 p0.From.Reg = r0
379                 p0.To.Type = obj.TYPE_REG
380                 p0.To.Reg = ppc64.REGTMP
381                 // If it is a Compare-and-Swap-Release operation, set the EH field with
382                 // the release hint.
383                 if v.AuxInt == 0 {
384                         p0.AddRestSourceConst(0)
385                 }
386                 // CMP reg1,reg2
387                 p1 := s.Prog(cmp)
388                 p1.From.Type = obj.TYPE_REG
389                 p1.From.Reg = r1
390                 p1.To.Reg = ppc64.REGTMP
391                 p1.To.Type = obj.TYPE_REG
392                 // BNE done with return value = false
393                 p2 := s.Prog(ppc64.ABNE)
394                 p2.To.Type = obj.TYPE_BRANCH
395                 // STDCCC or STWCCC
396                 p3 := s.Prog(st)
397                 p3.From.Type = obj.TYPE_REG
398                 p3.From.Reg = r2
399                 p3.To.Type = obj.TYPE_MEM
400                 p3.To.Reg = r0
401                 // BNE retry
402                 p4 := s.Prog(ppc64.ABNE)
403                 p4.To.Type = obj.TYPE_BRANCH
404                 p4.To.SetTarget(p0)
405                 // return value true
406                 p5 := s.Prog(ppc64.AMOVD)
407                 p5.From.Type = obj.TYPE_CONST
408                 p5.From.Offset = 1
409                 p5.To.Type = obj.TYPE_REG
410                 p5.To.Reg = out
411                 // LWSYNC - Assuming shared data not write-through-required nor
412                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
413                 // If the operation is a CAS-Release, then synchronization is not necessary.
414                 if v.AuxInt != 0 {
415                         plwsync2 := s.Prog(ppc64.ALWSYNC)
416                         plwsync2.To.Type = obj.TYPE_NONE
417                         p2.To.SetTarget(plwsync2)
418                 } else {
419                         // done (label)
420                         p6 := s.Prog(obj.ANOP)
421                         p2.To.SetTarget(p6)
422                 }
423
424         case ssa.OpPPC64LoweredPubBarrier:
425                 // LWSYNC
426                 s.Prog(v.Op.Asm())
427
428         case ssa.OpPPC64LoweredGetClosurePtr:
429                 // Closure pointer is R11 (already)
430                 ssagen.CheckLoweredGetClosurePtr(v)
431
432         case ssa.OpPPC64LoweredGetCallerSP:
433                 // caller's SP is FixedFrameSize below the address of the first arg
434                 p := s.Prog(ppc64.AMOVD)
435                 p.From.Type = obj.TYPE_ADDR
436                 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
437                 p.From.Name = obj.NAME_PARAM
438                 p.To.Type = obj.TYPE_REG
439                 p.To.Reg = v.Reg()
440
441         case ssa.OpPPC64LoweredGetCallerPC:
442                 p := s.Prog(obj.AGETCALLERPC)
443                 p.To.Type = obj.TYPE_REG
444                 p.To.Reg = v.Reg()
445
446         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
447                 // input is already rounded
448
449         case ssa.OpLoadReg:
450                 loadOp := loadByType(v.Type)
451                 p := s.Prog(loadOp)
452                 ssagen.AddrAuto(&p.From, v.Args[0])
453                 p.To.Type = obj.TYPE_REG
454                 p.To.Reg = v.Reg()
455
456         case ssa.OpStoreReg:
457                 storeOp := storeByType(v.Type)
458                 p := s.Prog(storeOp)
459                 p.From.Type = obj.TYPE_REG
460                 p.From.Reg = v.Args[0].Reg()
461                 ssagen.AddrAuto(&p.To, v)
462
463         case ssa.OpArgIntReg, ssa.OpArgFloatReg:
464                 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
465                 // The loop only runs once.
466                 for _, a := range v.Block.Func.RegArgs {
467                         // Pass the spill/unspill information along to the assembler, offset by size of
468                         // the saved LR slot.
469                         addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
470                         s.FuncInfo().AddSpill(
471                                 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
472                 }
473                 v.Block.Func.RegArgs = nil
474
475                 ssagen.CheckArgReg(v)
476
477         case ssa.OpPPC64DIVD:
478                 // For now,
479                 //
480                 // cmp arg1, -1
481                 // be  ahead
482                 // v = arg0 / arg1
483                 // b over
484                 // ahead: v = - arg0
485                 // over: nop
486                 r := v.Reg()
487                 r0 := v.Args[0].Reg()
488                 r1 := v.Args[1].Reg()
489
490                 p := s.Prog(ppc64.ACMP)
491                 p.From.Type = obj.TYPE_REG
492                 p.From.Reg = r1
493                 p.To.Type = obj.TYPE_CONST
494                 p.To.Offset = -1
495
496                 pbahead := s.Prog(ppc64.ABEQ)
497                 pbahead.To.Type = obj.TYPE_BRANCH
498
499                 p = s.Prog(v.Op.Asm())
500                 p.From.Type = obj.TYPE_REG
501                 p.From.Reg = r1
502                 p.Reg = r0
503                 p.To.Type = obj.TYPE_REG
504                 p.To.Reg = r
505
506                 pbover := s.Prog(obj.AJMP)
507                 pbover.To.Type = obj.TYPE_BRANCH
508
509                 p = s.Prog(ppc64.ANEG)
510                 p.To.Type = obj.TYPE_REG
511                 p.To.Reg = r
512                 p.From.Type = obj.TYPE_REG
513                 p.From.Reg = r0
514                 pbahead.To.SetTarget(p)
515
516                 p = s.Prog(obj.ANOP)
517                 pbover.To.SetTarget(p)
518
519         case ssa.OpPPC64DIVW:
520                 // word-width version of above
521                 r := v.Reg()
522                 r0 := v.Args[0].Reg()
523                 r1 := v.Args[1].Reg()
524
525                 p := s.Prog(ppc64.ACMPW)
526                 p.From.Type = obj.TYPE_REG
527                 p.From.Reg = r1
528                 p.To.Type = obj.TYPE_CONST
529                 p.To.Offset = -1
530
531                 pbahead := s.Prog(ppc64.ABEQ)
532                 pbahead.To.Type = obj.TYPE_BRANCH
533
534                 p = s.Prog(v.Op.Asm())
535                 p.From.Type = obj.TYPE_REG
536                 p.From.Reg = r1
537                 p.Reg = r0
538                 p.To.Type = obj.TYPE_REG
539                 p.To.Reg = r
540
541                 pbover := s.Prog(obj.AJMP)
542                 pbover.To.Type = obj.TYPE_BRANCH
543
544                 p = s.Prog(ppc64.ANEG)
545                 p.To.Type = obj.TYPE_REG
546                 p.To.Reg = r
547                 p.From.Type = obj.TYPE_REG
548                 p.From.Reg = r0
549                 pbahead.To.SetTarget(p)
550
551                 p = s.Prog(obj.ANOP)
552                 pbover.To.SetTarget(p)
553
554         case ssa.OpPPC64CLRLSLWI:
555                 r := v.Reg()
556                 r1 := v.Args[0].Reg()
557                 shifts := v.AuxInt
558                 p := s.Prog(v.Op.Asm())
559                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
560                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
561                 p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
562                 p.Reg = r1
563                 p.To.Type = obj.TYPE_REG
564                 p.To.Reg = r
565
566         case ssa.OpPPC64CLRLSLDI:
567                 r := v.Reg()
568                 r1 := v.Args[0].Reg()
569                 shifts := v.AuxInt
570                 p := s.Prog(v.Op.Asm())
571                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
572                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
573                 p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
574                 p.Reg = r1
575                 p.To.Type = obj.TYPE_REG
576                 p.To.Reg = r
577
578         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
579                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
580                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
581                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
582                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
583                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
584                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
585                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
586                 r := v.Reg()
587                 r1 := v.Args[0].Reg()
588                 r2 := v.Args[1].Reg()
589                 p := s.Prog(v.Op.Asm())
590                 p.From.Type = obj.TYPE_REG
591                 p.From.Reg = r2
592                 p.Reg = r1
593                 p.To.Type = obj.TYPE_REG
594                 p.To.Reg = r
595
596         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
597                 r1 := v.Args[0].Reg()
598                 r2 := v.Args[1].Reg()
599                 p := s.Prog(v.Op.Asm())
600                 p.From.Type = obj.TYPE_REG
601                 p.From.Reg = r2
602                 p.Reg = r1
603                 p.To.Type = obj.TYPE_REG
604                 p.To.Reg = v.Reg0()
605
606         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
607                 p := s.Prog(v.Op.Asm())
608                 p.From.Type = obj.TYPE_CONST
609                 p.From.Offset = v.AuxInt
610                 p.Reg = v.Args[0].Reg()
611                 p.To.Type = obj.TYPE_REG
612                 p.To.Reg = v.Reg()
613
614                 // Auxint holds encoded rotate + mask
615         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
616                 sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
617                 p := s.Prog(v.Op.Asm())
618                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
619                 p.Reg = v.Args[0].Reg()
620                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(sh)}
621                 p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
622                 // Auxint holds mask
623
624         case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICR:
625                 sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
626                 p := s.Prog(v.Op.Asm())
627                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: sh}
628                 switch v.Op {
629                 case ssa.OpPPC64RLDICL:
630                         p.AddRestSourceConst(mb)
631                 case ssa.OpPPC64RLDICR:
632                         p.AddRestSourceConst(me)
633                 }
634                 p.Reg = v.Args[0].Reg()
635                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
636
637         case ssa.OpPPC64RLWNM:
638                 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
639                 p := s.Prog(v.Op.Asm())
640                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
641                 p.Reg = v.Args[0].Reg()
642                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
643                 p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
644
645         case ssa.OpPPC64MADDLD:
646                 r := v.Reg()
647                 r1 := v.Args[0].Reg()
648                 r2 := v.Args[1].Reg()
649                 r3 := v.Args[2].Reg()
650                 // r = r1*r2 Â± r3
651                 p := s.Prog(v.Op.Asm())
652                 p.From.Type = obj.TYPE_REG
653                 p.From.Reg = r1
654                 p.Reg = r2
655                 p.AddRestSourceReg(r3)
656                 p.To.Type = obj.TYPE_REG
657                 p.To.Reg = r
658
659         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
660                 r := v.Reg()
661                 r1 := v.Args[0].Reg()
662                 r2 := v.Args[1].Reg()
663                 r3 := v.Args[2].Reg()
664                 // r = r1*r2 Â± r3
665                 p := s.Prog(v.Op.Asm())
666                 p.From.Type = obj.TYPE_REG
667                 p.From.Reg = r1
668                 p.Reg = r3
669                 p.AddRestSourceReg(r2)
670                 p.To.Type = obj.TYPE_REG
671                 p.To.Reg = r
672
673         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
674                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
675                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
676                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD:
677                 r := v.Reg()
678                 p := s.Prog(v.Op.Asm())
679                 p.To.Type = obj.TYPE_REG
680                 p.To.Reg = r
681                 p.From.Type = obj.TYPE_REG
682                 p.From.Reg = v.Args[0].Reg()
683
684         case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
685                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
686                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
687                 p := s.Prog(v.Op.Asm())
688                 p.Reg = v.Args[0].Reg()
689                 p.From.Type = obj.TYPE_CONST
690                 p.From.Offset = v.AuxInt
691                 p.To.Type = obj.TYPE_REG
692                 p.To.Reg = v.Reg()
693
694         case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
695                 r := v.Reg0() // CA is the first, implied argument.
696                 r1 := v.Args[0].Reg()
697                 r2 := v.Args[1].Reg()
698                 p := s.Prog(v.Op.Asm())
699                 p.From.Type = obj.TYPE_REG
700                 p.From.Reg = r2
701                 p.Reg = r1
702                 p.To.Type = obj.TYPE_REG
703                 p.To.Reg = r
704
705         case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
706                 p := s.Prog(v.Op.Asm())
707                 p.From.Type = obj.TYPE_REG
708                 p.From.Reg = ppc64.REG_R0
709                 p.To.Type = obj.TYPE_REG
710                 p.To.Reg = v.Reg()
711
712         case ssa.OpPPC64ADDCconst:
713                 p := s.Prog(v.Op.Asm())
714                 p.Reg = v.Args[0].Reg()
715                 p.From.Type = obj.TYPE_CONST
716                 p.From.Offset = v.AuxInt
717                 p.To.Type = obj.TYPE_REG
718                 // Output is a pair, the second is the CA, which is implied.
719                 p.To.Reg = v.Reg0()
720
721         case ssa.OpPPC64SUBCconst:
722                 p := s.Prog(v.Op.Asm())
723                 p.AddRestSourceConst(v.AuxInt)
724                 p.From.Type = obj.TYPE_REG
725                 p.From.Reg = v.Args[0].Reg()
726                 p.To.Type = obj.TYPE_REG
727                 p.To.Reg = v.Reg0()
728
729         case ssa.OpPPC64SUBFCconst:
730                 p := s.Prog(v.Op.Asm())
731                 p.AddRestSourceConst(v.AuxInt)
732                 p.From.Type = obj.TYPE_REG
733                 p.From.Reg = v.Args[0].Reg()
734                 p.To.Type = obj.TYPE_REG
735                 p.To.Reg = v.Reg()
736
737         case ssa.OpPPC64ANDCCconst:
738                 p := s.Prog(v.Op.Asm())
739                 p.Reg = v.Args[0].Reg()
740                 p.From.Type = obj.TYPE_CONST
741                 p.From.Offset = v.AuxInt
742                 p.To.Type = obj.TYPE_REG
743                 //              p.To.Reg = ppc64.REGTMP // discard result
744                 p.To.Reg = v.Reg0()
745
746         case ssa.OpPPC64MOVDaddr:
747                 switch v.Aux.(type) {
748                 default:
749                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
750                 case nil:
751                         // If aux offset and aux int are both 0, and the same
752                         // input and output regs are used, no instruction
753                         // needs to be generated, since it would just be
754                         // addi rx, rx, 0.
755                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
756                                 p := s.Prog(ppc64.AMOVD)
757                                 p.From.Type = obj.TYPE_ADDR
758                                 p.From.Reg = v.Args[0].Reg()
759                                 p.From.Offset = v.AuxInt
760                                 p.To.Type = obj.TYPE_REG
761                                 p.To.Reg = v.Reg()
762                         }
763
764                 case *obj.LSym, ir.Node:
765                         p := s.Prog(ppc64.AMOVD)
766                         p.From.Type = obj.TYPE_ADDR
767                         p.From.Reg = v.Args[0].Reg()
768                         p.To.Type = obj.TYPE_REG
769                         p.To.Reg = v.Reg()
770                         ssagen.AddAux(&p.From, v)
771
772                 }
773
774         case ssa.OpPPC64MOVDconst:
775                 p := s.Prog(v.Op.Asm())
776                 p.From.Type = obj.TYPE_CONST
777                 p.From.Offset = v.AuxInt
778                 p.To.Type = obj.TYPE_REG
779                 p.To.Reg = v.Reg()
780
781         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
782                 p := s.Prog(v.Op.Asm())
783                 p.From.Type = obj.TYPE_FCONST
784                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
785                 p.To.Type = obj.TYPE_REG
786                 p.To.Reg = v.Reg()
787
788         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
789                 p := s.Prog(v.Op.Asm())
790                 p.From.Type = obj.TYPE_REG
791                 p.From.Reg = v.Args[0].Reg()
792                 p.To.Type = obj.TYPE_REG
793                 p.To.Reg = v.Args[1].Reg()
794
795         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
796                 p := s.Prog(v.Op.Asm())
797                 p.From.Type = obj.TYPE_REG
798                 p.From.Reg = v.Args[0].Reg()
799                 p.To.Type = obj.TYPE_CONST
800                 p.To.Offset = v.AuxInt
801
802         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
803                 // Shift in register to required size
804                 p := s.Prog(v.Op.Asm())
805                 p.From.Type = obj.TYPE_REG
806                 p.From.Reg = v.Args[0].Reg()
807                 p.To.Reg = v.Reg()
808                 p.To.Type = obj.TYPE_REG
809
810         case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
811
812                 // MOVDload and MOVWload are DS form instructions that are restricted to
813                 // offsets that are a multiple of 4. If the offset is not a multiple of 4,
814                 // then the address of the symbol to be loaded is computed (base + offset)
815                 // and used as the new base register and the offset field in the instruction
816                 // can be set to zero.
817
818                 // This same problem can happen with gostrings since the final offset is not
819                 // known yet, but could be unaligned after the relocation is resolved.
820                 // So gostrings are handled the same way.
821
822                 // This allows the MOVDload and MOVWload to be generated in more cases and
823                 // eliminates some offset and alignment checking in the rules file.
824
825                 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
826                 ssagen.AddAux(&fromAddr, v)
827
828                 genAddr := false
829
830                 switch fromAddr.Name {
831                 case obj.NAME_EXTERN, obj.NAME_STATIC:
832                         // Special case for a rule combines the bytes of gostring.
833                         // The v alignment might seem OK, but we don't want to load it
834                         // using an offset because relocation comes later.
835                         genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
836                 default:
837                         genAddr = fromAddr.Offset%4 != 0
838                 }
839                 if genAddr {
840                         // Load full address into the temp register.
841                         p := s.Prog(ppc64.AMOVD)
842                         p.From.Type = obj.TYPE_ADDR
843                         p.From.Reg = v.Args[0].Reg()
844                         ssagen.AddAux(&p.From, v)
845                         // Load target using temp as base register
846                         // and offset zero. Setting NAME_NONE
847                         // prevents any extra offsets from being
848                         // added.
849                         p.To.Type = obj.TYPE_REG
850                         p.To.Reg = ppc64.REGTMP
851                         fromAddr.Reg = ppc64.REGTMP
852                         // Clear the offset field and other
853                         // information that might be used
854                         // by the assembler to add to the
855                         // final offset value.
856                         fromAddr.Offset = 0
857                         fromAddr.Name = obj.NAME_NONE
858                         fromAddr.Sym = nil
859                 }
860                 p := s.Prog(v.Op.Asm())
861                 p.From = fromAddr
862                 p.To.Type = obj.TYPE_REG
863                 p.To.Reg = v.Reg()
864
865         case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
866                 p := s.Prog(v.Op.Asm())
867                 p.From.Type = obj.TYPE_MEM
868                 p.From.Reg = v.Args[0].Reg()
869                 ssagen.AddAux(&p.From, v)
870                 p.To.Type = obj.TYPE_REG
871                 p.To.Reg = v.Reg()
872
873         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
874                 p := s.Prog(v.Op.Asm())
875                 p.From.Type = obj.TYPE_MEM
876                 p.From.Reg = v.Args[0].Reg()
877                 p.To.Type = obj.TYPE_REG
878                 p.To.Reg = v.Reg()
879
880         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
881                 p := s.Prog(v.Op.Asm())
882                 p.To.Type = obj.TYPE_MEM
883                 p.To.Reg = v.Args[0].Reg()
884                 p.From.Type = obj.TYPE_REG
885                 p.From.Reg = v.Args[1].Reg()
886
887         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
888                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
889                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
890                 p := s.Prog(v.Op.Asm())
891                 p.From.Type = obj.TYPE_MEM
892                 p.From.Reg = v.Args[0].Reg()
893                 p.From.Index = v.Args[1].Reg()
894                 p.To.Type = obj.TYPE_REG
895                 p.To.Reg = v.Reg()
896
897         case ssa.OpPPC64DCBT:
898                 p := s.Prog(v.Op.Asm())
899                 p.From.Type = obj.TYPE_MEM
900                 p.From.Reg = v.Args[0].Reg()
901                 p.To.Type = obj.TYPE_CONST
902                 p.To.Offset = v.AuxInt
903
904         case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
905                 p := s.Prog(v.Op.Asm())
906                 p.From.Type = obj.TYPE_REG
907                 p.From.Reg = ppc64.REGZERO
908                 p.To.Type = obj.TYPE_MEM
909                 p.To.Reg = v.Args[0].Reg()
910                 ssagen.AddAux(&p.To, v)
911
912         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
913
914                 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
915                 // to offset values that are a multiple of 4. If the offset field is not a
916                 // multiple of 4, then the full address of the store target is computed (base +
917                 // offset) and used as the new base register and the offset in the instruction
918                 // is set to 0.
919
920                 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
921                 // and prevents checking of the offset value and alignment in the rules.
922
923                 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
924                 ssagen.AddAux(&toAddr, v)
925
926                 if toAddr.Offset%4 != 0 {
927                         p := s.Prog(ppc64.AMOVD)
928                         p.From.Type = obj.TYPE_ADDR
929                         p.From.Reg = v.Args[0].Reg()
930                         ssagen.AddAux(&p.From, v)
931                         p.To.Type = obj.TYPE_REG
932                         p.To.Reg = ppc64.REGTMP
933                         toAddr.Reg = ppc64.REGTMP
934                         // Clear the offset field and other
935                         // information that might be used
936                         // by the assembler to add to the
937                         // final offset value.
938                         toAddr.Offset = 0
939                         toAddr.Name = obj.NAME_NONE
940                         toAddr.Sym = nil
941                 }
942                 p := s.Prog(v.Op.Asm())
943                 p.To = toAddr
944                 p.From.Type = obj.TYPE_REG
945                 if v.Op == ssa.OpPPC64MOVDstorezero {
946                         p.From.Reg = ppc64.REGZERO
947                 } else {
948                         p.From.Reg = v.Args[1].Reg()
949                 }
950
951         case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
952                 p := s.Prog(v.Op.Asm())
953                 p.From.Type = obj.TYPE_REG
954                 p.From.Reg = v.Args[1].Reg()
955                 p.To.Type = obj.TYPE_MEM
956                 p.To.Reg = v.Args[0].Reg()
957                 ssagen.AddAux(&p.To, v)
958
959         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
960                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
961                 ssa.OpPPC64MOVHBRstoreidx:
962                 p := s.Prog(v.Op.Asm())
963                 p.From.Type = obj.TYPE_REG
964                 p.From.Reg = v.Args[2].Reg()
965                 p.To.Index = v.Args[1].Reg()
966                 p.To.Type = obj.TYPE_MEM
967                 p.To.Reg = v.Args[0].Reg()
968
969         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELZ:
970                 // ISEL  AuxInt ? arg0 : arg1
971                 // ISELZ is a special case of ISEL where arg1 is implicitly $0.
972                 //
973                 // AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO.
974                 // ISEL accepts a CR bit argument, not a condition as expressed by AuxInt.
975                 // Convert the condition to a CR bit argument by the following conversion:
976                 //
977                 // AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO
978                 // AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO
979                 p := s.Prog(v.Op.Asm())
980                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
981                 p.Reg = v.Args[0].Reg()
982                 if v.Op == ssa.OpPPC64ISEL {
983                         p.AddRestSourceReg(v.Args[1].Reg())
984                 } else {
985                         p.AddRestSourceReg(ppc64.REG_R0)
986                 }
987                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
988                 if v.AuxInt > 3 {
989                         p.Reg, p.GetFrom3().Reg = p.GetFrom3().Reg, p.Reg
990                 }
991                 p.From.SetConst(v.AuxInt & 3)
992
993         case ssa.OpPPC64SETBC, ssa.OpPPC64SETBCR:
994                 p := s.Prog(v.Op.Asm())
995                 p.To.Type = obj.TYPE_REG
996                 p.To.Reg = v.Reg()
997                 p.From.Type = obj.TYPE_REG
998                 p.From.Reg = int16(ppc64.REG_CR0LT + v.AuxInt)
999
1000         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
1001                 // The LoweredQuad code generation
1002                 // generates STXV instructions on
1003                 // power9. The Short variation is used
1004                 // if no loop is generated.
1005
1006                 // sizes >= 64 generate a loop as follows:
1007
1008                 // Set up loop counter in CTR, used by BC
1009                 // XXLXOR clears VS32
1010                 //       XXLXOR VS32,VS32,VS32
1011                 //       MOVD len/64,REG_TMP
1012                 //       MOVD REG_TMP,CTR
1013                 //       loop:
1014                 //       STXV VS32,0(R20)
1015                 //       STXV VS32,16(R20)
1016                 //       STXV VS32,32(R20)
1017                 //       STXV VS32,48(R20)
1018                 //       ADD  $64,R20
1019                 //       BC   16, 0, loop
1020
1021                 // Bytes per iteration
1022                 ctr := v.AuxInt / 64
1023
1024                 // Remainder bytes
1025                 rem := v.AuxInt % 64
1026
1027                 // Only generate a loop if there is more
1028                 // than 1 iteration.
1029                 if ctr > 1 {
1030                         // Set up VS32 (V0) to hold 0s
1031                         p := s.Prog(ppc64.AXXLXOR)
1032                         p.From.Type = obj.TYPE_REG
1033                         p.From.Reg = ppc64.REG_VS32
1034                         p.To.Type = obj.TYPE_REG
1035                         p.To.Reg = ppc64.REG_VS32
1036                         p.Reg = ppc64.REG_VS32
1037
1038                         // Set up CTR loop counter
1039                         p = s.Prog(ppc64.AMOVD)
1040                         p.From.Type = obj.TYPE_CONST
1041                         p.From.Offset = ctr
1042                         p.To.Type = obj.TYPE_REG
1043                         p.To.Reg = ppc64.REGTMP
1044
1045                         p = s.Prog(ppc64.AMOVD)
1046                         p.From.Type = obj.TYPE_REG
1047                         p.From.Reg = ppc64.REGTMP
1048                         p.To.Type = obj.TYPE_REG
1049                         p.To.Reg = ppc64.REG_CTR
1050
1051                         // Don't generate padding for
1052                         // loops with few iterations.
1053                         if ctr > 3 {
1054                                 p = s.Prog(obj.APCALIGN)
1055                                 p.From.Type = obj.TYPE_CONST
1056                                 p.From.Offset = 16
1057                         }
1058
1059                         // generate 4 STXVs to zero 64 bytes
1060                         var top *obj.Prog
1061
1062                         p = s.Prog(ppc64.ASTXV)
1063                         p.From.Type = obj.TYPE_REG
1064                         p.From.Reg = ppc64.REG_VS32
1065                         p.To.Type = obj.TYPE_MEM
1066                         p.To.Reg = v.Args[0].Reg()
1067
1068                         //  Save the top of loop
1069                         if top == nil {
1070                                 top = p
1071                         }
1072                         p = s.Prog(ppc64.ASTXV)
1073                         p.From.Type = obj.TYPE_REG
1074                         p.From.Reg = ppc64.REG_VS32
1075                         p.To.Type = obj.TYPE_MEM
1076                         p.To.Reg = v.Args[0].Reg()
1077                         p.To.Offset = 16
1078
1079                         p = s.Prog(ppc64.ASTXV)
1080                         p.From.Type = obj.TYPE_REG
1081                         p.From.Reg = ppc64.REG_VS32
1082                         p.To.Type = obj.TYPE_MEM
1083                         p.To.Reg = v.Args[0].Reg()
1084                         p.To.Offset = 32
1085
1086                         p = s.Prog(ppc64.ASTXV)
1087                         p.From.Type = obj.TYPE_REG
1088                         p.From.Reg = ppc64.REG_VS32
1089                         p.To.Type = obj.TYPE_MEM
1090                         p.To.Reg = v.Args[0].Reg()
1091                         p.To.Offset = 48
1092
1093                         // Increment address for the
1094                         // 64 bytes just zeroed.
1095                         p = s.Prog(ppc64.AADD)
1096                         p.Reg = v.Args[0].Reg()
1097                         p.From.Type = obj.TYPE_CONST
1098                         p.From.Offset = 64
1099                         p.To.Type = obj.TYPE_REG
1100                         p.To.Reg = v.Args[0].Reg()
1101
1102                         // Branch back to top of loop
1103                         // based on CTR
1104                         // BC with BO_BCTR generates bdnz
1105                         p = s.Prog(ppc64.ABC)
1106                         p.From.Type = obj.TYPE_CONST
1107                         p.From.Offset = ppc64.BO_BCTR
1108                         p.Reg = ppc64.REG_CR0LT
1109                         p.To.Type = obj.TYPE_BRANCH
1110                         p.To.SetTarget(top)
1111                 }
1112                 // When ctr == 1 the loop was not generated but
1113                 // there are at least 64 bytes to clear, so add
1114                 // that to the remainder to generate the code
1115                 // to clear those doublewords
1116                 if ctr == 1 {
1117                         rem += 64
1118                 }
1119
1120                 // Clear the remainder starting at offset zero
1121                 offset := int64(0)
1122
1123                 if rem >= 16 && ctr <= 1 {
1124                         // If the XXLXOR hasn't already been
1125                         // generated, do it here to initialize
1126                         // VS32 (V0) to 0.
1127                         p := s.Prog(ppc64.AXXLXOR)
1128                         p.From.Type = obj.TYPE_REG
1129                         p.From.Reg = ppc64.REG_VS32
1130                         p.To.Type = obj.TYPE_REG
1131                         p.To.Reg = ppc64.REG_VS32
1132                         p.Reg = ppc64.REG_VS32
1133                 }
1134                 // Generate STXV for 32 or 64
1135                 // bytes.
1136                 for rem >= 32 {
1137                         p := s.Prog(ppc64.ASTXV)
1138                         p.From.Type = obj.TYPE_REG
1139                         p.From.Reg = ppc64.REG_VS32
1140                         p.To.Type = obj.TYPE_MEM
1141                         p.To.Reg = v.Args[0].Reg()
1142                         p.To.Offset = offset
1143
1144                         p = s.Prog(ppc64.ASTXV)
1145                         p.From.Type = obj.TYPE_REG
1146                         p.From.Reg = ppc64.REG_VS32
1147                         p.To.Type = obj.TYPE_MEM
1148                         p.To.Reg = v.Args[0].Reg()
1149                         p.To.Offset = offset + 16
1150                         offset += 32
1151                         rem -= 32
1152                 }
1153                 // Generate 16 bytes
1154                 if rem >= 16 {
1155                         p := s.Prog(ppc64.ASTXV)
1156                         p.From.Type = obj.TYPE_REG
1157                         p.From.Reg = ppc64.REG_VS32
1158                         p.To.Type = obj.TYPE_MEM
1159                         p.To.Reg = v.Args[0].Reg()
1160                         p.To.Offset = offset
1161                         offset += 16
1162                         rem -= 16
1163                 }
1164
1165                 // first clear as many doublewords as possible
1166                 // then clear remaining sizes as available
1167                 for rem > 0 {
1168                         op, size := ppc64.AMOVB, int64(1)
1169                         switch {
1170                         case rem >= 8:
1171                                 op, size = ppc64.AMOVD, 8
1172                         case rem >= 4:
1173                                 op, size = ppc64.AMOVW, 4
1174                         case rem >= 2:
1175                                 op, size = ppc64.AMOVH, 2
1176                         }
1177                         p := s.Prog(op)
1178                         p.From.Type = obj.TYPE_REG
1179                         p.From.Reg = ppc64.REG_R0
1180                         p.To.Type = obj.TYPE_MEM
1181                         p.To.Reg = v.Args[0].Reg()
1182                         p.To.Offset = offset
1183                         rem -= size
1184                         offset += size
1185                 }
1186
1187         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1188
1189                 // Unaligned data doesn't hurt performance
1190                 // for these instructions on power8.
1191
1192                 // For sizes >= 64 generate a loop as follows:
1193
1194                 // Set up loop counter in CTR, used by BC
1195                 //       XXLXOR VS32,VS32,VS32
1196                 //       MOVD len/32,REG_TMP
1197                 //       MOVD REG_TMP,CTR
1198                 //       MOVD $16,REG_TMP
1199                 //       loop:
1200                 //       STXVD2X VS32,(R0)(R20)
1201                 //       STXVD2X VS32,(R31)(R20)
1202                 //       ADD  $32,R20
1203                 //       BC   16, 0, loop
1204                 //
1205                 // any remainder is done as described below
1206
1207                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1208                 // then handle the remainder
1209                 //      MOVD R0,(R20)
1210                 //      MOVD R0,8(R20)
1211                 // .... etc.
1212                 //
1213                 // the remainder bytes are cleared using one or more
1214                 // of the following instructions with the appropriate
1215                 // offsets depending which instructions are needed
1216                 //
1217                 //      MOVW R0,n1(R20) 4 bytes
1218                 //      MOVH R0,n2(R20) 2 bytes
1219                 //      MOVB R0,n3(R20) 1 byte
1220                 //
1221                 // 7 bytes: MOVW, MOVH, MOVB
1222                 // 6 bytes: MOVW, MOVH
1223                 // 5 bytes: MOVW, MOVB
1224                 // 3 bytes: MOVH, MOVB
1225
1226                 // each loop iteration does 32 bytes
1227                 ctr := v.AuxInt / 32
1228
1229                 // remainder bytes
1230                 rem := v.AuxInt % 32
1231
1232                 // only generate a loop if there is more
1233                 // than 1 iteration.
1234                 if ctr > 1 {
1235                         // Set up VS32 (V0) to hold 0s
1236                         p := s.Prog(ppc64.AXXLXOR)
1237                         p.From.Type = obj.TYPE_REG
1238                         p.From.Reg = ppc64.REG_VS32
1239                         p.To.Type = obj.TYPE_REG
1240                         p.To.Reg = ppc64.REG_VS32
1241                         p.Reg = ppc64.REG_VS32
1242
1243                         // Set up CTR loop counter
1244                         p = s.Prog(ppc64.AMOVD)
1245                         p.From.Type = obj.TYPE_CONST
1246                         p.From.Offset = ctr
1247                         p.To.Type = obj.TYPE_REG
1248                         p.To.Reg = ppc64.REGTMP
1249
1250                         p = s.Prog(ppc64.AMOVD)
1251                         p.From.Type = obj.TYPE_REG
1252                         p.From.Reg = ppc64.REGTMP
1253                         p.To.Type = obj.TYPE_REG
1254                         p.To.Reg = ppc64.REG_CTR
1255
1256                         // Set up R31 to hold index value 16
1257                         p = s.Prog(ppc64.AMOVD)
1258                         p.From.Type = obj.TYPE_CONST
1259                         p.From.Offset = 16
1260                         p.To.Type = obj.TYPE_REG
1261                         p.To.Reg = ppc64.REGTMP
1262
1263                         // Don't add padding for alignment
1264                         // with few loop iterations.
1265                         if ctr > 3 {
1266                                 p = s.Prog(obj.APCALIGN)
1267                                 p.From.Type = obj.TYPE_CONST
1268                                 p.From.Offset = 16
1269                         }
1270
1271                         // generate 2 STXVD2Xs to store 16 bytes
1272                         // when this is a loop then the top must be saved
1273                         var top *obj.Prog
1274                         // This is the top of loop
1275
1276                         p = s.Prog(ppc64.ASTXVD2X)
1277                         p.From.Type = obj.TYPE_REG
1278                         p.From.Reg = ppc64.REG_VS32
1279                         p.To.Type = obj.TYPE_MEM
1280                         p.To.Reg = v.Args[0].Reg()
1281                         p.To.Index = ppc64.REGZERO
1282                         // Save the top of loop
1283                         if top == nil {
1284                                 top = p
1285                         }
1286                         p = s.Prog(ppc64.ASTXVD2X)
1287                         p.From.Type = obj.TYPE_REG
1288                         p.From.Reg = ppc64.REG_VS32
1289                         p.To.Type = obj.TYPE_MEM
1290                         p.To.Reg = v.Args[0].Reg()
1291                         p.To.Index = ppc64.REGTMP
1292
1293                         // Increment address for the
1294                         // 4 doublewords just zeroed.
1295                         p = s.Prog(ppc64.AADD)
1296                         p.Reg = v.Args[0].Reg()
1297                         p.From.Type = obj.TYPE_CONST
1298                         p.From.Offset = 32
1299                         p.To.Type = obj.TYPE_REG
1300                         p.To.Reg = v.Args[0].Reg()
1301
1302                         // Branch back to top of loop
1303                         // based on CTR
1304                         // BC with BO_BCTR generates bdnz
1305                         p = s.Prog(ppc64.ABC)
1306                         p.From.Type = obj.TYPE_CONST
1307                         p.From.Offset = ppc64.BO_BCTR
1308                         p.Reg = ppc64.REG_CR0LT
1309                         p.To.Type = obj.TYPE_BRANCH
1310                         p.To.SetTarget(top)
1311                 }
1312
1313                 // when ctr == 1 the loop was not generated but
1314                 // there are at least 32 bytes to clear, so add
1315                 // that to the remainder to generate the code
1316                 // to clear those doublewords
1317                 if ctr == 1 {
1318                         rem += 32
1319                 }
1320
1321                 // clear the remainder starting at offset zero
1322                 offset := int64(0)
1323
1324                 // first clear as many doublewords as possible
1325                 // then clear remaining sizes as available
1326                 for rem > 0 {
1327                         op, size := ppc64.AMOVB, int64(1)
1328                         switch {
1329                         case rem >= 8:
1330                                 op, size = ppc64.AMOVD, 8
1331                         case rem >= 4:
1332                                 op, size = ppc64.AMOVW, 4
1333                         case rem >= 2:
1334                                 op, size = ppc64.AMOVH, 2
1335                         }
1336                         p := s.Prog(op)
1337                         p.From.Type = obj.TYPE_REG
1338                         p.From.Reg = ppc64.REG_R0
1339                         p.To.Type = obj.TYPE_MEM
1340                         p.To.Reg = v.Args[0].Reg()
1341                         p.To.Offset = offset
1342                         rem -= size
1343                         offset += size
1344                 }
1345
1346         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1347
1348                 bytesPerLoop := int64(32)
1349                 // This will be used when moving more
1350                 // than 8 bytes.  Moves start with
1351                 // as many 8 byte moves as possible, then
1352                 // 4, 2, or 1 byte(s) as remaining.  This will
1353                 // work and be efficient for power8 or later.
1354                 // If there are 64 or more bytes, then a
1355                 // loop is generated to move 32 bytes and
1356                 // update the src and dst addresses on each
1357                 // iteration. When < 64 bytes, the appropriate
1358                 // number of moves are generated based on the
1359                 // size.
1360                 // When moving >= 64 bytes a loop is used
1361                 //      MOVD len/32,REG_TMP
1362                 //      MOVD REG_TMP,CTR
1363                 //      MOVD $16,REG_TMP
1364                 // top:
1365                 //      LXVD2X (R0)(R21),VS32
1366                 //      LXVD2X (R31)(R21),VS33
1367                 //      ADD $32,R21
1368                 //      STXVD2X VS32,(R0)(R20)
1369                 //      STXVD2X VS33,(R31)(R20)
1370                 //      ADD $32,R20
1371                 //      BC 16,0,top
1372                 // Bytes not moved by this loop are moved
1373                 // with a combination of the following instructions,
1374                 // starting with the largest sizes and generating as
1375                 // many as needed, using the appropriate offset value.
1376                 //      MOVD  n(R21),R31
1377                 //      MOVD  R31,n(R20)
1378                 //      MOVW  n1(R21),R31
1379                 //      MOVW  R31,n1(R20)
1380                 //      MOVH  n2(R21),R31
1381                 //      MOVH  R31,n2(R20)
1382                 //      MOVB  n3(R21),R31
1383                 //      MOVB  R31,n3(R20)
1384
1385                 // Each loop iteration moves 32 bytes
1386                 ctr := v.AuxInt / bytesPerLoop
1387
1388                 // Remainder after the loop
1389                 rem := v.AuxInt % bytesPerLoop
1390
1391                 dstReg := v.Args[0].Reg()
1392                 srcReg := v.Args[1].Reg()
1393
1394                 // The set of registers used here, must match the clobbered reg list
1395                 // in PPC64Ops.go.
1396                 offset := int64(0)
1397
1398                 // top of the loop
1399                 var top *obj.Prog
1400                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1401                 if ctr > 1 {
1402                         // Set up the CTR
1403                         p := s.Prog(ppc64.AMOVD)
1404                         p.From.Type = obj.TYPE_CONST
1405                         p.From.Offset = ctr
1406                         p.To.Type = obj.TYPE_REG
1407                         p.To.Reg = ppc64.REGTMP
1408
1409                         p = s.Prog(ppc64.AMOVD)
1410                         p.From.Type = obj.TYPE_REG
1411                         p.From.Reg = ppc64.REGTMP
1412                         p.To.Type = obj.TYPE_REG
1413                         p.To.Reg = ppc64.REG_CTR
1414
1415                         // Use REGTMP as index reg
1416                         p = s.Prog(ppc64.AMOVD)
1417                         p.From.Type = obj.TYPE_CONST
1418                         p.From.Offset = 16
1419                         p.To.Type = obj.TYPE_REG
1420                         p.To.Reg = ppc64.REGTMP
1421
1422                         // Don't adding padding for
1423                         // alignment with small iteration
1424                         // counts.
1425                         if ctr > 3 {
1426                                 p = s.Prog(obj.APCALIGN)
1427                                 p.From.Type = obj.TYPE_CONST
1428                                 p.From.Offset = 16
1429                         }
1430
1431                         // Generate 16 byte loads and stores.
1432                         // Use temp register for index (16)
1433                         // on the second one.
1434
1435                         p = s.Prog(ppc64.ALXVD2X)
1436                         p.From.Type = obj.TYPE_MEM
1437                         p.From.Reg = srcReg
1438                         p.From.Index = ppc64.REGZERO
1439                         p.To.Type = obj.TYPE_REG
1440                         p.To.Reg = ppc64.REG_VS32
1441                         if top == nil {
1442                                 top = p
1443                         }
1444                         p = s.Prog(ppc64.ALXVD2X)
1445                         p.From.Type = obj.TYPE_MEM
1446                         p.From.Reg = srcReg
1447                         p.From.Index = ppc64.REGTMP
1448                         p.To.Type = obj.TYPE_REG
1449                         p.To.Reg = ppc64.REG_VS33
1450
1451                         // increment the src reg for next iteration
1452                         p = s.Prog(ppc64.AADD)
1453                         p.Reg = srcReg
1454                         p.From.Type = obj.TYPE_CONST
1455                         p.From.Offset = bytesPerLoop
1456                         p.To.Type = obj.TYPE_REG
1457                         p.To.Reg = srcReg
1458
1459                         // generate 16 byte stores
1460                         p = s.Prog(ppc64.ASTXVD2X)
1461                         p.From.Type = obj.TYPE_REG
1462                         p.From.Reg = ppc64.REG_VS32
1463                         p.To.Type = obj.TYPE_MEM
1464                         p.To.Reg = dstReg
1465                         p.To.Index = ppc64.REGZERO
1466
1467                         p = s.Prog(ppc64.ASTXVD2X)
1468                         p.From.Type = obj.TYPE_REG
1469                         p.From.Reg = ppc64.REG_VS33
1470                         p.To.Type = obj.TYPE_MEM
1471                         p.To.Reg = dstReg
1472                         p.To.Index = ppc64.REGTMP
1473
1474                         // increment the dst reg for next iteration
1475                         p = s.Prog(ppc64.AADD)
1476                         p.Reg = dstReg
1477                         p.From.Type = obj.TYPE_CONST
1478                         p.From.Offset = bytesPerLoop
1479                         p.To.Type = obj.TYPE_REG
1480                         p.To.Reg = dstReg
1481
1482                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1483                         // to loop top.
1484                         p = s.Prog(ppc64.ABC)
1485                         p.From.Type = obj.TYPE_CONST
1486                         p.From.Offset = ppc64.BO_BCTR
1487                         p.Reg = ppc64.REG_CR0LT
1488                         p.To.Type = obj.TYPE_BRANCH
1489                         p.To.SetTarget(top)
1490
1491                         // srcReg and dstReg were incremented in the loop, so
1492                         // later instructions start with offset 0.
1493                         offset = int64(0)
1494                 }
1495
1496                 // No loop was generated for one iteration, so
1497                 // add 32 bytes to the remainder to move those bytes.
1498                 if ctr == 1 {
1499                         rem += bytesPerLoop
1500                 }
1501
1502                 if rem >= 16 {
1503                         // Generate 16 byte loads and stores.
1504                         // Use temp register for index (value 16)
1505                         // on the second one.
1506                         p := s.Prog(ppc64.ALXVD2X)
1507                         p.From.Type = obj.TYPE_MEM
1508                         p.From.Reg = srcReg
1509                         p.From.Index = ppc64.REGZERO
1510                         p.To.Type = obj.TYPE_REG
1511                         p.To.Reg = ppc64.REG_VS32
1512
1513                         p = s.Prog(ppc64.ASTXVD2X)
1514                         p.From.Type = obj.TYPE_REG
1515                         p.From.Reg = ppc64.REG_VS32
1516                         p.To.Type = obj.TYPE_MEM
1517                         p.To.Reg = dstReg
1518                         p.To.Index = ppc64.REGZERO
1519
1520                         offset = 16
1521                         rem -= 16
1522
1523                         if rem >= 16 {
1524                                 // Use REGTMP as index reg
1525                                 p := s.Prog(ppc64.AMOVD)
1526                                 p.From.Type = obj.TYPE_CONST
1527                                 p.From.Offset = 16
1528                                 p.To.Type = obj.TYPE_REG
1529                                 p.To.Reg = ppc64.REGTMP
1530
1531                                 p = s.Prog(ppc64.ALXVD2X)
1532                                 p.From.Type = obj.TYPE_MEM
1533                                 p.From.Reg = srcReg
1534                                 p.From.Index = ppc64.REGTMP
1535                                 p.To.Type = obj.TYPE_REG
1536                                 p.To.Reg = ppc64.REG_VS32
1537
1538                                 p = s.Prog(ppc64.ASTXVD2X)
1539                                 p.From.Type = obj.TYPE_REG
1540                                 p.From.Reg = ppc64.REG_VS32
1541                                 p.To.Type = obj.TYPE_MEM
1542                                 p.To.Reg = dstReg
1543                                 p.To.Index = ppc64.REGTMP
1544
1545                                 offset = 32
1546                                 rem -= 16
1547                         }
1548                 }
1549
1550                 // Generate all the remaining load and store pairs, starting with
1551                 // as many 8 byte moves as possible, then 4, 2, 1.
1552                 for rem > 0 {
1553                         op, size := ppc64.AMOVB, int64(1)
1554                         switch {
1555                         case rem >= 8:
1556                                 op, size = ppc64.AMOVD, 8
1557                         case rem >= 4:
1558                                 op, size = ppc64.AMOVWZ, 4
1559                         case rem >= 2:
1560                                 op, size = ppc64.AMOVH, 2
1561                         }
1562                         // Load
1563                         p := s.Prog(op)
1564                         p.To.Type = obj.TYPE_REG
1565                         p.To.Reg = ppc64.REGTMP
1566                         p.From.Type = obj.TYPE_MEM
1567                         p.From.Reg = srcReg
1568                         p.From.Offset = offset
1569
1570                         // Store
1571                         p = s.Prog(op)
1572                         p.From.Type = obj.TYPE_REG
1573                         p.From.Reg = ppc64.REGTMP
1574                         p.To.Type = obj.TYPE_MEM
1575                         p.To.Reg = dstReg
1576                         p.To.Offset = offset
1577                         rem -= size
1578                         offset += size
1579                 }
1580
1581         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1582                 bytesPerLoop := int64(64)
1583                 // This is used when moving more
1584                 // than 8 bytes on power9.  Moves start with
1585                 // as many 8 byte moves as possible, then
1586                 // 4, 2, or 1 byte(s) as remaining.  This will
1587                 // work and be efficient for power8 or later.
1588                 // If there are 64 or more bytes, then a
1589                 // loop is generated to move 32 bytes and
1590                 // update the src and dst addresses on each
1591                 // iteration. When < 64 bytes, the appropriate
1592                 // number of moves are generated based on the
1593                 // size.
1594                 // When moving >= 64 bytes a loop is used
1595                 //      MOVD len/32,REG_TMP
1596                 //      MOVD REG_TMP,CTR
1597                 // top:
1598                 //      LXV 0(R21),VS32
1599                 //      LXV 16(R21),VS33
1600                 //      ADD $32,R21
1601                 //      STXV VS32,0(R20)
1602                 //      STXV VS33,16(R20)
1603                 //      ADD $32,R20
1604                 //      BC 16,0,top
1605                 // Bytes not moved by this loop are moved
1606                 // with a combination of the following instructions,
1607                 // starting with the largest sizes and generating as
1608                 // many as needed, using the appropriate offset value.
1609                 //      MOVD  n(R21),R31
1610                 //      MOVD  R31,n(R20)
1611                 //      MOVW  n1(R21),R31
1612                 //      MOVW  R31,n1(R20)
1613                 //      MOVH  n2(R21),R31
1614                 //      MOVH  R31,n2(R20)
1615                 //      MOVB  n3(R21),R31
1616                 //      MOVB  R31,n3(R20)
1617
1618                 // Each loop iteration moves 32 bytes
1619                 ctr := v.AuxInt / bytesPerLoop
1620
1621                 // Remainder after the loop
1622                 rem := v.AuxInt % bytesPerLoop
1623
1624                 dstReg := v.Args[0].Reg()
1625                 srcReg := v.Args[1].Reg()
1626
1627                 offset := int64(0)
1628
1629                 // top of the loop
1630                 var top *obj.Prog
1631
1632                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1633                 if ctr > 1 {
1634                         // Set up the CTR
1635                         p := s.Prog(ppc64.AMOVD)
1636                         p.From.Type = obj.TYPE_CONST
1637                         p.From.Offset = ctr
1638                         p.To.Type = obj.TYPE_REG
1639                         p.To.Reg = ppc64.REGTMP
1640
1641                         p = s.Prog(ppc64.AMOVD)
1642                         p.From.Type = obj.TYPE_REG
1643                         p.From.Reg = ppc64.REGTMP
1644                         p.To.Type = obj.TYPE_REG
1645                         p.To.Reg = ppc64.REG_CTR
1646
1647                         p = s.Prog(obj.APCALIGN)
1648                         p.From.Type = obj.TYPE_CONST
1649                         p.From.Offset = 16
1650
1651                         // Generate 16 byte loads and stores.
1652                         p = s.Prog(ppc64.ALXV)
1653                         p.From.Type = obj.TYPE_MEM
1654                         p.From.Reg = srcReg
1655                         p.From.Offset = offset
1656                         p.To.Type = obj.TYPE_REG
1657                         p.To.Reg = ppc64.REG_VS32
1658                         if top == nil {
1659                                 top = p
1660                         }
1661                         p = s.Prog(ppc64.ALXV)
1662                         p.From.Type = obj.TYPE_MEM
1663                         p.From.Reg = srcReg
1664                         p.From.Offset = offset + 16
1665                         p.To.Type = obj.TYPE_REG
1666                         p.To.Reg = ppc64.REG_VS33
1667
1668                         // generate 16 byte stores
1669                         p = s.Prog(ppc64.ASTXV)
1670                         p.From.Type = obj.TYPE_REG
1671                         p.From.Reg = ppc64.REG_VS32
1672                         p.To.Type = obj.TYPE_MEM
1673                         p.To.Reg = dstReg
1674                         p.To.Offset = offset
1675
1676                         p = s.Prog(ppc64.ASTXV)
1677                         p.From.Type = obj.TYPE_REG
1678                         p.From.Reg = ppc64.REG_VS33
1679                         p.To.Type = obj.TYPE_MEM
1680                         p.To.Reg = dstReg
1681                         p.To.Offset = offset + 16
1682
1683                         // Generate 16 byte loads and stores.
1684                         p = s.Prog(ppc64.ALXV)
1685                         p.From.Type = obj.TYPE_MEM
1686                         p.From.Reg = srcReg
1687                         p.From.Offset = offset + 32
1688                         p.To.Type = obj.TYPE_REG
1689                         p.To.Reg = ppc64.REG_VS32
1690
1691                         p = s.Prog(ppc64.ALXV)
1692                         p.From.Type = obj.TYPE_MEM
1693                         p.From.Reg = srcReg
1694                         p.From.Offset = offset + 48
1695                         p.To.Type = obj.TYPE_REG
1696                         p.To.Reg = ppc64.REG_VS33
1697
1698                         // generate 16 byte stores
1699                         p = s.Prog(ppc64.ASTXV)
1700                         p.From.Type = obj.TYPE_REG
1701                         p.From.Reg = ppc64.REG_VS32
1702                         p.To.Type = obj.TYPE_MEM
1703                         p.To.Reg = dstReg
1704                         p.To.Offset = offset + 32
1705
1706                         p = s.Prog(ppc64.ASTXV)
1707                         p.From.Type = obj.TYPE_REG
1708                         p.From.Reg = ppc64.REG_VS33
1709                         p.To.Type = obj.TYPE_MEM
1710                         p.To.Reg = dstReg
1711                         p.To.Offset = offset + 48
1712
1713                         // increment the src reg for next iteration
1714                         p = s.Prog(ppc64.AADD)
1715                         p.Reg = srcReg
1716                         p.From.Type = obj.TYPE_CONST
1717                         p.From.Offset = bytesPerLoop
1718                         p.To.Type = obj.TYPE_REG
1719                         p.To.Reg = srcReg
1720
1721                         // increment the dst reg for next iteration
1722                         p = s.Prog(ppc64.AADD)
1723                         p.Reg = dstReg
1724                         p.From.Type = obj.TYPE_CONST
1725                         p.From.Offset = bytesPerLoop
1726                         p.To.Type = obj.TYPE_REG
1727                         p.To.Reg = dstReg
1728
1729                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1730                         // to loop top.
1731                         p = s.Prog(ppc64.ABC)
1732                         p.From.Type = obj.TYPE_CONST
1733                         p.From.Offset = ppc64.BO_BCTR
1734                         p.Reg = ppc64.REG_CR0LT
1735                         p.To.Type = obj.TYPE_BRANCH
1736                         p.To.SetTarget(top)
1737
1738                         // srcReg and dstReg were incremented in the loop, so
1739                         // later instructions start with offset 0.
1740                         offset = int64(0)
1741                 }
1742
1743                 // No loop was generated for one iteration, so
1744                 // add 32 bytes to the remainder to move those bytes.
1745                 if ctr == 1 {
1746                         rem += bytesPerLoop
1747                 }
1748                 if rem >= 32 {
1749                         p := s.Prog(ppc64.ALXV)
1750                         p.From.Type = obj.TYPE_MEM
1751                         p.From.Reg = srcReg
1752                         p.To.Type = obj.TYPE_REG
1753                         p.To.Reg = ppc64.REG_VS32
1754
1755                         p = s.Prog(ppc64.ALXV)
1756                         p.From.Type = obj.TYPE_MEM
1757                         p.From.Reg = srcReg
1758                         p.From.Offset = 16
1759                         p.To.Type = obj.TYPE_REG
1760                         p.To.Reg = ppc64.REG_VS33
1761
1762                         p = s.Prog(ppc64.ASTXV)
1763                         p.From.Type = obj.TYPE_REG
1764                         p.From.Reg = ppc64.REG_VS32
1765                         p.To.Type = obj.TYPE_MEM
1766                         p.To.Reg = dstReg
1767
1768                         p = s.Prog(ppc64.ASTXV)
1769                         p.From.Type = obj.TYPE_REG
1770                         p.From.Reg = ppc64.REG_VS33
1771                         p.To.Type = obj.TYPE_MEM
1772                         p.To.Reg = dstReg
1773                         p.To.Offset = 16
1774
1775                         offset = 32
1776                         rem -= 32
1777                 }
1778
1779                 if rem >= 16 {
1780                         // Generate 16 byte loads and stores.
1781                         p := s.Prog(ppc64.ALXV)
1782                         p.From.Type = obj.TYPE_MEM
1783                         p.From.Reg = srcReg
1784                         p.From.Offset = offset
1785                         p.To.Type = obj.TYPE_REG
1786                         p.To.Reg = ppc64.REG_VS32
1787
1788                         p = s.Prog(ppc64.ASTXV)
1789                         p.From.Type = obj.TYPE_REG
1790                         p.From.Reg = ppc64.REG_VS32
1791                         p.To.Type = obj.TYPE_MEM
1792                         p.To.Reg = dstReg
1793                         p.To.Offset = offset
1794
1795                         offset += 16
1796                         rem -= 16
1797
1798                         if rem >= 16 {
1799                                 p := s.Prog(ppc64.ALXV)
1800                                 p.From.Type = obj.TYPE_MEM
1801                                 p.From.Reg = srcReg
1802                                 p.From.Offset = offset
1803                                 p.To.Type = obj.TYPE_REG
1804                                 p.To.Reg = ppc64.REG_VS32
1805
1806                                 p = s.Prog(ppc64.ASTXV)
1807                                 p.From.Type = obj.TYPE_REG
1808                                 p.From.Reg = ppc64.REG_VS32
1809                                 p.To.Type = obj.TYPE_MEM
1810                                 p.To.Reg = dstReg
1811                                 p.To.Offset = offset
1812
1813                                 offset += 16
1814                                 rem -= 16
1815                         }
1816                 }
1817                 // Generate all the remaining load and store pairs, starting with
1818                 // as many 8 byte moves as possible, then 4, 2, 1.
1819                 for rem > 0 {
1820                         op, size := ppc64.AMOVB, int64(1)
1821                         switch {
1822                         case rem >= 8:
1823                                 op, size = ppc64.AMOVD, 8
1824                         case rem >= 4:
1825                                 op, size = ppc64.AMOVWZ, 4
1826                         case rem >= 2:
1827                                 op, size = ppc64.AMOVH, 2
1828                         }
1829                         // Load
1830                         p := s.Prog(op)
1831                         p.To.Type = obj.TYPE_REG
1832                         p.To.Reg = ppc64.REGTMP
1833                         p.From.Type = obj.TYPE_MEM
1834                         p.From.Reg = srcReg
1835                         p.From.Offset = offset
1836
1837                         // Store
1838                         p = s.Prog(op)
1839                         p.From.Type = obj.TYPE_REG
1840                         p.From.Reg = ppc64.REGTMP
1841                         p.To.Type = obj.TYPE_MEM
1842                         p.To.Reg = dstReg
1843                         p.To.Offset = offset
1844                         rem -= size
1845                         offset += size
1846                 }
1847
1848         case ssa.OpPPC64CALLstatic:
1849                 s.Call(v)
1850
1851         case ssa.OpPPC64CALLtail:
1852                 s.TailCall(v)
1853
1854         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1855                 p := s.Prog(ppc64.AMOVD)
1856                 p.From.Type = obj.TYPE_REG
1857                 p.From.Reg = v.Args[0].Reg()
1858                 p.To.Type = obj.TYPE_REG
1859                 p.To.Reg = ppc64.REG_LR
1860
1861                 if v.Args[0].Reg() != ppc64.REG_R12 {
1862                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1863                 }
1864
1865                 pp := s.Call(v)
1866
1867                 // Convert the call into a blrl with hint this is not a subroutine return.
1868                 // The full bclrl opcode must be specified when passing a hint.
1869                 pp.As = ppc64.ABCL
1870                 pp.From.Type = obj.TYPE_CONST
1871                 pp.From.Offset = ppc64.BO_ALWAYS
1872                 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1873                 pp.To.Reg = ppc64.REG_LR
1874                 pp.AddRestSourceConst(1)
1875
1876                 if ppc64.NeedTOCpointer(base.Ctxt) {
1877                         // When compiling Go into PIC, the function we just
1878                         // called via pointer might have been implemented in
1879                         // a separate module and so overwritten the TOC
1880                         // pointer in R2; reload it.
1881                         q := s.Prog(ppc64.AMOVD)
1882                         q.From.Type = obj.TYPE_MEM
1883                         q.From.Offset = 24
1884                         q.From.Reg = ppc64.REGSP
1885                         q.To.Type = obj.TYPE_REG
1886                         q.To.Reg = ppc64.REG_R2
1887                 }
1888
1889         case ssa.OpPPC64LoweredWB:
1890                 p := s.Prog(obj.ACALL)
1891                 p.To.Type = obj.TYPE_MEM
1892                 p.To.Name = obj.NAME_EXTERN
1893                 // AuxInt encodes how many buffer entries we need.
1894                 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
1895
1896         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1897                 p := s.Prog(obj.ACALL)
1898                 p.To.Type = obj.TYPE_MEM
1899                 p.To.Name = obj.NAME_EXTERN
1900                 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1901                 s.UseArgs(16) // space used in callee args area by assembly stubs
1902
1903         case ssa.OpPPC64LoweredNilCheck:
1904                 if buildcfg.GOOS == "aix" {
1905                         // CMP Rarg0, R0
1906                         // BNE 2(PC)
1907                         // STW R0, 0(R0)
1908                         // NOP (so the BNE has somewhere to land)
1909
1910                         // CMP Rarg0, R0
1911                         p := s.Prog(ppc64.ACMP)
1912                         p.From.Type = obj.TYPE_REG
1913                         p.From.Reg = v.Args[0].Reg()
1914                         p.To.Type = obj.TYPE_REG
1915                         p.To.Reg = ppc64.REG_R0
1916
1917                         // BNE 2(PC)
1918                         p2 := s.Prog(ppc64.ABNE)
1919                         p2.To.Type = obj.TYPE_BRANCH
1920
1921                         // STW R0, 0(R0)
1922                         // Write at 0 is forbidden and will trigger a SIGSEGV
1923                         p = s.Prog(ppc64.AMOVW)
1924                         p.From.Type = obj.TYPE_REG
1925                         p.From.Reg = ppc64.REG_R0
1926                         p.To.Type = obj.TYPE_MEM
1927                         p.To.Reg = ppc64.REG_R0
1928
1929                         // NOP (so the BNE has somewhere to land)
1930                         nop := s.Prog(obj.ANOP)
1931                         p2.To.SetTarget(nop)
1932
1933                 } else {
1934                         // Issue a load which will fault if arg is nil.
1935                         p := s.Prog(ppc64.AMOVBZ)
1936                         p.From.Type = obj.TYPE_MEM
1937                         p.From.Reg = v.Args[0].Reg()
1938                         ssagen.AddAux(&p.From, v)
1939                         p.To.Type = obj.TYPE_REG
1940                         p.To.Reg = ppc64.REGTMP
1941                 }
1942                 if logopt.Enabled() {
1943                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1944                 }
1945                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1946                         base.WarnfAt(v.Pos, "generated nil check")
1947                 }
1948
1949         // These should be resolved by rules and not make it here.
1950         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1951                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1952                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1953                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1954         case ssa.OpPPC64InvertFlags:
1955                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1956         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1957                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1958         case ssa.OpClobber, ssa.OpClobberReg:
1959                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1960         default:
1961                 v.Fatalf("genValue not implemented: %s", v.LongString())
1962         }
1963 }
1964
1965 var blockJump = [...]struct {
1966         asm, invasm     obj.As
1967         asmeq, invasmun bool
1968 }{
1969         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1970         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1971
1972         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1973         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1974         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1975         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1976
1977         // TODO: need to work FP comparisons into block jumps
1978         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1979         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1980         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1981         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1982 }
1983
1984 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1985         switch b.Kind {
1986         case ssa.BlockDefer:
1987                 // defer returns in R3:
1988                 // 0 if we should continue executing
1989                 // 1 if we should jump to deferreturn call
1990                 p := s.Prog(ppc64.ACMP)
1991                 p.From.Type = obj.TYPE_REG
1992                 p.From.Reg = ppc64.REG_R3
1993                 p.To.Type = obj.TYPE_REG
1994                 p.To.Reg = ppc64.REG_R0
1995
1996                 p = s.Prog(ppc64.ABNE)
1997                 p.To.Type = obj.TYPE_BRANCH
1998                 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1999                 if b.Succs[0].Block() != next {
2000                         p := s.Prog(obj.AJMP)
2001                         p.To.Type = obj.TYPE_BRANCH
2002                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2003                 }
2004
2005         case ssa.BlockPlain:
2006                 if b.Succs[0].Block() != next {
2007                         p := s.Prog(obj.AJMP)
2008                         p.To.Type = obj.TYPE_BRANCH
2009                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2010                 }
2011         case ssa.BlockExit, ssa.BlockRetJmp:
2012         case ssa.BlockRet:
2013                 s.Prog(obj.ARET)
2014
2015         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2016                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2017                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2018                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2019                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2020                 jmp := blockJump[b.Kind]
2021                 switch next {
2022                 case b.Succs[0].Block():
2023                         s.Br(jmp.invasm, b.Succs[1].Block())
2024                         if jmp.invasmun {
2025                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2026                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
2027                         }
2028                 case b.Succs[1].Block():
2029                         s.Br(jmp.asm, b.Succs[0].Block())
2030                         if jmp.asmeq {
2031                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2032                         }
2033                 default:
2034                         if b.Likely != ssa.BranchUnlikely {
2035                                 s.Br(jmp.asm, b.Succs[0].Block())
2036                                 if jmp.asmeq {
2037                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
2038                                 }
2039                                 s.Br(obj.AJMP, b.Succs[1].Block())
2040                         } else {
2041                                 s.Br(jmp.invasm, b.Succs[1].Block())
2042                                 if jmp.invasmun {
2043                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2044                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
2045                                 }
2046                                 s.Br(obj.AJMP, b.Succs[0].Block())
2047                         }
2048                 }
2049         default:
2050                 b.Fatalf("branch not implemented: %s", b.LongString())
2051         }
2052 }
2053
2054 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2055         p := s.Prog(loadByType(t))
2056         p.From.Type = obj.TYPE_MEM
2057         p.From.Name = obj.NAME_AUTO
2058         p.From.Sym = n.Linksym()
2059         p.From.Offset = n.FrameOffset() + off
2060         p.To.Type = obj.TYPE_REG
2061         p.To.Reg = reg
2062         return p
2063 }
2064
2065 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2066         p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2067         p.To.Name = obj.NAME_PARAM
2068         p.To.Sym = n.Linksym()
2069         p.Pos = p.Pos.WithNotStmt()
2070         return p
2071 }