]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
cmd/compile/internal/ssa: on PPC64, merge (CMPconst [0] (op ...)) more aggressively
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/base"
9         "cmd/compile/internal/ir"
10         "cmd/compile/internal/logopt"
11         "cmd/compile/internal/objw"
12         "cmd/compile/internal/ssa"
13         "cmd/compile/internal/ssagen"
14         "cmd/compile/internal/types"
15         "cmd/internal/obj"
16         "cmd/internal/obj/ppc64"
17         "internal/buildcfg"
18         "math"
19         "strings"
20 )
21
22 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24         //      flive := b.FlagsLiveAtEnd
25         //      if b.Control != nil && b.Control.Type.IsFlags() {
26         //              flive = true
27         //      }
28         //      for i := len(b.Values) - 1; i >= 0; i-- {
29         //              v := b.Values[i]
30         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31         //                      // The "mark" is any non-nil Aux value.
32         //                      v.Aux = v
33         //              }
34         //              if v.Type.IsFlags() {
35         //                      flive = false
36         //              }
37         //              for _, a := range v.Args {
38         //                      if a.Type.IsFlags() {
39         //                              flive = true
40         //                      }
41         //              }
42         //      }
43 }
44
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
47         if t.IsFloat() {
48                 switch t.Size() {
49                 case 4:
50                         return ppc64.AFMOVS
51                 case 8:
52                         return ppc64.AFMOVD
53                 }
54         } else {
55                 switch t.Size() {
56                 case 1:
57                         if t.IsSigned() {
58                                 return ppc64.AMOVB
59                         } else {
60                                 return ppc64.AMOVBZ
61                         }
62                 case 2:
63                         if t.IsSigned() {
64                                 return ppc64.AMOVH
65                         } else {
66                                 return ppc64.AMOVHZ
67                         }
68                 case 4:
69                         if t.IsSigned() {
70                                 return ppc64.AMOVW
71                         } else {
72                                 return ppc64.AMOVWZ
73                         }
74                 case 8:
75                         return ppc64.AMOVD
76                 }
77         }
78         panic("bad load type")
79 }
80
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
83         if t.IsFloat() {
84                 switch t.Size() {
85                 case 4:
86                         return ppc64.AFMOVS
87                 case 8:
88                         return ppc64.AFMOVD
89                 }
90         } else {
91                 switch t.Size() {
92                 case 1:
93                         return ppc64.AMOVB
94                 case 2:
95                         return ppc64.AMOVH
96                 case 4:
97                         return ppc64.AMOVW
98                 case 8:
99                         return ppc64.AMOVD
100                 }
101         }
102         panic("bad store type")
103 }
104
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
106         switch v.Op {
107         case ssa.OpCopy:
108                 t := v.Type
109                 if t.IsMemory() {
110                         return
111                 }
112                 x := v.Args[0].Reg()
113                 y := v.Reg()
114                 if x != y {
115                         rt := obj.TYPE_REG
116                         op := ppc64.AMOVD
117
118                         if t.IsFloat() {
119                                 op = ppc64.AFMOVD
120                         }
121                         p := s.Prog(op)
122                         p.From.Type = rt
123                         p.From.Reg = x
124                         p.To.Type = rt
125                         p.To.Reg = y
126                 }
127
128         case ssa.OpPPC64LoweredAtomicAnd8,
129                 ssa.OpPPC64LoweredAtomicAnd32,
130                 ssa.OpPPC64LoweredAtomicOr8,
131                 ssa.OpPPC64LoweredAtomicOr32:
132                 // LWSYNC
133                 // LBAR/LWAR    (Rarg0), Rtmp
134                 // AND/OR       Rarg1, Rtmp
135                 // STBCCC/STWCCC Rtmp, (Rarg0)
136                 // BNE          -3(PC)
137                 ld := ppc64.ALBAR
138                 st := ppc64.ASTBCCC
139                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
140                         ld = ppc64.ALWAR
141                         st = ppc64.ASTWCCC
142                 }
143                 r0 := v.Args[0].Reg()
144                 r1 := v.Args[1].Reg()
145                 // LWSYNC - Assuming shared data not write-through-required nor
146                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
147                 plwsync := s.Prog(ppc64.ALWSYNC)
148                 plwsync.To.Type = obj.TYPE_NONE
149                 // LBAR or LWAR
150                 p := s.Prog(ld)
151                 p.From.Type = obj.TYPE_MEM
152                 p.From.Reg = r0
153                 p.To.Type = obj.TYPE_REG
154                 p.To.Reg = ppc64.REGTMP
155                 // AND/OR reg1,out
156                 p1 := s.Prog(v.Op.Asm())
157                 p1.From.Type = obj.TYPE_REG
158                 p1.From.Reg = r1
159                 p1.To.Type = obj.TYPE_REG
160                 p1.To.Reg = ppc64.REGTMP
161                 // STBCCC or STWCCC
162                 p2 := s.Prog(st)
163                 p2.From.Type = obj.TYPE_REG
164                 p2.From.Reg = ppc64.REGTMP
165                 p2.To.Type = obj.TYPE_MEM
166                 p2.To.Reg = r0
167                 p2.RegTo2 = ppc64.REGTMP
168                 // BNE retry
169                 p3 := s.Prog(ppc64.ABNE)
170                 p3.To.Type = obj.TYPE_BRANCH
171                 p3.To.SetTarget(p)
172
173         case ssa.OpPPC64LoweredAtomicAdd32,
174                 ssa.OpPPC64LoweredAtomicAdd64:
175                 // LWSYNC
176                 // LDAR/LWAR    (Rarg0), Rout
177                 // ADD          Rarg1, Rout
178                 // STDCCC/STWCCC Rout, (Rarg0)
179                 // BNE         -3(PC)
180                 // MOVW         Rout,Rout (if Add32)
181                 ld := ppc64.ALDAR
182                 st := ppc64.ASTDCCC
183                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
184                         ld = ppc64.ALWAR
185                         st = ppc64.ASTWCCC
186                 }
187                 r0 := v.Args[0].Reg()
188                 r1 := v.Args[1].Reg()
189                 out := v.Reg0()
190                 // LWSYNC - Assuming shared data not write-through-required nor
191                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
192                 plwsync := s.Prog(ppc64.ALWSYNC)
193                 plwsync.To.Type = obj.TYPE_NONE
194                 // LDAR or LWAR
195                 p := s.Prog(ld)
196                 p.From.Type = obj.TYPE_MEM
197                 p.From.Reg = r0
198                 p.To.Type = obj.TYPE_REG
199                 p.To.Reg = out
200                 // ADD reg1,out
201                 p1 := s.Prog(ppc64.AADD)
202                 p1.From.Type = obj.TYPE_REG
203                 p1.From.Reg = r1
204                 p1.To.Reg = out
205                 p1.To.Type = obj.TYPE_REG
206                 // STDCCC or STWCCC
207                 p3 := s.Prog(st)
208                 p3.From.Type = obj.TYPE_REG
209                 p3.From.Reg = out
210                 p3.To.Type = obj.TYPE_MEM
211                 p3.To.Reg = r0
212                 // BNE retry
213                 p4 := s.Prog(ppc64.ABNE)
214                 p4.To.Type = obj.TYPE_BRANCH
215                 p4.To.SetTarget(p)
216
217                 // Ensure a 32 bit result
218                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
219                         p5 := s.Prog(ppc64.AMOVWZ)
220                         p5.To.Type = obj.TYPE_REG
221                         p5.To.Reg = out
222                         p5.From.Type = obj.TYPE_REG
223                         p5.From.Reg = out
224                 }
225
226         case ssa.OpPPC64LoweredAtomicExchange32,
227                 ssa.OpPPC64LoweredAtomicExchange64:
228                 // LWSYNC
229                 // LDAR/LWAR    (Rarg0), Rout
230                 // STDCCC/STWCCC Rout, (Rarg0)
231                 // BNE         -2(PC)
232                 // ISYNC
233                 ld := ppc64.ALDAR
234                 st := ppc64.ASTDCCC
235                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
236                         ld = ppc64.ALWAR
237                         st = ppc64.ASTWCCC
238                 }
239                 r0 := v.Args[0].Reg()
240                 r1 := v.Args[1].Reg()
241                 out := v.Reg0()
242                 // LWSYNC - Assuming shared data not write-through-required nor
243                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
244                 plwsync := s.Prog(ppc64.ALWSYNC)
245                 plwsync.To.Type = obj.TYPE_NONE
246                 // LDAR or LWAR
247                 p := s.Prog(ld)
248                 p.From.Type = obj.TYPE_MEM
249                 p.From.Reg = r0
250                 p.To.Type = obj.TYPE_REG
251                 p.To.Reg = out
252                 // STDCCC or STWCCC
253                 p1 := s.Prog(st)
254                 p1.From.Type = obj.TYPE_REG
255                 p1.From.Reg = r1
256                 p1.To.Type = obj.TYPE_MEM
257                 p1.To.Reg = r0
258                 // BNE retry
259                 p2 := s.Prog(ppc64.ABNE)
260                 p2.To.Type = obj.TYPE_BRANCH
261                 p2.To.SetTarget(p)
262                 // ISYNC
263                 pisync := s.Prog(ppc64.AISYNC)
264                 pisync.To.Type = obj.TYPE_NONE
265
266         case ssa.OpPPC64LoweredAtomicLoad8,
267                 ssa.OpPPC64LoweredAtomicLoad32,
268                 ssa.OpPPC64LoweredAtomicLoad64,
269                 ssa.OpPPC64LoweredAtomicLoadPtr:
270                 // SYNC
271                 // MOVB/MOVD/MOVW (Rarg0), Rout
272                 // CMP Rout,Rout
273                 // BNE 1(PC)
274                 // ISYNC
275                 ld := ppc64.AMOVD
276                 cmp := ppc64.ACMP
277                 switch v.Op {
278                 case ssa.OpPPC64LoweredAtomicLoad8:
279                         ld = ppc64.AMOVBZ
280                 case ssa.OpPPC64LoweredAtomicLoad32:
281                         ld = ppc64.AMOVWZ
282                         cmp = ppc64.ACMPW
283                 }
284                 arg0 := v.Args[0].Reg()
285                 out := v.Reg0()
286                 // SYNC when AuxInt == 1; otherwise, load-acquire
287                 if v.AuxInt == 1 {
288                         psync := s.Prog(ppc64.ASYNC)
289                         psync.To.Type = obj.TYPE_NONE
290                 }
291                 // Load
292                 p := s.Prog(ld)
293                 p.From.Type = obj.TYPE_MEM
294                 p.From.Reg = arg0
295                 p.To.Type = obj.TYPE_REG
296                 p.To.Reg = out
297                 // CMP
298                 p1 := s.Prog(cmp)
299                 p1.From.Type = obj.TYPE_REG
300                 p1.From.Reg = out
301                 p1.To.Type = obj.TYPE_REG
302                 p1.To.Reg = out
303                 // BNE
304                 p2 := s.Prog(ppc64.ABNE)
305                 p2.To.Type = obj.TYPE_BRANCH
306                 // ISYNC
307                 pisync := s.Prog(ppc64.AISYNC)
308                 pisync.To.Type = obj.TYPE_NONE
309                 p2.To.SetTarget(pisync)
310
311         case ssa.OpPPC64LoweredAtomicStore8,
312                 ssa.OpPPC64LoweredAtomicStore32,
313                 ssa.OpPPC64LoweredAtomicStore64:
314                 // SYNC or LWSYNC
315                 // MOVB/MOVW/MOVD arg1,(arg0)
316                 st := ppc64.AMOVD
317                 switch v.Op {
318                 case ssa.OpPPC64LoweredAtomicStore8:
319                         st = ppc64.AMOVB
320                 case ssa.OpPPC64LoweredAtomicStore32:
321                         st = ppc64.AMOVW
322                 }
323                 arg0 := v.Args[0].Reg()
324                 arg1 := v.Args[1].Reg()
325                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
326                 // SYNC
327                 syncOp := ppc64.ASYNC
328                 if v.AuxInt == 0 {
329                         syncOp = ppc64.ALWSYNC
330                 }
331                 psync := s.Prog(syncOp)
332                 psync.To.Type = obj.TYPE_NONE
333                 // Store
334                 p := s.Prog(st)
335                 p.To.Type = obj.TYPE_MEM
336                 p.To.Reg = arg0
337                 p.From.Type = obj.TYPE_REG
338                 p.From.Reg = arg1
339
340         case ssa.OpPPC64LoweredAtomicCas64,
341                 ssa.OpPPC64LoweredAtomicCas32:
342                 // MOVD        $0, Rout
343                 // LWSYNC
344                 // loop:
345                 // LDAR        (Rarg0), MutexHint, Rtmp
346                 // CMP         Rarg1, Rtmp
347                 // BNE         end
348                 // STDCCC      Rarg2, (Rarg0)
349                 // BNE         loop
350                 // MOVD        $1, Rout
351                 // end:
352                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
353                 ld := ppc64.ALDAR
354                 st := ppc64.ASTDCCC
355                 cmp := ppc64.ACMP
356                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
357                         ld = ppc64.ALWAR
358                         st = ppc64.ASTWCCC
359                         cmp = ppc64.ACMPW
360                 }
361                 r0 := v.Args[0].Reg()
362                 r1 := v.Args[1].Reg()
363                 r2 := v.Args[2].Reg()
364                 out := v.Reg0()
365                 // Initialize return value to false
366                 p := s.Prog(ppc64.AMOVD)
367                 p.From.Type = obj.TYPE_CONST
368                 p.From.Offset = 0
369                 p.To.Type = obj.TYPE_REG
370                 p.To.Reg = out
371                 // LWSYNC - Assuming shared data not write-through-required nor
372                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
373                 plwsync1 := s.Prog(ppc64.ALWSYNC)
374                 plwsync1.To.Type = obj.TYPE_NONE
375                 // LDAR or LWAR
376                 p0 := s.Prog(ld)
377                 p0.From.Type = obj.TYPE_MEM
378                 p0.From.Reg = r0
379                 p0.To.Type = obj.TYPE_REG
380                 p0.To.Reg = ppc64.REGTMP
381                 // If it is a Compare-and-Swap-Release operation, set the EH field with
382                 // the release hint.
383                 if v.AuxInt == 0 {
384                         p0.AddRestSourceConst(0)
385                 }
386                 // CMP reg1,reg2
387                 p1 := s.Prog(cmp)
388                 p1.From.Type = obj.TYPE_REG
389                 p1.From.Reg = r1
390                 p1.To.Reg = ppc64.REGTMP
391                 p1.To.Type = obj.TYPE_REG
392                 // BNE done with return value = false
393                 p2 := s.Prog(ppc64.ABNE)
394                 p2.To.Type = obj.TYPE_BRANCH
395                 // STDCCC or STWCCC
396                 p3 := s.Prog(st)
397                 p3.From.Type = obj.TYPE_REG
398                 p3.From.Reg = r2
399                 p3.To.Type = obj.TYPE_MEM
400                 p3.To.Reg = r0
401                 // BNE retry
402                 p4 := s.Prog(ppc64.ABNE)
403                 p4.To.Type = obj.TYPE_BRANCH
404                 p4.To.SetTarget(p0)
405                 // return value true
406                 p5 := s.Prog(ppc64.AMOVD)
407                 p5.From.Type = obj.TYPE_CONST
408                 p5.From.Offset = 1
409                 p5.To.Type = obj.TYPE_REG
410                 p5.To.Reg = out
411                 // LWSYNC - Assuming shared data not write-through-required nor
412                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
413                 // If the operation is a CAS-Release, then synchronization is not necessary.
414                 if v.AuxInt != 0 {
415                         plwsync2 := s.Prog(ppc64.ALWSYNC)
416                         plwsync2.To.Type = obj.TYPE_NONE
417                         p2.To.SetTarget(plwsync2)
418                 } else {
419                         // done (label)
420                         p6 := s.Prog(obj.ANOP)
421                         p2.To.SetTarget(p6)
422                 }
423
424         case ssa.OpPPC64LoweredPubBarrier:
425                 // LWSYNC
426                 s.Prog(v.Op.Asm())
427
428         case ssa.OpPPC64LoweredGetClosurePtr:
429                 // Closure pointer is R11 (already)
430                 ssagen.CheckLoweredGetClosurePtr(v)
431
432         case ssa.OpPPC64LoweredGetCallerSP:
433                 // caller's SP is FixedFrameSize below the address of the first arg
434                 p := s.Prog(ppc64.AMOVD)
435                 p.From.Type = obj.TYPE_ADDR
436                 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
437                 p.From.Name = obj.NAME_PARAM
438                 p.To.Type = obj.TYPE_REG
439                 p.To.Reg = v.Reg()
440
441         case ssa.OpPPC64LoweredGetCallerPC:
442                 p := s.Prog(obj.AGETCALLERPC)
443                 p.To.Type = obj.TYPE_REG
444                 p.To.Reg = v.Reg()
445
446         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
447                 // input is already rounded
448
449         case ssa.OpLoadReg:
450                 loadOp := loadByType(v.Type)
451                 p := s.Prog(loadOp)
452                 ssagen.AddrAuto(&p.From, v.Args[0])
453                 p.To.Type = obj.TYPE_REG
454                 p.To.Reg = v.Reg()
455
456         case ssa.OpStoreReg:
457                 storeOp := storeByType(v.Type)
458                 p := s.Prog(storeOp)
459                 p.From.Type = obj.TYPE_REG
460                 p.From.Reg = v.Args[0].Reg()
461                 ssagen.AddrAuto(&p.To, v)
462
463         case ssa.OpArgIntReg, ssa.OpArgFloatReg:
464                 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
465                 // The loop only runs once.
466                 for _, a := range v.Block.Func.RegArgs {
467                         // Pass the spill/unspill information along to the assembler, offset by size of
468                         // the saved LR slot.
469                         addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
470                         s.FuncInfo().AddSpill(
471                                 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
472                 }
473                 v.Block.Func.RegArgs = nil
474
475                 ssagen.CheckArgReg(v)
476
477         case ssa.OpPPC64DIVD:
478                 // For now,
479                 //
480                 // cmp arg1, -1
481                 // be  ahead
482                 // v = arg0 / arg1
483                 // b over
484                 // ahead: v = - arg0
485                 // over: nop
486                 r := v.Reg()
487                 r0 := v.Args[0].Reg()
488                 r1 := v.Args[1].Reg()
489
490                 p := s.Prog(ppc64.ACMP)
491                 p.From.Type = obj.TYPE_REG
492                 p.From.Reg = r1
493                 p.To.Type = obj.TYPE_CONST
494                 p.To.Offset = -1
495
496                 pbahead := s.Prog(ppc64.ABEQ)
497                 pbahead.To.Type = obj.TYPE_BRANCH
498
499                 p = s.Prog(v.Op.Asm())
500                 p.From.Type = obj.TYPE_REG
501                 p.From.Reg = r1
502                 p.Reg = r0
503                 p.To.Type = obj.TYPE_REG
504                 p.To.Reg = r
505
506                 pbover := s.Prog(obj.AJMP)
507                 pbover.To.Type = obj.TYPE_BRANCH
508
509                 p = s.Prog(ppc64.ANEG)
510                 p.To.Type = obj.TYPE_REG
511                 p.To.Reg = r
512                 p.From.Type = obj.TYPE_REG
513                 p.From.Reg = r0
514                 pbahead.To.SetTarget(p)
515
516                 p = s.Prog(obj.ANOP)
517                 pbover.To.SetTarget(p)
518
519         case ssa.OpPPC64DIVW:
520                 // word-width version of above
521                 r := v.Reg()
522                 r0 := v.Args[0].Reg()
523                 r1 := v.Args[1].Reg()
524
525                 p := s.Prog(ppc64.ACMPW)
526                 p.From.Type = obj.TYPE_REG
527                 p.From.Reg = r1
528                 p.To.Type = obj.TYPE_CONST
529                 p.To.Offset = -1
530
531                 pbahead := s.Prog(ppc64.ABEQ)
532                 pbahead.To.Type = obj.TYPE_BRANCH
533
534                 p = s.Prog(v.Op.Asm())
535                 p.From.Type = obj.TYPE_REG
536                 p.From.Reg = r1
537                 p.Reg = r0
538                 p.To.Type = obj.TYPE_REG
539                 p.To.Reg = r
540
541                 pbover := s.Prog(obj.AJMP)
542                 pbover.To.Type = obj.TYPE_BRANCH
543
544                 p = s.Prog(ppc64.ANEG)
545                 p.To.Type = obj.TYPE_REG
546                 p.To.Reg = r
547                 p.From.Type = obj.TYPE_REG
548                 p.From.Reg = r0
549                 pbahead.To.SetTarget(p)
550
551                 p = s.Prog(obj.ANOP)
552                 pbover.To.SetTarget(p)
553
554         case ssa.OpPPC64CLRLSLWI:
555                 r := v.Reg()
556                 r1 := v.Args[0].Reg()
557                 shifts := v.AuxInt
558                 p := s.Prog(v.Op.Asm())
559                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
560                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
561                 p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
562                 p.Reg = r1
563                 p.To.Type = obj.TYPE_REG
564                 p.To.Reg = r
565
566         case ssa.OpPPC64CLRLSLDI:
567                 r := v.Reg()
568                 r1 := v.Args[0].Reg()
569                 shifts := v.AuxInt
570                 p := s.Prog(v.Op.Asm())
571                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
572                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
573                 p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
574                 p.Reg = r1
575                 p.To.Type = obj.TYPE_REG
576                 p.To.Reg = r
577
578         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
579                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
580                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
581                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
582                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
583                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
584                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
585                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
586                 r := v.Reg()
587                 r1 := v.Args[0].Reg()
588                 r2 := v.Args[1].Reg()
589                 p := s.Prog(v.Op.Asm())
590                 p.From.Type = obj.TYPE_REG
591                 p.From.Reg = r2
592                 p.Reg = r1
593                 p.To.Type = obj.TYPE_REG
594                 p.To.Reg = r
595
596         case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
597                 ssa.OpPPC64ANDNCC:
598                 r1 := v.Args[0].Reg()
599                 r2 := v.Args[1].Reg()
600                 p := s.Prog(v.Op.Asm())
601                 p.From.Type = obj.TYPE_REG
602                 p.From.Reg = r2
603                 p.Reg = r1
604                 p.To.Type = obj.TYPE_REG
605                 p.To.Reg = v.Reg0()
606
607         case ssa.OpPPC64NEGCC, ssa.OpPPC64CNTLZDCC:
608                 p := s.Prog(v.Op.Asm())
609                 p.To.Type = obj.TYPE_REG
610                 p.To.Reg = v.Reg0()
611                 p.From.Type = obj.TYPE_REG
612                 p.From.Reg = v.Args[0].Reg()
613
614         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
615                 p := s.Prog(v.Op.Asm())
616                 p.From.Type = obj.TYPE_CONST
617                 p.From.Offset = v.AuxInt
618                 p.Reg = v.Args[0].Reg()
619                 p.To.Type = obj.TYPE_REG
620                 p.To.Reg = v.Reg()
621
622                 // Auxint holds encoded rotate + mask
623         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
624                 sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
625                 p := s.Prog(v.Op.Asm())
626                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
627                 p.Reg = v.Args[0].Reg()
628                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(sh)}
629                 p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
630                 // Auxint holds mask
631
632         case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICR:
633                 sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
634                 p := s.Prog(v.Op.Asm())
635                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: sh}
636                 switch v.Op {
637                 case ssa.OpPPC64RLDICL:
638                         p.AddRestSourceConst(mb)
639                 case ssa.OpPPC64RLDICR:
640                         p.AddRestSourceConst(me)
641                 }
642                 p.Reg = v.Args[0].Reg()
643                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
644
645         case ssa.OpPPC64RLWNM:
646                 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
647                 p := s.Prog(v.Op.Asm())
648                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
649                 p.Reg = v.Args[0].Reg()
650                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
651                 p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
652
653         case ssa.OpPPC64MADDLD:
654                 r := v.Reg()
655                 r1 := v.Args[0].Reg()
656                 r2 := v.Args[1].Reg()
657                 r3 := v.Args[2].Reg()
658                 // r = r1*r2 Â± r3
659                 p := s.Prog(v.Op.Asm())
660                 p.From.Type = obj.TYPE_REG
661                 p.From.Reg = r1
662                 p.Reg = r2
663                 p.AddRestSourceReg(r3)
664                 p.To.Type = obj.TYPE_REG
665                 p.To.Reg = r
666
667         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
668                 r := v.Reg()
669                 r1 := v.Args[0].Reg()
670                 r2 := v.Args[1].Reg()
671                 r3 := v.Args[2].Reg()
672                 // r = r1*r2 Â± r3
673                 p := s.Prog(v.Op.Asm())
674                 p.From.Type = obj.TYPE_REG
675                 p.From.Reg = r1
676                 p.Reg = r3
677                 p.AddRestSourceReg(r2)
678                 p.To.Type = obj.TYPE_REG
679                 p.To.Reg = r
680
681         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
682                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
683                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
684                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD:
685                 r := v.Reg()
686                 p := s.Prog(v.Op.Asm())
687                 p.To.Type = obj.TYPE_REG
688                 p.To.Reg = r
689                 p.From.Type = obj.TYPE_REG
690                 p.From.Reg = v.Args[0].Reg()
691
692         case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
693                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
694                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
695                 p := s.Prog(v.Op.Asm())
696                 p.Reg = v.Args[0].Reg()
697                 p.From.Type = obj.TYPE_CONST
698                 p.From.Offset = v.AuxInt
699                 p.To.Type = obj.TYPE_REG
700                 p.To.Reg = v.Reg()
701
702         case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
703                 r := v.Reg0() // CA is the first, implied argument.
704                 r1 := v.Args[0].Reg()
705                 r2 := v.Args[1].Reg()
706                 p := s.Prog(v.Op.Asm())
707                 p.From.Type = obj.TYPE_REG
708                 p.From.Reg = r2
709                 p.Reg = r1
710                 p.To.Type = obj.TYPE_REG
711                 p.To.Reg = r
712
713         case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
714                 p := s.Prog(v.Op.Asm())
715                 p.From.Type = obj.TYPE_REG
716                 p.From.Reg = ppc64.REG_R0
717                 p.To.Type = obj.TYPE_REG
718                 p.To.Reg = v.Reg()
719
720         case ssa.OpPPC64ADDCconst:
721                 p := s.Prog(v.Op.Asm())
722                 p.Reg = v.Args[0].Reg()
723                 p.From.Type = obj.TYPE_CONST
724                 p.From.Offset = v.AuxInt
725                 p.To.Type = obj.TYPE_REG
726                 // Output is a pair, the second is the CA, which is implied.
727                 p.To.Reg = v.Reg0()
728
729         case ssa.OpPPC64SUBCconst:
730                 p := s.Prog(v.Op.Asm())
731                 p.AddRestSourceConst(v.AuxInt)
732                 p.From.Type = obj.TYPE_REG
733                 p.From.Reg = v.Args[0].Reg()
734                 p.To.Type = obj.TYPE_REG
735                 p.To.Reg = v.Reg0()
736
737         case ssa.OpPPC64SUBFCconst:
738                 p := s.Prog(v.Op.Asm())
739                 p.AddRestSourceConst(v.AuxInt)
740                 p.From.Type = obj.TYPE_REG
741                 p.From.Reg = v.Args[0].Reg()
742                 p.To.Type = obj.TYPE_REG
743                 p.To.Reg = v.Reg()
744
745         case ssa.OpPPC64ADDCCconst, ssa.OpPPC64ANDCCconst:
746                 p := s.Prog(v.Op.Asm())
747                 p.Reg = v.Args[0].Reg()
748                 p.From.Type = obj.TYPE_CONST
749                 p.From.Offset = v.AuxInt
750                 p.To.Type = obj.TYPE_REG
751                 p.To.Reg = v.Reg0()
752
753         case ssa.OpPPC64MOVDaddr:
754                 switch v.Aux.(type) {
755                 default:
756                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
757                 case nil:
758                         // If aux offset and aux int are both 0, and the same
759                         // input and output regs are used, no instruction
760                         // needs to be generated, since it would just be
761                         // addi rx, rx, 0.
762                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
763                                 p := s.Prog(ppc64.AMOVD)
764                                 p.From.Type = obj.TYPE_ADDR
765                                 p.From.Reg = v.Args[0].Reg()
766                                 p.From.Offset = v.AuxInt
767                                 p.To.Type = obj.TYPE_REG
768                                 p.To.Reg = v.Reg()
769                         }
770
771                 case *obj.LSym, ir.Node:
772                         p := s.Prog(ppc64.AMOVD)
773                         p.From.Type = obj.TYPE_ADDR
774                         p.From.Reg = v.Args[0].Reg()
775                         p.To.Type = obj.TYPE_REG
776                         p.To.Reg = v.Reg()
777                         ssagen.AddAux(&p.From, v)
778
779                 }
780
781         case ssa.OpPPC64MOVDconst:
782                 p := s.Prog(v.Op.Asm())
783                 p.From.Type = obj.TYPE_CONST
784                 p.From.Offset = v.AuxInt
785                 p.To.Type = obj.TYPE_REG
786                 p.To.Reg = v.Reg()
787
788         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
789                 p := s.Prog(v.Op.Asm())
790                 p.From.Type = obj.TYPE_FCONST
791                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
792                 p.To.Type = obj.TYPE_REG
793                 p.To.Reg = v.Reg()
794
795         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
796                 p := s.Prog(v.Op.Asm())
797                 p.From.Type = obj.TYPE_REG
798                 p.From.Reg = v.Args[0].Reg()
799                 p.To.Type = obj.TYPE_REG
800                 p.To.Reg = v.Args[1].Reg()
801
802         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
803                 p := s.Prog(v.Op.Asm())
804                 p.From.Type = obj.TYPE_REG
805                 p.From.Reg = v.Args[0].Reg()
806                 p.To.Type = obj.TYPE_CONST
807                 p.To.Offset = v.AuxInt
808
809         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
810                 // Shift in register to required size
811                 p := s.Prog(v.Op.Asm())
812                 p.From.Type = obj.TYPE_REG
813                 p.From.Reg = v.Args[0].Reg()
814                 p.To.Reg = v.Reg()
815                 p.To.Type = obj.TYPE_REG
816
817         case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
818
819                 // MOVDload and MOVWload are DS form instructions that are restricted to
820                 // offsets that are a multiple of 4. If the offset is not a multiple of 4,
821                 // then the address of the symbol to be loaded is computed (base + offset)
822                 // and used as the new base register and the offset field in the instruction
823                 // can be set to zero.
824
825                 // This same problem can happen with gostrings since the final offset is not
826                 // known yet, but could be unaligned after the relocation is resolved.
827                 // So gostrings are handled the same way.
828
829                 // This allows the MOVDload and MOVWload to be generated in more cases and
830                 // eliminates some offset and alignment checking in the rules file.
831
832                 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
833                 ssagen.AddAux(&fromAddr, v)
834
835                 genAddr := false
836
837                 switch fromAddr.Name {
838                 case obj.NAME_EXTERN, obj.NAME_STATIC:
839                         // Special case for a rule combines the bytes of gostring.
840                         // The v alignment might seem OK, but we don't want to load it
841                         // using an offset because relocation comes later.
842                         genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
843                 default:
844                         genAddr = fromAddr.Offset%4 != 0
845                 }
846                 if genAddr {
847                         // Load full address into the temp register.
848                         p := s.Prog(ppc64.AMOVD)
849                         p.From.Type = obj.TYPE_ADDR
850                         p.From.Reg = v.Args[0].Reg()
851                         ssagen.AddAux(&p.From, v)
852                         // Load target using temp as base register
853                         // and offset zero. Setting NAME_NONE
854                         // prevents any extra offsets from being
855                         // added.
856                         p.To.Type = obj.TYPE_REG
857                         p.To.Reg = ppc64.REGTMP
858                         fromAddr.Reg = ppc64.REGTMP
859                         // Clear the offset field and other
860                         // information that might be used
861                         // by the assembler to add to the
862                         // final offset value.
863                         fromAddr.Offset = 0
864                         fromAddr.Name = obj.NAME_NONE
865                         fromAddr.Sym = nil
866                 }
867                 p := s.Prog(v.Op.Asm())
868                 p.From = fromAddr
869                 p.To.Type = obj.TYPE_REG
870                 p.To.Reg = v.Reg()
871
872         case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
873                 p := s.Prog(v.Op.Asm())
874                 p.From.Type = obj.TYPE_MEM
875                 p.From.Reg = v.Args[0].Reg()
876                 ssagen.AddAux(&p.From, v)
877                 p.To.Type = obj.TYPE_REG
878                 p.To.Reg = v.Reg()
879
880         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
881                 p := s.Prog(v.Op.Asm())
882                 p.From.Type = obj.TYPE_MEM
883                 p.From.Reg = v.Args[0].Reg()
884                 p.To.Type = obj.TYPE_REG
885                 p.To.Reg = v.Reg()
886
887         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
888                 p := s.Prog(v.Op.Asm())
889                 p.To.Type = obj.TYPE_MEM
890                 p.To.Reg = v.Args[0].Reg()
891                 p.From.Type = obj.TYPE_REG
892                 p.From.Reg = v.Args[1].Reg()
893
894         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
895                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
896                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
897                 p := s.Prog(v.Op.Asm())
898                 p.From.Type = obj.TYPE_MEM
899                 p.From.Reg = v.Args[0].Reg()
900                 p.From.Index = v.Args[1].Reg()
901                 p.To.Type = obj.TYPE_REG
902                 p.To.Reg = v.Reg()
903
904         case ssa.OpPPC64DCBT:
905                 p := s.Prog(v.Op.Asm())
906                 p.From.Type = obj.TYPE_MEM
907                 p.From.Reg = v.Args[0].Reg()
908                 p.To.Type = obj.TYPE_CONST
909                 p.To.Offset = v.AuxInt
910
911         case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
912                 p := s.Prog(v.Op.Asm())
913                 p.From.Type = obj.TYPE_REG
914                 p.From.Reg = ppc64.REGZERO
915                 p.To.Type = obj.TYPE_MEM
916                 p.To.Reg = v.Args[0].Reg()
917                 ssagen.AddAux(&p.To, v)
918
919         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
920
921                 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
922                 // to offset values that are a multiple of 4. If the offset field is not a
923                 // multiple of 4, then the full address of the store target is computed (base +
924                 // offset) and used as the new base register and the offset in the instruction
925                 // is set to 0.
926
927                 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
928                 // and prevents checking of the offset value and alignment in the rules.
929
930                 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
931                 ssagen.AddAux(&toAddr, v)
932
933                 if toAddr.Offset%4 != 0 {
934                         p := s.Prog(ppc64.AMOVD)
935                         p.From.Type = obj.TYPE_ADDR
936                         p.From.Reg = v.Args[0].Reg()
937                         ssagen.AddAux(&p.From, v)
938                         p.To.Type = obj.TYPE_REG
939                         p.To.Reg = ppc64.REGTMP
940                         toAddr.Reg = ppc64.REGTMP
941                         // Clear the offset field and other
942                         // information that might be used
943                         // by the assembler to add to the
944                         // final offset value.
945                         toAddr.Offset = 0
946                         toAddr.Name = obj.NAME_NONE
947                         toAddr.Sym = nil
948                 }
949                 p := s.Prog(v.Op.Asm())
950                 p.To = toAddr
951                 p.From.Type = obj.TYPE_REG
952                 if v.Op == ssa.OpPPC64MOVDstorezero {
953                         p.From.Reg = ppc64.REGZERO
954                 } else {
955                         p.From.Reg = v.Args[1].Reg()
956                 }
957
958         case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
959                 p := s.Prog(v.Op.Asm())
960                 p.From.Type = obj.TYPE_REG
961                 p.From.Reg = v.Args[1].Reg()
962                 p.To.Type = obj.TYPE_MEM
963                 p.To.Reg = v.Args[0].Reg()
964                 ssagen.AddAux(&p.To, v)
965
966         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
967                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
968                 ssa.OpPPC64MOVHBRstoreidx:
969                 p := s.Prog(v.Op.Asm())
970                 p.From.Type = obj.TYPE_REG
971                 p.From.Reg = v.Args[2].Reg()
972                 p.To.Index = v.Args[1].Reg()
973                 p.To.Type = obj.TYPE_MEM
974                 p.To.Reg = v.Args[0].Reg()
975
976         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELZ:
977                 // ISEL  AuxInt ? arg0 : arg1
978                 // ISELZ is a special case of ISEL where arg1 is implicitly $0.
979                 //
980                 // AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO.
981                 // ISEL accepts a CR bit argument, not a condition as expressed by AuxInt.
982                 // Convert the condition to a CR bit argument by the following conversion:
983                 //
984                 // AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO
985                 // AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO
986                 p := s.Prog(v.Op.Asm())
987                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
988                 p.Reg = v.Args[0].Reg()
989                 if v.Op == ssa.OpPPC64ISEL {
990                         p.AddRestSourceReg(v.Args[1].Reg())
991                 } else {
992                         p.AddRestSourceReg(ppc64.REG_R0)
993                 }
994                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
995                 if v.AuxInt > 3 {
996                         p.Reg, p.GetFrom3().Reg = p.GetFrom3().Reg, p.Reg
997                 }
998                 p.From.SetConst(v.AuxInt & 3)
999
1000         case ssa.OpPPC64SETBC, ssa.OpPPC64SETBCR:
1001                 p := s.Prog(v.Op.Asm())
1002                 p.To.Type = obj.TYPE_REG
1003                 p.To.Reg = v.Reg()
1004                 p.From.Type = obj.TYPE_REG
1005                 p.From.Reg = int16(ppc64.REG_CR0LT + v.AuxInt)
1006
1007         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
1008                 // The LoweredQuad code generation
1009                 // generates STXV instructions on
1010                 // power9. The Short variation is used
1011                 // if no loop is generated.
1012
1013                 // sizes >= 64 generate a loop as follows:
1014
1015                 // Set up loop counter in CTR, used by BC
1016                 // XXLXOR clears VS32
1017                 //       XXLXOR VS32,VS32,VS32
1018                 //       MOVD len/64,REG_TMP
1019                 //       MOVD REG_TMP,CTR
1020                 //       loop:
1021                 //       STXV VS32,0(R20)
1022                 //       STXV VS32,16(R20)
1023                 //       STXV VS32,32(R20)
1024                 //       STXV VS32,48(R20)
1025                 //       ADD  $64,R20
1026                 //       BC   16, 0, loop
1027
1028                 // Bytes per iteration
1029                 ctr := v.AuxInt / 64
1030
1031                 // Remainder bytes
1032                 rem := v.AuxInt % 64
1033
1034                 // Only generate a loop if there is more
1035                 // than 1 iteration.
1036                 if ctr > 1 {
1037                         // Set up VS32 (V0) to hold 0s
1038                         p := s.Prog(ppc64.AXXLXOR)
1039                         p.From.Type = obj.TYPE_REG
1040                         p.From.Reg = ppc64.REG_VS32
1041                         p.To.Type = obj.TYPE_REG
1042                         p.To.Reg = ppc64.REG_VS32
1043                         p.Reg = ppc64.REG_VS32
1044
1045                         // Set up CTR loop counter
1046                         p = s.Prog(ppc64.AMOVD)
1047                         p.From.Type = obj.TYPE_CONST
1048                         p.From.Offset = ctr
1049                         p.To.Type = obj.TYPE_REG
1050                         p.To.Reg = ppc64.REGTMP
1051
1052                         p = s.Prog(ppc64.AMOVD)
1053                         p.From.Type = obj.TYPE_REG
1054                         p.From.Reg = ppc64.REGTMP
1055                         p.To.Type = obj.TYPE_REG
1056                         p.To.Reg = ppc64.REG_CTR
1057
1058                         // Don't generate padding for
1059                         // loops with few iterations.
1060                         if ctr > 3 {
1061                                 p = s.Prog(obj.APCALIGN)
1062                                 p.From.Type = obj.TYPE_CONST
1063                                 p.From.Offset = 16
1064                         }
1065
1066                         // generate 4 STXVs to zero 64 bytes
1067                         var top *obj.Prog
1068
1069                         p = s.Prog(ppc64.ASTXV)
1070                         p.From.Type = obj.TYPE_REG
1071                         p.From.Reg = ppc64.REG_VS32
1072                         p.To.Type = obj.TYPE_MEM
1073                         p.To.Reg = v.Args[0].Reg()
1074
1075                         //  Save the top of loop
1076                         if top == nil {
1077                                 top = p
1078                         }
1079                         p = s.Prog(ppc64.ASTXV)
1080                         p.From.Type = obj.TYPE_REG
1081                         p.From.Reg = ppc64.REG_VS32
1082                         p.To.Type = obj.TYPE_MEM
1083                         p.To.Reg = v.Args[0].Reg()
1084                         p.To.Offset = 16
1085
1086                         p = s.Prog(ppc64.ASTXV)
1087                         p.From.Type = obj.TYPE_REG
1088                         p.From.Reg = ppc64.REG_VS32
1089                         p.To.Type = obj.TYPE_MEM
1090                         p.To.Reg = v.Args[0].Reg()
1091                         p.To.Offset = 32
1092
1093                         p = s.Prog(ppc64.ASTXV)
1094                         p.From.Type = obj.TYPE_REG
1095                         p.From.Reg = ppc64.REG_VS32
1096                         p.To.Type = obj.TYPE_MEM
1097                         p.To.Reg = v.Args[0].Reg()
1098                         p.To.Offset = 48
1099
1100                         // Increment address for the
1101                         // 64 bytes just zeroed.
1102                         p = s.Prog(ppc64.AADD)
1103                         p.Reg = v.Args[0].Reg()
1104                         p.From.Type = obj.TYPE_CONST
1105                         p.From.Offset = 64
1106                         p.To.Type = obj.TYPE_REG
1107                         p.To.Reg = v.Args[0].Reg()
1108
1109                         // Branch back to top of loop
1110                         // based on CTR
1111                         // BC with BO_BCTR generates bdnz
1112                         p = s.Prog(ppc64.ABC)
1113                         p.From.Type = obj.TYPE_CONST
1114                         p.From.Offset = ppc64.BO_BCTR
1115                         p.Reg = ppc64.REG_CR0LT
1116                         p.To.Type = obj.TYPE_BRANCH
1117                         p.To.SetTarget(top)
1118                 }
1119                 // When ctr == 1 the loop was not generated but
1120                 // there are at least 64 bytes to clear, so add
1121                 // that to the remainder to generate the code
1122                 // to clear those doublewords
1123                 if ctr == 1 {
1124                         rem += 64
1125                 }
1126
1127                 // Clear the remainder starting at offset zero
1128                 offset := int64(0)
1129
1130                 if rem >= 16 && ctr <= 1 {
1131                         // If the XXLXOR hasn't already been
1132                         // generated, do it here to initialize
1133                         // VS32 (V0) to 0.
1134                         p := s.Prog(ppc64.AXXLXOR)
1135                         p.From.Type = obj.TYPE_REG
1136                         p.From.Reg = ppc64.REG_VS32
1137                         p.To.Type = obj.TYPE_REG
1138                         p.To.Reg = ppc64.REG_VS32
1139                         p.Reg = ppc64.REG_VS32
1140                 }
1141                 // Generate STXV for 32 or 64
1142                 // bytes.
1143                 for rem >= 32 {
1144                         p := s.Prog(ppc64.ASTXV)
1145                         p.From.Type = obj.TYPE_REG
1146                         p.From.Reg = ppc64.REG_VS32
1147                         p.To.Type = obj.TYPE_MEM
1148                         p.To.Reg = v.Args[0].Reg()
1149                         p.To.Offset = offset
1150
1151                         p = s.Prog(ppc64.ASTXV)
1152                         p.From.Type = obj.TYPE_REG
1153                         p.From.Reg = ppc64.REG_VS32
1154                         p.To.Type = obj.TYPE_MEM
1155                         p.To.Reg = v.Args[0].Reg()
1156                         p.To.Offset = offset + 16
1157                         offset += 32
1158                         rem -= 32
1159                 }
1160                 // Generate 16 bytes
1161                 if rem >= 16 {
1162                         p := s.Prog(ppc64.ASTXV)
1163                         p.From.Type = obj.TYPE_REG
1164                         p.From.Reg = ppc64.REG_VS32
1165                         p.To.Type = obj.TYPE_MEM
1166                         p.To.Reg = v.Args[0].Reg()
1167                         p.To.Offset = offset
1168                         offset += 16
1169                         rem -= 16
1170                 }
1171
1172                 // first clear as many doublewords as possible
1173                 // then clear remaining sizes as available
1174                 for rem > 0 {
1175                         op, size := ppc64.AMOVB, int64(1)
1176                         switch {
1177                         case rem >= 8:
1178                                 op, size = ppc64.AMOVD, 8
1179                         case rem >= 4:
1180                                 op, size = ppc64.AMOVW, 4
1181                         case rem >= 2:
1182                                 op, size = ppc64.AMOVH, 2
1183                         }
1184                         p := s.Prog(op)
1185                         p.From.Type = obj.TYPE_REG
1186                         p.From.Reg = ppc64.REG_R0
1187                         p.To.Type = obj.TYPE_MEM
1188                         p.To.Reg = v.Args[0].Reg()
1189                         p.To.Offset = offset
1190                         rem -= size
1191                         offset += size
1192                 }
1193
1194         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1195
1196                 // Unaligned data doesn't hurt performance
1197                 // for these instructions on power8.
1198
1199                 // For sizes >= 64 generate a loop as follows:
1200
1201                 // Set up loop counter in CTR, used by BC
1202                 //       XXLXOR VS32,VS32,VS32
1203                 //       MOVD len/32,REG_TMP
1204                 //       MOVD REG_TMP,CTR
1205                 //       MOVD $16,REG_TMP
1206                 //       loop:
1207                 //       STXVD2X VS32,(R0)(R20)
1208                 //       STXVD2X VS32,(R31)(R20)
1209                 //       ADD  $32,R20
1210                 //       BC   16, 0, loop
1211                 //
1212                 // any remainder is done as described below
1213
1214                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1215                 // then handle the remainder
1216                 //      MOVD R0,(R20)
1217                 //      MOVD R0,8(R20)
1218                 // .... etc.
1219                 //
1220                 // the remainder bytes are cleared using one or more
1221                 // of the following instructions with the appropriate
1222                 // offsets depending which instructions are needed
1223                 //
1224                 //      MOVW R0,n1(R20) 4 bytes
1225                 //      MOVH R0,n2(R20) 2 bytes
1226                 //      MOVB R0,n3(R20) 1 byte
1227                 //
1228                 // 7 bytes: MOVW, MOVH, MOVB
1229                 // 6 bytes: MOVW, MOVH
1230                 // 5 bytes: MOVW, MOVB
1231                 // 3 bytes: MOVH, MOVB
1232
1233                 // each loop iteration does 32 bytes
1234                 ctr := v.AuxInt / 32
1235
1236                 // remainder bytes
1237                 rem := v.AuxInt % 32
1238
1239                 // only generate a loop if there is more
1240                 // than 1 iteration.
1241                 if ctr > 1 {
1242                         // Set up VS32 (V0) to hold 0s
1243                         p := s.Prog(ppc64.AXXLXOR)
1244                         p.From.Type = obj.TYPE_REG
1245                         p.From.Reg = ppc64.REG_VS32
1246                         p.To.Type = obj.TYPE_REG
1247                         p.To.Reg = ppc64.REG_VS32
1248                         p.Reg = ppc64.REG_VS32
1249
1250                         // Set up CTR loop counter
1251                         p = s.Prog(ppc64.AMOVD)
1252                         p.From.Type = obj.TYPE_CONST
1253                         p.From.Offset = ctr
1254                         p.To.Type = obj.TYPE_REG
1255                         p.To.Reg = ppc64.REGTMP
1256
1257                         p = s.Prog(ppc64.AMOVD)
1258                         p.From.Type = obj.TYPE_REG
1259                         p.From.Reg = ppc64.REGTMP
1260                         p.To.Type = obj.TYPE_REG
1261                         p.To.Reg = ppc64.REG_CTR
1262
1263                         // Set up R31 to hold index value 16
1264                         p = s.Prog(ppc64.AMOVD)
1265                         p.From.Type = obj.TYPE_CONST
1266                         p.From.Offset = 16
1267                         p.To.Type = obj.TYPE_REG
1268                         p.To.Reg = ppc64.REGTMP
1269
1270                         // Don't add padding for alignment
1271                         // with few loop iterations.
1272                         if ctr > 3 {
1273                                 p = s.Prog(obj.APCALIGN)
1274                                 p.From.Type = obj.TYPE_CONST
1275                                 p.From.Offset = 16
1276                         }
1277
1278                         // generate 2 STXVD2Xs to store 16 bytes
1279                         // when this is a loop then the top must be saved
1280                         var top *obj.Prog
1281                         // This is the top of loop
1282
1283                         p = s.Prog(ppc64.ASTXVD2X)
1284                         p.From.Type = obj.TYPE_REG
1285                         p.From.Reg = ppc64.REG_VS32
1286                         p.To.Type = obj.TYPE_MEM
1287                         p.To.Reg = v.Args[0].Reg()
1288                         p.To.Index = ppc64.REGZERO
1289                         // Save the top of loop
1290                         if top == nil {
1291                                 top = p
1292                         }
1293                         p = s.Prog(ppc64.ASTXVD2X)
1294                         p.From.Type = obj.TYPE_REG
1295                         p.From.Reg = ppc64.REG_VS32
1296                         p.To.Type = obj.TYPE_MEM
1297                         p.To.Reg = v.Args[0].Reg()
1298                         p.To.Index = ppc64.REGTMP
1299
1300                         // Increment address for the
1301                         // 4 doublewords just zeroed.
1302                         p = s.Prog(ppc64.AADD)
1303                         p.Reg = v.Args[0].Reg()
1304                         p.From.Type = obj.TYPE_CONST
1305                         p.From.Offset = 32
1306                         p.To.Type = obj.TYPE_REG
1307                         p.To.Reg = v.Args[0].Reg()
1308
1309                         // Branch back to top of loop
1310                         // based on CTR
1311                         // BC with BO_BCTR generates bdnz
1312                         p = s.Prog(ppc64.ABC)
1313                         p.From.Type = obj.TYPE_CONST
1314                         p.From.Offset = ppc64.BO_BCTR
1315                         p.Reg = ppc64.REG_CR0LT
1316                         p.To.Type = obj.TYPE_BRANCH
1317                         p.To.SetTarget(top)
1318                 }
1319
1320                 // when ctr == 1 the loop was not generated but
1321                 // there are at least 32 bytes to clear, so add
1322                 // that to the remainder to generate the code
1323                 // to clear those doublewords
1324                 if ctr == 1 {
1325                         rem += 32
1326                 }
1327
1328                 // clear the remainder starting at offset zero
1329                 offset := int64(0)
1330
1331                 // first clear as many doublewords as possible
1332                 // then clear remaining sizes as available
1333                 for rem > 0 {
1334                         op, size := ppc64.AMOVB, int64(1)
1335                         switch {
1336                         case rem >= 8:
1337                                 op, size = ppc64.AMOVD, 8
1338                         case rem >= 4:
1339                                 op, size = ppc64.AMOVW, 4
1340                         case rem >= 2:
1341                                 op, size = ppc64.AMOVH, 2
1342                         }
1343                         p := s.Prog(op)
1344                         p.From.Type = obj.TYPE_REG
1345                         p.From.Reg = ppc64.REG_R0
1346                         p.To.Type = obj.TYPE_MEM
1347                         p.To.Reg = v.Args[0].Reg()
1348                         p.To.Offset = offset
1349                         rem -= size
1350                         offset += size
1351                 }
1352
1353         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1354
1355                 bytesPerLoop := int64(32)
1356                 // This will be used when moving more
1357                 // than 8 bytes.  Moves start with
1358                 // as many 8 byte moves as possible, then
1359                 // 4, 2, or 1 byte(s) as remaining.  This will
1360                 // work and be efficient for power8 or later.
1361                 // If there are 64 or more bytes, then a
1362                 // loop is generated to move 32 bytes and
1363                 // update the src and dst addresses on each
1364                 // iteration. When < 64 bytes, the appropriate
1365                 // number of moves are generated based on the
1366                 // size.
1367                 // When moving >= 64 bytes a loop is used
1368                 //      MOVD len/32,REG_TMP
1369                 //      MOVD REG_TMP,CTR
1370                 //      MOVD $16,REG_TMP
1371                 // top:
1372                 //      LXVD2X (R0)(R21),VS32
1373                 //      LXVD2X (R31)(R21),VS33
1374                 //      ADD $32,R21
1375                 //      STXVD2X VS32,(R0)(R20)
1376                 //      STXVD2X VS33,(R31)(R20)
1377                 //      ADD $32,R20
1378                 //      BC 16,0,top
1379                 // Bytes not moved by this loop are moved
1380                 // with a combination of the following instructions,
1381                 // starting with the largest sizes and generating as
1382                 // many as needed, using the appropriate offset value.
1383                 //      MOVD  n(R21),R31
1384                 //      MOVD  R31,n(R20)
1385                 //      MOVW  n1(R21),R31
1386                 //      MOVW  R31,n1(R20)
1387                 //      MOVH  n2(R21),R31
1388                 //      MOVH  R31,n2(R20)
1389                 //      MOVB  n3(R21),R31
1390                 //      MOVB  R31,n3(R20)
1391
1392                 // Each loop iteration moves 32 bytes
1393                 ctr := v.AuxInt / bytesPerLoop
1394
1395                 // Remainder after the loop
1396                 rem := v.AuxInt % bytesPerLoop
1397
1398                 dstReg := v.Args[0].Reg()
1399                 srcReg := v.Args[1].Reg()
1400
1401                 // The set of registers used here, must match the clobbered reg list
1402                 // in PPC64Ops.go.
1403                 offset := int64(0)
1404
1405                 // top of the loop
1406                 var top *obj.Prog
1407                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1408                 if ctr > 1 {
1409                         // Set up the CTR
1410                         p := s.Prog(ppc64.AMOVD)
1411                         p.From.Type = obj.TYPE_CONST
1412                         p.From.Offset = ctr
1413                         p.To.Type = obj.TYPE_REG
1414                         p.To.Reg = ppc64.REGTMP
1415
1416                         p = s.Prog(ppc64.AMOVD)
1417                         p.From.Type = obj.TYPE_REG
1418                         p.From.Reg = ppc64.REGTMP
1419                         p.To.Type = obj.TYPE_REG
1420                         p.To.Reg = ppc64.REG_CTR
1421
1422                         // Use REGTMP as index reg
1423                         p = s.Prog(ppc64.AMOVD)
1424                         p.From.Type = obj.TYPE_CONST
1425                         p.From.Offset = 16
1426                         p.To.Type = obj.TYPE_REG
1427                         p.To.Reg = ppc64.REGTMP
1428
1429                         // Don't adding padding for
1430                         // alignment with small iteration
1431                         // counts.
1432                         if ctr > 3 {
1433                                 p = s.Prog(obj.APCALIGN)
1434                                 p.From.Type = obj.TYPE_CONST
1435                                 p.From.Offset = 16
1436                         }
1437
1438                         // Generate 16 byte loads and stores.
1439                         // Use temp register for index (16)
1440                         // on the second one.
1441
1442                         p = s.Prog(ppc64.ALXVD2X)
1443                         p.From.Type = obj.TYPE_MEM
1444                         p.From.Reg = srcReg
1445                         p.From.Index = ppc64.REGZERO
1446                         p.To.Type = obj.TYPE_REG
1447                         p.To.Reg = ppc64.REG_VS32
1448                         if top == nil {
1449                                 top = p
1450                         }
1451                         p = s.Prog(ppc64.ALXVD2X)
1452                         p.From.Type = obj.TYPE_MEM
1453                         p.From.Reg = srcReg
1454                         p.From.Index = ppc64.REGTMP
1455                         p.To.Type = obj.TYPE_REG
1456                         p.To.Reg = ppc64.REG_VS33
1457
1458                         // increment the src reg for next iteration
1459                         p = s.Prog(ppc64.AADD)
1460                         p.Reg = srcReg
1461                         p.From.Type = obj.TYPE_CONST
1462                         p.From.Offset = bytesPerLoop
1463                         p.To.Type = obj.TYPE_REG
1464                         p.To.Reg = srcReg
1465
1466                         // generate 16 byte stores
1467                         p = s.Prog(ppc64.ASTXVD2X)
1468                         p.From.Type = obj.TYPE_REG
1469                         p.From.Reg = ppc64.REG_VS32
1470                         p.To.Type = obj.TYPE_MEM
1471                         p.To.Reg = dstReg
1472                         p.To.Index = ppc64.REGZERO
1473
1474                         p = s.Prog(ppc64.ASTXVD2X)
1475                         p.From.Type = obj.TYPE_REG
1476                         p.From.Reg = ppc64.REG_VS33
1477                         p.To.Type = obj.TYPE_MEM
1478                         p.To.Reg = dstReg
1479                         p.To.Index = ppc64.REGTMP
1480
1481                         // increment the dst reg for next iteration
1482                         p = s.Prog(ppc64.AADD)
1483                         p.Reg = dstReg
1484                         p.From.Type = obj.TYPE_CONST
1485                         p.From.Offset = bytesPerLoop
1486                         p.To.Type = obj.TYPE_REG
1487                         p.To.Reg = dstReg
1488
1489                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1490                         // to loop top.
1491                         p = s.Prog(ppc64.ABC)
1492                         p.From.Type = obj.TYPE_CONST
1493                         p.From.Offset = ppc64.BO_BCTR
1494                         p.Reg = ppc64.REG_CR0LT
1495                         p.To.Type = obj.TYPE_BRANCH
1496                         p.To.SetTarget(top)
1497
1498                         // srcReg and dstReg were incremented in the loop, so
1499                         // later instructions start with offset 0.
1500                         offset = int64(0)
1501                 }
1502
1503                 // No loop was generated for one iteration, so
1504                 // add 32 bytes to the remainder to move those bytes.
1505                 if ctr == 1 {
1506                         rem += bytesPerLoop
1507                 }
1508
1509                 if rem >= 16 {
1510                         // Generate 16 byte loads and stores.
1511                         // Use temp register for index (value 16)
1512                         // on the second one.
1513                         p := s.Prog(ppc64.ALXVD2X)
1514                         p.From.Type = obj.TYPE_MEM
1515                         p.From.Reg = srcReg
1516                         p.From.Index = ppc64.REGZERO
1517                         p.To.Type = obj.TYPE_REG
1518                         p.To.Reg = ppc64.REG_VS32
1519
1520                         p = s.Prog(ppc64.ASTXVD2X)
1521                         p.From.Type = obj.TYPE_REG
1522                         p.From.Reg = ppc64.REG_VS32
1523                         p.To.Type = obj.TYPE_MEM
1524                         p.To.Reg = dstReg
1525                         p.To.Index = ppc64.REGZERO
1526
1527                         offset = 16
1528                         rem -= 16
1529
1530                         if rem >= 16 {
1531                                 // Use REGTMP as index reg
1532                                 p := s.Prog(ppc64.AMOVD)
1533                                 p.From.Type = obj.TYPE_CONST
1534                                 p.From.Offset = 16
1535                                 p.To.Type = obj.TYPE_REG
1536                                 p.To.Reg = ppc64.REGTMP
1537
1538                                 p = s.Prog(ppc64.ALXVD2X)
1539                                 p.From.Type = obj.TYPE_MEM
1540                                 p.From.Reg = srcReg
1541                                 p.From.Index = ppc64.REGTMP
1542                                 p.To.Type = obj.TYPE_REG
1543                                 p.To.Reg = ppc64.REG_VS32
1544
1545                                 p = s.Prog(ppc64.ASTXVD2X)
1546                                 p.From.Type = obj.TYPE_REG
1547                                 p.From.Reg = ppc64.REG_VS32
1548                                 p.To.Type = obj.TYPE_MEM
1549                                 p.To.Reg = dstReg
1550                                 p.To.Index = ppc64.REGTMP
1551
1552                                 offset = 32
1553                                 rem -= 16
1554                         }
1555                 }
1556
1557                 // Generate all the remaining load and store pairs, starting with
1558                 // as many 8 byte moves as possible, then 4, 2, 1.
1559                 for rem > 0 {
1560                         op, size := ppc64.AMOVB, int64(1)
1561                         switch {
1562                         case rem >= 8:
1563                                 op, size = ppc64.AMOVD, 8
1564                         case rem >= 4:
1565                                 op, size = ppc64.AMOVWZ, 4
1566                         case rem >= 2:
1567                                 op, size = ppc64.AMOVH, 2
1568                         }
1569                         // Load
1570                         p := s.Prog(op)
1571                         p.To.Type = obj.TYPE_REG
1572                         p.To.Reg = ppc64.REGTMP
1573                         p.From.Type = obj.TYPE_MEM
1574                         p.From.Reg = srcReg
1575                         p.From.Offset = offset
1576
1577                         // Store
1578                         p = s.Prog(op)
1579                         p.From.Type = obj.TYPE_REG
1580                         p.From.Reg = ppc64.REGTMP
1581                         p.To.Type = obj.TYPE_MEM
1582                         p.To.Reg = dstReg
1583                         p.To.Offset = offset
1584                         rem -= size
1585                         offset += size
1586                 }
1587
1588         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1589                 bytesPerLoop := int64(64)
1590                 // This is used when moving more
1591                 // than 8 bytes on power9.  Moves start with
1592                 // as many 8 byte moves as possible, then
1593                 // 4, 2, or 1 byte(s) as remaining.  This will
1594                 // work and be efficient for power8 or later.
1595                 // If there are 64 or more bytes, then a
1596                 // loop is generated to move 32 bytes and
1597                 // update the src and dst addresses on each
1598                 // iteration. When < 64 bytes, the appropriate
1599                 // number of moves are generated based on the
1600                 // size.
1601                 // When moving >= 64 bytes a loop is used
1602                 //      MOVD len/32,REG_TMP
1603                 //      MOVD REG_TMP,CTR
1604                 // top:
1605                 //      LXV 0(R21),VS32
1606                 //      LXV 16(R21),VS33
1607                 //      ADD $32,R21
1608                 //      STXV VS32,0(R20)
1609                 //      STXV VS33,16(R20)
1610                 //      ADD $32,R20
1611                 //      BC 16,0,top
1612                 // Bytes not moved by this loop are moved
1613                 // with a combination of the following instructions,
1614                 // starting with the largest sizes and generating as
1615                 // many as needed, using the appropriate offset value.
1616                 //      MOVD  n(R21),R31
1617                 //      MOVD  R31,n(R20)
1618                 //      MOVW  n1(R21),R31
1619                 //      MOVW  R31,n1(R20)
1620                 //      MOVH  n2(R21),R31
1621                 //      MOVH  R31,n2(R20)
1622                 //      MOVB  n3(R21),R31
1623                 //      MOVB  R31,n3(R20)
1624
1625                 // Each loop iteration moves 32 bytes
1626                 ctr := v.AuxInt / bytesPerLoop
1627
1628                 // Remainder after the loop
1629                 rem := v.AuxInt % bytesPerLoop
1630
1631                 dstReg := v.Args[0].Reg()
1632                 srcReg := v.Args[1].Reg()
1633
1634                 offset := int64(0)
1635
1636                 // top of the loop
1637                 var top *obj.Prog
1638
1639                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1640                 if ctr > 1 {
1641                         // Set up the CTR
1642                         p := s.Prog(ppc64.AMOVD)
1643                         p.From.Type = obj.TYPE_CONST
1644                         p.From.Offset = ctr
1645                         p.To.Type = obj.TYPE_REG
1646                         p.To.Reg = ppc64.REGTMP
1647
1648                         p = s.Prog(ppc64.AMOVD)
1649                         p.From.Type = obj.TYPE_REG
1650                         p.From.Reg = ppc64.REGTMP
1651                         p.To.Type = obj.TYPE_REG
1652                         p.To.Reg = ppc64.REG_CTR
1653
1654                         p = s.Prog(obj.APCALIGN)
1655                         p.From.Type = obj.TYPE_CONST
1656                         p.From.Offset = 16
1657
1658                         // Generate 16 byte loads and stores.
1659                         p = s.Prog(ppc64.ALXV)
1660                         p.From.Type = obj.TYPE_MEM
1661                         p.From.Reg = srcReg
1662                         p.From.Offset = offset
1663                         p.To.Type = obj.TYPE_REG
1664                         p.To.Reg = ppc64.REG_VS32
1665                         if top == nil {
1666                                 top = p
1667                         }
1668                         p = s.Prog(ppc64.ALXV)
1669                         p.From.Type = obj.TYPE_MEM
1670                         p.From.Reg = srcReg
1671                         p.From.Offset = offset + 16
1672                         p.To.Type = obj.TYPE_REG
1673                         p.To.Reg = ppc64.REG_VS33
1674
1675                         // generate 16 byte stores
1676                         p = s.Prog(ppc64.ASTXV)
1677                         p.From.Type = obj.TYPE_REG
1678                         p.From.Reg = ppc64.REG_VS32
1679                         p.To.Type = obj.TYPE_MEM
1680                         p.To.Reg = dstReg
1681                         p.To.Offset = offset
1682
1683                         p = s.Prog(ppc64.ASTXV)
1684                         p.From.Type = obj.TYPE_REG
1685                         p.From.Reg = ppc64.REG_VS33
1686                         p.To.Type = obj.TYPE_MEM
1687                         p.To.Reg = dstReg
1688                         p.To.Offset = offset + 16
1689
1690                         // Generate 16 byte loads and stores.
1691                         p = s.Prog(ppc64.ALXV)
1692                         p.From.Type = obj.TYPE_MEM
1693                         p.From.Reg = srcReg
1694                         p.From.Offset = offset + 32
1695                         p.To.Type = obj.TYPE_REG
1696                         p.To.Reg = ppc64.REG_VS32
1697
1698                         p = s.Prog(ppc64.ALXV)
1699                         p.From.Type = obj.TYPE_MEM
1700                         p.From.Reg = srcReg
1701                         p.From.Offset = offset + 48
1702                         p.To.Type = obj.TYPE_REG
1703                         p.To.Reg = ppc64.REG_VS33
1704
1705                         // generate 16 byte stores
1706                         p = s.Prog(ppc64.ASTXV)
1707                         p.From.Type = obj.TYPE_REG
1708                         p.From.Reg = ppc64.REG_VS32
1709                         p.To.Type = obj.TYPE_MEM
1710                         p.To.Reg = dstReg
1711                         p.To.Offset = offset + 32
1712
1713                         p = s.Prog(ppc64.ASTXV)
1714                         p.From.Type = obj.TYPE_REG
1715                         p.From.Reg = ppc64.REG_VS33
1716                         p.To.Type = obj.TYPE_MEM
1717                         p.To.Reg = dstReg
1718                         p.To.Offset = offset + 48
1719
1720                         // increment the src reg for next iteration
1721                         p = s.Prog(ppc64.AADD)
1722                         p.Reg = srcReg
1723                         p.From.Type = obj.TYPE_CONST
1724                         p.From.Offset = bytesPerLoop
1725                         p.To.Type = obj.TYPE_REG
1726                         p.To.Reg = srcReg
1727
1728                         // increment the dst reg for next iteration
1729                         p = s.Prog(ppc64.AADD)
1730                         p.Reg = dstReg
1731                         p.From.Type = obj.TYPE_CONST
1732                         p.From.Offset = bytesPerLoop
1733                         p.To.Type = obj.TYPE_REG
1734                         p.To.Reg = dstReg
1735
1736                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1737                         // to loop top.
1738                         p = s.Prog(ppc64.ABC)
1739                         p.From.Type = obj.TYPE_CONST
1740                         p.From.Offset = ppc64.BO_BCTR
1741                         p.Reg = ppc64.REG_CR0LT
1742                         p.To.Type = obj.TYPE_BRANCH
1743                         p.To.SetTarget(top)
1744
1745                         // srcReg and dstReg were incremented in the loop, so
1746                         // later instructions start with offset 0.
1747                         offset = int64(0)
1748                 }
1749
1750                 // No loop was generated for one iteration, so
1751                 // add 32 bytes to the remainder to move those bytes.
1752                 if ctr == 1 {
1753                         rem += bytesPerLoop
1754                 }
1755                 if rem >= 32 {
1756                         p := s.Prog(ppc64.ALXV)
1757                         p.From.Type = obj.TYPE_MEM
1758                         p.From.Reg = srcReg
1759                         p.To.Type = obj.TYPE_REG
1760                         p.To.Reg = ppc64.REG_VS32
1761
1762                         p = s.Prog(ppc64.ALXV)
1763                         p.From.Type = obj.TYPE_MEM
1764                         p.From.Reg = srcReg
1765                         p.From.Offset = 16
1766                         p.To.Type = obj.TYPE_REG
1767                         p.To.Reg = ppc64.REG_VS33
1768
1769                         p = s.Prog(ppc64.ASTXV)
1770                         p.From.Type = obj.TYPE_REG
1771                         p.From.Reg = ppc64.REG_VS32
1772                         p.To.Type = obj.TYPE_MEM
1773                         p.To.Reg = dstReg
1774
1775                         p = s.Prog(ppc64.ASTXV)
1776                         p.From.Type = obj.TYPE_REG
1777                         p.From.Reg = ppc64.REG_VS33
1778                         p.To.Type = obj.TYPE_MEM
1779                         p.To.Reg = dstReg
1780                         p.To.Offset = 16
1781
1782                         offset = 32
1783                         rem -= 32
1784                 }
1785
1786                 if rem >= 16 {
1787                         // Generate 16 byte loads and stores.
1788                         p := s.Prog(ppc64.ALXV)
1789                         p.From.Type = obj.TYPE_MEM
1790                         p.From.Reg = srcReg
1791                         p.From.Offset = offset
1792                         p.To.Type = obj.TYPE_REG
1793                         p.To.Reg = ppc64.REG_VS32
1794
1795                         p = s.Prog(ppc64.ASTXV)
1796                         p.From.Type = obj.TYPE_REG
1797                         p.From.Reg = ppc64.REG_VS32
1798                         p.To.Type = obj.TYPE_MEM
1799                         p.To.Reg = dstReg
1800                         p.To.Offset = offset
1801
1802                         offset += 16
1803                         rem -= 16
1804
1805                         if rem >= 16 {
1806                                 p := s.Prog(ppc64.ALXV)
1807                                 p.From.Type = obj.TYPE_MEM
1808                                 p.From.Reg = srcReg
1809                                 p.From.Offset = offset
1810                                 p.To.Type = obj.TYPE_REG
1811                                 p.To.Reg = ppc64.REG_VS32
1812
1813                                 p = s.Prog(ppc64.ASTXV)
1814                                 p.From.Type = obj.TYPE_REG
1815                                 p.From.Reg = ppc64.REG_VS32
1816                                 p.To.Type = obj.TYPE_MEM
1817                                 p.To.Reg = dstReg
1818                                 p.To.Offset = offset
1819
1820                                 offset += 16
1821                                 rem -= 16
1822                         }
1823                 }
1824                 // Generate all the remaining load and store pairs, starting with
1825                 // as many 8 byte moves as possible, then 4, 2, 1.
1826                 for rem > 0 {
1827                         op, size := ppc64.AMOVB, int64(1)
1828                         switch {
1829                         case rem >= 8:
1830                                 op, size = ppc64.AMOVD, 8
1831                         case rem >= 4:
1832                                 op, size = ppc64.AMOVWZ, 4
1833                         case rem >= 2:
1834                                 op, size = ppc64.AMOVH, 2
1835                         }
1836                         // Load
1837                         p := s.Prog(op)
1838                         p.To.Type = obj.TYPE_REG
1839                         p.To.Reg = ppc64.REGTMP
1840                         p.From.Type = obj.TYPE_MEM
1841                         p.From.Reg = srcReg
1842                         p.From.Offset = offset
1843
1844                         // Store
1845                         p = s.Prog(op)
1846                         p.From.Type = obj.TYPE_REG
1847                         p.From.Reg = ppc64.REGTMP
1848                         p.To.Type = obj.TYPE_MEM
1849                         p.To.Reg = dstReg
1850                         p.To.Offset = offset
1851                         rem -= size
1852                         offset += size
1853                 }
1854
1855         case ssa.OpPPC64CALLstatic:
1856                 s.Call(v)
1857
1858         case ssa.OpPPC64CALLtail:
1859                 s.TailCall(v)
1860
1861         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1862                 p := s.Prog(ppc64.AMOVD)
1863                 p.From.Type = obj.TYPE_REG
1864                 p.From.Reg = v.Args[0].Reg()
1865                 p.To.Type = obj.TYPE_REG
1866                 p.To.Reg = ppc64.REG_LR
1867
1868                 if v.Args[0].Reg() != ppc64.REG_R12 {
1869                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1870                 }
1871
1872                 pp := s.Call(v)
1873
1874                 // Convert the call into a blrl with hint this is not a subroutine return.
1875                 // The full bclrl opcode must be specified when passing a hint.
1876                 pp.As = ppc64.ABCL
1877                 pp.From.Type = obj.TYPE_CONST
1878                 pp.From.Offset = ppc64.BO_ALWAYS
1879                 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1880                 pp.To.Reg = ppc64.REG_LR
1881                 pp.AddRestSourceConst(1)
1882
1883                 if ppc64.NeedTOCpointer(base.Ctxt) {
1884                         // When compiling Go into PIC, the function we just
1885                         // called via pointer might have been implemented in
1886                         // a separate module and so overwritten the TOC
1887                         // pointer in R2; reload it.
1888                         q := s.Prog(ppc64.AMOVD)
1889                         q.From.Type = obj.TYPE_MEM
1890                         q.From.Offset = 24
1891                         q.From.Reg = ppc64.REGSP
1892                         q.To.Type = obj.TYPE_REG
1893                         q.To.Reg = ppc64.REG_R2
1894                 }
1895
1896         case ssa.OpPPC64LoweredWB:
1897                 p := s.Prog(obj.ACALL)
1898                 p.To.Type = obj.TYPE_MEM
1899                 p.To.Name = obj.NAME_EXTERN
1900                 // AuxInt encodes how many buffer entries we need.
1901                 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
1902
1903         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1904                 p := s.Prog(obj.ACALL)
1905                 p.To.Type = obj.TYPE_MEM
1906                 p.To.Name = obj.NAME_EXTERN
1907                 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1908                 s.UseArgs(16) // space used in callee args area by assembly stubs
1909
1910         case ssa.OpPPC64LoweredNilCheck:
1911                 if buildcfg.GOOS == "aix" {
1912                         // CMP Rarg0, R0
1913                         // BNE 2(PC)
1914                         // STW R0, 0(R0)
1915                         // NOP (so the BNE has somewhere to land)
1916
1917                         // CMP Rarg0, R0
1918                         p := s.Prog(ppc64.ACMP)
1919                         p.From.Type = obj.TYPE_REG
1920                         p.From.Reg = v.Args[0].Reg()
1921                         p.To.Type = obj.TYPE_REG
1922                         p.To.Reg = ppc64.REG_R0
1923
1924                         // BNE 2(PC)
1925                         p2 := s.Prog(ppc64.ABNE)
1926                         p2.To.Type = obj.TYPE_BRANCH
1927
1928                         // STW R0, 0(R0)
1929                         // Write at 0 is forbidden and will trigger a SIGSEGV
1930                         p = s.Prog(ppc64.AMOVW)
1931                         p.From.Type = obj.TYPE_REG
1932                         p.From.Reg = ppc64.REG_R0
1933                         p.To.Type = obj.TYPE_MEM
1934                         p.To.Reg = ppc64.REG_R0
1935
1936                         // NOP (so the BNE has somewhere to land)
1937                         nop := s.Prog(obj.ANOP)
1938                         p2.To.SetTarget(nop)
1939
1940                 } else {
1941                         // Issue a load which will fault if arg is nil.
1942                         p := s.Prog(ppc64.AMOVBZ)
1943                         p.From.Type = obj.TYPE_MEM
1944                         p.From.Reg = v.Args[0].Reg()
1945                         ssagen.AddAux(&p.From, v)
1946                         p.To.Type = obj.TYPE_REG
1947                         p.To.Reg = ppc64.REGTMP
1948                 }
1949                 if logopt.Enabled() {
1950                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1951                 }
1952                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1953                         base.WarnfAt(v.Pos, "generated nil check")
1954                 }
1955
1956         // These should be resolved by rules and not make it here.
1957         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1958                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1959                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1960                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1961         case ssa.OpPPC64InvertFlags:
1962                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1963         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1964                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1965         case ssa.OpClobber, ssa.OpClobberReg:
1966                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1967         default:
1968                 v.Fatalf("genValue not implemented: %s", v.LongString())
1969         }
1970 }
1971
1972 var blockJump = [...]struct {
1973         asm, invasm     obj.As
1974         asmeq, invasmun bool
1975 }{
1976         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1977         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1978
1979         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1980         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1981         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1982         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1983
1984         // TODO: need to work FP comparisons into block jumps
1985         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1986         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1987         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1988         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1989 }
1990
1991 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1992         switch b.Kind {
1993         case ssa.BlockDefer:
1994                 // defer returns in R3:
1995                 // 0 if we should continue executing
1996                 // 1 if we should jump to deferreturn call
1997                 p := s.Prog(ppc64.ACMP)
1998                 p.From.Type = obj.TYPE_REG
1999                 p.From.Reg = ppc64.REG_R3
2000                 p.To.Type = obj.TYPE_REG
2001                 p.To.Reg = ppc64.REG_R0
2002
2003                 p = s.Prog(ppc64.ABNE)
2004                 p.To.Type = obj.TYPE_BRANCH
2005                 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
2006                 if b.Succs[0].Block() != next {
2007                         p := s.Prog(obj.AJMP)
2008                         p.To.Type = obj.TYPE_BRANCH
2009                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2010                 }
2011
2012         case ssa.BlockPlain:
2013                 if b.Succs[0].Block() != next {
2014                         p := s.Prog(obj.AJMP)
2015                         p.To.Type = obj.TYPE_BRANCH
2016                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2017                 }
2018         case ssa.BlockExit, ssa.BlockRetJmp:
2019         case ssa.BlockRet:
2020                 s.Prog(obj.ARET)
2021
2022         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2023                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2024                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2025                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2026                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2027                 jmp := blockJump[b.Kind]
2028                 switch next {
2029                 case b.Succs[0].Block():
2030                         s.Br(jmp.invasm, b.Succs[1].Block())
2031                         if jmp.invasmun {
2032                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2033                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
2034                         }
2035                 case b.Succs[1].Block():
2036                         s.Br(jmp.asm, b.Succs[0].Block())
2037                         if jmp.asmeq {
2038                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2039                         }
2040                 default:
2041                         if b.Likely != ssa.BranchUnlikely {
2042                                 s.Br(jmp.asm, b.Succs[0].Block())
2043                                 if jmp.asmeq {
2044                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
2045                                 }
2046                                 s.Br(obj.AJMP, b.Succs[1].Block())
2047                         } else {
2048                                 s.Br(jmp.invasm, b.Succs[1].Block())
2049                                 if jmp.invasmun {
2050                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2051                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
2052                                 }
2053                                 s.Br(obj.AJMP, b.Succs[0].Block())
2054                         }
2055                 }
2056         default:
2057                 b.Fatalf("branch not implemented: %s", b.LongString())
2058         }
2059 }
2060
2061 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2062         p := s.Prog(loadByType(t))
2063         p.From.Type = obj.TYPE_MEM
2064         p.From.Name = obj.NAME_AUTO
2065         p.From.Sym = n.Linksym()
2066         p.From.Offset = n.FrameOffset() + off
2067         p.To.Type = obj.TYPE_REG
2068         p.To.Reg = reg
2069         return p
2070 }
2071
2072 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2073         p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2074         p.To.Name = obj.NAME_PARAM
2075         p.To.Sym = n.Linksym()
2076         p.Pos = p.Pos.WithNotStmt()
2077         return p
2078 }