]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
all: fix spelling
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/base"
9         "cmd/compile/internal/ir"
10         "cmd/compile/internal/logopt"
11         "cmd/compile/internal/objw"
12         "cmd/compile/internal/ssa"
13         "cmd/compile/internal/ssagen"
14         "cmd/compile/internal/types"
15         "cmd/internal/obj"
16         "cmd/internal/obj/ppc64"
17         "internal/buildcfg"
18         "math"
19         "strings"
20 )
21
22 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24         //      flive := b.FlagsLiveAtEnd
25         //      if b.Control != nil && b.Control.Type.IsFlags() {
26         //              flive = true
27         //      }
28         //      for i := len(b.Values) - 1; i >= 0; i-- {
29         //              v := b.Values[i]
30         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31         //                      // The "mark" is any non-nil Aux value.
32         //                      v.Aux = v
33         //              }
34         //              if v.Type.IsFlags() {
35         //                      flive = false
36         //              }
37         //              for _, a := range v.Args {
38         //                      if a.Type.IsFlags() {
39         //                              flive = true
40         //                      }
41         //              }
42         //      }
43 }
44
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
47         if t.IsFloat() {
48                 switch t.Size() {
49                 case 4:
50                         return ppc64.AFMOVS
51                 case 8:
52                         return ppc64.AFMOVD
53                 }
54         } else {
55                 switch t.Size() {
56                 case 1:
57                         if t.IsSigned() {
58                                 return ppc64.AMOVB
59                         } else {
60                                 return ppc64.AMOVBZ
61                         }
62                 case 2:
63                         if t.IsSigned() {
64                                 return ppc64.AMOVH
65                         } else {
66                                 return ppc64.AMOVHZ
67                         }
68                 case 4:
69                         if t.IsSigned() {
70                                 return ppc64.AMOVW
71                         } else {
72                                 return ppc64.AMOVWZ
73                         }
74                 case 8:
75                         return ppc64.AMOVD
76                 }
77         }
78         panic("bad load type")
79 }
80
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
83         if t.IsFloat() {
84                 switch t.Size() {
85                 case 4:
86                         return ppc64.AFMOVS
87                 case 8:
88                         return ppc64.AFMOVD
89                 }
90         } else {
91                 switch t.Size() {
92                 case 1:
93                         return ppc64.AMOVB
94                 case 2:
95                         return ppc64.AMOVH
96                 case 4:
97                         return ppc64.AMOVW
98                 case 8:
99                         return ppc64.AMOVD
100                 }
101         }
102         panic("bad store type")
103 }
104
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
106         switch v.Op {
107         case ssa.OpCopy:
108                 t := v.Type
109                 if t.IsMemory() {
110                         return
111                 }
112                 x := v.Args[0].Reg()
113                 y := v.Reg()
114                 if x != y {
115                         rt := obj.TYPE_REG
116                         op := ppc64.AMOVD
117
118                         if t.IsFloat() {
119                                 op = ppc64.AFMOVD
120                         }
121                         p := s.Prog(op)
122                         p.From.Type = rt
123                         p.From.Reg = x
124                         p.To.Type = rt
125                         p.To.Reg = y
126                 }
127
128         case ssa.OpPPC64LoweredMuluhilo:
129                 // MULHDU       Rarg1, Rarg0, Reg0
130                 // MULLD        Rarg1, Rarg0, Reg1
131                 r0 := v.Args[0].Reg()
132                 r1 := v.Args[1].Reg()
133                 p := s.Prog(ppc64.AMULHDU)
134                 p.From.Type = obj.TYPE_REG
135                 p.From.Reg = r1
136                 p.Reg = r0
137                 p.To.Type = obj.TYPE_REG
138                 p.To.Reg = v.Reg0()
139                 p1 := s.Prog(ppc64.AMULLD)
140                 p1.From.Type = obj.TYPE_REG
141                 p1.From.Reg = r1
142                 p1.Reg = r0
143                 p1.To.Type = obj.TYPE_REG
144                 p1.To.Reg = v.Reg1()
145
146         case ssa.OpPPC64LoweredAtomicAnd8,
147                 ssa.OpPPC64LoweredAtomicAnd32,
148                 ssa.OpPPC64LoweredAtomicOr8,
149                 ssa.OpPPC64LoweredAtomicOr32:
150                 // LWSYNC
151                 // LBAR/LWAR    (Rarg0), Rtmp
152                 // AND/OR       Rarg1, Rtmp
153                 // STBCCC/STWCCC Rtmp, (Rarg0)
154                 // BNE          -3(PC)
155                 ld := ppc64.ALBAR
156                 st := ppc64.ASTBCCC
157                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
158                         ld = ppc64.ALWAR
159                         st = ppc64.ASTWCCC
160                 }
161                 r0 := v.Args[0].Reg()
162                 r1 := v.Args[1].Reg()
163                 // LWSYNC - Assuming shared data not write-through-required nor
164                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
165                 plwsync := s.Prog(ppc64.ALWSYNC)
166                 plwsync.To.Type = obj.TYPE_NONE
167                 // LBAR or LWAR
168                 p := s.Prog(ld)
169                 p.From.Type = obj.TYPE_MEM
170                 p.From.Reg = r0
171                 p.To.Type = obj.TYPE_REG
172                 p.To.Reg = ppc64.REGTMP
173                 // AND/OR reg1,out
174                 p1 := s.Prog(v.Op.Asm())
175                 p1.From.Type = obj.TYPE_REG
176                 p1.From.Reg = r1
177                 p1.To.Type = obj.TYPE_REG
178                 p1.To.Reg = ppc64.REGTMP
179                 // STBCCC or STWCCC
180                 p2 := s.Prog(st)
181                 p2.From.Type = obj.TYPE_REG
182                 p2.From.Reg = ppc64.REGTMP
183                 p2.To.Type = obj.TYPE_MEM
184                 p2.To.Reg = r0
185                 p2.RegTo2 = ppc64.REGTMP
186                 // BNE retry
187                 p3 := s.Prog(ppc64.ABNE)
188                 p3.To.Type = obj.TYPE_BRANCH
189                 p3.To.SetTarget(p)
190
191         case ssa.OpPPC64LoweredAtomicAdd32,
192                 ssa.OpPPC64LoweredAtomicAdd64:
193                 // LWSYNC
194                 // LDAR/LWAR    (Rarg0), Rout
195                 // ADD          Rarg1, Rout
196                 // STDCCC/STWCCC Rout, (Rarg0)
197                 // BNE         -3(PC)
198                 // MOVW         Rout,Rout (if Add32)
199                 ld := ppc64.ALDAR
200                 st := ppc64.ASTDCCC
201                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
202                         ld = ppc64.ALWAR
203                         st = ppc64.ASTWCCC
204                 }
205                 r0 := v.Args[0].Reg()
206                 r1 := v.Args[1].Reg()
207                 out := v.Reg0()
208                 // LWSYNC - Assuming shared data not write-through-required nor
209                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
210                 plwsync := s.Prog(ppc64.ALWSYNC)
211                 plwsync.To.Type = obj.TYPE_NONE
212                 // LDAR or LWAR
213                 p := s.Prog(ld)
214                 p.From.Type = obj.TYPE_MEM
215                 p.From.Reg = r0
216                 p.To.Type = obj.TYPE_REG
217                 p.To.Reg = out
218                 // ADD reg1,out
219                 p1 := s.Prog(ppc64.AADD)
220                 p1.From.Type = obj.TYPE_REG
221                 p1.From.Reg = r1
222                 p1.To.Reg = out
223                 p1.To.Type = obj.TYPE_REG
224                 // STDCCC or STWCCC
225                 p3 := s.Prog(st)
226                 p3.From.Type = obj.TYPE_REG
227                 p3.From.Reg = out
228                 p3.To.Type = obj.TYPE_MEM
229                 p3.To.Reg = r0
230                 // BNE retry
231                 p4 := s.Prog(ppc64.ABNE)
232                 p4.To.Type = obj.TYPE_BRANCH
233                 p4.To.SetTarget(p)
234
235                 // Ensure a 32 bit result
236                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
237                         p5 := s.Prog(ppc64.AMOVWZ)
238                         p5.To.Type = obj.TYPE_REG
239                         p5.To.Reg = out
240                         p5.From.Type = obj.TYPE_REG
241                         p5.From.Reg = out
242                 }
243
244         case ssa.OpPPC64LoweredAtomicExchange32,
245                 ssa.OpPPC64LoweredAtomicExchange64:
246                 // LWSYNC
247                 // LDAR/LWAR    (Rarg0), Rout
248                 // STDCCC/STWCCC Rout, (Rarg0)
249                 // BNE         -2(PC)
250                 // ISYNC
251                 ld := ppc64.ALDAR
252                 st := ppc64.ASTDCCC
253                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
254                         ld = ppc64.ALWAR
255                         st = ppc64.ASTWCCC
256                 }
257                 r0 := v.Args[0].Reg()
258                 r1 := v.Args[1].Reg()
259                 out := v.Reg0()
260                 // LWSYNC - Assuming shared data not write-through-required nor
261                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
262                 plwsync := s.Prog(ppc64.ALWSYNC)
263                 plwsync.To.Type = obj.TYPE_NONE
264                 // LDAR or LWAR
265                 p := s.Prog(ld)
266                 p.From.Type = obj.TYPE_MEM
267                 p.From.Reg = r0
268                 p.To.Type = obj.TYPE_REG
269                 p.To.Reg = out
270                 // STDCCC or STWCCC
271                 p1 := s.Prog(st)
272                 p1.From.Type = obj.TYPE_REG
273                 p1.From.Reg = r1
274                 p1.To.Type = obj.TYPE_MEM
275                 p1.To.Reg = r0
276                 // BNE retry
277                 p2 := s.Prog(ppc64.ABNE)
278                 p2.To.Type = obj.TYPE_BRANCH
279                 p2.To.SetTarget(p)
280                 // ISYNC
281                 pisync := s.Prog(ppc64.AISYNC)
282                 pisync.To.Type = obj.TYPE_NONE
283
284         case ssa.OpPPC64LoweredAtomicLoad8,
285                 ssa.OpPPC64LoweredAtomicLoad32,
286                 ssa.OpPPC64LoweredAtomicLoad64,
287                 ssa.OpPPC64LoweredAtomicLoadPtr:
288                 // SYNC
289                 // MOVB/MOVD/MOVW (Rarg0), Rout
290                 // CMP Rout,Rout
291                 // BNE 1(PC)
292                 // ISYNC
293                 ld := ppc64.AMOVD
294                 cmp := ppc64.ACMP
295                 switch v.Op {
296                 case ssa.OpPPC64LoweredAtomicLoad8:
297                         ld = ppc64.AMOVBZ
298                 case ssa.OpPPC64LoweredAtomicLoad32:
299                         ld = ppc64.AMOVWZ
300                         cmp = ppc64.ACMPW
301                 }
302                 arg0 := v.Args[0].Reg()
303                 out := v.Reg0()
304                 // SYNC when AuxInt == 1; otherwise, load-acquire
305                 if v.AuxInt == 1 {
306                         psync := s.Prog(ppc64.ASYNC)
307                         psync.To.Type = obj.TYPE_NONE
308                 }
309                 // Load
310                 p := s.Prog(ld)
311                 p.From.Type = obj.TYPE_MEM
312                 p.From.Reg = arg0
313                 p.To.Type = obj.TYPE_REG
314                 p.To.Reg = out
315                 // CMP
316                 p1 := s.Prog(cmp)
317                 p1.From.Type = obj.TYPE_REG
318                 p1.From.Reg = out
319                 p1.To.Type = obj.TYPE_REG
320                 p1.To.Reg = out
321                 // BNE
322                 p2 := s.Prog(ppc64.ABNE)
323                 p2.To.Type = obj.TYPE_BRANCH
324                 // ISYNC
325                 pisync := s.Prog(ppc64.AISYNC)
326                 pisync.To.Type = obj.TYPE_NONE
327                 p2.To.SetTarget(pisync)
328
329         case ssa.OpPPC64LoweredAtomicStore8,
330                 ssa.OpPPC64LoweredAtomicStore32,
331                 ssa.OpPPC64LoweredAtomicStore64:
332                 // SYNC or LWSYNC
333                 // MOVB/MOVW/MOVD arg1,(arg0)
334                 st := ppc64.AMOVD
335                 switch v.Op {
336                 case ssa.OpPPC64LoweredAtomicStore8:
337                         st = ppc64.AMOVB
338                 case ssa.OpPPC64LoweredAtomicStore32:
339                         st = ppc64.AMOVW
340                 }
341                 arg0 := v.Args[0].Reg()
342                 arg1 := v.Args[1].Reg()
343                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
344                 // SYNC
345                 syncOp := ppc64.ASYNC
346                 if v.AuxInt == 0 {
347                         syncOp = ppc64.ALWSYNC
348                 }
349                 psync := s.Prog(syncOp)
350                 psync.To.Type = obj.TYPE_NONE
351                 // Store
352                 p := s.Prog(st)
353                 p.To.Type = obj.TYPE_MEM
354                 p.To.Reg = arg0
355                 p.From.Type = obj.TYPE_REG
356                 p.From.Reg = arg1
357
358         case ssa.OpPPC64LoweredAtomicCas64,
359                 ssa.OpPPC64LoweredAtomicCas32:
360                 // LWSYNC
361                 // loop:
362                 // LDAR        (Rarg0), MutexHint, Rtmp
363                 // CMP         Rarg1, Rtmp
364                 // BNE         fail
365                 // STDCCC      Rarg2, (Rarg0)
366                 // BNE         loop
367                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
368                 // MOVD        $1, Rout
369                 // BR          end
370                 // fail:
371                 // MOVD        $0, Rout
372                 // end:
373                 ld := ppc64.ALDAR
374                 st := ppc64.ASTDCCC
375                 cmp := ppc64.ACMP
376                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
377                         ld = ppc64.ALWAR
378                         st = ppc64.ASTWCCC
379                         cmp = ppc64.ACMPW
380                 }
381                 r0 := v.Args[0].Reg()
382                 r1 := v.Args[1].Reg()
383                 r2 := v.Args[2].Reg()
384                 out := v.Reg0()
385                 // LWSYNC - Assuming shared data not write-through-required nor
386                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
387                 plwsync1 := s.Prog(ppc64.ALWSYNC)
388                 plwsync1.To.Type = obj.TYPE_NONE
389                 // LDAR or LWAR
390                 p := s.Prog(ld)
391                 p.From.Type = obj.TYPE_MEM
392                 p.From.Reg = r0
393                 p.To.Type = obj.TYPE_REG
394                 p.To.Reg = ppc64.REGTMP
395                 // If it is a Compare-and-Swap-Release operation, set the EH field with
396                 // the release hint.
397                 if v.AuxInt == 0 {
398                         p.SetFrom3Const(0)
399                 }
400                 // CMP reg1,reg2
401                 p1 := s.Prog(cmp)
402                 p1.From.Type = obj.TYPE_REG
403                 p1.From.Reg = r1
404                 p1.To.Reg = ppc64.REGTMP
405                 p1.To.Type = obj.TYPE_REG
406                 // BNE cas_fail
407                 p2 := s.Prog(ppc64.ABNE)
408                 p2.To.Type = obj.TYPE_BRANCH
409                 // STDCCC or STWCCC
410                 p3 := s.Prog(st)
411                 p3.From.Type = obj.TYPE_REG
412                 p3.From.Reg = r2
413                 p3.To.Type = obj.TYPE_MEM
414                 p3.To.Reg = r0
415                 // BNE retry
416                 p4 := s.Prog(ppc64.ABNE)
417                 p4.To.Type = obj.TYPE_BRANCH
418                 p4.To.SetTarget(p)
419                 // LWSYNC - Assuming shared data not write-through-required nor
420                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
421                 // If the operation is a CAS-Release, then synchronization is not necessary.
422                 if v.AuxInt != 0 {
423                         plwsync2 := s.Prog(ppc64.ALWSYNC)
424                         plwsync2.To.Type = obj.TYPE_NONE
425                 }
426                 // return true
427                 p5 := s.Prog(ppc64.AMOVD)
428                 p5.From.Type = obj.TYPE_CONST
429                 p5.From.Offset = 1
430                 p5.To.Type = obj.TYPE_REG
431                 p5.To.Reg = out
432                 // BR done
433                 p6 := s.Prog(obj.AJMP)
434                 p6.To.Type = obj.TYPE_BRANCH
435                 // return false
436                 p7 := s.Prog(ppc64.AMOVD)
437                 p7.From.Type = obj.TYPE_CONST
438                 p7.From.Offset = 0
439                 p7.To.Type = obj.TYPE_REG
440                 p7.To.Reg = out
441                 p2.To.SetTarget(p7)
442                 // done (label)
443                 p8 := s.Prog(obj.ANOP)
444                 p6.To.SetTarget(p8)
445
446         case ssa.OpPPC64LoweredPubBarrier:
447                 // LWSYNC
448                 s.Prog(v.Op.Asm())
449
450         case ssa.OpPPC64LoweredGetClosurePtr:
451                 // Closure pointer is R11 (already)
452                 ssagen.CheckLoweredGetClosurePtr(v)
453
454         case ssa.OpPPC64LoweredGetCallerSP:
455                 // caller's SP is FixedFrameSize below the address of the first arg
456                 p := s.Prog(ppc64.AMOVD)
457                 p.From.Type = obj.TYPE_ADDR
458                 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
459                 p.From.Name = obj.NAME_PARAM
460                 p.To.Type = obj.TYPE_REG
461                 p.To.Reg = v.Reg()
462
463         case ssa.OpPPC64LoweredGetCallerPC:
464                 p := s.Prog(obj.AGETCALLERPC)
465                 p.To.Type = obj.TYPE_REG
466                 p.To.Reg = v.Reg()
467
468         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
469                 // input is already rounded
470
471         case ssa.OpLoadReg:
472                 loadOp := loadByType(v.Type)
473                 p := s.Prog(loadOp)
474                 ssagen.AddrAuto(&p.From, v.Args[0])
475                 p.To.Type = obj.TYPE_REG
476                 p.To.Reg = v.Reg()
477
478         case ssa.OpStoreReg:
479                 storeOp := storeByType(v.Type)
480                 p := s.Prog(storeOp)
481                 p.From.Type = obj.TYPE_REG
482                 p.From.Reg = v.Args[0].Reg()
483                 ssagen.AddrAuto(&p.To, v)
484
485         case ssa.OpArgIntReg, ssa.OpArgFloatReg:
486                 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
487                 // The loop only runs once.
488                 for _, a := range v.Block.Func.RegArgs {
489                         // Pass the spill/unspill information along to the assembler, offset by size of
490                         // the saved LR slot.
491                         addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
492                         s.FuncInfo().AddSpill(
493                                 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
494                 }
495                 v.Block.Func.RegArgs = nil
496
497                 ssagen.CheckArgReg(v)
498
499         case ssa.OpPPC64DIVD:
500                 // For now,
501                 //
502                 // cmp arg1, -1
503                 // be  ahead
504                 // v = arg0 / arg1
505                 // b over
506                 // ahead: v = - arg0
507                 // over: nop
508                 r := v.Reg()
509                 r0 := v.Args[0].Reg()
510                 r1 := v.Args[1].Reg()
511
512                 p := s.Prog(ppc64.ACMP)
513                 p.From.Type = obj.TYPE_REG
514                 p.From.Reg = r1
515                 p.To.Type = obj.TYPE_CONST
516                 p.To.Offset = -1
517
518                 pbahead := s.Prog(ppc64.ABEQ)
519                 pbahead.To.Type = obj.TYPE_BRANCH
520
521                 p = s.Prog(v.Op.Asm())
522                 p.From.Type = obj.TYPE_REG
523                 p.From.Reg = r1
524                 p.Reg = r0
525                 p.To.Type = obj.TYPE_REG
526                 p.To.Reg = r
527
528                 pbover := s.Prog(obj.AJMP)
529                 pbover.To.Type = obj.TYPE_BRANCH
530
531                 p = s.Prog(ppc64.ANEG)
532                 p.To.Type = obj.TYPE_REG
533                 p.To.Reg = r
534                 p.From.Type = obj.TYPE_REG
535                 p.From.Reg = r0
536                 pbahead.To.SetTarget(p)
537
538                 p = s.Prog(obj.ANOP)
539                 pbover.To.SetTarget(p)
540
541         case ssa.OpPPC64DIVW:
542                 // word-width version of above
543                 r := v.Reg()
544                 r0 := v.Args[0].Reg()
545                 r1 := v.Args[1].Reg()
546
547                 p := s.Prog(ppc64.ACMPW)
548                 p.From.Type = obj.TYPE_REG
549                 p.From.Reg = r1
550                 p.To.Type = obj.TYPE_CONST
551                 p.To.Offset = -1
552
553                 pbahead := s.Prog(ppc64.ABEQ)
554                 pbahead.To.Type = obj.TYPE_BRANCH
555
556                 p = s.Prog(v.Op.Asm())
557                 p.From.Type = obj.TYPE_REG
558                 p.From.Reg = r1
559                 p.Reg = r0
560                 p.To.Type = obj.TYPE_REG
561                 p.To.Reg = r
562
563                 pbover := s.Prog(obj.AJMP)
564                 pbover.To.Type = obj.TYPE_BRANCH
565
566                 p = s.Prog(ppc64.ANEG)
567                 p.To.Type = obj.TYPE_REG
568                 p.To.Reg = r
569                 p.From.Type = obj.TYPE_REG
570                 p.From.Reg = r0
571                 pbahead.To.SetTarget(p)
572
573                 p = s.Prog(obj.ANOP)
574                 pbover.To.SetTarget(p)
575
576         case ssa.OpPPC64CLRLSLWI:
577                 r := v.Reg()
578                 r1 := v.Args[0].Reg()
579                 shifts := v.AuxInt
580                 p := s.Prog(v.Op.Asm())
581                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
582                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
583                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
584                 p.Reg = r1
585                 p.To.Type = obj.TYPE_REG
586                 p.To.Reg = r
587
588         case ssa.OpPPC64CLRLSLDI:
589                 r := v.Reg()
590                 r1 := v.Args[0].Reg()
591                 shifts := v.AuxInt
592                 p := s.Prog(v.Op.Asm())
593                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
594                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
595                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
596                 p.Reg = r1
597                 p.To.Type = obj.TYPE_REG
598                 p.To.Reg = r
599
600                 // Mask has been set as sh
601         case ssa.OpPPC64RLDICL:
602                 r := v.Reg()
603                 r1 := v.Args[0].Reg()
604                 shifts := v.AuxInt
605                 p := s.Prog(v.Op.Asm())
606                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
607                 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
608                 p.Reg = r1
609                 p.To.Type = obj.TYPE_REG
610                 p.To.Reg = r
611
612         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
613                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
614                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
615                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
616                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
617                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
618                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
619                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
620                 r := v.Reg()
621                 r1 := v.Args[0].Reg()
622                 r2 := v.Args[1].Reg()
623                 p := s.Prog(v.Op.Asm())
624                 p.From.Type = obj.TYPE_REG
625                 p.From.Reg = r2
626                 p.Reg = r1
627                 p.To.Type = obj.TYPE_REG
628                 p.To.Reg = r
629
630         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
631                 r1 := v.Args[0].Reg()
632                 r2 := v.Args[1].Reg()
633                 p := s.Prog(v.Op.Asm())
634                 p.From.Type = obj.TYPE_REG
635                 p.From.Reg = r2
636                 p.Reg = r1
637                 p.To.Type = obj.TYPE_REG
638                 p.To.Reg = ppc64.REGTMP // result is not needed
639
640         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
641                 p := s.Prog(v.Op.Asm())
642                 p.From.Type = obj.TYPE_CONST
643                 p.From.Offset = v.AuxInt
644                 p.Reg = v.Args[0].Reg()
645                 p.To.Type = obj.TYPE_REG
646                 p.To.Reg = v.Reg()
647
648                 // Auxint holds encoded rotate + mask
649         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
650                 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
651                 p := s.Prog(v.Op.Asm())
652                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
653                 p.Reg = v.Args[0].Reg()
654                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
655                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
656
657                 // Auxint holds mask
658         case ssa.OpPPC64RLWNM:
659                 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
660                 p := s.Prog(v.Op.Asm())
661                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
662                 p.Reg = v.Args[0].Reg()
663                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
664                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
665
666         case ssa.OpPPC64MADDLD:
667                 r := v.Reg()
668                 r1 := v.Args[0].Reg()
669                 r2 := v.Args[1].Reg()
670                 r3 := v.Args[2].Reg()
671                 // r = r1*r2 Â± r3
672                 p := s.Prog(v.Op.Asm())
673                 p.From.Type = obj.TYPE_REG
674                 p.From.Reg = r1
675                 p.Reg = r2
676                 p.SetFrom3Reg(r3)
677                 p.To.Type = obj.TYPE_REG
678                 p.To.Reg = r
679
680         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
681                 r := v.Reg()
682                 r1 := v.Args[0].Reg()
683                 r2 := v.Args[1].Reg()
684                 r3 := v.Args[2].Reg()
685                 // r = r1*r2 Â± r3
686                 p := s.Prog(v.Op.Asm())
687                 p.From.Type = obj.TYPE_REG
688                 p.From.Reg = r1
689                 p.Reg = r3
690                 p.SetFrom3Reg(r2)
691                 p.To.Type = obj.TYPE_REG
692                 p.To.Reg = r
693
694         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
695                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
696                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
697                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
698                 r := v.Reg()
699                 p := s.Prog(v.Op.Asm())
700                 p.To.Type = obj.TYPE_REG
701                 p.To.Reg = r
702                 p.From.Type = obj.TYPE_REG
703                 p.From.Reg = v.Args[0].Reg()
704
705         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
706                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
707                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
708                 p := s.Prog(v.Op.Asm())
709                 p.Reg = v.Args[0].Reg()
710                 p.From.Type = obj.TYPE_CONST
711                 p.From.Offset = v.AuxInt
712                 p.To.Type = obj.TYPE_REG
713                 p.To.Reg = v.Reg()
714
715         case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
716                 r := v.Reg0() // CA is the first, implied argument.
717                 r1 := v.Args[0].Reg()
718                 r2 := v.Args[1].Reg()
719                 p := s.Prog(v.Op.Asm())
720                 p.From.Type = obj.TYPE_REG
721                 p.From.Reg = r2
722                 p.Reg = r1
723                 p.To.Type = obj.TYPE_REG
724                 p.To.Reg = r
725
726         case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
727                 p := s.Prog(v.Op.Asm())
728                 p.From.Type = obj.TYPE_REG
729                 p.From.Reg = ppc64.REG_R0
730                 p.To.Type = obj.TYPE_REG
731                 p.To.Reg = v.Reg()
732
733         case ssa.OpPPC64ADDCconst:
734                 p := s.Prog(v.Op.Asm())
735                 p.Reg = v.Args[0].Reg()
736                 p.From.Type = obj.TYPE_CONST
737                 p.From.Offset = v.AuxInt
738                 p.To.Type = obj.TYPE_REG
739                 // Output is a pair, the second is the CA, which is implied.
740                 p.To.Reg = v.Reg0()
741
742         case ssa.OpPPC64SUBCconst:
743                 p := s.Prog(v.Op.Asm())
744                 p.SetFrom3Const(v.AuxInt)
745                 p.From.Type = obj.TYPE_REG
746                 p.From.Reg = v.Args[0].Reg()
747                 p.To.Type = obj.TYPE_REG
748                 p.To.Reg = v.Reg0()
749
750         case ssa.OpPPC64SUBFCconst:
751                 p := s.Prog(v.Op.Asm())
752                 p.SetFrom3Const(v.AuxInt)
753                 p.From.Type = obj.TYPE_REG
754                 p.From.Reg = v.Args[0].Reg()
755                 p.To.Type = obj.TYPE_REG
756                 p.To.Reg = v.Reg()
757
758         case ssa.OpPPC64ANDCCconst:
759                 p := s.Prog(v.Op.Asm())
760                 p.Reg = v.Args[0].Reg()
761                 p.From.Type = obj.TYPE_CONST
762                 p.From.Offset = v.AuxInt
763                 p.To.Type = obj.TYPE_REG
764                 p.To.Reg = ppc64.REGTMP // discard result
765
766         case ssa.OpPPC64MOVDaddr:
767                 switch v.Aux.(type) {
768                 default:
769                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
770                 case nil:
771                         // If aux offset and aux int are both 0, and the same
772                         // input and output regs are used, no instruction
773                         // needs to be generated, since it would just be
774                         // addi rx, rx, 0.
775                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
776                                 p := s.Prog(ppc64.AMOVD)
777                                 p.From.Type = obj.TYPE_ADDR
778                                 p.From.Reg = v.Args[0].Reg()
779                                 p.From.Offset = v.AuxInt
780                                 p.To.Type = obj.TYPE_REG
781                                 p.To.Reg = v.Reg()
782                         }
783
784                 case *obj.LSym, ir.Node:
785                         p := s.Prog(ppc64.AMOVD)
786                         p.From.Type = obj.TYPE_ADDR
787                         p.From.Reg = v.Args[0].Reg()
788                         p.To.Type = obj.TYPE_REG
789                         p.To.Reg = v.Reg()
790                         ssagen.AddAux(&p.From, v)
791
792                 }
793
794         case ssa.OpPPC64MOVDconst:
795                 p := s.Prog(v.Op.Asm())
796                 p.From.Type = obj.TYPE_CONST
797                 p.From.Offset = v.AuxInt
798                 p.To.Type = obj.TYPE_REG
799                 p.To.Reg = v.Reg()
800
801         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
802                 p := s.Prog(v.Op.Asm())
803                 p.From.Type = obj.TYPE_FCONST
804                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
805                 p.To.Type = obj.TYPE_REG
806                 p.To.Reg = v.Reg()
807
808         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
809                 p := s.Prog(v.Op.Asm())
810                 p.From.Type = obj.TYPE_REG
811                 p.From.Reg = v.Args[0].Reg()
812                 p.To.Type = obj.TYPE_REG
813                 p.To.Reg = v.Args[1].Reg()
814
815         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
816                 p := s.Prog(v.Op.Asm())
817                 p.From.Type = obj.TYPE_REG
818                 p.From.Reg = v.Args[0].Reg()
819                 p.To.Type = obj.TYPE_CONST
820                 p.To.Offset = v.AuxInt
821
822         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
823                 // Shift in register to required size
824                 p := s.Prog(v.Op.Asm())
825                 p.From.Type = obj.TYPE_REG
826                 p.From.Reg = v.Args[0].Reg()
827                 p.To.Reg = v.Reg()
828                 p.To.Type = obj.TYPE_REG
829
830         case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
831
832                 // MOVDload and MOVWload are DS form instructions that are restricted to
833                 // offsets that are a multiple of 4. If the offset is not a multiple of 4,
834                 // then the address of the symbol to be loaded is computed (base + offset)
835                 // and used as the new base register and the offset field in the instruction
836                 // can be set to zero.
837
838                 // This same problem can happen with gostrings since the final offset is not
839                 // known yet, but could be unaligned after the relocation is resolved.
840                 // So gostrings are handled the same way.
841
842                 // This allows the MOVDload and MOVWload to be generated in more cases and
843                 // eliminates some offset and alignment checking in the rules file.
844
845                 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
846                 ssagen.AddAux(&fromAddr, v)
847
848                 genAddr := false
849
850                 switch fromAddr.Name {
851                 case obj.NAME_EXTERN, obj.NAME_STATIC:
852                         // Special case for a rule combines the bytes of gostring.
853                         // The v alignment might seem OK, but we don't want to load it
854                         // using an offset because relocation comes later.
855                         genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go.string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
856                 default:
857                         genAddr = fromAddr.Offset%4 != 0
858                 }
859                 if genAddr {
860                         // Load full address into the temp register.
861                         p := s.Prog(ppc64.AMOVD)
862                         p.From.Type = obj.TYPE_ADDR
863                         p.From.Reg = v.Args[0].Reg()
864                         ssagen.AddAux(&p.From, v)
865                         // Load target using temp as base register
866                         // and offset zero. Setting NAME_NONE
867                         // prevents any extra offsets from being
868                         // added.
869                         p.To.Type = obj.TYPE_REG
870                         p.To.Reg = ppc64.REGTMP
871                         fromAddr.Reg = ppc64.REGTMP
872                         // Clear the offset field and other
873                         // information that might be used
874                         // by the assembler to add to the
875                         // final offset value.
876                         fromAddr.Offset = 0
877                         fromAddr.Name = obj.NAME_NONE
878                         fromAddr.Sym = nil
879                 }
880                 p := s.Prog(v.Op.Asm())
881                 p.From = fromAddr
882                 p.To.Type = obj.TYPE_REG
883                 p.To.Reg = v.Reg()
884                 break
885
886         case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
887                 p := s.Prog(v.Op.Asm())
888                 p.From.Type = obj.TYPE_MEM
889                 p.From.Reg = v.Args[0].Reg()
890                 ssagen.AddAux(&p.From, v)
891                 p.To.Type = obj.TYPE_REG
892                 p.To.Reg = v.Reg()
893
894         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
895                 p := s.Prog(v.Op.Asm())
896                 p.From.Type = obj.TYPE_MEM
897                 p.From.Reg = v.Args[0].Reg()
898                 p.To.Type = obj.TYPE_REG
899                 p.To.Reg = v.Reg()
900
901         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
902                 p := s.Prog(v.Op.Asm())
903                 p.To.Type = obj.TYPE_MEM
904                 p.To.Reg = v.Args[0].Reg()
905                 p.From.Type = obj.TYPE_REG
906                 p.From.Reg = v.Args[1].Reg()
907
908         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
909                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
910                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
911                 p := s.Prog(v.Op.Asm())
912                 p.From.Type = obj.TYPE_MEM
913                 p.From.Reg = v.Args[0].Reg()
914                 p.From.Index = v.Args[1].Reg()
915                 p.To.Type = obj.TYPE_REG
916                 p.To.Reg = v.Reg()
917
918         case ssa.OpPPC64DCBT:
919                 p := s.Prog(v.Op.Asm())
920                 p.From.Type = obj.TYPE_MEM
921                 p.From.Reg = v.Args[0].Reg()
922                 p.To.Type = obj.TYPE_CONST
923                 p.To.Offset = v.AuxInt
924
925         case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
926                 p := s.Prog(v.Op.Asm())
927                 p.From.Type = obj.TYPE_REG
928                 p.From.Reg = ppc64.REGZERO
929                 p.To.Type = obj.TYPE_MEM
930                 p.To.Reg = v.Args[0].Reg()
931                 ssagen.AddAux(&p.To, v)
932
933         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
934
935                 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
936                 // to offset values that are a multiple of 4. If the offset field is not a
937                 // multiple of 4, then the full address of the store target is computed (base +
938                 // offset) and used as the new base register and the offset in the instruction
939                 // is set to 0.
940
941                 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
942                 // and prevents checking of the offset value and alignment in the rules.
943
944                 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
945                 ssagen.AddAux(&toAddr, v)
946
947                 if toAddr.Offset%4 != 0 {
948                         p := s.Prog(ppc64.AMOVD)
949                         p.From.Type = obj.TYPE_ADDR
950                         p.From.Reg = v.Args[0].Reg()
951                         ssagen.AddAux(&p.From, v)
952                         p.To.Type = obj.TYPE_REG
953                         p.To.Reg = ppc64.REGTMP
954                         toAddr.Reg = ppc64.REGTMP
955                         // Clear the offset field and other
956                         // information that might be used
957                         // by the assembler to add to the
958                         // final offset value.
959                         toAddr.Offset = 0
960                         toAddr.Name = obj.NAME_NONE
961                         toAddr.Sym = nil
962                 }
963                 p := s.Prog(v.Op.Asm())
964                 p.To = toAddr
965                 p.From.Type = obj.TYPE_REG
966                 if v.Op == ssa.OpPPC64MOVDstorezero {
967                         p.From.Reg = ppc64.REGZERO
968                 } else {
969                         p.From.Reg = v.Args[1].Reg()
970                 }
971
972         case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
973                 p := s.Prog(v.Op.Asm())
974                 p.From.Type = obj.TYPE_REG
975                 p.From.Reg = v.Args[1].Reg()
976                 p.To.Type = obj.TYPE_MEM
977                 p.To.Reg = v.Args[0].Reg()
978                 ssagen.AddAux(&p.To, v)
979
980         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
981                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
982                 ssa.OpPPC64MOVHBRstoreidx:
983                 p := s.Prog(v.Op.Asm())
984                 p.From.Type = obj.TYPE_REG
985                 p.From.Reg = v.Args[2].Reg()
986                 p.To.Index = v.Args[1].Reg()
987                 p.To.Type = obj.TYPE_MEM
988                 p.To.Reg = v.Args[0].Reg()
989
990         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
991                 // ISEL, ISELB
992                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
993                 // ISEL only accepts 0, 1, 2 condition values but the others can be
994                 // achieved by swapping operand order.
995                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
996                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
997                 // ISELB is used when a boolean result is needed, returning 0 or 1
998                 p := s.Prog(ppc64.AISEL)
999                 p.To.Type = obj.TYPE_REG
1000                 p.To.Reg = v.Reg()
1001                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
1002                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
1003                 if v.Op == ssa.OpPPC64ISEL {
1004                         r.Reg = v.Args[1].Reg()
1005                 }
1006                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
1007                 if v.AuxInt > 3 {
1008                         p.Reg = r.Reg
1009                         p.SetFrom3Reg(v.Args[0].Reg())
1010                 } else {
1011                         p.Reg = v.Args[0].Reg()
1012                         p.SetFrom3(r)
1013                 }
1014                 p.From.Type = obj.TYPE_CONST
1015                 p.From.Offset = v.AuxInt & 3
1016
1017         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
1018                 // The LoweredQuad code generation
1019                 // generates STXV instructions on
1020                 // power9. The Short variation is used
1021                 // if no loop is generated.
1022
1023                 // sizes >= 64 generate a loop as follows:
1024
1025                 // Set up loop counter in CTR, used by BC
1026                 // XXLXOR clears VS32
1027                 //       XXLXOR VS32,VS32,VS32
1028                 //       MOVD len/64,REG_TMP
1029                 //       MOVD REG_TMP,CTR
1030                 //       loop:
1031                 //       STXV VS32,0(R20)
1032                 //       STXV VS32,16(R20)
1033                 //       STXV VS32,32(R20)
1034                 //       STXV VS32,48(R20)
1035                 //       ADD  $64,R20
1036                 //       BC   16, 0, loop
1037
1038                 // Bytes per iteration
1039                 ctr := v.AuxInt / 64
1040
1041                 // Remainder bytes
1042                 rem := v.AuxInt % 64
1043
1044                 // Only generate a loop if there is more
1045                 // than 1 iteration.
1046                 if ctr > 1 {
1047                         // Set up VS32 (V0) to hold 0s
1048                         p := s.Prog(ppc64.AXXLXOR)
1049                         p.From.Type = obj.TYPE_REG
1050                         p.From.Reg = ppc64.REG_VS32
1051                         p.To.Type = obj.TYPE_REG
1052                         p.To.Reg = ppc64.REG_VS32
1053                         p.Reg = ppc64.REG_VS32
1054
1055                         // Set up CTR loop counter
1056                         p = s.Prog(ppc64.AMOVD)
1057                         p.From.Type = obj.TYPE_CONST
1058                         p.From.Offset = ctr
1059                         p.To.Type = obj.TYPE_REG
1060                         p.To.Reg = ppc64.REGTMP
1061
1062                         p = s.Prog(ppc64.AMOVD)
1063                         p.From.Type = obj.TYPE_REG
1064                         p.From.Reg = ppc64.REGTMP
1065                         p.To.Type = obj.TYPE_REG
1066                         p.To.Reg = ppc64.REG_CTR
1067
1068                         // Don't generate padding for
1069                         // loops with few iterations.
1070                         if ctr > 3 {
1071                                 p = s.Prog(obj.APCALIGN)
1072                                 p.From.Type = obj.TYPE_CONST
1073                                 p.From.Offset = 16
1074                         }
1075
1076                         // generate 4 STXVs to zero 64 bytes
1077                         var top *obj.Prog
1078
1079                         p = s.Prog(ppc64.ASTXV)
1080                         p.From.Type = obj.TYPE_REG
1081                         p.From.Reg = ppc64.REG_VS32
1082                         p.To.Type = obj.TYPE_MEM
1083                         p.To.Reg = v.Args[0].Reg()
1084
1085                         //  Save the top of loop
1086                         if top == nil {
1087                                 top = p
1088                         }
1089                         p = s.Prog(ppc64.ASTXV)
1090                         p.From.Type = obj.TYPE_REG
1091                         p.From.Reg = ppc64.REG_VS32
1092                         p.To.Type = obj.TYPE_MEM
1093                         p.To.Reg = v.Args[0].Reg()
1094                         p.To.Offset = 16
1095
1096                         p = s.Prog(ppc64.ASTXV)
1097                         p.From.Type = obj.TYPE_REG
1098                         p.From.Reg = ppc64.REG_VS32
1099                         p.To.Type = obj.TYPE_MEM
1100                         p.To.Reg = v.Args[0].Reg()
1101                         p.To.Offset = 32
1102
1103                         p = s.Prog(ppc64.ASTXV)
1104                         p.From.Type = obj.TYPE_REG
1105                         p.From.Reg = ppc64.REG_VS32
1106                         p.To.Type = obj.TYPE_MEM
1107                         p.To.Reg = v.Args[0].Reg()
1108                         p.To.Offset = 48
1109
1110                         // Increment address for the
1111                         // 64 bytes just zeroed.
1112                         p = s.Prog(ppc64.AADD)
1113                         p.Reg = v.Args[0].Reg()
1114                         p.From.Type = obj.TYPE_CONST
1115                         p.From.Offset = 64
1116                         p.To.Type = obj.TYPE_REG
1117                         p.To.Reg = v.Args[0].Reg()
1118
1119                         // Branch back to top of loop
1120                         // based on CTR
1121                         // BC with BO_BCTR generates bdnz
1122                         p = s.Prog(ppc64.ABC)
1123                         p.From.Type = obj.TYPE_CONST
1124                         p.From.Offset = ppc64.BO_BCTR
1125                         p.Reg = ppc64.REG_CR0LT
1126                         p.To.Type = obj.TYPE_BRANCH
1127                         p.To.SetTarget(top)
1128                 }
1129                 // When ctr == 1 the loop was not generated but
1130                 // there are at least 64 bytes to clear, so add
1131                 // that to the remainder to generate the code
1132                 // to clear those doublewords
1133                 if ctr == 1 {
1134                         rem += 64
1135                 }
1136
1137                 // Clear the remainder starting at offset zero
1138                 offset := int64(0)
1139
1140                 if rem >= 16 && ctr <= 1 {
1141                         // If the XXLXOR hasn't already been
1142                         // generated, do it here to initialize
1143                         // VS32 (V0) to 0.
1144                         p := s.Prog(ppc64.AXXLXOR)
1145                         p.From.Type = obj.TYPE_REG
1146                         p.From.Reg = ppc64.REG_VS32
1147                         p.To.Type = obj.TYPE_REG
1148                         p.To.Reg = ppc64.REG_VS32
1149                         p.Reg = ppc64.REG_VS32
1150                 }
1151                 // Generate STXV for 32 or 64
1152                 // bytes.
1153                 for rem >= 32 {
1154                         p := s.Prog(ppc64.ASTXV)
1155                         p.From.Type = obj.TYPE_REG
1156                         p.From.Reg = ppc64.REG_VS32
1157                         p.To.Type = obj.TYPE_MEM
1158                         p.To.Reg = v.Args[0].Reg()
1159                         p.To.Offset = offset
1160
1161                         p = s.Prog(ppc64.ASTXV)
1162                         p.From.Type = obj.TYPE_REG
1163                         p.From.Reg = ppc64.REG_VS32
1164                         p.To.Type = obj.TYPE_MEM
1165                         p.To.Reg = v.Args[0].Reg()
1166                         p.To.Offset = offset + 16
1167                         offset += 32
1168                         rem -= 32
1169                 }
1170                 // Generate 16 bytes
1171                 if rem >= 16 {
1172                         p := s.Prog(ppc64.ASTXV)
1173                         p.From.Type = obj.TYPE_REG
1174                         p.From.Reg = ppc64.REG_VS32
1175                         p.To.Type = obj.TYPE_MEM
1176                         p.To.Reg = v.Args[0].Reg()
1177                         p.To.Offset = offset
1178                         offset += 16
1179                         rem -= 16
1180                 }
1181
1182                 // first clear as many doublewords as possible
1183                 // then clear remaining sizes as available
1184                 for rem > 0 {
1185                         op, size := ppc64.AMOVB, int64(1)
1186                         switch {
1187                         case rem >= 8:
1188                                 op, size = ppc64.AMOVD, 8
1189                         case rem >= 4:
1190                                 op, size = ppc64.AMOVW, 4
1191                         case rem >= 2:
1192                                 op, size = ppc64.AMOVH, 2
1193                         }
1194                         p := s.Prog(op)
1195                         p.From.Type = obj.TYPE_REG
1196                         p.From.Reg = ppc64.REG_R0
1197                         p.To.Type = obj.TYPE_MEM
1198                         p.To.Reg = v.Args[0].Reg()
1199                         p.To.Offset = offset
1200                         rem -= size
1201                         offset += size
1202                 }
1203
1204         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1205
1206                 // Unaligned data doesn't hurt performance
1207                 // for these instructions on power8.
1208
1209                 // For sizes >= 64 generate a loop as follows:
1210
1211                 // Set up loop counter in CTR, used by BC
1212                 //       XXLXOR VS32,VS32,VS32
1213                 //       MOVD len/32,REG_TMP
1214                 //       MOVD REG_TMP,CTR
1215                 //       MOVD $16,REG_TMP
1216                 //       loop:
1217                 //       STXVD2X VS32,(R0)(R20)
1218                 //       STXVD2X VS32,(R31)(R20)
1219                 //       ADD  $32,R20
1220                 //       BC   16, 0, loop
1221                 //
1222                 // any remainder is done as described below
1223
1224                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1225                 // then handle the remainder
1226                 //      MOVD R0,(R20)
1227                 //      MOVD R0,8(R20)
1228                 // .... etc.
1229                 //
1230                 // the remainder bytes are cleared using one or more
1231                 // of the following instructions with the appropriate
1232                 // offsets depending which instructions are needed
1233                 //
1234                 //      MOVW R0,n1(R20) 4 bytes
1235                 //      MOVH R0,n2(R20) 2 bytes
1236                 //      MOVB R0,n3(R20) 1 byte
1237                 //
1238                 // 7 bytes: MOVW, MOVH, MOVB
1239                 // 6 bytes: MOVW, MOVH
1240                 // 5 bytes: MOVW, MOVB
1241                 // 3 bytes: MOVH, MOVB
1242
1243                 // each loop iteration does 32 bytes
1244                 ctr := v.AuxInt / 32
1245
1246                 // remainder bytes
1247                 rem := v.AuxInt % 32
1248
1249                 // only generate a loop if there is more
1250                 // than 1 iteration.
1251                 if ctr > 1 {
1252                         // Set up VS32 (V0) to hold 0s
1253                         p := s.Prog(ppc64.AXXLXOR)
1254                         p.From.Type = obj.TYPE_REG
1255                         p.From.Reg = ppc64.REG_VS32
1256                         p.To.Type = obj.TYPE_REG
1257                         p.To.Reg = ppc64.REG_VS32
1258                         p.Reg = ppc64.REG_VS32
1259
1260                         // Set up CTR loop counter
1261                         p = s.Prog(ppc64.AMOVD)
1262                         p.From.Type = obj.TYPE_CONST
1263                         p.From.Offset = ctr
1264                         p.To.Type = obj.TYPE_REG
1265                         p.To.Reg = ppc64.REGTMP
1266
1267                         p = s.Prog(ppc64.AMOVD)
1268                         p.From.Type = obj.TYPE_REG
1269                         p.From.Reg = ppc64.REGTMP
1270                         p.To.Type = obj.TYPE_REG
1271                         p.To.Reg = ppc64.REG_CTR
1272
1273                         // Set up R31 to hold index value 16
1274                         p = s.Prog(ppc64.AMOVD)
1275                         p.From.Type = obj.TYPE_CONST
1276                         p.From.Offset = 16
1277                         p.To.Type = obj.TYPE_REG
1278                         p.To.Reg = ppc64.REGTMP
1279
1280                         // Don't add padding for alignment
1281                         // with few loop iterations.
1282                         if ctr > 3 {
1283                                 p = s.Prog(obj.APCALIGN)
1284                                 p.From.Type = obj.TYPE_CONST
1285                                 p.From.Offset = 16
1286                         }
1287
1288                         // generate 2 STXVD2Xs to store 16 bytes
1289                         // when this is a loop then the top must be saved
1290                         var top *obj.Prog
1291                         // This is the top of loop
1292
1293                         p = s.Prog(ppc64.ASTXVD2X)
1294                         p.From.Type = obj.TYPE_REG
1295                         p.From.Reg = ppc64.REG_VS32
1296                         p.To.Type = obj.TYPE_MEM
1297                         p.To.Reg = v.Args[0].Reg()
1298                         p.To.Index = ppc64.REGZERO
1299                         // Save the top of loop
1300                         if top == nil {
1301                                 top = p
1302                         }
1303                         p = s.Prog(ppc64.ASTXVD2X)
1304                         p.From.Type = obj.TYPE_REG
1305                         p.From.Reg = ppc64.REG_VS32
1306                         p.To.Type = obj.TYPE_MEM
1307                         p.To.Reg = v.Args[0].Reg()
1308                         p.To.Index = ppc64.REGTMP
1309
1310                         // Increment address for the
1311                         // 4 doublewords just zeroed.
1312                         p = s.Prog(ppc64.AADD)
1313                         p.Reg = v.Args[0].Reg()
1314                         p.From.Type = obj.TYPE_CONST
1315                         p.From.Offset = 32
1316                         p.To.Type = obj.TYPE_REG
1317                         p.To.Reg = v.Args[0].Reg()
1318
1319                         // Branch back to top of loop
1320                         // based on CTR
1321                         // BC with BO_BCTR generates bdnz
1322                         p = s.Prog(ppc64.ABC)
1323                         p.From.Type = obj.TYPE_CONST
1324                         p.From.Offset = ppc64.BO_BCTR
1325                         p.Reg = ppc64.REG_CR0LT
1326                         p.To.Type = obj.TYPE_BRANCH
1327                         p.To.SetTarget(top)
1328                 }
1329
1330                 // when ctr == 1 the loop was not generated but
1331                 // there are at least 32 bytes to clear, so add
1332                 // that to the remainder to generate the code
1333                 // to clear those doublewords
1334                 if ctr == 1 {
1335                         rem += 32
1336                 }
1337
1338                 // clear the remainder starting at offset zero
1339                 offset := int64(0)
1340
1341                 // first clear as many doublewords as possible
1342                 // then clear remaining sizes as available
1343                 for rem > 0 {
1344                         op, size := ppc64.AMOVB, int64(1)
1345                         switch {
1346                         case rem >= 8:
1347                                 op, size = ppc64.AMOVD, 8
1348                         case rem >= 4:
1349                                 op, size = ppc64.AMOVW, 4
1350                         case rem >= 2:
1351                                 op, size = ppc64.AMOVH, 2
1352                         }
1353                         p := s.Prog(op)
1354                         p.From.Type = obj.TYPE_REG
1355                         p.From.Reg = ppc64.REG_R0
1356                         p.To.Type = obj.TYPE_MEM
1357                         p.To.Reg = v.Args[0].Reg()
1358                         p.To.Offset = offset
1359                         rem -= size
1360                         offset += size
1361                 }
1362
1363         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1364
1365                 bytesPerLoop := int64(32)
1366                 // This will be used when moving more
1367                 // than 8 bytes.  Moves start with
1368                 // as many 8 byte moves as possible, then
1369                 // 4, 2, or 1 byte(s) as remaining.  This will
1370                 // work and be efficient for power8 or later.
1371                 // If there are 64 or more bytes, then a
1372                 // loop is generated to move 32 bytes and
1373                 // update the src and dst addresses on each
1374                 // iteration. When < 64 bytes, the appropriate
1375                 // number of moves are generated based on the
1376                 // size.
1377                 // When moving >= 64 bytes a loop is used
1378                 //      MOVD len/32,REG_TMP
1379                 //      MOVD REG_TMP,CTR
1380                 //      MOVD $16,REG_TMP
1381                 // top:
1382                 //      LXVD2X (R0)(R21),VS32
1383                 //      LXVD2X (R31)(R21),VS33
1384                 //      ADD $32,R21
1385                 //      STXVD2X VS32,(R0)(R20)
1386                 //      STXVD2X VS33,(R31)(R20)
1387                 //      ADD $32,R20
1388                 //      BC 16,0,top
1389                 // Bytes not moved by this loop are moved
1390                 // with a combination of the following instructions,
1391                 // starting with the largest sizes and generating as
1392                 // many as needed, using the appropriate offset value.
1393                 //      MOVD  n(R21),R31
1394                 //      MOVD  R31,n(R20)
1395                 //      MOVW  n1(R21),R31
1396                 //      MOVW  R31,n1(R20)
1397                 //      MOVH  n2(R21),R31
1398                 //      MOVH  R31,n2(R20)
1399                 //      MOVB  n3(R21),R31
1400                 //      MOVB  R31,n3(R20)
1401
1402                 // Each loop iteration moves 32 bytes
1403                 ctr := v.AuxInt / bytesPerLoop
1404
1405                 // Remainder after the loop
1406                 rem := v.AuxInt % bytesPerLoop
1407
1408                 dstReg := v.Args[0].Reg()
1409                 srcReg := v.Args[1].Reg()
1410
1411                 // The set of registers used here, must match the clobbered reg list
1412                 // in PPC64Ops.go.
1413                 offset := int64(0)
1414
1415                 // top of the loop
1416                 var top *obj.Prog
1417                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1418                 if ctr > 1 {
1419                         // Set up the CTR
1420                         p := s.Prog(ppc64.AMOVD)
1421                         p.From.Type = obj.TYPE_CONST
1422                         p.From.Offset = ctr
1423                         p.To.Type = obj.TYPE_REG
1424                         p.To.Reg = ppc64.REGTMP
1425
1426                         p = s.Prog(ppc64.AMOVD)
1427                         p.From.Type = obj.TYPE_REG
1428                         p.From.Reg = ppc64.REGTMP
1429                         p.To.Type = obj.TYPE_REG
1430                         p.To.Reg = ppc64.REG_CTR
1431
1432                         // Use REGTMP as index reg
1433                         p = s.Prog(ppc64.AMOVD)
1434                         p.From.Type = obj.TYPE_CONST
1435                         p.From.Offset = 16
1436                         p.To.Type = obj.TYPE_REG
1437                         p.To.Reg = ppc64.REGTMP
1438
1439                         // Don't adding padding for
1440                         // alignment with small iteration
1441                         // counts.
1442                         if ctr > 3 {
1443                                 p = s.Prog(obj.APCALIGN)
1444                                 p.From.Type = obj.TYPE_CONST
1445                                 p.From.Offset = 16
1446                         }
1447
1448                         // Generate 16 byte loads and stores.
1449                         // Use temp register for index (16)
1450                         // on the second one.
1451
1452                         p = s.Prog(ppc64.ALXVD2X)
1453                         p.From.Type = obj.TYPE_MEM
1454                         p.From.Reg = srcReg
1455                         p.From.Index = ppc64.REGZERO
1456                         p.To.Type = obj.TYPE_REG
1457                         p.To.Reg = ppc64.REG_VS32
1458                         if top == nil {
1459                                 top = p
1460                         }
1461                         p = s.Prog(ppc64.ALXVD2X)
1462                         p.From.Type = obj.TYPE_MEM
1463                         p.From.Reg = srcReg
1464                         p.From.Index = ppc64.REGTMP
1465                         p.To.Type = obj.TYPE_REG
1466                         p.To.Reg = ppc64.REG_VS33
1467
1468                         // increment the src reg for next iteration
1469                         p = s.Prog(ppc64.AADD)
1470                         p.Reg = srcReg
1471                         p.From.Type = obj.TYPE_CONST
1472                         p.From.Offset = bytesPerLoop
1473                         p.To.Type = obj.TYPE_REG
1474                         p.To.Reg = srcReg
1475
1476                         // generate 16 byte stores
1477                         p = s.Prog(ppc64.ASTXVD2X)
1478                         p.From.Type = obj.TYPE_REG
1479                         p.From.Reg = ppc64.REG_VS32
1480                         p.To.Type = obj.TYPE_MEM
1481                         p.To.Reg = dstReg
1482                         p.To.Index = ppc64.REGZERO
1483
1484                         p = s.Prog(ppc64.ASTXVD2X)
1485                         p.From.Type = obj.TYPE_REG
1486                         p.From.Reg = ppc64.REG_VS33
1487                         p.To.Type = obj.TYPE_MEM
1488                         p.To.Reg = dstReg
1489                         p.To.Index = ppc64.REGTMP
1490
1491                         // increment the dst reg for next iteration
1492                         p = s.Prog(ppc64.AADD)
1493                         p.Reg = dstReg
1494                         p.From.Type = obj.TYPE_CONST
1495                         p.From.Offset = bytesPerLoop
1496                         p.To.Type = obj.TYPE_REG
1497                         p.To.Reg = dstReg
1498
1499                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1500                         // to loop top.
1501                         p = s.Prog(ppc64.ABC)
1502                         p.From.Type = obj.TYPE_CONST
1503                         p.From.Offset = ppc64.BO_BCTR
1504                         p.Reg = ppc64.REG_CR0LT
1505                         p.To.Type = obj.TYPE_BRANCH
1506                         p.To.SetTarget(top)
1507
1508                         // srcReg and dstReg were incremented in the loop, so
1509                         // later instructions start with offset 0.
1510                         offset = int64(0)
1511                 }
1512
1513                 // No loop was generated for one iteration, so
1514                 // add 32 bytes to the remainder to move those bytes.
1515                 if ctr == 1 {
1516                         rem += bytesPerLoop
1517                 }
1518
1519                 if rem >= 16 {
1520                         // Generate 16 byte loads and stores.
1521                         // Use temp register for index (value 16)
1522                         // on the second one.
1523                         p := s.Prog(ppc64.ALXVD2X)
1524                         p.From.Type = obj.TYPE_MEM
1525                         p.From.Reg = srcReg
1526                         p.From.Index = ppc64.REGZERO
1527                         p.To.Type = obj.TYPE_REG
1528                         p.To.Reg = ppc64.REG_VS32
1529
1530                         p = s.Prog(ppc64.ASTXVD2X)
1531                         p.From.Type = obj.TYPE_REG
1532                         p.From.Reg = ppc64.REG_VS32
1533                         p.To.Type = obj.TYPE_MEM
1534                         p.To.Reg = dstReg
1535                         p.To.Index = ppc64.REGZERO
1536
1537                         offset = 16
1538                         rem -= 16
1539
1540                         if rem >= 16 {
1541                                 // Use REGTMP as index reg
1542                                 p := s.Prog(ppc64.AMOVD)
1543                                 p.From.Type = obj.TYPE_CONST
1544                                 p.From.Offset = 16
1545                                 p.To.Type = obj.TYPE_REG
1546                                 p.To.Reg = ppc64.REGTMP
1547
1548                                 p = s.Prog(ppc64.ALXVD2X)
1549                                 p.From.Type = obj.TYPE_MEM
1550                                 p.From.Reg = srcReg
1551                                 p.From.Index = ppc64.REGTMP
1552                                 p.To.Type = obj.TYPE_REG
1553                                 p.To.Reg = ppc64.REG_VS32
1554
1555                                 p = s.Prog(ppc64.ASTXVD2X)
1556                                 p.From.Type = obj.TYPE_REG
1557                                 p.From.Reg = ppc64.REG_VS32
1558                                 p.To.Type = obj.TYPE_MEM
1559                                 p.To.Reg = dstReg
1560                                 p.To.Index = ppc64.REGTMP
1561
1562                                 offset = 32
1563                                 rem -= 16
1564                         }
1565                 }
1566
1567                 // Generate all the remaining load and store pairs, starting with
1568                 // as many 8 byte moves as possible, then 4, 2, 1.
1569                 for rem > 0 {
1570                         op, size := ppc64.AMOVB, int64(1)
1571                         switch {
1572                         case rem >= 8:
1573                                 op, size = ppc64.AMOVD, 8
1574                         case rem >= 4:
1575                                 op, size = ppc64.AMOVWZ, 4
1576                         case rem >= 2:
1577                                 op, size = ppc64.AMOVH, 2
1578                         }
1579                         // Load
1580                         p := s.Prog(op)
1581                         p.To.Type = obj.TYPE_REG
1582                         p.To.Reg = ppc64.REGTMP
1583                         p.From.Type = obj.TYPE_MEM
1584                         p.From.Reg = srcReg
1585                         p.From.Offset = offset
1586
1587                         // Store
1588                         p = s.Prog(op)
1589                         p.From.Type = obj.TYPE_REG
1590                         p.From.Reg = ppc64.REGTMP
1591                         p.To.Type = obj.TYPE_MEM
1592                         p.To.Reg = dstReg
1593                         p.To.Offset = offset
1594                         rem -= size
1595                         offset += size
1596                 }
1597
1598         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1599                 bytesPerLoop := int64(64)
1600                 // This is used when moving more
1601                 // than 8 bytes on power9.  Moves start with
1602                 // as many 8 byte moves as possible, then
1603                 // 4, 2, or 1 byte(s) as remaining.  This will
1604                 // work and be efficient for power8 or later.
1605                 // If there are 64 or more bytes, then a
1606                 // loop is generated to move 32 bytes and
1607                 // update the src and dst addresses on each
1608                 // iteration. When < 64 bytes, the appropriate
1609                 // number of moves are generated based on the
1610                 // size.
1611                 // When moving >= 64 bytes a loop is used
1612                 //      MOVD len/32,REG_TMP
1613                 //      MOVD REG_TMP,CTR
1614                 // top:
1615                 //      LXV 0(R21),VS32
1616                 //      LXV 16(R21),VS33
1617                 //      ADD $32,R21
1618                 //      STXV VS32,0(R20)
1619                 //      STXV VS33,16(R20)
1620                 //      ADD $32,R20
1621                 //      BC 16,0,top
1622                 // Bytes not moved by this loop are moved
1623                 // with a combination of the following instructions,
1624                 // starting with the largest sizes and generating as
1625                 // many as needed, using the appropriate offset value.
1626                 //      MOVD  n(R21),R31
1627                 //      MOVD  R31,n(R20)
1628                 //      MOVW  n1(R21),R31
1629                 //      MOVW  R31,n1(R20)
1630                 //      MOVH  n2(R21),R31
1631                 //      MOVH  R31,n2(R20)
1632                 //      MOVB  n3(R21),R31
1633                 //      MOVB  R31,n3(R20)
1634
1635                 // Each loop iteration moves 32 bytes
1636                 ctr := v.AuxInt / bytesPerLoop
1637
1638                 // Remainder after the loop
1639                 rem := v.AuxInt % bytesPerLoop
1640
1641                 dstReg := v.Args[0].Reg()
1642                 srcReg := v.Args[1].Reg()
1643
1644                 offset := int64(0)
1645
1646                 // top of the loop
1647                 var top *obj.Prog
1648
1649                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1650                 if ctr > 1 {
1651                         // Set up the CTR
1652                         p := s.Prog(ppc64.AMOVD)
1653                         p.From.Type = obj.TYPE_CONST
1654                         p.From.Offset = ctr
1655                         p.To.Type = obj.TYPE_REG
1656                         p.To.Reg = ppc64.REGTMP
1657
1658                         p = s.Prog(ppc64.AMOVD)
1659                         p.From.Type = obj.TYPE_REG
1660                         p.From.Reg = ppc64.REGTMP
1661                         p.To.Type = obj.TYPE_REG
1662                         p.To.Reg = ppc64.REG_CTR
1663
1664                         p = s.Prog(obj.APCALIGN)
1665                         p.From.Type = obj.TYPE_CONST
1666                         p.From.Offset = 16
1667
1668                         // Generate 16 byte loads and stores.
1669                         p = s.Prog(ppc64.ALXV)
1670                         p.From.Type = obj.TYPE_MEM
1671                         p.From.Reg = srcReg
1672                         p.From.Offset = offset
1673                         p.To.Type = obj.TYPE_REG
1674                         p.To.Reg = ppc64.REG_VS32
1675                         if top == nil {
1676                                 top = p
1677                         }
1678                         p = s.Prog(ppc64.ALXV)
1679                         p.From.Type = obj.TYPE_MEM
1680                         p.From.Reg = srcReg
1681                         p.From.Offset = offset + 16
1682                         p.To.Type = obj.TYPE_REG
1683                         p.To.Reg = ppc64.REG_VS33
1684
1685                         // generate 16 byte stores
1686                         p = s.Prog(ppc64.ASTXV)
1687                         p.From.Type = obj.TYPE_REG
1688                         p.From.Reg = ppc64.REG_VS32
1689                         p.To.Type = obj.TYPE_MEM
1690                         p.To.Reg = dstReg
1691                         p.To.Offset = offset
1692
1693                         p = s.Prog(ppc64.ASTXV)
1694                         p.From.Type = obj.TYPE_REG
1695                         p.From.Reg = ppc64.REG_VS33
1696                         p.To.Type = obj.TYPE_MEM
1697                         p.To.Reg = dstReg
1698                         p.To.Offset = offset + 16
1699
1700                         // Generate 16 byte loads and stores.
1701                         p = s.Prog(ppc64.ALXV)
1702                         p.From.Type = obj.TYPE_MEM
1703                         p.From.Reg = srcReg
1704                         p.From.Offset = offset + 32
1705                         p.To.Type = obj.TYPE_REG
1706                         p.To.Reg = ppc64.REG_VS32
1707
1708                         p = s.Prog(ppc64.ALXV)
1709                         p.From.Type = obj.TYPE_MEM
1710                         p.From.Reg = srcReg
1711                         p.From.Offset = offset + 48
1712                         p.To.Type = obj.TYPE_REG
1713                         p.To.Reg = ppc64.REG_VS33
1714
1715                         // generate 16 byte stores
1716                         p = s.Prog(ppc64.ASTXV)
1717                         p.From.Type = obj.TYPE_REG
1718                         p.From.Reg = ppc64.REG_VS32
1719                         p.To.Type = obj.TYPE_MEM
1720                         p.To.Reg = dstReg
1721                         p.To.Offset = offset + 32
1722
1723                         p = s.Prog(ppc64.ASTXV)
1724                         p.From.Type = obj.TYPE_REG
1725                         p.From.Reg = ppc64.REG_VS33
1726                         p.To.Type = obj.TYPE_MEM
1727                         p.To.Reg = dstReg
1728                         p.To.Offset = offset + 48
1729
1730                         // increment the src reg for next iteration
1731                         p = s.Prog(ppc64.AADD)
1732                         p.Reg = srcReg
1733                         p.From.Type = obj.TYPE_CONST
1734                         p.From.Offset = bytesPerLoop
1735                         p.To.Type = obj.TYPE_REG
1736                         p.To.Reg = srcReg
1737
1738                         // increment the dst reg for next iteration
1739                         p = s.Prog(ppc64.AADD)
1740                         p.Reg = dstReg
1741                         p.From.Type = obj.TYPE_CONST
1742                         p.From.Offset = bytesPerLoop
1743                         p.To.Type = obj.TYPE_REG
1744                         p.To.Reg = dstReg
1745
1746                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1747                         // to loop top.
1748                         p = s.Prog(ppc64.ABC)
1749                         p.From.Type = obj.TYPE_CONST
1750                         p.From.Offset = ppc64.BO_BCTR
1751                         p.Reg = ppc64.REG_CR0LT
1752                         p.To.Type = obj.TYPE_BRANCH
1753                         p.To.SetTarget(top)
1754
1755                         // srcReg and dstReg were incremented in the loop, so
1756                         // later instructions start with offset 0.
1757                         offset = int64(0)
1758                 }
1759
1760                 // No loop was generated for one iteration, so
1761                 // add 32 bytes to the remainder to move those bytes.
1762                 if ctr == 1 {
1763                         rem += bytesPerLoop
1764                 }
1765                 if rem >= 32 {
1766                         p := s.Prog(ppc64.ALXV)
1767                         p.From.Type = obj.TYPE_MEM
1768                         p.From.Reg = srcReg
1769                         p.To.Type = obj.TYPE_REG
1770                         p.To.Reg = ppc64.REG_VS32
1771
1772                         p = s.Prog(ppc64.ALXV)
1773                         p.From.Type = obj.TYPE_MEM
1774                         p.From.Reg = srcReg
1775                         p.From.Offset = 16
1776                         p.To.Type = obj.TYPE_REG
1777                         p.To.Reg = ppc64.REG_VS33
1778
1779                         p = s.Prog(ppc64.ASTXV)
1780                         p.From.Type = obj.TYPE_REG
1781                         p.From.Reg = ppc64.REG_VS32
1782                         p.To.Type = obj.TYPE_MEM
1783                         p.To.Reg = dstReg
1784
1785                         p = s.Prog(ppc64.ASTXV)
1786                         p.From.Type = obj.TYPE_REG
1787                         p.From.Reg = ppc64.REG_VS33
1788                         p.To.Type = obj.TYPE_MEM
1789                         p.To.Reg = dstReg
1790                         p.To.Offset = 16
1791
1792                         offset = 32
1793                         rem -= 32
1794                 }
1795
1796                 if rem >= 16 {
1797                         // Generate 16 byte loads and stores.
1798                         p := s.Prog(ppc64.ALXV)
1799                         p.From.Type = obj.TYPE_MEM
1800                         p.From.Reg = srcReg
1801                         p.From.Offset = offset
1802                         p.To.Type = obj.TYPE_REG
1803                         p.To.Reg = ppc64.REG_VS32
1804
1805                         p = s.Prog(ppc64.ASTXV)
1806                         p.From.Type = obj.TYPE_REG
1807                         p.From.Reg = ppc64.REG_VS32
1808                         p.To.Type = obj.TYPE_MEM
1809                         p.To.Reg = dstReg
1810                         p.To.Offset = offset
1811
1812                         offset += 16
1813                         rem -= 16
1814
1815                         if rem >= 16 {
1816                                 p := s.Prog(ppc64.ALXV)
1817                                 p.From.Type = obj.TYPE_MEM
1818                                 p.From.Reg = srcReg
1819                                 p.From.Offset = offset
1820                                 p.To.Type = obj.TYPE_REG
1821                                 p.To.Reg = ppc64.REG_VS32
1822
1823                                 p = s.Prog(ppc64.ASTXV)
1824                                 p.From.Type = obj.TYPE_REG
1825                                 p.From.Reg = ppc64.REG_VS32
1826                                 p.To.Type = obj.TYPE_MEM
1827                                 p.To.Reg = dstReg
1828                                 p.To.Offset = offset
1829
1830                                 offset += 16
1831                                 rem -= 16
1832                         }
1833                 }
1834                 // Generate all the remaining load and store pairs, starting with
1835                 // as many 8 byte moves as possible, then 4, 2, 1.
1836                 for rem > 0 {
1837                         op, size := ppc64.AMOVB, int64(1)
1838                         switch {
1839                         case rem >= 8:
1840                                 op, size = ppc64.AMOVD, 8
1841                         case rem >= 4:
1842                                 op, size = ppc64.AMOVWZ, 4
1843                         case rem >= 2:
1844                                 op, size = ppc64.AMOVH, 2
1845                         }
1846                         // Load
1847                         p := s.Prog(op)
1848                         p.To.Type = obj.TYPE_REG
1849                         p.To.Reg = ppc64.REGTMP
1850                         p.From.Type = obj.TYPE_MEM
1851                         p.From.Reg = srcReg
1852                         p.From.Offset = offset
1853
1854                         // Store
1855                         p = s.Prog(op)
1856                         p.From.Type = obj.TYPE_REG
1857                         p.From.Reg = ppc64.REGTMP
1858                         p.To.Type = obj.TYPE_MEM
1859                         p.To.Reg = dstReg
1860                         p.To.Offset = offset
1861                         rem -= size
1862                         offset += size
1863                 }
1864
1865         case ssa.OpPPC64CALLstatic:
1866                 s.Call(v)
1867
1868         case ssa.OpPPC64CALLtail:
1869                 s.TailCall(v)
1870
1871         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1872                 p := s.Prog(ppc64.AMOVD)
1873                 p.From.Type = obj.TYPE_REG
1874                 p.From.Reg = v.Args[0].Reg()
1875                 p.To.Type = obj.TYPE_REG
1876                 p.To.Reg = ppc64.REG_LR
1877
1878                 if v.Args[0].Reg() != ppc64.REG_R12 {
1879                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1880                 }
1881
1882                 pp := s.Call(v)
1883
1884                 // Convert the call into a blrl with hint this is not a subroutine return.
1885                 // The full bclrl opcode must be specified when passing a hint.
1886                 pp.As = ppc64.ABCL
1887                 pp.From.Type = obj.TYPE_CONST
1888                 pp.From.Offset = ppc64.BO_ALWAYS
1889                 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
1890                 pp.To.Reg = ppc64.REG_LR
1891                 pp.SetFrom3Const(1)
1892
1893                 if base.Ctxt.Flag_shared {
1894                         // When compiling Go into PIC, the function we just
1895                         // called via pointer might have been implemented in
1896                         // a separate module and so overwritten the TOC
1897                         // pointer in R2; reload it.
1898                         q := s.Prog(ppc64.AMOVD)
1899                         q.From.Type = obj.TYPE_MEM
1900                         q.From.Offset = 24
1901                         q.From.Reg = ppc64.REGSP
1902                         q.To.Type = obj.TYPE_REG
1903                         q.To.Reg = ppc64.REG_R2
1904                 }
1905
1906         case ssa.OpPPC64LoweredWB:
1907                 p := s.Prog(obj.ACALL)
1908                 p.To.Type = obj.TYPE_MEM
1909                 p.To.Name = obj.NAME_EXTERN
1910                 p.To.Sym = v.Aux.(*obj.LSym)
1911
1912         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1913                 p := s.Prog(obj.ACALL)
1914                 p.To.Type = obj.TYPE_MEM
1915                 p.To.Name = obj.NAME_EXTERN
1916                 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1917                 s.UseArgs(16) // space used in callee args area by assembly stubs
1918
1919         case ssa.OpPPC64LoweredNilCheck:
1920                 if buildcfg.GOOS == "aix" {
1921                         // CMP Rarg0, R0
1922                         // BNE 2(PC)
1923                         // STW R0, 0(R0)
1924                         // NOP (so the BNE has somewhere to land)
1925
1926                         // CMP Rarg0, R0
1927                         p := s.Prog(ppc64.ACMP)
1928                         p.From.Type = obj.TYPE_REG
1929                         p.From.Reg = v.Args[0].Reg()
1930                         p.To.Type = obj.TYPE_REG
1931                         p.To.Reg = ppc64.REG_R0
1932
1933                         // BNE 2(PC)
1934                         p2 := s.Prog(ppc64.ABNE)
1935                         p2.To.Type = obj.TYPE_BRANCH
1936
1937                         // STW R0, 0(R0)
1938                         // Write at 0 is forbidden and will trigger a SIGSEGV
1939                         p = s.Prog(ppc64.AMOVW)
1940                         p.From.Type = obj.TYPE_REG
1941                         p.From.Reg = ppc64.REG_R0
1942                         p.To.Type = obj.TYPE_MEM
1943                         p.To.Reg = ppc64.REG_R0
1944
1945                         // NOP (so the BNE has somewhere to land)
1946                         nop := s.Prog(obj.ANOP)
1947                         p2.To.SetTarget(nop)
1948
1949                 } else {
1950                         // Issue a load which will fault if arg is nil.
1951                         p := s.Prog(ppc64.AMOVBZ)
1952                         p.From.Type = obj.TYPE_MEM
1953                         p.From.Reg = v.Args[0].Reg()
1954                         ssagen.AddAux(&p.From, v)
1955                         p.To.Type = obj.TYPE_REG
1956                         p.To.Reg = ppc64.REGTMP
1957                 }
1958                 if logopt.Enabled() {
1959                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1960                 }
1961                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1962                         base.WarnfAt(v.Pos, "generated nil check")
1963                 }
1964
1965         // These should be resolved by rules and not make it here.
1966         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1967                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1968                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1969                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1970         case ssa.OpPPC64InvertFlags:
1971                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1972         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1973                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1974         case ssa.OpClobber, ssa.OpClobberReg:
1975                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1976         default:
1977                 v.Fatalf("genValue not implemented: %s", v.LongString())
1978         }
1979 }
1980
1981 var blockJump = [...]struct {
1982         asm, invasm     obj.As
1983         asmeq, invasmun bool
1984 }{
1985         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1986         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1987
1988         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1989         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1990         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1991         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1992
1993         // TODO: need to work FP comparisons into block jumps
1994         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1995         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1996         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1997         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1998 }
1999
2000 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
2001         switch b.Kind {
2002         case ssa.BlockDefer:
2003                 // defer returns in R3:
2004                 // 0 if we should continue executing
2005                 // 1 if we should jump to deferreturn call
2006                 p := s.Prog(ppc64.ACMP)
2007                 p.From.Type = obj.TYPE_REG
2008                 p.From.Reg = ppc64.REG_R3
2009                 p.To.Type = obj.TYPE_REG
2010                 p.To.Reg = ppc64.REG_R0
2011
2012                 p = s.Prog(ppc64.ABNE)
2013                 p.To.Type = obj.TYPE_BRANCH
2014                 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
2015                 if b.Succs[0].Block() != next {
2016                         p := s.Prog(obj.AJMP)
2017                         p.To.Type = obj.TYPE_BRANCH
2018                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2019                 }
2020
2021         case ssa.BlockPlain:
2022                 if b.Succs[0].Block() != next {
2023                         p := s.Prog(obj.AJMP)
2024                         p.To.Type = obj.TYPE_BRANCH
2025                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2026                 }
2027         case ssa.BlockExit, ssa.BlockRetJmp:
2028         case ssa.BlockRet:
2029                 s.Prog(obj.ARET)
2030
2031         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2032                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2033                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2034                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2035                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2036                 jmp := blockJump[b.Kind]
2037                 switch next {
2038                 case b.Succs[0].Block():
2039                         s.Br(jmp.invasm, b.Succs[1].Block())
2040                         if jmp.invasmun {
2041                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2042                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
2043                         }
2044                 case b.Succs[1].Block():
2045                         s.Br(jmp.asm, b.Succs[0].Block())
2046                         if jmp.asmeq {
2047                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2048                         }
2049                 default:
2050                         if b.Likely != ssa.BranchUnlikely {
2051                                 s.Br(jmp.asm, b.Succs[0].Block())
2052                                 if jmp.asmeq {
2053                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
2054                                 }
2055                                 s.Br(obj.AJMP, b.Succs[1].Block())
2056                         } else {
2057                                 s.Br(jmp.invasm, b.Succs[1].Block())
2058                                 if jmp.invasmun {
2059                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2060                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
2061                                 }
2062                                 s.Br(obj.AJMP, b.Succs[0].Block())
2063                         }
2064                 }
2065         default:
2066                 b.Fatalf("branch not implemented: %s", b.LongString())
2067         }
2068 }
2069
2070 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2071         p := s.Prog(loadByType(t))
2072         p.From.Type = obj.TYPE_MEM
2073         p.From.Name = obj.NAME_AUTO
2074         p.From.Sym = n.Linksym()
2075         p.From.Offset = n.FrameOffset() + off
2076         p.To.Type = obj.TYPE_REG
2077         p.To.Reg = reg
2078         return p
2079 }
2080
2081 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2082         p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2083         p.To.Name = obj.NAME_PARAM
2084         p.To.Sym = n.Linksym()
2085         p.Pos = p.Pos.WithNotStmt()
2086         return p
2087 }