]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
[dev.boringcrypto] all: merge master into dev.boringcrypto
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/base"
9         "cmd/compile/internal/ir"
10         "cmd/compile/internal/logopt"
11         "cmd/compile/internal/objw"
12         "cmd/compile/internal/ssa"
13         "cmd/compile/internal/ssagen"
14         "cmd/compile/internal/types"
15         "cmd/internal/obj"
16         "cmd/internal/obj/ppc64"
17         "internal/buildcfg"
18         "math"
19         "strings"
20 )
21
22 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
24         //      flive := b.FlagsLiveAtEnd
25         //      if b.Control != nil && b.Control.Type.IsFlags() {
26         //              flive = true
27         //      }
28         //      for i := len(b.Values) - 1; i >= 0; i-- {
29         //              v := b.Values[i]
30         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
31         //                      // The "mark" is any non-nil Aux value.
32         //                      v.Aux = v
33         //              }
34         //              if v.Type.IsFlags() {
35         //                      flive = false
36         //              }
37         //              for _, a := range v.Args {
38         //                      if a.Type.IsFlags() {
39         //                              flive = true
40         //                      }
41         //              }
42         //      }
43 }
44
45 // loadByType returns the load instruction of the given type.
46 func loadByType(t *types.Type) obj.As {
47         if t.IsFloat() {
48                 switch t.Size() {
49                 case 4:
50                         return ppc64.AFMOVS
51                 case 8:
52                         return ppc64.AFMOVD
53                 }
54         } else {
55                 switch t.Size() {
56                 case 1:
57                         if t.IsSigned() {
58                                 return ppc64.AMOVB
59                         } else {
60                                 return ppc64.AMOVBZ
61                         }
62                 case 2:
63                         if t.IsSigned() {
64                                 return ppc64.AMOVH
65                         } else {
66                                 return ppc64.AMOVHZ
67                         }
68                 case 4:
69                         if t.IsSigned() {
70                                 return ppc64.AMOVW
71                         } else {
72                                 return ppc64.AMOVWZ
73                         }
74                 case 8:
75                         return ppc64.AMOVD
76                 }
77         }
78         panic("bad load type")
79 }
80
81 // storeByType returns the store instruction of the given type.
82 func storeByType(t *types.Type) obj.As {
83         if t.IsFloat() {
84                 switch t.Size() {
85                 case 4:
86                         return ppc64.AFMOVS
87                 case 8:
88                         return ppc64.AFMOVD
89                 }
90         } else {
91                 switch t.Size() {
92                 case 1:
93                         return ppc64.AMOVB
94                 case 2:
95                         return ppc64.AMOVH
96                 case 4:
97                         return ppc64.AMOVW
98                 case 8:
99                         return ppc64.AMOVD
100                 }
101         }
102         panic("bad store type")
103 }
104
105 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
106         switch v.Op {
107         case ssa.OpCopy:
108                 t := v.Type
109                 if t.IsMemory() {
110                         return
111                 }
112                 x := v.Args[0].Reg()
113                 y := v.Reg()
114                 if x != y {
115                         rt := obj.TYPE_REG
116                         op := ppc64.AMOVD
117
118                         if t.IsFloat() {
119                                 op = ppc64.AFMOVD
120                         }
121                         p := s.Prog(op)
122                         p.From.Type = rt
123                         p.From.Reg = x
124                         p.To.Type = rt
125                         p.To.Reg = y
126                 }
127
128         case ssa.OpPPC64LoweredMuluhilo:
129                 // MULHDU       Rarg1, Rarg0, Reg0
130                 // MULLD        Rarg1, Rarg0, Reg1
131                 r0 := v.Args[0].Reg()
132                 r1 := v.Args[1].Reg()
133                 p := s.Prog(ppc64.AMULHDU)
134                 p.From.Type = obj.TYPE_REG
135                 p.From.Reg = r1
136                 p.Reg = r0
137                 p.To.Type = obj.TYPE_REG
138                 p.To.Reg = v.Reg0()
139                 p1 := s.Prog(ppc64.AMULLD)
140                 p1.From.Type = obj.TYPE_REG
141                 p1.From.Reg = r1
142                 p1.Reg = r0
143                 p1.To.Type = obj.TYPE_REG
144                 p1.To.Reg = v.Reg1()
145
146         case ssa.OpPPC64LoweredAdd64Carry:
147                 // ADDC         Rarg2, -1, Rtmp
148                 // ADDE         Rarg1, Rarg0, Reg0
149                 // ADDZE        Rzero, Reg1
150                 r0 := v.Args[0].Reg()
151                 r1 := v.Args[1].Reg()
152                 r2 := v.Args[2].Reg()
153                 p := s.Prog(ppc64.AADDC)
154                 p.From.Type = obj.TYPE_CONST
155                 p.From.Offset = -1
156                 p.Reg = r2
157                 p.To.Type = obj.TYPE_REG
158                 p.To.Reg = ppc64.REGTMP
159                 p1 := s.Prog(ppc64.AADDE)
160                 p1.From.Type = obj.TYPE_REG
161                 p1.From.Reg = r1
162                 p1.Reg = r0
163                 p1.To.Type = obj.TYPE_REG
164                 p1.To.Reg = v.Reg0()
165                 p2 := s.Prog(ppc64.AADDZE)
166                 p2.From.Type = obj.TYPE_REG
167                 p2.From.Reg = ppc64.REGZERO
168                 p2.To.Type = obj.TYPE_REG
169                 p2.To.Reg = v.Reg1()
170
171         case ssa.OpPPC64LoweredAtomicAnd8,
172                 ssa.OpPPC64LoweredAtomicAnd32,
173                 ssa.OpPPC64LoweredAtomicOr8,
174                 ssa.OpPPC64LoweredAtomicOr32:
175                 // LWSYNC
176                 // LBAR/LWAR    (Rarg0), Rtmp
177                 // AND/OR       Rarg1, Rtmp
178                 // STBCCC/STWCCC Rtmp, (Rarg0)
179                 // BNE          -3(PC)
180                 ld := ppc64.ALBAR
181                 st := ppc64.ASTBCCC
182                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
183                         ld = ppc64.ALWAR
184                         st = ppc64.ASTWCCC
185                 }
186                 r0 := v.Args[0].Reg()
187                 r1 := v.Args[1].Reg()
188                 // LWSYNC - Assuming shared data not write-through-required nor
189                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
190                 plwsync := s.Prog(ppc64.ALWSYNC)
191                 plwsync.To.Type = obj.TYPE_NONE
192                 // LBAR or LWAR
193                 p := s.Prog(ld)
194                 p.From.Type = obj.TYPE_MEM
195                 p.From.Reg = r0
196                 p.To.Type = obj.TYPE_REG
197                 p.To.Reg = ppc64.REGTMP
198                 // AND/OR reg1,out
199                 p1 := s.Prog(v.Op.Asm())
200                 p1.From.Type = obj.TYPE_REG
201                 p1.From.Reg = r1
202                 p1.To.Type = obj.TYPE_REG
203                 p1.To.Reg = ppc64.REGTMP
204                 // STBCCC or STWCCC
205                 p2 := s.Prog(st)
206                 p2.From.Type = obj.TYPE_REG
207                 p2.From.Reg = ppc64.REGTMP
208                 p2.To.Type = obj.TYPE_MEM
209                 p2.To.Reg = r0
210                 p2.RegTo2 = ppc64.REGTMP
211                 // BNE retry
212                 p3 := s.Prog(ppc64.ABNE)
213                 p3.To.Type = obj.TYPE_BRANCH
214                 p3.To.SetTarget(p)
215
216         case ssa.OpPPC64LoweredAtomicAdd32,
217                 ssa.OpPPC64LoweredAtomicAdd64:
218                 // LWSYNC
219                 // LDAR/LWAR    (Rarg0), Rout
220                 // ADD          Rarg1, Rout
221                 // STDCCC/STWCCC Rout, (Rarg0)
222                 // BNE         -3(PC)
223                 // MOVW         Rout,Rout (if Add32)
224                 ld := ppc64.ALDAR
225                 st := ppc64.ASTDCCC
226                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
227                         ld = ppc64.ALWAR
228                         st = ppc64.ASTWCCC
229                 }
230                 r0 := v.Args[0].Reg()
231                 r1 := v.Args[1].Reg()
232                 out := v.Reg0()
233                 // LWSYNC - Assuming shared data not write-through-required nor
234                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
235                 plwsync := s.Prog(ppc64.ALWSYNC)
236                 plwsync.To.Type = obj.TYPE_NONE
237                 // LDAR or LWAR
238                 p := s.Prog(ld)
239                 p.From.Type = obj.TYPE_MEM
240                 p.From.Reg = r0
241                 p.To.Type = obj.TYPE_REG
242                 p.To.Reg = out
243                 // ADD reg1,out
244                 p1 := s.Prog(ppc64.AADD)
245                 p1.From.Type = obj.TYPE_REG
246                 p1.From.Reg = r1
247                 p1.To.Reg = out
248                 p1.To.Type = obj.TYPE_REG
249                 // STDCCC or STWCCC
250                 p3 := s.Prog(st)
251                 p3.From.Type = obj.TYPE_REG
252                 p3.From.Reg = out
253                 p3.To.Type = obj.TYPE_MEM
254                 p3.To.Reg = r0
255                 // BNE retry
256                 p4 := s.Prog(ppc64.ABNE)
257                 p4.To.Type = obj.TYPE_BRANCH
258                 p4.To.SetTarget(p)
259
260                 // Ensure a 32 bit result
261                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
262                         p5 := s.Prog(ppc64.AMOVWZ)
263                         p5.To.Type = obj.TYPE_REG
264                         p5.To.Reg = out
265                         p5.From.Type = obj.TYPE_REG
266                         p5.From.Reg = out
267                 }
268
269         case ssa.OpPPC64LoweredAtomicExchange32,
270                 ssa.OpPPC64LoweredAtomicExchange64:
271                 // LWSYNC
272                 // LDAR/LWAR    (Rarg0), Rout
273                 // STDCCC/STWCCC Rout, (Rarg0)
274                 // BNE         -2(PC)
275                 // ISYNC
276                 ld := ppc64.ALDAR
277                 st := ppc64.ASTDCCC
278                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
279                         ld = ppc64.ALWAR
280                         st = ppc64.ASTWCCC
281                 }
282                 r0 := v.Args[0].Reg()
283                 r1 := v.Args[1].Reg()
284                 out := v.Reg0()
285                 // LWSYNC - Assuming shared data not write-through-required nor
286                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
287                 plwsync := s.Prog(ppc64.ALWSYNC)
288                 plwsync.To.Type = obj.TYPE_NONE
289                 // LDAR or LWAR
290                 p := s.Prog(ld)
291                 p.From.Type = obj.TYPE_MEM
292                 p.From.Reg = r0
293                 p.To.Type = obj.TYPE_REG
294                 p.To.Reg = out
295                 // STDCCC or STWCCC
296                 p1 := s.Prog(st)
297                 p1.From.Type = obj.TYPE_REG
298                 p1.From.Reg = r1
299                 p1.To.Type = obj.TYPE_MEM
300                 p1.To.Reg = r0
301                 // BNE retry
302                 p2 := s.Prog(ppc64.ABNE)
303                 p2.To.Type = obj.TYPE_BRANCH
304                 p2.To.SetTarget(p)
305                 // ISYNC
306                 pisync := s.Prog(ppc64.AISYNC)
307                 pisync.To.Type = obj.TYPE_NONE
308
309         case ssa.OpPPC64LoweredAtomicLoad8,
310                 ssa.OpPPC64LoweredAtomicLoad32,
311                 ssa.OpPPC64LoweredAtomicLoad64,
312                 ssa.OpPPC64LoweredAtomicLoadPtr:
313                 // SYNC
314                 // MOVB/MOVD/MOVW (Rarg0), Rout
315                 // CMP Rout,Rout
316                 // BNE 1(PC)
317                 // ISYNC
318                 ld := ppc64.AMOVD
319                 cmp := ppc64.ACMP
320                 switch v.Op {
321                 case ssa.OpPPC64LoweredAtomicLoad8:
322                         ld = ppc64.AMOVBZ
323                 case ssa.OpPPC64LoweredAtomicLoad32:
324                         ld = ppc64.AMOVWZ
325                         cmp = ppc64.ACMPW
326                 }
327                 arg0 := v.Args[0].Reg()
328                 out := v.Reg0()
329                 // SYNC when AuxInt == 1; otherwise, load-acquire
330                 if v.AuxInt == 1 {
331                         psync := s.Prog(ppc64.ASYNC)
332                         psync.To.Type = obj.TYPE_NONE
333                 }
334                 // Load
335                 p := s.Prog(ld)
336                 p.From.Type = obj.TYPE_MEM
337                 p.From.Reg = arg0
338                 p.To.Type = obj.TYPE_REG
339                 p.To.Reg = out
340                 // CMP
341                 p1 := s.Prog(cmp)
342                 p1.From.Type = obj.TYPE_REG
343                 p1.From.Reg = out
344                 p1.To.Type = obj.TYPE_REG
345                 p1.To.Reg = out
346                 // BNE
347                 p2 := s.Prog(ppc64.ABNE)
348                 p2.To.Type = obj.TYPE_BRANCH
349                 // ISYNC
350                 pisync := s.Prog(ppc64.AISYNC)
351                 pisync.To.Type = obj.TYPE_NONE
352                 p2.To.SetTarget(pisync)
353
354         case ssa.OpPPC64LoweredAtomicStore8,
355                 ssa.OpPPC64LoweredAtomicStore32,
356                 ssa.OpPPC64LoweredAtomicStore64:
357                 // SYNC or LWSYNC
358                 // MOVB/MOVW/MOVD arg1,(arg0)
359                 st := ppc64.AMOVD
360                 switch v.Op {
361                 case ssa.OpPPC64LoweredAtomicStore8:
362                         st = ppc64.AMOVB
363                 case ssa.OpPPC64LoweredAtomicStore32:
364                         st = ppc64.AMOVW
365                 }
366                 arg0 := v.Args[0].Reg()
367                 arg1 := v.Args[1].Reg()
368                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
369                 // SYNC
370                 syncOp := ppc64.ASYNC
371                 if v.AuxInt == 0 {
372                         syncOp = ppc64.ALWSYNC
373                 }
374                 psync := s.Prog(syncOp)
375                 psync.To.Type = obj.TYPE_NONE
376                 // Store
377                 p := s.Prog(st)
378                 p.To.Type = obj.TYPE_MEM
379                 p.To.Reg = arg0
380                 p.From.Type = obj.TYPE_REG
381                 p.From.Reg = arg1
382
383         case ssa.OpPPC64LoweredAtomicCas64,
384                 ssa.OpPPC64LoweredAtomicCas32:
385                 // LWSYNC
386                 // loop:
387                 // LDAR        (Rarg0), MutexHint, Rtmp
388                 // CMP         Rarg1, Rtmp
389                 // BNE         fail
390                 // STDCCC      Rarg2, (Rarg0)
391                 // BNE         loop
392                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
393                 // MOVD        $1, Rout
394                 // BR          end
395                 // fail:
396                 // MOVD        $0, Rout
397                 // end:
398                 ld := ppc64.ALDAR
399                 st := ppc64.ASTDCCC
400                 cmp := ppc64.ACMP
401                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
402                         ld = ppc64.ALWAR
403                         st = ppc64.ASTWCCC
404                         cmp = ppc64.ACMPW
405                 }
406                 r0 := v.Args[0].Reg()
407                 r1 := v.Args[1].Reg()
408                 r2 := v.Args[2].Reg()
409                 out := v.Reg0()
410                 // LWSYNC - Assuming shared data not write-through-required nor
411                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
412                 plwsync1 := s.Prog(ppc64.ALWSYNC)
413                 plwsync1.To.Type = obj.TYPE_NONE
414                 // LDAR or LWAR
415                 p := s.Prog(ld)
416                 p.From.Type = obj.TYPE_MEM
417                 p.From.Reg = r0
418                 p.To.Type = obj.TYPE_REG
419                 p.To.Reg = ppc64.REGTMP
420                 // If it is a Compare-and-Swap-Release operation, set the EH field with
421                 // the release hint.
422                 if v.AuxInt == 0 {
423                         p.SetFrom3Const(0)
424                 }
425                 // CMP reg1,reg2
426                 p1 := s.Prog(cmp)
427                 p1.From.Type = obj.TYPE_REG
428                 p1.From.Reg = r1
429                 p1.To.Reg = ppc64.REGTMP
430                 p1.To.Type = obj.TYPE_REG
431                 // BNE cas_fail
432                 p2 := s.Prog(ppc64.ABNE)
433                 p2.To.Type = obj.TYPE_BRANCH
434                 // STDCCC or STWCCC
435                 p3 := s.Prog(st)
436                 p3.From.Type = obj.TYPE_REG
437                 p3.From.Reg = r2
438                 p3.To.Type = obj.TYPE_MEM
439                 p3.To.Reg = r0
440                 // BNE retry
441                 p4 := s.Prog(ppc64.ABNE)
442                 p4.To.Type = obj.TYPE_BRANCH
443                 p4.To.SetTarget(p)
444                 // LWSYNC - Assuming shared data not write-through-required nor
445                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
446                 // If the operation is a CAS-Release, then synchronization is not necessary.
447                 if v.AuxInt != 0 {
448                         plwsync2 := s.Prog(ppc64.ALWSYNC)
449                         plwsync2.To.Type = obj.TYPE_NONE
450                 }
451                 // return true
452                 p5 := s.Prog(ppc64.AMOVD)
453                 p5.From.Type = obj.TYPE_CONST
454                 p5.From.Offset = 1
455                 p5.To.Type = obj.TYPE_REG
456                 p5.To.Reg = out
457                 // BR done
458                 p6 := s.Prog(obj.AJMP)
459                 p6.To.Type = obj.TYPE_BRANCH
460                 // return false
461                 p7 := s.Prog(ppc64.AMOVD)
462                 p7.From.Type = obj.TYPE_CONST
463                 p7.From.Offset = 0
464                 p7.To.Type = obj.TYPE_REG
465                 p7.To.Reg = out
466                 p2.To.SetTarget(p7)
467                 // done (label)
468                 p8 := s.Prog(obj.ANOP)
469                 p6.To.SetTarget(p8)
470
471         case ssa.OpPPC64LoweredGetClosurePtr:
472                 // Closure pointer is R11 (already)
473                 ssagen.CheckLoweredGetClosurePtr(v)
474
475         case ssa.OpPPC64LoweredGetCallerSP:
476                 // caller's SP is FixedFrameSize below the address of the first arg
477                 p := s.Prog(ppc64.AMOVD)
478                 p.From.Type = obj.TYPE_ADDR
479                 p.From.Offset = -base.Ctxt.FixedFrameSize()
480                 p.From.Name = obj.NAME_PARAM
481                 p.To.Type = obj.TYPE_REG
482                 p.To.Reg = v.Reg()
483
484         case ssa.OpPPC64LoweredGetCallerPC:
485                 p := s.Prog(obj.AGETCALLERPC)
486                 p.To.Type = obj.TYPE_REG
487                 p.To.Reg = v.Reg()
488
489         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
490                 // input is already rounded
491
492         case ssa.OpLoadReg:
493                 loadOp := loadByType(v.Type)
494                 p := s.Prog(loadOp)
495                 ssagen.AddrAuto(&p.From, v.Args[0])
496                 p.To.Type = obj.TYPE_REG
497                 p.To.Reg = v.Reg()
498
499         case ssa.OpStoreReg:
500                 storeOp := storeByType(v.Type)
501                 p := s.Prog(storeOp)
502                 p.From.Type = obj.TYPE_REG
503                 p.From.Reg = v.Args[0].Reg()
504                 ssagen.AddrAuto(&p.To, v)
505
506         case ssa.OpArgIntReg, ssa.OpArgFloatReg:
507                 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
508                 // The loop only runs once.
509                 for _, a := range v.Block.Func.RegArgs {
510                         // Pass the spill/unspill information along to the assembler, offset by size of
511                         // the saved LR slot.
512                         addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.FixedFrameSize())
513                         s.FuncInfo().AddSpill(
514                                 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
515                 }
516                 v.Block.Func.RegArgs = nil
517
518                 ssagen.CheckArgReg(v)
519
520         case ssa.OpPPC64DIVD:
521                 // For now,
522                 //
523                 // cmp arg1, -1
524                 // be  ahead
525                 // v = arg0 / arg1
526                 // b over
527                 // ahead: v = - arg0
528                 // over: nop
529                 r := v.Reg()
530                 r0 := v.Args[0].Reg()
531                 r1 := v.Args[1].Reg()
532
533                 p := s.Prog(ppc64.ACMP)
534                 p.From.Type = obj.TYPE_REG
535                 p.From.Reg = r1
536                 p.To.Type = obj.TYPE_CONST
537                 p.To.Offset = -1
538
539                 pbahead := s.Prog(ppc64.ABEQ)
540                 pbahead.To.Type = obj.TYPE_BRANCH
541
542                 p = s.Prog(v.Op.Asm())
543                 p.From.Type = obj.TYPE_REG
544                 p.From.Reg = r1
545                 p.Reg = r0
546                 p.To.Type = obj.TYPE_REG
547                 p.To.Reg = r
548
549                 pbover := s.Prog(obj.AJMP)
550                 pbover.To.Type = obj.TYPE_BRANCH
551
552                 p = s.Prog(ppc64.ANEG)
553                 p.To.Type = obj.TYPE_REG
554                 p.To.Reg = r
555                 p.From.Type = obj.TYPE_REG
556                 p.From.Reg = r0
557                 pbahead.To.SetTarget(p)
558
559                 p = s.Prog(obj.ANOP)
560                 pbover.To.SetTarget(p)
561
562         case ssa.OpPPC64DIVW:
563                 // word-width version of above
564                 r := v.Reg()
565                 r0 := v.Args[0].Reg()
566                 r1 := v.Args[1].Reg()
567
568                 p := s.Prog(ppc64.ACMPW)
569                 p.From.Type = obj.TYPE_REG
570                 p.From.Reg = r1
571                 p.To.Type = obj.TYPE_CONST
572                 p.To.Offset = -1
573
574                 pbahead := s.Prog(ppc64.ABEQ)
575                 pbahead.To.Type = obj.TYPE_BRANCH
576
577                 p = s.Prog(v.Op.Asm())
578                 p.From.Type = obj.TYPE_REG
579                 p.From.Reg = r1
580                 p.Reg = r0
581                 p.To.Type = obj.TYPE_REG
582                 p.To.Reg = r
583
584                 pbover := s.Prog(obj.AJMP)
585                 pbover.To.Type = obj.TYPE_BRANCH
586
587                 p = s.Prog(ppc64.ANEG)
588                 p.To.Type = obj.TYPE_REG
589                 p.To.Reg = r
590                 p.From.Type = obj.TYPE_REG
591                 p.From.Reg = r0
592                 pbahead.To.SetTarget(p)
593
594                 p = s.Prog(obj.ANOP)
595                 pbover.To.SetTarget(p)
596
597         case ssa.OpPPC64CLRLSLWI:
598                 r := v.Reg()
599                 r1 := v.Args[0].Reg()
600                 shifts := v.AuxInt
601                 p := s.Prog(v.Op.Asm())
602                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
603                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
604                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
605                 p.Reg = r1
606                 p.To.Type = obj.TYPE_REG
607                 p.To.Reg = r
608
609         case ssa.OpPPC64CLRLSLDI:
610                 r := v.Reg()
611                 r1 := v.Args[0].Reg()
612                 shifts := v.AuxInt
613                 p := s.Prog(v.Op.Asm())
614                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
615                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
616                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
617                 p.Reg = r1
618                 p.To.Type = obj.TYPE_REG
619                 p.To.Reg = r
620
621                 // Mask has been set as sh
622         case ssa.OpPPC64RLDICL:
623                 r := v.Reg()
624                 r1 := v.Args[0].Reg()
625                 shifts := v.AuxInt
626                 p := s.Prog(v.Op.Asm())
627                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
628                 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
629                 p.Reg = r1
630                 p.To.Type = obj.TYPE_REG
631                 p.To.Reg = r
632
633         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
634                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
635                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
636                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
637                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
638                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
639                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
640                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
641                 r := v.Reg()
642                 r1 := v.Args[0].Reg()
643                 r2 := v.Args[1].Reg()
644                 p := s.Prog(v.Op.Asm())
645                 p.From.Type = obj.TYPE_REG
646                 p.From.Reg = r2
647                 p.Reg = r1
648                 p.To.Type = obj.TYPE_REG
649                 p.To.Reg = r
650
651         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
652                 r1 := v.Args[0].Reg()
653                 r2 := v.Args[1].Reg()
654                 p := s.Prog(v.Op.Asm())
655                 p.From.Type = obj.TYPE_REG
656                 p.From.Reg = r2
657                 p.Reg = r1
658                 p.To.Type = obj.TYPE_REG
659                 p.To.Reg = ppc64.REGTMP // result is not needed
660
661         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
662                 p := s.Prog(v.Op.Asm())
663                 p.From.Type = obj.TYPE_CONST
664                 p.From.Offset = v.AuxInt
665                 p.Reg = v.Args[0].Reg()
666                 p.To.Type = obj.TYPE_REG
667                 p.To.Reg = v.Reg()
668
669                 // Auxint holds encoded rotate + mask
670         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
671                 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
672                 p := s.Prog(v.Op.Asm())
673                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
674                 p.Reg = v.Args[0].Reg()
675                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
676                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
677
678                 // Auxint holds mask
679         case ssa.OpPPC64RLWNM:
680                 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
681                 p := s.Prog(v.Op.Asm())
682                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
683                 p.Reg = v.Args[0].Reg()
684                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
685                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
686
687         case ssa.OpPPC64MADDLD:
688                 r := v.Reg()
689                 r1 := v.Args[0].Reg()
690                 r2 := v.Args[1].Reg()
691                 r3 := v.Args[2].Reg()
692                 // r = r1*r2 Â± r3
693                 p := s.Prog(v.Op.Asm())
694                 p.From.Type = obj.TYPE_REG
695                 p.From.Reg = r1
696                 p.Reg = r2
697                 p.SetFrom3Reg(r3)
698                 p.To.Type = obj.TYPE_REG
699                 p.To.Reg = r
700
701         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
702                 r := v.Reg()
703                 r1 := v.Args[0].Reg()
704                 r2 := v.Args[1].Reg()
705                 r3 := v.Args[2].Reg()
706                 // r = r1*r2 Â± r3
707                 p := s.Prog(v.Op.Asm())
708                 p.From.Type = obj.TYPE_REG
709                 p.From.Reg = r1
710                 p.Reg = r3
711                 p.SetFrom3Reg(r2)
712                 p.To.Type = obj.TYPE_REG
713                 p.To.Reg = r
714
715         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
716                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
717                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
718                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
719                 r := v.Reg()
720                 p := s.Prog(v.Op.Asm())
721                 p.To.Type = obj.TYPE_REG
722                 p.To.Reg = r
723                 p.From.Type = obj.TYPE_REG
724                 p.From.Reg = v.Args[0].Reg()
725
726         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
727                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
728                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
729                 p := s.Prog(v.Op.Asm())
730                 p.Reg = v.Args[0].Reg()
731                 p.From.Type = obj.TYPE_CONST
732                 p.From.Offset = v.AuxInt
733                 p.To.Type = obj.TYPE_REG
734                 p.To.Reg = v.Reg()
735
736         case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
737                 r := v.Reg0() // CA is the first, implied argument.
738                 r1 := v.Args[0].Reg()
739                 r2 := v.Args[1].Reg()
740                 p := s.Prog(v.Op.Asm())
741                 p.From.Type = obj.TYPE_REG
742                 p.From.Reg = r2
743                 p.Reg = r1
744                 p.To.Type = obj.TYPE_REG
745                 p.To.Reg = r
746
747         case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
748                 p := s.Prog(v.Op.Asm())
749                 p.From.Type = obj.TYPE_REG
750                 p.From.Reg = ppc64.REG_R0
751                 p.To.Type = obj.TYPE_REG
752                 p.To.Reg = v.Reg()
753
754         case ssa.OpPPC64ADDCconst:
755                 p := s.Prog(v.Op.Asm())
756                 p.Reg = v.Args[0].Reg()
757                 p.From.Type = obj.TYPE_CONST
758                 p.From.Offset = v.AuxInt
759                 p.To.Type = obj.TYPE_REG
760                 // Output is a pair, the second is the CA, which is implied.
761                 p.To.Reg = v.Reg0()
762
763         case ssa.OpPPC64SUBCconst:
764                 p := s.Prog(v.Op.Asm())
765                 p.SetFrom3Const(v.AuxInt)
766                 p.From.Type = obj.TYPE_REG
767                 p.From.Reg = v.Args[0].Reg()
768                 p.To.Type = obj.TYPE_REG
769                 p.To.Reg = v.Reg0()
770
771         case ssa.OpPPC64SUBFCconst:
772                 p := s.Prog(v.Op.Asm())
773                 p.SetFrom3Const(v.AuxInt)
774                 p.From.Type = obj.TYPE_REG
775                 p.From.Reg = v.Args[0].Reg()
776                 p.To.Type = obj.TYPE_REG
777                 p.To.Reg = v.Reg()
778
779         case ssa.OpPPC64ANDCCconst:
780                 p := s.Prog(v.Op.Asm())
781                 p.Reg = v.Args[0].Reg()
782                 p.From.Type = obj.TYPE_CONST
783                 p.From.Offset = v.AuxInt
784                 p.To.Type = obj.TYPE_REG
785                 p.To.Reg = ppc64.REGTMP // discard result
786
787         case ssa.OpPPC64MOVDaddr:
788                 switch v.Aux.(type) {
789                 default:
790                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
791                 case nil:
792                         // If aux offset and aux int are both 0, and the same
793                         // input and output regs are used, no instruction
794                         // needs to be generated, since it would just be
795                         // addi rx, rx, 0.
796                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
797                                 p := s.Prog(ppc64.AMOVD)
798                                 p.From.Type = obj.TYPE_ADDR
799                                 p.From.Reg = v.Args[0].Reg()
800                                 p.From.Offset = v.AuxInt
801                                 p.To.Type = obj.TYPE_REG
802                                 p.To.Reg = v.Reg()
803                         }
804
805                 case *obj.LSym, ir.Node:
806                         p := s.Prog(ppc64.AMOVD)
807                         p.From.Type = obj.TYPE_ADDR
808                         p.From.Reg = v.Args[0].Reg()
809                         p.To.Type = obj.TYPE_REG
810                         p.To.Reg = v.Reg()
811                         ssagen.AddAux(&p.From, v)
812
813                 }
814
815         case ssa.OpPPC64MOVDconst:
816                 p := s.Prog(v.Op.Asm())
817                 p.From.Type = obj.TYPE_CONST
818                 p.From.Offset = v.AuxInt
819                 p.To.Type = obj.TYPE_REG
820                 p.To.Reg = v.Reg()
821
822         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
823                 p := s.Prog(v.Op.Asm())
824                 p.From.Type = obj.TYPE_FCONST
825                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
826                 p.To.Type = obj.TYPE_REG
827                 p.To.Reg = v.Reg()
828
829         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
830                 p := s.Prog(v.Op.Asm())
831                 p.From.Type = obj.TYPE_REG
832                 p.From.Reg = v.Args[0].Reg()
833                 p.To.Type = obj.TYPE_REG
834                 p.To.Reg = v.Args[1].Reg()
835
836         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
837                 p := s.Prog(v.Op.Asm())
838                 p.From.Type = obj.TYPE_REG
839                 p.From.Reg = v.Args[0].Reg()
840                 p.To.Type = obj.TYPE_CONST
841                 p.To.Offset = v.AuxInt
842
843         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
844                 // Shift in register to required size
845                 p := s.Prog(v.Op.Asm())
846                 p.From.Type = obj.TYPE_REG
847                 p.From.Reg = v.Args[0].Reg()
848                 p.To.Reg = v.Reg()
849                 p.To.Type = obj.TYPE_REG
850
851         case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
852
853                 // MOVDload and MOVWload are DS form instructions that are restricted to
854                 // offsets that are a multiple of 4. If the offset is not a multple of 4,
855                 // then the address of the symbol to be loaded is computed (base + offset)
856                 // and used as the new base register and the offset field in the instruction
857                 // can be set to zero.
858
859                 // This same problem can happen with gostrings since the final offset is not
860                 // known yet, but could be unaligned after the relocation is resolved.
861                 // So gostrings are handled the same way.
862
863                 // This allows the MOVDload and MOVWload to be generated in more cases and
864                 // eliminates some offset and alignment checking in the rules file.
865
866                 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
867                 ssagen.AddAux(&fromAddr, v)
868
869                 genAddr := false
870
871                 switch fromAddr.Name {
872                 case obj.NAME_EXTERN, obj.NAME_STATIC:
873                         // Special case for a rule combines the bytes of gostring.
874                         // The v alignment might seem OK, but we don't want to load it
875                         // using an offset because relocation comes later.
876                         genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go.string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
877                 default:
878                         genAddr = fromAddr.Offset%4 != 0
879                 }
880                 if genAddr {
881                         // Load full address into the temp register.
882                         p := s.Prog(ppc64.AMOVD)
883                         p.From.Type = obj.TYPE_ADDR
884                         p.From.Reg = v.Args[0].Reg()
885                         ssagen.AddAux(&p.From, v)
886                         // Load target using temp as base register
887                         // and offset zero. Setting NAME_NONE
888                         // prevents any extra offsets from being
889                         // added.
890                         p.To.Type = obj.TYPE_REG
891                         p.To.Reg = ppc64.REGTMP
892                         fromAddr.Reg = ppc64.REGTMP
893                         // Clear the offset field and other
894                         // information that might be used
895                         // by the assembler to add to the
896                         // final offset value.
897                         fromAddr.Offset = 0
898                         fromAddr.Name = obj.NAME_NONE
899                         fromAddr.Sym = nil
900                 }
901                 p := s.Prog(v.Op.Asm())
902                 p.From = fromAddr
903                 p.To.Type = obj.TYPE_REG
904                 p.To.Reg = v.Reg()
905                 break
906
907         case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
908                 p := s.Prog(v.Op.Asm())
909                 p.From.Type = obj.TYPE_MEM
910                 p.From.Reg = v.Args[0].Reg()
911                 ssagen.AddAux(&p.From, v)
912                 p.To.Type = obj.TYPE_REG
913                 p.To.Reg = v.Reg()
914
915         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
916                 p := s.Prog(v.Op.Asm())
917                 p.From.Type = obj.TYPE_MEM
918                 p.From.Reg = v.Args[0].Reg()
919                 p.To.Type = obj.TYPE_REG
920                 p.To.Reg = v.Reg()
921
922         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
923                 p := s.Prog(v.Op.Asm())
924                 p.To.Type = obj.TYPE_MEM
925                 p.To.Reg = v.Args[0].Reg()
926                 p.From.Type = obj.TYPE_REG
927                 p.From.Reg = v.Args[1].Reg()
928
929         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
930                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
931                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
932                 p := s.Prog(v.Op.Asm())
933                 p.From.Type = obj.TYPE_MEM
934                 p.From.Reg = v.Args[0].Reg()
935                 p.From.Index = v.Args[1].Reg()
936                 p.To.Type = obj.TYPE_REG
937                 p.To.Reg = v.Reg()
938
939         case ssa.OpPPC64DCBT:
940                 p := s.Prog(v.Op.Asm())
941                 p.From.Type = obj.TYPE_MEM
942                 p.From.Reg = v.Args[0].Reg()
943                 p.To.Type = obj.TYPE_CONST
944                 p.To.Offset = v.AuxInt
945
946         case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
947                 p := s.Prog(v.Op.Asm())
948                 p.From.Type = obj.TYPE_REG
949                 p.From.Reg = ppc64.REGZERO
950                 p.To.Type = obj.TYPE_MEM
951                 p.To.Reg = v.Args[0].Reg()
952                 ssagen.AddAux(&p.To, v)
953
954         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
955
956                 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
957                 // to offset values that are a multple of 4. If the offset field is not a
958                 // multiple of 4, then the full address of the store target is computed (base +
959                 // offset) and used as the new base register and the offset in the instruction
960                 // is set to 0.
961
962                 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
963                 // and prevents checking of the offset value and alignment in the rules.
964
965                 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
966                 ssagen.AddAux(&toAddr, v)
967
968                 if toAddr.Offset%4 != 0 {
969                         p := s.Prog(ppc64.AMOVD)
970                         p.From.Type = obj.TYPE_ADDR
971                         p.From.Reg = v.Args[0].Reg()
972                         ssagen.AddAux(&p.From, v)
973                         p.To.Type = obj.TYPE_REG
974                         p.To.Reg = ppc64.REGTMP
975                         toAddr.Reg = ppc64.REGTMP
976                         // Clear the offset field and other
977                         // information that might be used
978                         // by the assembler to add to the
979                         // final offset value.
980                         toAddr.Offset = 0
981                         toAddr.Name = obj.NAME_NONE
982                         toAddr.Sym = nil
983                 }
984                 p := s.Prog(v.Op.Asm())
985                 p.To = toAddr
986                 p.From.Type = obj.TYPE_REG
987                 if v.Op == ssa.OpPPC64MOVDstorezero {
988                         p.From.Reg = ppc64.REGZERO
989                 } else {
990                         p.From.Reg = v.Args[1].Reg()
991                 }
992
993         case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
994                 p := s.Prog(v.Op.Asm())
995                 p.From.Type = obj.TYPE_REG
996                 p.From.Reg = v.Args[1].Reg()
997                 p.To.Type = obj.TYPE_MEM
998                 p.To.Reg = v.Args[0].Reg()
999                 ssagen.AddAux(&p.To, v)
1000
1001         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
1002                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
1003                 ssa.OpPPC64MOVHBRstoreidx:
1004                 p := s.Prog(v.Op.Asm())
1005                 p.From.Type = obj.TYPE_REG
1006                 p.From.Reg = v.Args[2].Reg()
1007                 p.To.Index = v.Args[1].Reg()
1008                 p.To.Type = obj.TYPE_MEM
1009                 p.To.Reg = v.Args[0].Reg()
1010
1011         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
1012                 // ISEL, ISELB
1013                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
1014                 // ISEL only accepts 0, 1, 2 condition values but the others can be
1015                 // achieved by swapping operand order.
1016                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
1017                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
1018                 // ISELB is used when a boolean result is needed, returning 0 or 1
1019                 p := s.Prog(ppc64.AISEL)
1020                 p.To.Type = obj.TYPE_REG
1021                 p.To.Reg = v.Reg()
1022                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
1023                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
1024                 if v.Op == ssa.OpPPC64ISEL {
1025                         r.Reg = v.Args[1].Reg()
1026                 }
1027                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
1028                 if v.AuxInt > 3 {
1029                         p.Reg = r.Reg
1030                         p.SetFrom3Reg(v.Args[0].Reg())
1031                 } else {
1032                         p.Reg = v.Args[0].Reg()
1033                         p.SetFrom3(r)
1034                 }
1035                 p.From.Type = obj.TYPE_CONST
1036                 p.From.Offset = v.AuxInt & 3
1037
1038         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
1039                 // The LoweredQuad code generation
1040                 // generates STXV instructions on
1041                 // power9. The Short variation is used
1042                 // if no loop is generated.
1043
1044                 // sizes >= 64 generate a loop as follows:
1045
1046                 // Set up loop counter in CTR, used by BC
1047                 // XXLXOR clears VS32
1048                 //       XXLXOR VS32,VS32,VS32
1049                 //       MOVD len/64,REG_TMP
1050                 //       MOVD REG_TMP,CTR
1051                 //       loop:
1052                 //       STXV VS32,0(R20)
1053                 //       STXV VS32,16(R20)
1054                 //       STXV VS32,32(R20)
1055                 //       STXV VS32,48(R20)
1056                 //       ADD  $64,R20
1057                 //       BC   16, 0, loop
1058
1059                 // Bytes per iteration
1060                 ctr := v.AuxInt / 64
1061
1062                 // Remainder bytes
1063                 rem := v.AuxInt % 64
1064
1065                 // Only generate a loop if there is more
1066                 // than 1 iteration.
1067                 if ctr > 1 {
1068                         // Set up VS32 (V0) to hold 0s
1069                         p := s.Prog(ppc64.AXXLXOR)
1070                         p.From.Type = obj.TYPE_REG
1071                         p.From.Reg = ppc64.REG_VS32
1072                         p.To.Type = obj.TYPE_REG
1073                         p.To.Reg = ppc64.REG_VS32
1074                         p.Reg = ppc64.REG_VS32
1075
1076                         // Set up CTR loop counter
1077                         p = s.Prog(ppc64.AMOVD)
1078                         p.From.Type = obj.TYPE_CONST
1079                         p.From.Offset = ctr
1080                         p.To.Type = obj.TYPE_REG
1081                         p.To.Reg = ppc64.REGTMP
1082
1083                         p = s.Prog(ppc64.AMOVD)
1084                         p.From.Type = obj.TYPE_REG
1085                         p.From.Reg = ppc64.REGTMP
1086                         p.To.Type = obj.TYPE_REG
1087                         p.To.Reg = ppc64.REG_CTR
1088
1089                         // Don't generate padding for
1090                         // loops with few iterations.
1091                         if ctr > 3 {
1092                                 p = s.Prog(obj.APCALIGN)
1093                                 p.From.Type = obj.TYPE_CONST
1094                                 p.From.Offset = 16
1095                         }
1096
1097                         // generate 4 STXVs to zero 64 bytes
1098                         var top *obj.Prog
1099
1100                         p = s.Prog(ppc64.ASTXV)
1101                         p.From.Type = obj.TYPE_REG
1102                         p.From.Reg = ppc64.REG_VS32
1103                         p.To.Type = obj.TYPE_MEM
1104                         p.To.Reg = v.Args[0].Reg()
1105
1106                         //  Save the top of loop
1107                         if top == nil {
1108                                 top = p
1109                         }
1110                         p = s.Prog(ppc64.ASTXV)
1111                         p.From.Type = obj.TYPE_REG
1112                         p.From.Reg = ppc64.REG_VS32
1113                         p.To.Type = obj.TYPE_MEM
1114                         p.To.Reg = v.Args[0].Reg()
1115                         p.To.Offset = 16
1116
1117                         p = s.Prog(ppc64.ASTXV)
1118                         p.From.Type = obj.TYPE_REG
1119                         p.From.Reg = ppc64.REG_VS32
1120                         p.To.Type = obj.TYPE_MEM
1121                         p.To.Reg = v.Args[0].Reg()
1122                         p.To.Offset = 32
1123
1124                         p = s.Prog(ppc64.ASTXV)
1125                         p.From.Type = obj.TYPE_REG
1126                         p.From.Reg = ppc64.REG_VS32
1127                         p.To.Type = obj.TYPE_MEM
1128                         p.To.Reg = v.Args[0].Reg()
1129                         p.To.Offset = 48
1130
1131                         // Increment address for the
1132                         // 64 bytes just zeroed.
1133                         p = s.Prog(ppc64.AADD)
1134                         p.Reg = v.Args[0].Reg()
1135                         p.From.Type = obj.TYPE_CONST
1136                         p.From.Offset = 64
1137                         p.To.Type = obj.TYPE_REG
1138                         p.To.Reg = v.Args[0].Reg()
1139
1140                         // Branch back to top of loop
1141                         // based on CTR
1142                         // BC with BO_BCTR generates bdnz
1143                         p = s.Prog(ppc64.ABC)
1144                         p.From.Type = obj.TYPE_CONST
1145                         p.From.Offset = ppc64.BO_BCTR
1146                         p.Reg = ppc64.REG_R0
1147                         p.To.Type = obj.TYPE_BRANCH
1148                         p.To.SetTarget(top)
1149                 }
1150                 // When ctr == 1 the loop was not generated but
1151                 // there are at least 64 bytes to clear, so add
1152                 // that to the remainder to generate the code
1153                 // to clear those doublewords
1154                 if ctr == 1 {
1155                         rem += 64
1156                 }
1157
1158                 // Clear the remainder starting at offset zero
1159                 offset := int64(0)
1160
1161                 if rem >= 16 && ctr <= 1 {
1162                         // If the XXLXOR hasn't already been
1163                         // generated, do it here to initialize
1164                         // VS32 (V0) to 0.
1165                         p := s.Prog(ppc64.AXXLXOR)
1166                         p.From.Type = obj.TYPE_REG
1167                         p.From.Reg = ppc64.REG_VS32
1168                         p.To.Type = obj.TYPE_REG
1169                         p.To.Reg = ppc64.REG_VS32
1170                         p.Reg = ppc64.REG_VS32
1171                 }
1172                 // Generate STXV for 32 or 64
1173                 // bytes.
1174                 for rem >= 32 {
1175                         p := s.Prog(ppc64.ASTXV)
1176                         p.From.Type = obj.TYPE_REG
1177                         p.From.Reg = ppc64.REG_VS32
1178                         p.To.Type = obj.TYPE_MEM
1179                         p.To.Reg = v.Args[0].Reg()
1180                         p.To.Offset = offset
1181
1182                         p = s.Prog(ppc64.ASTXV)
1183                         p.From.Type = obj.TYPE_REG
1184                         p.From.Reg = ppc64.REG_VS32
1185                         p.To.Type = obj.TYPE_MEM
1186                         p.To.Reg = v.Args[0].Reg()
1187                         p.To.Offset = offset + 16
1188                         offset += 32
1189                         rem -= 32
1190                 }
1191                 // Generate 16 bytes
1192                 if rem >= 16 {
1193                         p := s.Prog(ppc64.ASTXV)
1194                         p.From.Type = obj.TYPE_REG
1195                         p.From.Reg = ppc64.REG_VS32
1196                         p.To.Type = obj.TYPE_MEM
1197                         p.To.Reg = v.Args[0].Reg()
1198                         p.To.Offset = offset
1199                         offset += 16
1200                         rem -= 16
1201                 }
1202
1203                 // first clear as many doublewords as possible
1204                 // then clear remaining sizes as available
1205                 for rem > 0 {
1206                         op, size := ppc64.AMOVB, int64(1)
1207                         switch {
1208                         case rem >= 8:
1209                                 op, size = ppc64.AMOVD, 8
1210                         case rem >= 4:
1211                                 op, size = ppc64.AMOVW, 4
1212                         case rem >= 2:
1213                                 op, size = ppc64.AMOVH, 2
1214                         }
1215                         p := s.Prog(op)
1216                         p.From.Type = obj.TYPE_REG
1217                         p.From.Reg = ppc64.REG_R0
1218                         p.To.Type = obj.TYPE_MEM
1219                         p.To.Reg = v.Args[0].Reg()
1220                         p.To.Offset = offset
1221                         rem -= size
1222                         offset += size
1223                 }
1224
1225         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1226
1227                 // Unaligned data doesn't hurt performance
1228                 // for these instructions on power8.
1229
1230                 // For sizes >= 64 generate a loop as follows:
1231
1232                 // Set up loop counter in CTR, used by BC
1233                 //       XXLXOR VS32,VS32,VS32
1234                 //       MOVD len/32,REG_TMP
1235                 //       MOVD REG_TMP,CTR
1236                 //       MOVD $16,REG_TMP
1237                 //       loop:
1238                 //       STXVD2X VS32,(R0)(R20)
1239                 //       STXVD2X VS32,(R31)(R20)
1240                 //       ADD  $32,R20
1241                 //       BC   16, 0, loop
1242                 //
1243                 // any remainder is done as described below
1244
1245                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1246                 // then handle the remainder
1247                 //      MOVD R0,(R20)
1248                 //      MOVD R0,8(R20)
1249                 // .... etc.
1250                 //
1251                 // the remainder bytes are cleared using one or more
1252                 // of the following instructions with the appropriate
1253                 // offsets depending which instructions are needed
1254                 //
1255                 //      MOVW R0,n1(R20) 4 bytes
1256                 //      MOVH R0,n2(R20) 2 bytes
1257                 //      MOVB R0,n3(R20) 1 byte
1258                 //
1259                 // 7 bytes: MOVW, MOVH, MOVB
1260                 // 6 bytes: MOVW, MOVH
1261                 // 5 bytes: MOVW, MOVB
1262                 // 3 bytes: MOVH, MOVB
1263
1264                 // each loop iteration does 32 bytes
1265                 ctr := v.AuxInt / 32
1266
1267                 // remainder bytes
1268                 rem := v.AuxInt % 32
1269
1270                 // only generate a loop if there is more
1271                 // than 1 iteration.
1272                 if ctr > 1 {
1273                         // Set up VS32 (V0) to hold 0s
1274                         p := s.Prog(ppc64.AXXLXOR)
1275                         p.From.Type = obj.TYPE_REG
1276                         p.From.Reg = ppc64.REG_VS32
1277                         p.To.Type = obj.TYPE_REG
1278                         p.To.Reg = ppc64.REG_VS32
1279                         p.Reg = ppc64.REG_VS32
1280
1281                         // Set up CTR loop counter
1282                         p = s.Prog(ppc64.AMOVD)
1283                         p.From.Type = obj.TYPE_CONST
1284                         p.From.Offset = ctr
1285                         p.To.Type = obj.TYPE_REG
1286                         p.To.Reg = ppc64.REGTMP
1287
1288                         p = s.Prog(ppc64.AMOVD)
1289                         p.From.Type = obj.TYPE_REG
1290                         p.From.Reg = ppc64.REGTMP
1291                         p.To.Type = obj.TYPE_REG
1292                         p.To.Reg = ppc64.REG_CTR
1293
1294                         // Set up R31 to hold index value 16
1295                         p = s.Prog(ppc64.AMOVD)
1296                         p.From.Type = obj.TYPE_CONST
1297                         p.From.Offset = 16
1298                         p.To.Type = obj.TYPE_REG
1299                         p.To.Reg = ppc64.REGTMP
1300
1301                         // Don't add padding for alignment
1302                         // with few loop iterations.
1303                         if ctr > 3 {
1304                                 p = s.Prog(obj.APCALIGN)
1305                                 p.From.Type = obj.TYPE_CONST
1306                                 p.From.Offset = 16
1307                         }
1308
1309                         // generate 2 STXVD2Xs to store 16 bytes
1310                         // when this is a loop then the top must be saved
1311                         var top *obj.Prog
1312                         // This is the top of loop
1313
1314                         p = s.Prog(ppc64.ASTXVD2X)
1315                         p.From.Type = obj.TYPE_REG
1316                         p.From.Reg = ppc64.REG_VS32
1317                         p.To.Type = obj.TYPE_MEM
1318                         p.To.Reg = v.Args[0].Reg()
1319                         p.To.Index = ppc64.REGZERO
1320                         // Save the top of loop
1321                         if top == nil {
1322                                 top = p
1323                         }
1324                         p = s.Prog(ppc64.ASTXVD2X)
1325                         p.From.Type = obj.TYPE_REG
1326                         p.From.Reg = ppc64.REG_VS32
1327                         p.To.Type = obj.TYPE_MEM
1328                         p.To.Reg = v.Args[0].Reg()
1329                         p.To.Index = ppc64.REGTMP
1330
1331                         // Increment address for the
1332                         // 4 doublewords just zeroed.
1333                         p = s.Prog(ppc64.AADD)
1334                         p.Reg = v.Args[0].Reg()
1335                         p.From.Type = obj.TYPE_CONST
1336                         p.From.Offset = 32
1337                         p.To.Type = obj.TYPE_REG
1338                         p.To.Reg = v.Args[0].Reg()
1339
1340                         // Branch back to top of loop
1341                         // based on CTR
1342                         // BC with BO_BCTR generates bdnz
1343                         p = s.Prog(ppc64.ABC)
1344                         p.From.Type = obj.TYPE_CONST
1345                         p.From.Offset = ppc64.BO_BCTR
1346                         p.Reg = ppc64.REG_R0
1347                         p.To.Type = obj.TYPE_BRANCH
1348                         p.To.SetTarget(top)
1349                 }
1350
1351                 // when ctr == 1 the loop was not generated but
1352                 // there are at least 32 bytes to clear, so add
1353                 // that to the remainder to generate the code
1354                 // to clear those doublewords
1355                 if ctr == 1 {
1356                         rem += 32
1357                 }
1358
1359                 // clear the remainder starting at offset zero
1360                 offset := int64(0)
1361
1362                 // first clear as many doublewords as possible
1363                 // then clear remaining sizes as available
1364                 for rem > 0 {
1365                         op, size := ppc64.AMOVB, int64(1)
1366                         switch {
1367                         case rem >= 8:
1368                                 op, size = ppc64.AMOVD, 8
1369                         case rem >= 4:
1370                                 op, size = ppc64.AMOVW, 4
1371                         case rem >= 2:
1372                                 op, size = ppc64.AMOVH, 2
1373                         }
1374                         p := s.Prog(op)
1375                         p.From.Type = obj.TYPE_REG
1376                         p.From.Reg = ppc64.REG_R0
1377                         p.To.Type = obj.TYPE_MEM
1378                         p.To.Reg = v.Args[0].Reg()
1379                         p.To.Offset = offset
1380                         rem -= size
1381                         offset += size
1382                 }
1383
1384         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1385
1386                 bytesPerLoop := int64(32)
1387                 // This will be used when moving more
1388                 // than 8 bytes.  Moves start with
1389                 // as many 8 byte moves as possible, then
1390                 // 4, 2, or 1 byte(s) as remaining.  This will
1391                 // work and be efficient for power8 or later.
1392                 // If there are 64 or more bytes, then a
1393                 // loop is generated to move 32 bytes and
1394                 // update the src and dst addresses on each
1395                 // iteration. When < 64 bytes, the appropriate
1396                 // number of moves are generated based on the
1397                 // size.
1398                 // When moving >= 64 bytes a loop is used
1399                 //      MOVD len/32,REG_TMP
1400                 //      MOVD REG_TMP,CTR
1401                 //      MOVD $16,REG_TMP
1402                 // top:
1403                 //      LXVD2X (R0)(R21),VS32
1404                 //      LXVD2X (R31)(R21),VS33
1405                 //      ADD $32,R21
1406                 //      STXVD2X VS32,(R0)(R20)
1407                 //      STXVD2X VS33,(R31)(R20)
1408                 //      ADD $32,R20
1409                 //      BC 16,0,top
1410                 // Bytes not moved by this loop are moved
1411                 // with a combination of the following instructions,
1412                 // starting with the largest sizes and generating as
1413                 // many as needed, using the appropriate offset value.
1414                 //      MOVD  n(R21),R31
1415                 //      MOVD  R31,n(R20)
1416                 //      MOVW  n1(R21),R31
1417                 //      MOVW  R31,n1(R20)
1418                 //      MOVH  n2(R21),R31
1419                 //      MOVH  R31,n2(R20)
1420                 //      MOVB  n3(R21),R31
1421                 //      MOVB  R31,n3(R20)
1422
1423                 // Each loop iteration moves 32 bytes
1424                 ctr := v.AuxInt / bytesPerLoop
1425
1426                 // Remainder after the loop
1427                 rem := v.AuxInt % bytesPerLoop
1428
1429                 dstReg := v.Args[0].Reg()
1430                 srcReg := v.Args[1].Reg()
1431
1432                 // The set of registers used here, must match the clobbered reg list
1433                 // in PPC64Ops.go.
1434                 offset := int64(0)
1435
1436                 // top of the loop
1437                 var top *obj.Prog
1438                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1439                 if ctr > 1 {
1440                         // Set up the CTR
1441                         p := s.Prog(ppc64.AMOVD)
1442                         p.From.Type = obj.TYPE_CONST
1443                         p.From.Offset = ctr
1444                         p.To.Type = obj.TYPE_REG
1445                         p.To.Reg = ppc64.REGTMP
1446
1447                         p = s.Prog(ppc64.AMOVD)
1448                         p.From.Type = obj.TYPE_REG
1449                         p.From.Reg = ppc64.REGTMP
1450                         p.To.Type = obj.TYPE_REG
1451                         p.To.Reg = ppc64.REG_CTR
1452
1453                         // Use REGTMP as index reg
1454                         p = s.Prog(ppc64.AMOVD)
1455                         p.From.Type = obj.TYPE_CONST
1456                         p.From.Offset = 16
1457                         p.To.Type = obj.TYPE_REG
1458                         p.To.Reg = ppc64.REGTMP
1459
1460                         // Don't adding padding for
1461                         // alignment with small iteration
1462                         // counts.
1463                         if ctr > 3 {
1464                                 p = s.Prog(obj.APCALIGN)
1465                                 p.From.Type = obj.TYPE_CONST
1466                                 p.From.Offset = 16
1467                         }
1468
1469                         // Generate 16 byte loads and stores.
1470                         // Use temp register for index (16)
1471                         // on the second one.
1472
1473                         p = s.Prog(ppc64.ALXVD2X)
1474                         p.From.Type = obj.TYPE_MEM
1475                         p.From.Reg = srcReg
1476                         p.From.Index = ppc64.REGZERO
1477                         p.To.Type = obj.TYPE_REG
1478                         p.To.Reg = ppc64.REG_VS32
1479                         if top == nil {
1480                                 top = p
1481                         }
1482                         p = s.Prog(ppc64.ALXVD2X)
1483                         p.From.Type = obj.TYPE_MEM
1484                         p.From.Reg = srcReg
1485                         p.From.Index = ppc64.REGTMP
1486                         p.To.Type = obj.TYPE_REG
1487                         p.To.Reg = ppc64.REG_VS33
1488
1489                         // increment the src reg for next iteration
1490                         p = s.Prog(ppc64.AADD)
1491                         p.Reg = srcReg
1492                         p.From.Type = obj.TYPE_CONST
1493                         p.From.Offset = bytesPerLoop
1494                         p.To.Type = obj.TYPE_REG
1495                         p.To.Reg = srcReg
1496
1497                         // generate 16 byte stores
1498                         p = s.Prog(ppc64.ASTXVD2X)
1499                         p.From.Type = obj.TYPE_REG
1500                         p.From.Reg = ppc64.REG_VS32
1501                         p.To.Type = obj.TYPE_MEM
1502                         p.To.Reg = dstReg
1503                         p.To.Index = ppc64.REGZERO
1504
1505                         p = s.Prog(ppc64.ASTXVD2X)
1506                         p.From.Type = obj.TYPE_REG
1507                         p.From.Reg = ppc64.REG_VS33
1508                         p.To.Type = obj.TYPE_MEM
1509                         p.To.Reg = dstReg
1510                         p.To.Index = ppc64.REGTMP
1511
1512                         // increment the dst reg for next iteration
1513                         p = s.Prog(ppc64.AADD)
1514                         p.Reg = dstReg
1515                         p.From.Type = obj.TYPE_CONST
1516                         p.From.Offset = bytesPerLoop
1517                         p.To.Type = obj.TYPE_REG
1518                         p.To.Reg = dstReg
1519
1520                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1521                         // to loop top.
1522                         p = s.Prog(ppc64.ABC)
1523                         p.From.Type = obj.TYPE_CONST
1524                         p.From.Offset = ppc64.BO_BCTR
1525                         p.Reg = ppc64.REG_R0
1526                         p.To.Type = obj.TYPE_BRANCH
1527                         p.To.SetTarget(top)
1528
1529                         // srcReg and dstReg were incremented in the loop, so
1530                         // later instructions start with offset 0.
1531                         offset = int64(0)
1532                 }
1533
1534                 // No loop was generated for one iteration, so
1535                 // add 32 bytes to the remainder to move those bytes.
1536                 if ctr == 1 {
1537                         rem += bytesPerLoop
1538                 }
1539
1540                 if rem >= 16 {
1541                         // Generate 16 byte loads and stores.
1542                         // Use temp register for index (value 16)
1543                         // on the second one.
1544                         p := s.Prog(ppc64.ALXVD2X)
1545                         p.From.Type = obj.TYPE_MEM
1546                         p.From.Reg = srcReg
1547                         p.From.Index = ppc64.REGZERO
1548                         p.To.Type = obj.TYPE_REG
1549                         p.To.Reg = ppc64.REG_VS32
1550
1551                         p = s.Prog(ppc64.ASTXVD2X)
1552                         p.From.Type = obj.TYPE_REG
1553                         p.From.Reg = ppc64.REG_VS32
1554                         p.To.Type = obj.TYPE_MEM
1555                         p.To.Reg = dstReg
1556                         p.To.Index = ppc64.REGZERO
1557
1558                         offset = 16
1559                         rem -= 16
1560
1561                         if rem >= 16 {
1562                                 // Use REGTMP as index reg
1563                                 p := s.Prog(ppc64.AMOVD)
1564                                 p.From.Type = obj.TYPE_CONST
1565                                 p.From.Offset = 16
1566                                 p.To.Type = obj.TYPE_REG
1567                                 p.To.Reg = ppc64.REGTMP
1568
1569                                 p = s.Prog(ppc64.ALXVD2X)
1570                                 p.From.Type = obj.TYPE_MEM
1571                                 p.From.Reg = srcReg
1572                                 p.From.Index = ppc64.REGTMP
1573                                 p.To.Type = obj.TYPE_REG
1574                                 p.To.Reg = ppc64.REG_VS32
1575
1576                                 p = s.Prog(ppc64.ASTXVD2X)
1577                                 p.From.Type = obj.TYPE_REG
1578                                 p.From.Reg = ppc64.REG_VS32
1579                                 p.To.Type = obj.TYPE_MEM
1580                                 p.To.Reg = dstReg
1581                                 p.To.Index = ppc64.REGTMP
1582
1583                                 offset = 32
1584                                 rem -= 16
1585                         }
1586                 }
1587
1588                 // Generate all the remaining load and store pairs, starting with
1589                 // as many 8 byte moves as possible, then 4, 2, 1.
1590                 for rem > 0 {
1591                         op, size := ppc64.AMOVB, int64(1)
1592                         switch {
1593                         case rem >= 8:
1594                                 op, size = ppc64.AMOVD, 8
1595                         case rem >= 4:
1596                                 op, size = ppc64.AMOVWZ, 4
1597                         case rem >= 2:
1598                                 op, size = ppc64.AMOVH, 2
1599                         }
1600                         // Load
1601                         p := s.Prog(op)
1602                         p.To.Type = obj.TYPE_REG
1603                         p.To.Reg = ppc64.REGTMP
1604                         p.From.Type = obj.TYPE_MEM
1605                         p.From.Reg = srcReg
1606                         p.From.Offset = offset
1607
1608                         // Store
1609                         p = s.Prog(op)
1610                         p.From.Type = obj.TYPE_REG
1611                         p.From.Reg = ppc64.REGTMP
1612                         p.To.Type = obj.TYPE_MEM
1613                         p.To.Reg = dstReg
1614                         p.To.Offset = offset
1615                         rem -= size
1616                         offset += size
1617                 }
1618
1619         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1620                 bytesPerLoop := int64(64)
1621                 // This is used when moving more
1622                 // than 8 bytes on power9.  Moves start with
1623                 // as many 8 byte moves as possible, then
1624                 // 4, 2, or 1 byte(s) as remaining.  This will
1625                 // work and be efficient for power8 or later.
1626                 // If there are 64 or more bytes, then a
1627                 // loop is generated to move 32 bytes and
1628                 // update the src and dst addresses on each
1629                 // iteration. When < 64 bytes, the appropriate
1630                 // number of moves are generated based on the
1631                 // size.
1632                 // When moving >= 64 bytes a loop is used
1633                 //      MOVD len/32,REG_TMP
1634                 //      MOVD REG_TMP,CTR
1635                 // top:
1636                 //      LXV 0(R21),VS32
1637                 //      LXV 16(R21),VS33
1638                 //      ADD $32,R21
1639                 //      STXV VS32,0(R20)
1640                 //      STXV VS33,16(R20)
1641                 //      ADD $32,R20
1642                 //      BC 16,0,top
1643                 // Bytes not moved by this loop are moved
1644                 // with a combination of the following instructions,
1645                 // starting with the largest sizes and generating as
1646                 // many as needed, using the appropriate offset value.
1647                 //      MOVD  n(R21),R31
1648                 //      MOVD  R31,n(R20)
1649                 //      MOVW  n1(R21),R31
1650                 //      MOVW  R31,n1(R20)
1651                 //      MOVH  n2(R21),R31
1652                 //      MOVH  R31,n2(R20)
1653                 //      MOVB  n3(R21),R31
1654                 //      MOVB  R31,n3(R20)
1655
1656                 // Each loop iteration moves 32 bytes
1657                 ctr := v.AuxInt / bytesPerLoop
1658
1659                 // Remainder after the loop
1660                 rem := v.AuxInt % bytesPerLoop
1661
1662                 dstReg := v.Args[0].Reg()
1663                 srcReg := v.Args[1].Reg()
1664
1665                 offset := int64(0)
1666
1667                 // top of the loop
1668                 var top *obj.Prog
1669
1670                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1671                 if ctr > 1 {
1672                         // Set up the CTR
1673                         p := s.Prog(ppc64.AMOVD)
1674                         p.From.Type = obj.TYPE_CONST
1675                         p.From.Offset = ctr
1676                         p.To.Type = obj.TYPE_REG
1677                         p.To.Reg = ppc64.REGTMP
1678
1679                         p = s.Prog(ppc64.AMOVD)
1680                         p.From.Type = obj.TYPE_REG
1681                         p.From.Reg = ppc64.REGTMP
1682                         p.To.Type = obj.TYPE_REG
1683                         p.To.Reg = ppc64.REG_CTR
1684
1685                         p = s.Prog(obj.APCALIGN)
1686                         p.From.Type = obj.TYPE_CONST
1687                         p.From.Offset = 16
1688
1689                         // Generate 16 byte loads and stores.
1690                         p = s.Prog(ppc64.ALXV)
1691                         p.From.Type = obj.TYPE_MEM
1692                         p.From.Reg = srcReg
1693                         p.From.Offset = offset
1694                         p.To.Type = obj.TYPE_REG
1695                         p.To.Reg = ppc64.REG_VS32
1696                         if top == nil {
1697                                 top = p
1698                         }
1699                         p = s.Prog(ppc64.ALXV)
1700                         p.From.Type = obj.TYPE_MEM
1701                         p.From.Reg = srcReg
1702                         p.From.Offset = offset + 16
1703                         p.To.Type = obj.TYPE_REG
1704                         p.To.Reg = ppc64.REG_VS33
1705
1706                         // generate 16 byte stores
1707                         p = s.Prog(ppc64.ASTXV)
1708                         p.From.Type = obj.TYPE_REG
1709                         p.From.Reg = ppc64.REG_VS32
1710                         p.To.Type = obj.TYPE_MEM
1711                         p.To.Reg = dstReg
1712                         p.To.Offset = offset
1713
1714                         p = s.Prog(ppc64.ASTXV)
1715                         p.From.Type = obj.TYPE_REG
1716                         p.From.Reg = ppc64.REG_VS33
1717                         p.To.Type = obj.TYPE_MEM
1718                         p.To.Reg = dstReg
1719                         p.To.Offset = offset + 16
1720
1721                         // Generate 16 byte loads and stores.
1722                         p = s.Prog(ppc64.ALXV)
1723                         p.From.Type = obj.TYPE_MEM
1724                         p.From.Reg = srcReg
1725                         p.From.Offset = offset + 32
1726                         p.To.Type = obj.TYPE_REG
1727                         p.To.Reg = ppc64.REG_VS32
1728
1729                         p = s.Prog(ppc64.ALXV)
1730                         p.From.Type = obj.TYPE_MEM
1731                         p.From.Reg = srcReg
1732                         p.From.Offset = offset + 48
1733                         p.To.Type = obj.TYPE_REG
1734                         p.To.Reg = ppc64.REG_VS33
1735
1736                         // generate 16 byte stores
1737                         p = s.Prog(ppc64.ASTXV)
1738                         p.From.Type = obj.TYPE_REG
1739                         p.From.Reg = ppc64.REG_VS32
1740                         p.To.Type = obj.TYPE_MEM
1741                         p.To.Reg = dstReg
1742                         p.To.Offset = offset + 32
1743
1744                         p = s.Prog(ppc64.ASTXV)
1745                         p.From.Type = obj.TYPE_REG
1746                         p.From.Reg = ppc64.REG_VS33
1747                         p.To.Type = obj.TYPE_MEM
1748                         p.To.Reg = dstReg
1749                         p.To.Offset = offset + 48
1750
1751                         // increment the src reg for next iteration
1752                         p = s.Prog(ppc64.AADD)
1753                         p.Reg = srcReg
1754                         p.From.Type = obj.TYPE_CONST
1755                         p.From.Offset = bytesPerLoop
1756                         p.To.Type = obj.TYPE_REG
1757                         p.To.Reg = srcReg
1758
1759                         // increment the dst reg for next iteration
1760                         p = s.Prog(ppc64.AADD)
1761                         p.Reg = dstReg
1762                         p.From.Type = obj.TYPE_CONST
1763                         p.From.Offset = bytesPerLoop
1764                         p.To.Type = obj.TYPE_REG
1765                         p.To.Reg = dstReg
1766
1767                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1768                         // to loop top.
1769                         p = s.Prog(ppc64.ABC)
1770                         p.From.Type = obj.TYPE_CONST
1771                         p.From.Offset = ppc64.BO_BCTR
1772                         p.Reg = ppc64.REG_R0
1773                         p.To.Type = obj.TYPE_BRANCH
1774                         p.To.SetTarget(top)
1775
1776                         // srcReg and dstReg were incremented in the loop, so
1777                         // later instructions start with offset 0.
1778                         offset = int64(0)
1779                 }
1780
1781                 // No loop was generated for one iteration, so
1782                 // add 32 bytes to the remainder to move those bytes.
1783                 if ctr == 1 {
1784                         rem += bytesPerLoop
1785                 }
1786                 if rem >= 32 {
1787                         p := s.Prog(ppc64.ALXV)
1788                         p.From.Type = obj.TYPE_MEM
1789                         p.From.Reg = srcReg
1790                         p.To.Type = obj.TYPE_REG
1791                         p.To.Reg = ppc64.REG_VS32
1792
1793                         p = s.Prog(ppc64.ALXV)
1794                         p.From.Type = obj.TYPE_MEM
1795                         p.From.Reg = srcReg
1796                         p.From.Offset = 16
1797                         p.To.Type = obj.TYPE_REG
1798                         p.To.Reg = ppc64.REG_VS33
1799
1800                         p = s.Prog(ppc64.ASTXV)
1801                         p.From.Type = obj.TYPE_REG
1802                         p.From.Reg = ppc64.REG_VS32
1803                         p.To.Type = obj.TYPE_MEM
1804                         p.To.Reg = dstReg
1805
1806                         p = s.Prog(ppc64.ASTXV)
1807                         p.From.Type = obj.TYPE_REG
1808                         p.From.Reg = ppc64.REG_VS33
1809                         p.To.Type = obj.TYPE_MEM
1810                         p.To.Reg = dstReg
1811                         p.To.Offset = 16
1812
1813                         offset = 32
1814                         rem -= 32
1815                 }
1816
1817                 if rem >= 16 {
1818                         // Generate 16 byte loads and stores.
1819                         p := s.Prog(ppc64.ALXV)
1820                         p.From.Type = obj.TYPE_MEM
1821                         p.From.Reg = srcReg
1822                         p.From.Offset = offset
1823                         p.To.Type = obj.TYPE_REG
1824                         p.To.Reg = ppc64.REG_VS32
1825
1826                         p = s.Prog(ppc64.ASTXV)
1827                         p.From.Type = obj.TYPE_REG
1828                         p.From.Reg = ppc64.REG_VS32
1829                         p.To.Type = obj.TYPE_MEM
1830                         p.To.Reg = dstReg
1831                         p.To.Offset = offset
1832
1833                         offset += 16
1834                         rem -= 16
1835
1836                         if rem >= 16 {
1837                                 p := s.Prog(ppc64.ALXV)
1838                                 p.From.Type = obj.TYPE_MEM
1839                                 p.From.Reg = srcReg
1840                                 p.From.Offset = offset
1841                                 p.To.Type = obj.TYPE_REG
1842                                 p.To.Reg = ppc64.REG_VS32
1843
1844                                 p = s.Prog(ppc64.ASTXV)
1845                                 p.From.Type = obj.TYPE_REG
1846                                 p.From.Reg = ppc64.REG_VS32
1847                                 p.To.Type = obj.TYPE_MEM
1848                                 p.To.Reg = dstReg
1849                                 p.To.Offset = offset
1850
1851                                 offset += 16
1852                                 rem -= 16
1853                         }
1854                 }
1855                 // Generate all the remaining load and store pairs, starting with
1856                 // as many 8 byte moves as possible, then 4, 2, 1.
1857                 for rem > 0 {
1858                         op, size := ppc64.AMOVB, int64(1)
1859                         switch {
1860                         case rem >= 8:
1861                                 op, size = ppc64.AMOVD, 8
1862                         case rem >= 4:
1863                                 op, size = ppc64.AMOVWZ, 4
1864                         case rem >= 2:
1865                                 op, size = ppc64.AMOVH, 2
1866                         }
1867                         // Load
1868                         p := s.Prog(op)
1869                         p.To.Type = obj.TYPE_REG
1870                         p.To.Reg = ppc64.REGTMP
1871                         p.From.Type = obj.TYPE_MEM
1872                         p.From.Reg = srcReg
1873                         p.From.Offset = offset
1874
1875                         // Store
1876                         p = s.Prog(op)
1877                         p.From.Type = obj.TYPE_REG
1878                         p.From.Reg = ppc64.REGTMP
1879                         p.To.Type = obj.TYPE_MEM
1880                         p.To.Reg = dstReg
1881                         p.To.Offset = offset
1882                         rem -= size
1883                         offset += size
1884                 }
1885
1886         case ssa.OpPPC64CALLstatic:
1887                 s.Call(v)
1888
1889         case ssa.OpPPC64CALLtail:
1890                 s.TailCall(v)
1891
1892         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1893                 p := s.Prog(ppc64.AMOVD)
1894                 p.From.Type = obj.TYPE_REG
1895                 p.From.Reg = v.Args[0].Reg()
1896                 p.To.Type = obj.TYPE_REG
1897                 p.To.Reg = ppc64.REG_LR
1898
1899                 if v.Args[0].Reg() != ppc64.REG_R12 {
1900                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1901                 }
1902
1903                 pp := s.Call(v)
1904                 pp.To.Reg = ppc64.REG_LR
1905
1906                 // Insert a hint this is not a subroutine return.
1907                 pp.SetFrom3Const(1)
1908
1909                 if base.Ctxt.Flag_shared {
1910                         // When compiling Go into PIC, the function we just
1911                         // called via pointer might have been implemented in
1912                         // a separate module and so overwritten the TOC
1913                         // pointer in R2; reload it.
1914                         q := s.Prog(ppc64.AMOVD)
1915                         q.From.Type = obj.TYPE_MEM
1916                         q.From.Offset = 24
1917                         q.From.Reg = ppc64.REGSP
1918                         q.To.Type = obj.TYPE_REG
1919                         q.To.Reg = ppc64.REG_R2
1920                 }
1921
1922         case ssa.OpPPC64LoweredWB:
1923                 p := s.Prog(obj.ACALL)
1924                 p.To.Type = obj.TYPE_MEM
1925                 p.To.Name = obj.NAME_EXTERN
1926                 p.To.Sym = v.Aux.(*obj.LSym)
1927
1928         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1929                 p := s.Prog(obj.ACALL)
1930                 p.To.Type = obj.TYPE_MEM
1931                 p.To.Name = obj.NAME_EXTERN
1932                 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1933                 s.UseArgs(16) // space used in callee args area by assembly stubs
1934
1935         case ssa.OpPPC64LoweredNilCheck:
1936                 if buildcfg.GOOS == "aix" {
1937                         // CMP Rarg0, R0
1938                         // BNE 2(PC)
1939                         // STW R0, 0(R0)
1940                         // NOP (so the BNE has somewhere to land)
1941
1942                         // CMP Rarg0, R0
1943                         p := s.Prog(ppc64.ACMP)
1944                         p.From.Type = obj.TYPE_REG
1945                         p.From.Reg = v.Args[0].Reg()
1946                         p.To.Type = obj.TYPE_REG
1947                         p.To.Reg = ppc64.REG_R0
1948
1949                         // BNE 2(PC)
1950                         p2 := s.Prog(ppc64.ABNE)
1951                         p2.To.Type = obj.TYPE_BRANCH
1952
1953                         // STW R0, 0(R0)
1954                         // Write at 0 is forbidden and will trigger a SIGSEGV
1955                         p = s.Prog(ppc64.AMOVW)
1956                         p.From.Type = obj.TYPE_REG
1957                         p.From.Reg = ppc64.REG_R0
1958                         p.To.Type = obj.TYPE_MEM
1959                         p.To.Reg = ppc64.REG_R0
1960
1961                         // NOP (so the BNE has somewhere to land)
1962                         nop := s.Prog(obj.ANOP)
1963                         p2.To.SetTarget(nop)
1964
1965                 } else {
1966                         // Issue a load which will fault if arg is nil.
1967                         p := s.Prog(ppc64.AMOVBZ)
1968                         p.From.Type = obj.TYPE_MEM
1969                         p.From.Reg = v.Args[0].Reg()
1970                         ssagen.AddAux(&p.From, v)
1971                         p.To.Type = obj.TYPE_REG
1972                         p.To.Reg = ppc64.REGTMP
1973                 }
1974                 if logopt.Enabled() {
1975                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1976                 }
1977                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1978                         base.WarnfAt(v.Pos, "generated nil check")
1979                 }
1980
1981         // These should be resolved by rules and not make it here.
1982         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1983                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1984                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1985                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1986         case ssa.OpPPC64InvertFlags:
1987                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1988         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1989                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1990         case ssa.OpClobber, ssa.OpClobberReg:
1991                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1992         default:
1993                 v.Fatalf("genValue not implemented: %s", v.LongString())
1994         }
1995 }
1996
1997 var blockJump = [...]struct {
1998         asm, invasm     obj.As
1999         asmeq, invasmun bool
2000 }{
2001         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
2002         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
2003
2004         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
2005         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
2006         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
2007         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
2008
2009         // TODO: need to work FP comparisons into block jumps
2010         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
2011         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
2012         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
2013         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
2014 }
2015
2016 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
2017         switch b.Kind {
2018         case ssa.BlockDefer:
2019                 // defer returns in R3:
2020                 // 0 if we should continue executing
2021                 // 1 if we should jump to deferreturn call
2022                 p := s.Prog(ppc64.ACMP)
2023                 p.From.Type = obj.TYPE_REG
2024                 p.From.Reg = ppc64.REG_R3
2025                 p.To.Type = obj.TYPE_REG
2026                 p.To.Reg = ppc64.REG_R0
2027
2028                 p = s.Prog(ppc64.ABNE)
2029                 p.To.Type = obj.TYPE_BRANCH
2030                 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
2031                 if b.Succs[0].Block() != next {
2032                         p := s.Prog(obj.AJMP)
2033                         p.To.Type = obj.TYPE_BRANCH
2034                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2035                 }
2036
2037         case ssa.BlockPlain:
2038                 if b.Succs[0].Block() != next {
2039                         p := s.Prog(obj.AJMP)
2040                         p.To.Type = obj.TYPE_BRANCH
2041                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2042                 }
2043         case ssa.BlockExit, ssa.BlockRetJmp:
2044         case ssa.BlockRet:
2045                 s.Prog(obj.ARET)
2046
2047         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
2048                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
2049                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
2050                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
2051                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
2052                 jmp := blockJump[b.Kind]
2053                 switch next {
2054                 case b.Succs[0].Block():
2055                         s.Br(jmp.invasm, b.Succs[1].Block())
2056                         if jmp.invasmun {
2057                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2058                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
2059                         }
2060                 case b.Succs[1].Block():
2061                         s.Br(jmp.asm, b.Succs[0].Block())
2062                         if jmp.asmeq {
2063                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2064                         }
2065                 default:
2066                         if b.Likely != ssa.BranchUnlikely {
2067                                 s.Br(jmp.asm, b.Succs[0].Block())
2068                                 if jmp.asmeq {
2069                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
2070                                 }
2071                                 s.Br(obj.AJMP, b.Succs[1].Block())
2072                         } else {
2073                                 s.Br(jmp.invasm, b.Succs[1].Block())
2074                                 if jmp.invasmun {
2075                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2076                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
2077                                 }
2078                                 s.Br(obj.AJMP, b.Succs[0].Block())
2079                         }
2080                 }
2081         default:
2082                 b.Fatalf("branch not implemented: %s", b.LongString())
2083         }
2084 }
2085
2086 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2087         p := s.Prog(loadByType(t))
2088         p.From.Type = obj.TYPE_MEM
2089         p.From.Name = obj.NAME_AUTO
2090         p.From.Sym = n.Linksym()
2091         p.From.Offset = n.FrameOffset() + off
2092         p.To.Type = obj.TYPE_REG
2093         p.To.Reg = reg
2094         return p
2095 }
2096
2097 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2098         p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2099         p.To.Name = obj.NAME_PARAM
2100         p.To.Sym = n.Linksym()
2101         p.Pos = p.Pos.WithNotStmt()
2102         return p
2103 }