]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
[dev.regabi] cmd/compile: split out package objw [generated]
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/base"
9         "cmd/compile/internal/gc"
10         "cmd/compile/internal/ir"
11         "cmd/compile/internal/logopt"
12         "cmd/compile/internal/ssa"
13         "cmd/compile/internal/types"
14         "cmd/internal/obj"
15         "cmd/internal/obj/ppc64"
16         "cmd/internal/objabi"
17         "math"
18         "strings"
19 )
20
21 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
22 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
23         //      flive := b.FlagsLiveAtEnd
24         //      if b.Control != nil && b.Control.Type.IsFlags() {
25         //              flive = true
26         //      }
27         //      for i := len(b.Values) - 1; i >= 0; i-- {
28         //              v := b.Values[i]
29         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
30         //                      // The "mark" is any non-nil Aux value.
31         //                      v.Aux = v
32         //              }
33         //              if v.Type.IsFlags() {
34         //                      flive = false
35         //              }
36         //              for _, a := range v.Args {
37         //                      if a.Type.IsFlags() {
38         //                              flive = true
39         //                      }
40         //              }
41         //      }
42 }
43
44 // loadByType returns the load instruction of the given type.
45 func loadByType(t *types.Type) obj.As {
46         if t.IsFloat() {
47                 switch t.Size() {
48                 case 4:
49                         return ppc64.AFMOVS
50                 case 8:
51                         return ppc64.AFMOVD
52                 }
53         } else {
54                 switch t.Size() {
55                 case 1:
56                         if t.IsSigned() {
57                                 return ppc64.AMOVB
58                         } else {
59                                 return ppc64.AMOVBZ
60                         }
61                 case 2:
62                         if t.IsSigned() {
63                                 return ppc64.AMOVH
64                         } else {
65                                 return ppc64.AMOVHZ
66                         }
67                 case 4:
68                         if t.IsSigned() {
69                                 return ppc64.AMOVW
70                         } else {
71                                 return ppc64.AMOVWZ
72                         }
73                 case 8:
74                         return ppc64.AMOVD
75                 }
76         }
77         panic("bad load type")
78 }
79
80 // storeByType returns the store instruction of the given type.
81 func storeByType(t *types.Type) obj.As {
82         if t.IsFloat() {
83                 switch t.Size() {
84                 case 4:
85                         return ppc64.AFMOVS
86                 case 8:
87                         return ppc64.AFMOVD
88                 }
89         } else {
90                 switch t.Size() {
91                 case 1:
92                         return ppc64.AMOVB
93                 case 2:
94                         return ppc64.AMOVH
95                 case 4:
96                         return ppc64.AMOVW
97                 case 8:
98                         return ppc64.AMOVD
99                 }
100         }
101         panic("bad store type")
102 }
103
104 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
105         switch v.Op {
106         case ssa.OpCopy:
107                 t := v.Type
108                 if t.IsMemory() {
109                         return
110                 }
111                 x := v.Args[0].Reg()
112                 y := v.Reg()
113                 if x != y {
114                         rt := obj.TYPE_REG
115                         op := ppc64.AMOVD
116
117                         if t.IsFloat() {
118                                 op = ppc64.AFMOVD
119                         }
120                         p := s.Prog(op)
121                         p.From.Type = rt
122                         p.From.Reg = x
123                         p.To.Type = rt
124                         p.To.Reg = y
125                 }
126
127         case ssa.OpPPC64LoweredMuluhilo:
128                 // MULHDU       Rarg1, Rarg0, Reg0
129                 // MULLD        Rarg1, Rarg0, Reg1
130                 r0 := v.Args[0].Reg()
131                 r1 := v.Args[1].Reg()
132                 p := s.Prog(ppc64.AMULHDU)
133                 p.From.Type = obj.TYPE_REG
134                 p.From.Reg = r1
135                 p.Reg = r0
136                 p.To.Type = obj.TYPE_REG
137                 p.To.Reg = v.Reg0()
138                 p1 := s.Prog(ppc64.AMULLD)
139                 p1.From.Type = obj.TYPE_REG
140                 p1.From.Reg = r1
141                 p1.Reg = r0
142                 p1.To.Type = obj.TYPE_REG
143                 p1.To.Reg = v.Reg1()
144
145         case ssa.OpPPC64LoweredAdd64Carry:
146                 // ADDC         Rarg2, -1, Rtmp
147                 // ADDE         Rarg1, Rarg0, Reg0
148                 // ADDZE        Rzero, Reg1
149                 r0 := v.Args[0].Reg()
150                 r1 := v.Args[1].Reg()
151                 r2 := v.Args[2].Reg()
152                 p := s.Prog(ppc64.AADDC)
153                 p.From.Type = obj.TYPE_CONST
154                 p.From.Offset = -1
155                 p.Reg = r2
156                 p.To.Type = obj.TYPE_REG
157                 p.To.Reg = ppc64.REGTMP
158                 p1 := s.Prog(ppc64.AADDE)
159                 p1.From.Type = obj.TYPE_REG
160                 p1.From.Reg = r1
161                 p1.Reg = r0
162                 p1.To.Type = obj.TYPE_REG
163                 p1.To.Reg = v.Reg0()
164                 p2 := s.Prog(ppc64.AADDZE)
165                 p2.From.Type = obj.TYPE_REG
166                 p2.From.Reg = ppc64.REGZERO
167                 p2.To.Type = obj.TYPE_REG
168                 p2.To.Reg = v.Reg1()
169
170         case ssa.OpPPC64LoweredAtomicAnd8,
171                 ssa.OpPPC64LoweredAtomicAnd32,
172                 ssa.OpPPC64LoweredAtomicOr8,
173                 ssa.OpPPC64LoweredAtomicOr32:
174                 // LWSYNC
175                 // LBAR/LWAR    (Rarg0), Rtmp
176                 // AND/OR       Rarg1, Rtmp
177                 // STBCCC/STWCCC Rtmp, (Rarg0)
178                 // BNE          -3(PC)
179                 ld := ppc64.ALBAR
180                 st := ppc64.ASTBCCC
181                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
182                         ld = ppc64.ALWAR
183                         st = ppc64.ASTWCCC
184                 }
185                 r0 := v.Args[0].Reg()
186                 r1 := v.Args[1].Reg()
187                 // LWSYNC - Assuming shared data not write-through-required nor
188                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
189                 plwsync := s.Prog(ppc64.ALWSYNC)
190                 plwsync.To.Type = obj.TYPE_NONE
191                 // LBAR or LWAR
192                 p := s.Prog(ld)
193                 p.From.Type = obj.TYPE_MEM
194                 p.From.Reg = r0
195                 p.To.Type = obj.TYPE_REG
196                 p.To.Reg = ppc64.REGTMP
197                 // AND/OR reg1,out
198                 p1 := s.Prog(v.Op.Asm())
199                 p1.From.Type = obj.TYPE_REG
200                 p1.From.Reg = r1
201                 p1.To.Type = obj.TYPE_REG
202                 p1.To.Reg = ppc64.REGTMP
203                 // STBCCC or STWCCC
204                 p2 := s.Prog(st)
205                 p2.From.Type = obj.TYPE_REG
206                 p2.From.Reg = ppc64.REGTMP
207                 p2.To.Type = obj.TYPE_MEM
208                 p2.To.Reg = r0
209                 p2.RegTo2 = ppc64.REGTMP
210                 // BNE retry
211                 p3 := s.Prog(ppc64.ABNE)
212                 p3.To.Type = obj.TYPE_BRANCH
213                 p3.To.SetTarget(p)
214
215         case ssa.OpPPC64LoweredAtomicAdd32,
216                 ssa.OpPPC64LoweredAtomicAdd64:
217                 // LWSYNC
218                 // LDAR/LWAR    (Rarg0), Rout
219                 // ADD          Rarg1, Rout
220                 // STDCCC/STWCCC Rout, (Rarg0)
221                 // BNE         -3(PC)
222                 // MOVW         Rout,Rout (if Add32)
223                 ld := ppc64.ALDAR
224                 st := ppc64.ASTDCCC
225                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
226                         ld = ppc64.ALWAR
227                         st = ppc64.ASTWCCC
228                 }
229                 r0 := v.Args[0].Reg()
230                 r1 := v.Args[1].Reg()
231                 out := v.Reg0()
232                 // LWSYNC - Assuming shared data not write-through-required nor
233                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
234                 plwsync := s.Prog(ppc64.ALWSYNC)
235                 plwsync.To.Type = obj.TYPE_NONE
236                 // LDAR or LWAR
237                 p := s.Prog(ld)
238                 p.From.Type = obj.TYPE_MEM
239                 p.From.Reg = r0
240                 p.To.Type = obj.TYPE_REG
241                 p.To.Reg = out
242                 // ADD reg1,out
243                 p1 := s.Prog(ppc64.AADD)
244                 p1.From.Type = obj.TYPE_REG
245                 p1.From.Reg = r1
246                 p1.To.Reg = out
247                 p1.To.Type = obj.TYPE_REG
248                 // STDCCC or STWCCC
249                 p3 := s.Prog(st)
250                 p3.From.Type = obj.TYPE_REG
251                 p3.From.Reg = out
252                 p3.To.Type = obj.TYPE_MEM
253                 p3.To.Reg = r0
254                 // BNE retry
255                 p4 := s.Prog(ppc64.ABNE)
256                 p4.To.Type = obj.TYPE_BRANCH
257                 p4.To.SetTarget(p)
258
259                 // Ensure a 32 bit result
260                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
261                         p5 := s.Prog(ppc64.AMOVWZ)
262                         p5.To.Type = obj.TYPE_REG
263                         p5.To.Reg = out
264                         p5.From.Type = obj.TYPE_REG
265                         p5.From.Reg = out
266                 }
267
268         case ssa.OpPPC64LoweredAtomicExchange32,
269                 ssa.OpPPC64LoweredAtomicExchange64:
270                 // LWSYNC
271                 // LDAR/LWAR    (Rarg0), Rout
272                 // STDCCC/STWCCC Rout, (Rarg0)
273                 // BNE         -2(PC)
274                 // ISYNC
275                 ld := ppc64.ALDAR
276                 st := ppc64.ASTDCCC
277                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
278                         ld = ppc64.ALWAR
279                         st = ppc64.ASTWCCC
280                 }
281                 r0 := v.Args[0].Reg()
282                 r1 := v.Args[1].Reg()
283                 out := v.Reg0()
284                 // LWSYNC - Assuming shared data not write-through-required nor
285                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
286                 plwsync := s.Prog(ppc64.ALWSYNC)
287                 plwsync.To.Type = obj.TYPE_NONE
288                 // LDAR or LWAR
289                 p := s.Prog(ld)
290                 p.From.Type = obj.TYPE_MEM
291                 p.From.Reg = r0
292                 p.To.Type = obj.TYPE_REG
293                 p.To.Reg = out
294                 // STDCCC or STWCCC
295                 p1 := s.Prog(st)
296                 p1.From.Type = obj.TYPE_REG
297                 p1.From.Reg = r1
298                 p1.To.Type = obj.TYPE_MEM
299                 p1.To.Reg = r0
300                 // BNE retry
301                 p2 := s.Prog(ppc64.ABNE)
302                 p2.To.Type = obj.TYPE_BRANCH
303                 p2.To.SetTarget(p)
304                 // ISYNC
305                 pisync := s.Prog(ppc64.AISYNC)
306                 pisync.To.Type = obj.TYPE_NONE
307
308         case ssa.OpPPC64LoweredAtomicLoad8,
309                 ssa.OpPPC64LoweredAtomicLoad32,
310                 ssa.OpPPC64LoweredAtomicLoad64,
311                 ssa.OpPPC64LoweredAtomicLoadPtr:
312                 // SYNC
313                 // MOVB/MOVD/MOVW (Rarg0), Rout
314                 // CMP Rout,Rout
315                 // BNE 1(PC)
316                 // ISYNC
317                 ld := ppc64.AMOVD
318                 cmp := ppc64.ACMP
319                 switch v.Op {
320                 case ssa.OpPPC64LoweredAtomicLoad8:
321                         ld = ppc64.AMOVBZ
322                 case ssa.OpPPC64LoweredAtomicLoad32:
323                         ld = ppc64.AMOVWZ
324                         cmp = ppc64.ACMPW
325                 }
326                 arg0 := v.Args[0].Reg()
327                 out := v.Reg0()
328                 // SYNC when AuxInt == 1; otherwise, load-acquire
329                 if v.AuxInt == 1 {
330                         psync := s.Prog(ppc64.ASYNC)
331                         psync.To.Type = obj.TYPE_NONE
332                 }
333                 // Load
334                 p := s.Prog(ld)
335                 p.From.Type = obj.TYPE_MEM
336                 p.From.Reg = arg0
337                 p.To.Type = obj.TYPE_REG
338                 p.To.Reg = out
339                 // CMP
340                 p1 := s.Prog(cmp)
341                 p1.From.Type = obj.TYPE_REG
342                 p1.From.Reg = out
343                 p1.To.Type = obj.TYPE_REG
344                 p1.To.Reg = out
345                 // BNE
346                 p2 := s.Prog(ppc64.ABNE)
347                 p2.To.Type = obj.TYPE_BRANCH
348                 // ISYNC
349                 pisync := s.Prog(ppc64.AISYNC)
350                 pisync.To.Type = obj.TYPE_NONE
351                 p2.To.SetTarget(pisync)
352
353         case ssa.OpPPC64LoweredAtomicStore8,
354                 ssa.OpPPC64LoweredAtomicStore32,
355                 ssa.OpPPC64LoweredAtomicStore64:
356                 // SYNC or LWSYNC
357                 // MOVB/MOVW/MOVD arg1,(arg0)
358                 st := ppc64.AMOVD
359                 switch v.Op {
360                 case ssa.OpPPC64LoweredAtomicStore8:
361                         st = ppc64.AMOVB
362                 case ssa.OpPPC64LoweredAtomicStore32:
363                         st = ppc64.AMOVW
364                 }
365                 arg0 := v.Args[0].Reg()
366                 arg1 := v.Args[1].Reg()
367                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
368                 // SYNC
369                 syncOp := ppc64.ASYNC
370                 if v.AuxInt == 0 {
371                         syncOp = ppc64.ALWSYNC
372                 }
373                 psync := s.Prog(syncOp)
374                 psync.To.Type = obj.TYPE_NONE
375                 // Store
376                 p := s.Prog(st)
377                 p.To.Type = obj.TYPE_MEM
378                 p.To.Reg = arg0
379                 p.From.Type = obj.TYPE_REG
380                 p.From.Reg = arg1
381
382         case ssa.OpPPC64LoweredAtomicCas64,
383                 ssa.OpPPC64LoweredAtomicCas32:
384                 // LWSYNC
385                 // loop:
386                 // LDAR        (Rarg0), MutexHint, Rtmp
387                 // CMP         Rarg1, Rtmp
388                 // BNE         fail
389                 // STDCCC      Rarg2, (Rarg0)
390                 // BNE         loop
391                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
392                 // MOVD        $1, Rout
393                 // BR          end
394                 // fail:
395                 // MOVD        $0, Rout
396                 // end:
397                 ld := ppc64.ALDAR
398                 st := ppc64.ASTDCCC
399                 cmp := ppc64.ACMP
400                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
401                         ld = ppc64.ALWAR
402                         st = ppc64.ASTWCCC
403                         cmp = ppc64.ACMPW
404                 }
405                 r0 := v.Args[0].Reg()
406                 r1 := v.Args[1].Reg()
407                 r2 := v.Args[2].Reg()
408                 out := v.Reg0()
409                 // LWSYNC - Assuming shared data not write-through-required nor
410                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
411                 plwsync1 := s.Prog(ppc64.ALWSYNC)
412                 plwsync1.To.Type = obj.TYPE_NONE
413                 // LDAR or LWAR
414                 p := s.Prog(ld)
415                 p.From.Type = obj.TYPE_MEM
416                 p.From.Reg = r0
417                 p.To.Type = obj.TYPE_REG
418                 p.To.Reg = ppc64.REGTMP
419                 // If it is a Compare-and-Swap-Release operation, set the EH field with
420                 // the release hint.
421                 if v.AuxInt == 0 {
422                         p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
423                 }
424                 // CMP reg1,reg2
425                 p1 := s.Prog(cmp)
426                 p1.From.Type = obj.TYPE_REG
427                 p1.From.Reg = r1
428                 p1.To.Reg = ppc64.REGTMP
429                 p1.To.Type = obj.TYPE_REG
430                 // BNE cas_fail
431                 p2 := s.Prog(ppc64.ABNE)
432                 p2.To.Type = obj.TYPE_BRANCH
433                 // STDCCC or STWCCC
434                 p3 := s.Prog(st)
435                 p3.From.Type = obj.TYPE_REG
436                 p3.From.Reg = r2
437                 p3.To.Type = obj.TYPE_MEM
438                 p3.To.Reg = r0
439                 // BNE retry
440                 p4 := s.Prog(ppc64.ABNE)
441                 p4.To.Type = obj.TYPE_BRANCH
442                 p4.To.SetTarget(p)
443                 // LWSYNC - Assuming shared data not write-through-required nor
444                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
445                 // If the operation is a CAS-Release, then synchronization is not necessary.
446                 if v.AuxInt != 0 {
447                         plwsync2 := s.Prog(ppc64.ALWSYNC)
448                         plwsync2.To.Type = obj.TYPE_NONE
449                 }
450                 // return true
451                 p5 := s.Prog(ppc64.AMOVD)
452                 p5.From.Type = obj.TYPE_CONST
453                 p5.From.Offset = 1
454                 p5.To.Type = obj.TYPE_REG
455                 p5.To.Reg = out
456                 // BR done
457                 p6 := s.Prog(obj.AJMP)
458                 p6.To.Type = obj.TYPE_BRANCH
459                 // return false
460                 p7 := s.Prog(ppc64.AMOVD)
461                 p7.From.Type = obj.TYPE_CONST
462                 p7.From.Offset = 0
463                 p7.To.Type = obj.TYPE_REG
464                 p7.To.Reg = out
465                 p2.To.SetTarget(p7)
466                 // done (label)
467                 p8 := s.Prog(obj.ANOP)
468                 p6.To.SetTarget(p8)
469
470         case ssa.OpPPC64LoweredGetClosurePtr:
471                 // Closure pointer is R11 (already)
472                 gc.CheckLoweredGetClosurePtr(v)
473
474         case ssa.OpPPC64LoweredGetCallerSP:
475                 // caller's SP is FixedFrameSize below the address of the first arg
476                 p := s.Prog(ppc64.AMOVD)
477                 p.From.Type = obj.TYPE_ADDR
478                 p.From.Offset = -base.Ctxt.FixedFrameSize()
479                 p.From.Name = obj.NAME_PARAM
480                 p.To.Type = obj.TYPE_REG
481                 p.To.Reg = v.Reg()
482
483         case ssa.OpPPC64LoweredGetCallerPC:
484                 p := s.Prog(obj.AGETCALLERPC)
485                 p.To.Type = obj.TYPE_REG
486                 p.To.Reg = v.Reg()
487
488         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
489                 // input is already rounded
490
491         case ssa.OpLoadReg:
492                 loadOp := loadByType(v.Type)
493                 p := s.Prog(loadOp)
494                 gc.AddrAuto(&p.From, v.Args[0])
495                 p.To.Type = obj.TYPE_REG
496                 p.To.Reg = v.Reg()
497
498         case ssa.OpStoreReg:
499                 storeOp := storeByType(v.Type)
500                 p := s.Prog(storeOp)
501                 p.From.Type = obj.TYPE_REG
502                 p.From.Reg = v.Args[0].Reg()
503                 gc.AddrAuto(&p.To, v)
504
505         case ssa.OpPPC64DIVD:
506                 // For now,
507                 //
508                 // cmp arg1, -1
509                 // be  ahead
510                 // v = arg0 / arg1
511                 // b over
512                 // ahead: v = - arg0
513                 // over: nop
514                 r := v.Reg()
515                 r0 := v.Args[0].Reg()
516                 r1 := v.Args[1].Reg()
517
518                 p := s.Prog(ppc64.ACMP)
519                 p.From.Type = obj.TYPE_REG
520                 p.From.Reg = r1
521                 p.To.Type = obj.TYPE_CONST
522                 p.To.Offset = -1
523
524                 pbahead := s.Prog(ppc64.ABEQ)
525                 pbahead.To.Type = obj.TYPE_BRANCH
526
527                 p = s.Prog(v.Op.Asm())
528                 p.From.Type = obj.TYPE_REG
529                 p.From.Reg = r1
530                 p.Reg = r0
531                 p.To.Type = obj.TYPE_REG
532                 p.To.Reg = r
533
534                 pbover := s.Prog(obj.AJMP)
535                 pbover.To.Type = obj.TYPE_BRANCH
536
537                 p = s.Prog(ppc64.ANEG)
538                 p.To.Type = obj.TYPE_REG
539                 p.To.Reg = r
540                 p.From.Type = obj.TYPE_REG
541                 p.From.Reg = r0
542                 pbahead.To.SetTarget(p)
543
544                 p = s.Prog(obj.ANOP)
545                 pbover.To.SetTarget(p)
546
547         case ssa.OpPPC64DIVW:
548                 // word-width version of above
549                 r := v.Reg()
550                 r0 := v.Args[0].Reg()
551                 r1 := v.Args[1].Reg()
552
553                 p := s.Prog(ppc64.ACMPW)
554                 p.From.Type = obj.TYPE_REG
555                 p.From.Reg = r1
556                 p.To.Type = obj.TYPE_CONST
557                 p.To.Offset = -1
558
559                 pbahead := s.Prog(ppc64.ABEQ)
560                 pbahead.To.Type = obj.TYPE_BRANCH
561
562                 p = s.Prog(v.Op.Asm())
563                 p.From.Type = obj.TYPE_REG
564                 p.From.Reg = r1
565                 p.Reg = r0
566                 p.To.Type = obj.TYPE_REG
567                 p.To.Reg = r
568
569                 pbover := s.Prog(obj.AJMP)
570                 pbover.To.Type = obj.TYPE_BRANCH
571
572                 p = s.Prog(ppc64.ANEG)
573                 p.To.Type = obj.TYPE_REG
574                 p.To.Reg = r
575                 p.From.Type = obj.TYPE_REG
576                 p.From.Reg = r0
577                 pbahead.To.SetTarget(p)
578
579                 p = s.Prog(obj.ANOP)
580                 pbover.To.SetTarget(p)
581
582         case ssa.OpPPC64CLRLSLWI:
583                 r := v.Reg()
584                 r1 := v.Args[0].Reg()
585                 shifts := v.AuxInt
586                 p := s.Prog(v.Op.Asm())
587                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
588                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
589                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
590                 p.Reg = r1
591                 p.To.Type = obj.TYPE_REG
592                 p.To.Reg = r
593
594         case ssa.OpPPC64CLRLSLDI:
595                 r := v.Reg()
596                 r1 := v.Args[0].Reg()
597                 shifts := v.AuxInt
598                 p := s.Prog(v.Op.Asm())
599                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
600                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
601                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
602                 p.Reg = r1
603                 p.To.Type = obj.TYPE_REG
604                 p.To.Reg = r
605
606                 // Mask has been set as sh
607         case ssa.OpPPC64RLDICL:
608                 r := v.Reg()
609                 r1 := v.Args[0].Reg()
610                 shifts := v.AuxInt
611                 p := s.Prog(v.Op.Asm())
612                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
613                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)})
614                 p.Reg = r1
615                 p.To.Type = obj.TYPE_REG
616                 p.To.Reg = r
617
618         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
619                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
620                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
621                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
622                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
623                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
624                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
625                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
626                 r := v.Reg()
627                 r1 := v.Args[0].Reg()
628                 r2 := v.Args[1].Reg()
629                 p := s.Prog(v.Op.Asm())
630                 p.From.Type = obj.TYPE_REG
631                 p.From.Reg = r2
632                 p.Reg = r1
633                 p.To.Type = obj.TYPE_REG
634                 p.To.Reg = r
635
636         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
637                 r1 := v.Args[0].Reg()
638                 r2 := v.Args[1].Reg()
639                 p := s.Prog(v.Op.Asm())
640                 p.From.Type = obj.TYPE_REG
641                 p.From.Reg = r2
642                 p.Reg = r1
643                 p.To.Type = obj.TYPE_REG
644                 p.To.Reg = ppc64.REGTMP // result is not needed
645
646         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
647                 p := s.Prog(v.Op.Asm())
648                 p.From.Type = obj.TYPE_CONST
649                 p.From.Offset = v.AuxInt
650                 p.Reg = v.Args[0].Reg()
651                 p.To.Type = obj.TYPE_REG
652                 p.To.Reg = v.Reg()
653
654                 // Auxint holds encoded rotate + mask
655         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
656                 rot, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
657                 p := s.Prog(v.Op.Asm())
658                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
659                 p.Reg = v.Args[0].Reg()
660                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
661                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
662
663                 // Auxint holds mask
664         case ssa.OpPPC64RLWNM:
665                 _, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
666                 p := s.Prog(v.Op.Asm())
667                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
668                 p.Reg = v.Args[0].Reg()
669                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
670                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
671
672         case ssa.OpPPC64MADDLD:
673                 r := v.Reg()
674                 r1 := v.Args[0].Reg()
675                 r2 := v.Args[1].Reg()
676                 r3 := v.Args[2].Reg()
677                 // r = r1*r2 Â± r3
678                 p := s.Prog(v.Op.Asm())
679                 p.From.Type = obj.TYPE_REG
680                 p.From.Reg = r1
681                 p.Reg = r2
682                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
683                 p.To.Type = obj.TYPE_REG
684                 p.To.Reg = r
685
686         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
687                 r := v.Reg()
688                 r1 := v.Args[0].Reg()
689                 r2 := v.Args[1].Reg()
690                 r3 := v.Args[2].Reg()
691                 // r = r1*r2 Â± r3
692                 p := s.Prog(v.Op.Asm())
693                 p.From.Type = obj.TYPE_REG
694                 p.From.Reg = r1
695                 p.Reg = r3
696                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
697                 p.To.Type = obj.TYPE_REG
698                 p.To.Reg = r
699
700         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
701                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
702                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
703                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
704                 r := v.Reg()
705                 p := s.Prog(v.Op.Asm())
706                 p.To.Type = obj.TYPE_REG
707                 p.To.Reg = r
708                 p.From.Type = obj.TYPE_REG
709                 p.From.Reg = v.Args[0].Reg()
710
711         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
712                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
713                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
714                 p := s.Prog(v.Op.Asm())
715                 p.Reg = v.Args[0].Reg()
716                 p.From.Type = obj.TYPE_CONST
717                 p.From.Offset = v.AuxInt
718                 p.To.Type = obj.TYPE_REG
719                 p.To.Reg = v.Reg()
720
721         case ssa.OpPPC64SUBFCconst:
722                 p := s.Prog(v.Op.Asm())
723                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
724                 p.From.Type = obj.TYPE_REG
725                 p.From.Reg = v.Args[0].Reg()
726                 p.To.Type = obj.TYPE_REG
727                 p.To.Reg = v.Reg()
728
729         case ssa.OpPPC64ANDCCconst:
730                 p := s.Prog(v.Op.Asm())
731                 p.Reg = v.Args[0].Reg()
732                 p.From.Type = obj.TYPE_CONST
733                 p.From.Offset = v.AuxInt
734                 p.To.Type = obj.TYPE_REG
735                 p.To.Reg = ppc64.REGTMP // discard result
736
737         case ssa.OpPPC64MOVDaddr:
738                 switch v.Aux.(type) {
739                 default:
740                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
741                 case nil:
742                         // If aux offset and aux int are both 0, and the same
743                         // input and output regs are used, no instruction
744                         // needs to be generated, since it would just be
745                         // addi rx, rx, 0.
746                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
747                                 p := s.Prog(ppc64.AMOVD)
748                                 p.From.Type = obj.TYPE_ADDR
749                                 p.From.Reg = v.Args[0].Reg()
750                                 p.From.Offset = v.AuxInt
751                                 p.To.Type = obj.TYPE_REG
752                                 p.To.Reg = v.Reg()
753                         }
754
755                 case *obj.LSym, ir.Node:
756                         p := s.Prog(ppc64.AMOVD)
757                         p.From.Type = obj.TYPE_ADDR
758                         p.From.Reg = v.Args[0].Reg()
759                         p.To.Type = obj.TYPE_REG
760                         p.To.Reg = v.Reg()
761                         gc.AddAux(&p.From, v)
762
763                 }
764
765         case ssa.OpPPC64MOVDconst:
766                 p := s.Prog(v.Op.Asm())
767                 p.From.Type = obj.TYPE_CONST
768                 p.From.Offset = v.AuxInt
769                 p.To.Type = obj.TYPE_REG
770                 p.To.Reg = v.Reg()
771
772         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
773                 p := s.Prog(v.Op.Asm())
774                 p.From.Type = obj.TYPE_FCONST
775                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
776                 p.To.Type = obj.TYPE_REG
777                 p.To.Reg = v.Reg()
778
779         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
780                 p := s.Prog(v.Op.Asm())
781                 p.From.Type = obj.TYPE_REG
782                 p.From.Reg = v.Args[0].Reg()
783                 p.To.Type = obj.TYPE_REG
784                 p.To.Reg = v.Args[1].Reg()
785
786         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
787                 p := s.Prog(v.Op.Asm())
788                 p.From.Type = obj.TYPE_REG
789                 p.From.Reg = v.Args[0].Reg()
790                 p.To.Type = obj.TYPE_CONST
791                 p.To.Offset = v.AuxInt
792
793         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
794                 // Shift in register to required size
795                 p := s.Prog(v.Op.Asm())
796                 p.From.Type = obj.TYPE_REG
797                 p.From.Reg = v.Args[0].Reg()
798                 p.To.Reg = v.Reg()
799                 p.To.Type = obj.TYPE_REG
800
801         case ssa.OpPPC64MOVDload:
802
803                 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
804                 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
805                 // the offset is not known until link time. If the load of a go.string uses relocation for the
806                 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
807                 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
808                 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
809                 // go.string types because other types will have proper alignment.
810
811                 gostring := false
812                 switch n := v.Aux.(type) {
813                 case *obj.LSym:
814                         gostring = strings.HasPrefix(n.Name, "go.string.")
815                 }
816                 if gostring {
817                         // Generate full addr of the go.string const
818                         // including AuxInt
819                         p := s.Prog(ppc64.AMOVD)
820                         p.From.Type = obj.TYPE_ADDR
821                         p.From.Reg = v.Args[0].Reg()
822                         gc.AddAux(&p.From, v)
823                         p.To.Type = obj.TYPE_REG
824                         p.To.Reg = v.Reg()
825                         // Load go.string using 0 offset
826                         p = s.Prog(v.Op.Asm())
827                         p.From.Type = obj.TYPE_MEM
828                         p.From.Reg = v.Reg()
829                         p.To.Type = obj.TYPE_REG
830                         p.To.Reg = v.Reg()
831                         break
832                 }
833                 // Not a go.string, generate a normal load
834                 fallthrough
835
836         case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
837                 p := s.Prog(v.Op.Asm())
838                 p.From.Type = obj.TYPE_MEM
839                 p.From.Reg = v.Args[0].Reg()
840                 gc.AddAux(&p.From, v)
841                 p.To.Type = obj.TYPE_REG
842                 p.To.Reg = v.Reg()
843
844         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
845                 p := s.Prog(v.Op.Asm())
846                 p.From.Type = obj.TYPE_MEM
847                 p.From.Reg = v.Args[0].Reg()
848                 p.To.Type = obj.TYPE_REG
849                 p.To.Reg = v.Reg()
850
851         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
852                 p := s.Prog(v.Op.Asm())
853                 p.To.Type = obj.TYPE_MEM
854                 p.To.Reg = v.Args[0].Reg()
855                 p.From.Type = obj.TYPE_REG
856                 p.From.Reg = v.Args[1].Reg()
857
858         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
859                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
860                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
861                 p := s.Prog(v.Op.Asm())
862                 p.From.Type = obj.TYPE_MEM
863                 p.From.Reg = v.Args[0].Reg()
864                 p.From.Index = v.Args[1].Reg()
865                 p.To.Type = obj.TYPE_REG
866                 p.To.Reg = v.Reg()
867
868         case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
869                 p := s.Prog(v.Op.Asm())
870                 p.From.Type = obj.TYPE_REG
871                 p.From.Reg = ppc64.REGZERO
872                 p.To.Type = obj.TYPE_MEM
873                 p.To.Reg = v.Args[0].Reg()
874                 gc.AddAux(&p.To, v)
875
876         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
877                 p := s.Prog(v.Op.Asm())
878                 p.From.Type = obj.TYPE_REG
879                 p.From.Reg = v.Args[1].Reg()
880                 p.To.Type = obj.TYPE_MEM
881                 p.To.Reg = v.Args[0].Reg()
882                 gc.AddAux(&p.To, v)
883
884         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
885                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
886                 ssa.OpPPC64MOVHBRstoreidx:
887                 p := s.Prog(v.Op.Asm())
888                 p.From.Type = obj.TYPE_REG
889                 p.From.Reg = v.Args[2].Reg()
890                 p.To.Index = v.Args[1].Reg()
891                 p.To.Type = obj.TYPE_MEM
892                 p.To.Reg = v.Args[0].Reg()
893
894         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
895                 // ISEL, ISELB
896                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
897                 // ISEL only accepts 0, 1, 2 condition values but the others can be
898                 // achieved by swapping operand order.
899                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
900                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
901                 // ISELB is used when a boolean result is needed, returning 0 or 1
902                 p := s.Prog(ppc64.AISEL)
903                 p.To.Type = obj.TYPE_REG
904                 p.To.Reg = v.Reg()
905                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
906                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
907                 if v.Op == ssa.OpPPC64ISEL {
908                         r.Reg = v.Args[1].Reg()
909                 }
910                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
911                 if v.AuxInt > 3 {
912                         p.Reg = r.Reg
913                         p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
914                 } else {
915                         p.Reg = v.Args[0].Reg()
916                         p.SetFrom3(r)
917                 }
918                 p.From.Type = obj.TYPE_CONST
919                 p.From.Offset = v.AuxInt & 3
920
921         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
922                 // The LoweredQuad code generation
923                 // generates STXV instructions on
924                 // power9. The Short variation is used
925                 // if no loop is generated.
926
927                 // sizes >= 64 generate a loop as follows:
928
929                 // Set up loop counter in CTR, used by BC
930                 // XXLXOR clears VS32
931                 //       XXLXOR VS32,VS32,VS32
932                 //       MOVD len/64,REG_TMP
933                 //       MOVD REG_TMP,CTR
934                 //       loop:
935                 //       STXV VS32,0(R20)
936                 //       STXV VS32,16(R20)
937                 //       STXV VS32,32(R20)
938                 //       STXV VS32,48(R20)
939                 //       ADD  $64,R20
940                 //       BC   16, 0, loop
941
942                 // Bytes per iteration
943                 ctr := v.AuxInt / 64
944
945                 // Remainder bytes
946                 rem := v.AuxInt % 64
947
948                 // Only generate a loop if there is more
949                 // than 1 iteration.
950                 if ctr > 1 {
951                         // Set up VS32 (V0) to hold 0s
952                         p := s.Prog(ppc64.AXXLXOR)
953                         p.From.Type = obj.TYPE_REG
954                         p.From.Reg = ppc64.REG_VS32
955                         p.To.Type = obj.TYPE_REG
956                         p.To.Reg = ppc64.REG_VS32
957                         p.Reg = ppc64.REG_VS32
958
959                         // Set up CTR loop counter
960                         p = s.Prog(ppc64.AMOVD)
961                         p.From.Type = obj.TYPE_CONST
962                         p.From.Offset = ctr
963                         p.To.Type = obj.TYPE_REG
964                         p.To.Reg = ppc64.REGTMP
965
966                         p = s.Prog(ppc64.AMOVD)
967                         p.From.Type = obj.TYPE_REG
968                         p.From.Reg = ppc64.REGTMP
969                         p.To.Type = obj.TYPE_REG
970                         p.To.Reg = ppc64.REG_CTR
971
972                         // Don't generate padding for
973                         // loops with few iterations.
974                         if ctr > 3 {
975                                 p = s.Prog(obj.APCALIGN)
976                                 p.From.Type = obj.TYPE_CONST
977                                 p.From.Offset = 16
978                         }
979
980                         // generate 4 STXVs to zero 64 bytes
981                         var top *obj.Prog
982
983                         p = s.Prog(ppc64.ASTXV)
984                         p.From.Type = obj.TYPE_REG
985                         p.From.Reg = ppc64.REG_VS32
986                         p.To.Type = obj.TYPE_MEM
987                         p.To.Reg = v.Args[0].Reg()
988
989                         //  Save the top of loop
990                         if top == nil {
991                                 top = p
992                         }
993                         p = s.Prog(ppc64.ASTXV)
994                         p.From.Type = obj.TYPE_REG
995                         p.From.Reg = ppc64.REG_VS32
996                         p.To.Type = obj.TYPE_MEM
997                         p.To.Reg = v.Args[0].Reg()
998                         p.To.Offset = 16
999
1000                         p = s.Prog(ppc64.ASTXV)
1001                         p.From.Type = obj.TYPE_REG
1002                         p.From.Reg = ppc64.REG_VS32
1003                         p.To.Type = obj.TYPE_MEM
1004                         p.To.Reg = v.Args[0].Reg()
1005                         p.To.Offset = 32
1006
1007                         p = s.Prog(ppc64.ASTXV)
1008                         p.From.Type = obj.TYPE_REG
1009                         p.From.Reg = ppc64.REG_VS32
1010                         p.To.Type = obj.TYPE_MEM
1011                         p.To.Reg = v.Args[0].Reg()
1012                         p.To.Offset = 48
1013
1014                         // Increment address for the
1015                         // 64 bytes just zeroed.
1016                         p = s.Prog(ppc64.AADD)
1017                         p.Reg = v.Args[0].Reg()
1018                         p.From.Type = obj.TYPE_CONST
1019                         p.From.Offset = 64
1020                         p.To.Type = obj.TYPE_REG
1021                         p.To.Reg = v.Args[0].Reg()
1022
1023                         // Branch back to top of loop
1024                         // based on CTR
1025                         // BC with BO_BCTR generates bdnz
1026                         p = s.Prog(ppc64.ABC)
1027                         p.From.Type = obj.TYPE_CONST
1028                         p.From.Offset = ppc64.BO_BCTR
1029                         p.Reg = ppc64.REG_R0
1030                         p.To.Type = obj.TYPE_BRANCH
1031                         p.To.SetTarget(top)
1032                 }
1033                 // When ctr == 1 the loop was not generated but
1034                 // there are at least 64 bytes to clear, so add
1035                 // that to the remainder to generate the code
1036                 // to clear those doublewords
1037                 if ctr == 1 {
1038                         rem += 64
1039                 }
1040
1041                 // Clear the remainder starting at offset zero
1042                 offset := int64(0)
1043
1044                 if rem >= 16 && ctr <= 1 {
1045                         // If the XXLXOR hasn't already been
1046                         // generated, do it here to initialize
1047                         // VS32 (V0) to 0.
1048                         p := s.Prog(ppc64.AXXLXOR)
1049                         p.From.Type = obj.TYPE_REG
1050                         p.From.Reg = ppc64.REG_VS32
1051                         p.To.Type = obj.TYPE_REG
1052                         p.To.Reg = ppc64.REG_VS32
1053                         p.Reg = ppc64.REG_VS32
1054                 }
1055                 // Generate STXV for 32 or 64
1056                 // bytes.
1057                 for rem >= 32 {
1058                         p := s.Prog(ppc64.ASTXV)
1059                         p.From.Type = obj.TYPE_REG
1060                         p.From.Reg = ppc64.REG_VS32
1061                         p.To.Type = obj.TYPE_MEM
1062                         p.To.Reg = v.Args[0].Reg()
1063                         p.To.Offset = offset
1064
1065                         p = s.Prog(ppc64.ASTXV)
1066                         p.From.Type = obj.TYPE_REG
1067                         p.From.Reg = ppc64.REG_VS32
1068                         p.To.Type = obj.TYPE_MEM
1069                         p.To.Reg = v.Args[0].Reg()
1070                         p.To.Offset = offset + 16
1071                         offset += 32
1072                         rem -= 32
1073                 }
1074                 // Generate 16 bytes
1075                 if rem >= 16 {
1076                         p := s.Prog(ppc64.ASTXV)
1077                         p.From.Type = obj.TYPE_REG
1078                         p.From.Reg = ppc64.REG_VS32
1079                         p.To.Type = obj.TYPE_MEM
1080                         p.To.Reg = v.Args[0].Reg()
1081                         p.To.Offset = offset
1082                         offset += 16
1083                         rem -= 16
1084                 }
1085
1086                 // first clear as many doublewords as possible
1087                 // then clear remaining sizes as available
1088                 for rem > 0 {
1089                         op, size := ppc64.AMOVB, int64(1)
1090                         switch {
1091                         case rem >= 8:
1092                                 op, size = ppc64.AMOVD, 8
1093                         case rem >= 4:
1094                                 op, size = ppc64.AMOVW, 4
1095                         case rem >= 2:
1096                                 op, size = ppc64.AMOVH, 2
1097                         }
1098                         p := s.Prog(op)
1099                         p.From.Type = obj.TYPE_REG
1100                         p.From.Reg = ppc64.REG_R0
1101                         p.To.Type = obj.TYPE_MEM
1102                         p.To.Reg = v.Args[0].Reg()
1103                         p.To.Offset = offset
1104                         rem -= size
1105                         offset += size
1106                 }
1107
1108         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1109
1110                 // Unaligned data doesn't hurt performance
1111                 // for these instructions on power8.
1112
1113                 // For sizes >= 64 generate a loop as follows:
1114
1115                 // Set up loop counter in CTR, used by BC
1116                 //       XXLXOR VS32,VS32,VS32
1117                 //       MOVD len/32,REG_TMP
1118                 //       MOVD REG_TMP,CTR
1119                 //       MOVD $16,REG_TMP
1120                 //       loop:
1121                 //       STXVD2X VS32,(R0)(R20)
1122                 //       STXVD2X VS32,(R31)(R20)
1123                 //       ADD  $32,R20
1124                 //       BC   16, 0, loop
1125                 //
1126                 // any remainder is done as described below
1127
1128                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1129                 // then handle the remainder
1130                 //      MOVD R0,(R20)
1131                 //      MOVD R0,8(R20)
1132                 // .... etc.
1133                 //
1134                 // the remainder bytes are cleared using one or more
1135                 // of the following instructions with the appropriate
1136                 // offsets depending which instructions are needed
1137                 //
1138                 //      MOVW R0,n1(R20) 4 bytes
1139                 //      MOVH R0,n2(R20) 2 bytes
1140                 //      MOVB R0,n3(R20) 1 byte
1141                 //
1142                 // 7 bytes: MOVW, MOVH, MOVB
1143                 // 6 bytes: MOVW, MOVH
1144                 // 5 bytes: MOVW, MOVB
1145                 // 3 bytes: MOVH, MOVB
1146
1147                 // each loop iteration does 32 bytes
1148                 ctr := v.AuxInt / 32
1149
1150                 // remainder bytes
1151                 rem := v.AuxInt % 32
1152
1153                 // only generate a loop if there is more
1154                 // than 1 iteration.
1155                 if ctr > 1 {
1156                         // Set up VS32 (V0) to hold 0s
1157                         p := s.Prog(ppc64.AXXLXOR)
1158                         p.From.Type = obj.TYPE_REG
1159                         p.From.Reg = ppc64.REG_VS32
1160                         p.To.Type = obj.TYPE_REG
1161                         p.To.Reg = ppc64.REG_VS32
1162                         p.Reg = ppc64.REG_VS32
1163
1164                         // Set up CTR loop counter
1165                         p = s.Prog(ppc64.AMOVD)
1166                         p.From.Type = obj.TYPE_CONST
1167                         p.From.Offset = ctr
1168                         p.To.Type = obj.TYPE_REG
1169                         p.To.Reg = ppc64.REGTMP
1170
1171                         p = s.Prog(ppc64.AMOVD)
1172                         p.From.Type = obj.TYPE_REG
1173                         p.From.Reg = ppc64.REGTMP
1174                         p.To.Type = obj.TYPE_REG
1175                         p.To.Reg = ppc64.REG_CTR
1176
1177                         // Set up R31 to hold index value 16
1178                         p = s.Prog(ppc64.AMOVD)
1179                         p.From.Type = obj.TYPE_CONST
1180                         p.From.Offset = 16
1181                         p.To.Type = obj.TYPE_REG
1182                         p.To.Reg = ppc64.REGTMP
1183
1184                         // Don't add padding for alignment
1185                         // with few loop iterations.
1186                         if ctr > 3 {
1187                                 p = s.Prog(obj.APCALIGN)
1188                                 p.From.Type = obj.TYPE_CONST
1189                                 p.From.Offset = 16
1190                         }
1191
1192                         // generate 2 STXVD2Xs to store 16 bytes
1193                         // when this is a loop then the top must be saved
1194                         var top *obj.Prog
1195                         // This is the top of loop
1196
1197                         p = s.Prog(ppc64.ASTXVD2X)
1198                         p.From.Type = obj.TYPE_REG
1199                         p.From.Reg = ppc64.REG_VS32
1200                         p.To.Type = obj.TYPE_MEM
1201                         p.To.Reg = v.Args[0].Reg()
1202                         p.To.Index = ppc64.REGZERO
1203                         // Save the top of loop
1204                         if top == nil {
1205                                 top = p
1206                         }
1207                         p = s.Prog(ppc64.ASTXVD2X)
1208                         p.From.Type = obj.TYPE_REG
1209                         p.From.Reg = ppc64.REG_VS32
1210                         p.To.Type = obj.TYPE_MEM
1211                         p.To.Reg = v.Args[0].Reg()
1212                         p.To.Index = ppc64.REGTMP
1213
1214                         // Increment address for the
1215                         // 4 doublewords just zeroed.
1216                         p = s.Prog(ppc64.AADD)
1217                         p.Reg = v.Args[0].Reg()
1218                         p.From.Type = obj.TYPE_CONST
1219                         p.From.Offset = 32
1220                         p.To.Type = obj.TYPE_REG
1221                         p.To.Reg = v.Args[0].Reg()
1222
1223                         // Branch back to top of loop
1224                         // based on CTR
1225                         // BC with BO_BCTR generates bdnz
1226                         p = s.Prog(ppc64.ABC)
1227                         p.From.Type = obj.TYPE_CONST
1228                         p.From.Offset = ppc64.BO_BCTR
1229                         p.Reg = ppc64.REG_R0
1230                         p.To.Type = obj.TYPE_BRANCH
1231                         p.To.SetTarget(top)
1232                 }
1233
1234                 // when ctr == 1 the loop was not generated but
1235                 // there are at least 32 bytes to clear, so add
1236                 // that to the remainder to generate the code
1237                 // to clear those doublewords
1238                 if ctr == 1 {
1239                         rem += 32
1240                 }
1241
1242                 // clear the remainder starting at offset zero
1243                 offset := int64(0)
1244
1245                 // first clear as many doublewords as possible
1246                 // then clear remaining sizes as available
1247                 for rem > 0 {
1248                         op, size := ppc64.AMOVB, int64(1)
1249                         switch {
1250                         case rem >= 8:
1251                                 op, size = ppc64.AMOVD, 8
1252                         case rem >= 4:
1253                                 op, size = ppc64.AMOVW, 4
1254                         case rem >= 2:
1255                                 op, size = ppc64.AMOVH, 2
1256                         }
1257                         p := s.Prog(op)
1258                         p.From.Type = obj.TYPE_REG
1259                         p.From.Reg = ppc64.REG_R0
1260                         p.To.Type = obj.TYPE_MEM
1261                         p.To.Reg = v.Args[0].Reg()
1262                         p.To.Offset = offset
1263                         rem -= size
1264                         offset += size
1265                 }
1266
1267         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1268
1269                 bytesPerLoop := int64(32)
1270                 // This will be used when moving more
1271                 // than 8 bytes.  Moves start with
1272                 // as many 8 byte moves as possible, then
1273                 // 4, 2, or 1 byte(s) as remaining.  This will
1274                 // work and be efficient for power8 or later.
1275                 // If there are 64 or more bytes, then a
1276                 // loop is generated to move 32 bytes and
1277                 // update the src and dst addresses on each
1278                 // iteration. When < 64 bytes, the appropriate
1279                 // number of moves are generated based on the
1280                 // size.
1281                 // When moving >= 64 bytes a loop is used
1282                 //      MOVD len/32,REG_TMP
1283                 //      MOVD REG_TMP,CTR
1284                 //      MOVD $16,REG_TMP
1285                 // top:
1286                 //      LXVD2X (R0)(R21),VS32
1287                 //      LXVD2X (R31)(R21),VS33
1288                 //      ADD $32,R21
1289                 //      STXVD2X VS32,(R0)(R20)
1290                 //      STXVD2X VS33,(R31)(R20)
1291                 //      ADD $32,R20
1292                 //      BC 16,0,top
1293                 // Bytes not moved by this loop are moved
1294                 // with a combination of the following instructions,
1295                 // starting with the largest sizes and generating as
1296                 // many as needed, using the appropriate offset value.
1297                 //      MOVD  n(R21),R31
1298                 //      MOVD  R31,n(R20)
1299                 //      MOVW  n1(R21),R31
1300                 //      MOVW  R31,n1(R20)
1301                 //      MOVH  n2(R21),R31
1302                 //      MOVH  R31,n2(R20)
1303                 //      MOVB  n3(R21),R31
1304                 //      MOVB  R31,n3(R20)
1305
1306                 // Each loop iteration moves 32 bytes
1307                 ctr := v.AuxInt / bytesPerLoop
1308
1309                 // Remainder after the loop
1310                 rem := v.AuxInt % bytesPerLoop
1311
1312                 dstReg := v.Args[0].Reg()
1313                 srcReg := v.Args[1].Reg()
1314
1315                 // The set of registers used here, must match the clobbered reg list
1316                 // in PPC64Ops.go.
1317                 offset := int64(0)
1318
1319                 // top of the loop
1320                 var top *obj.Prog
1321                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1322                 if ctr > 1 {
1323                         // Set up the CTR
1324                         p := s.Prog(ppc64.AMOVD)
1325                         p.From.Type = obj.TYPE_CONST
1326                         p.From.Offset = ctr
1327                         p.To.Type = obj.TYPE_REG
1328                         p.To.Reg = ppc64.REGTMP
1329
1330                         p = s.Prog(ppc64.AMOVD)
1331                         p.From.Type = obj.TYPE_REG
1332                         p.From.Reg = ppc64.REGTMP
1333                         p.To.Type = obj.TYPE_REG
1334                         p.To.Reg = ppc64.REG_CTR
1335
1336                         // Use REGTMP as index reg
1337                         p = s.Prog(ppc64.AMOVD)
1338                         p.From.Type = obj.TYPE_CONST
1339                         p.From.Offset = 16
1340                         p.To.Type = obj.TYPE_REG
1341                         p.To.Reg = ppc64.REGTMP
1342
1343                         // Don't adding padding for
1344                         // alignment with small iteration
1345                         // counts.
1346                         if ctr > 3 {
1347                                 p = s.Prog(obj.APCALIGN)
1348                                 p.From.Type = obj.TYPE_CONST
1349                                 p.From.Offset = 16
1350                         }
1351
1352                         // Generate 16 byte loads and stores.
1353                         // Use temp register for index (16)
1354                         // on the second one.
1355
1356                         p = s.Prog(ppc64.ALXVD2X)
1357                         p.From.Type = obj.TYPE_MEM
1358                         p.From.Reg = srcReg
1359                         p.From.Index = ppc64.REGZERO
1360                         p.To.Type = obj.TYPE_REG
1361                         p.To.Reg = ppc64.REG_VS32
1362                         if top == nil {
1363                                 top = p
1364                         }
1365                         p = s.Prog(ppc64.ALXVD2X)
1366                         p.From.Type = obj.TYPE_MEM
1367                         p.From.Reg = srcReg
1368                         p.From.Index = ppc64.REGTMP
1369                         p.To.Type = obj.TYPE_REG
1370                         p.To.Reg = ppc64.REG_VS33
1371
1372                         // increment the src reg for next iteration
1373                         p = s.Prog(ppc64.AADD)
1374                         p.Reg = srcReg
1375                         p.From.Type = obj.TYPE_CONST
1376                         p.From.Offset = bytesPerLoop
1377                         p.To.Type = obj.TYPE_REG
1378                         p.To.Reg = srcReg
1379
1380                         // generate 16 byte stores
1381                         p = s.Prog(ppc64.ASTXVD2X)
1382                         p.From.Type = obj.TYPE_REG
1383                         p.From.Reg = ppc64.REG_VS32
1384                         p.To.Type = obj.TYPE_MEM
1385                         p.To.Reg = dstReg
1386                         p.To.Index = ppc64.REGZERO
1387
1388                         p = s.Prog(ppc64.ASTXVD2X)
1389                         p.From.Type = obj.TYPE_REG
1390                         p.From.Reg = ppc64.REG_VS33
1391                         p.To.Type = obj.TYPE_MEM
1392                         p.To.Reg = dstReg
1393                         p.To.Index = ppc64.REGTMP
1394
1395                         // increment the dst reg for next iteration
1396                         p = s.Prog(ppc64.AADD)
1397                         p.Reg = dstReg
1398                         p.From.Type = obj.TYPE_CONST
1399                         p.From.Offset = bytesPerLoop
1400                         p.To.Type = obj.TYPE_REG
1401                         p.To.Reg = dstReg
1402
1403                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1404                         // to loop top.
1405                         p = s.Prog(ppc64.ABC)
1406                         p.From.Type = obj.TYPE_CONST
1407                         p.From.Offset = ppc64.BO_BCTR
1408                         p.Reg = ppc64.REG_R0
1409                         p.To.Type = obj.TYPE_BRANCH
1410                         p.To.SetTarget(top)
1411
1412                         // srcReg and dstReg were incremented in the loop, so
1413                         // later instructions start with offset 0.
1414                         offset = int64(0)
1415                 }
1416
1417                 // No loop was generated for one iteration, so
1418                 // add 32 bytes to the remainder to move those bytes.
1419                 if ctr == 1 {
1420                         rem += bytesPerLoop
1421                 }
1422
1423                 if rem >= 16 {
1424                         // Generate 16 byte loads and stores.
1425                         // Use temp register for index (value 16)
1426                         // on the second one.
1427                         p := s.Prog(ppc64.ALXVD2X)
1428                         p.From.Type = obj.TYPE_MEM
1429                         p.From.Reg = srcReg
1430                         p.From.Index = ppc64.REGZERO
1431                         p.To.Type = obj.TYPE_REG
1432                         p.To.Reg = ppc64.REG_VS32
1433
1434                         p = s.Prog(ppc64.ASTXVD2X)
1435                         p.From.Type = obj.TYPE_REG
1436                         p.From.Reg = ppc64.REG_VS32
1437                         p.To.Type = obj.TYPE_MEM
1438                         p.To.Reg = dstReg
1439                         p.To.Index = ppc64.REGZERO
1440
1441                         offset = 16
1442                         rem -= 16
1443
1444                         if rem >= 16 {
1445                                 // Use REGTMP as index reg
1446                                 p := s.Prog(ppc64.AMOVD)
1447                                 p.From.Type = obj.TYPE_CONST
1448                                 p.From.Offset = 16
1449                                 p.To.Type = obj.TYPE_REG
1450                                 p.To.Reg = ppc64.REGTMP
1451
1452                                 p = s.Prog(ppc64.ALXVD2X)
1453                                 p.From.Type = obj.TYPE_MEM
1454                                 p.From.Reg = srcReg
1455                                 p.From.Index = ppc64.REGTMP
1456                                 p.To.Type = obj.TYPE_REG
1457                                 p.To.Reg = ppc64.REG_VS32
1458
1459                                 p = s.Prog(ppc64.ASTXVD2X)
1460                                 p.From.Type = obj.TYPE_REG
1461                                 p.From.Reg = ppc64.REG_VS32
1462                                 p.To.Type = obj.TYPE_MEM
1463                                 p.To.Reg = dstReg
1464                                 p.To.Index = ppc64.REGTMP
1465
1466                                 offset = 32
1467                                 rem -= 16
1468                         }
1469                 }
1470
1471                 // Generate all the remaining load and store pairs, starting with
1472                 // as many 8 byte moves as possible, then 4, 2, 1.
1473                 for rem > 0 {
1474                         op, size := ppc64.AMOVB, int64(1)
1475                         switch {
1476                         case rem >= 8:
1477                                 op, size = ppc64.AMOVD, 8
1478                         case rem >= 4:
1479                                 op, size = ppc64.AMOVW, 4
1480                         case rem >= 2:
1481                                 op, size = ppc64.AMOVH, 2
1482                         }
1483                         // Load
1484                         p := s.Prog(op)
1485                         p.To.Type = obj.TYPE_REG
1486                         p.To.Reg = ppc64.REGTMP
1487                         p.From.Type = obj.TYPE_MEM
1488                         p.From.Reg = srcReg
1489                         p.From.Offset = offset
1490
1491                         // Store
1492                         p = s.Prog(op)
1493                         p.From.Type = obj.TYPE_REG
1494                         p.From.Reg = ppc64.REGTMP
1495                         p.To.Type = obj.TYPE_MEM
1496                         p.To.Reg = dstReg
1497                         p.To.Offset = offset
1498                         rem -= size
1499                         offset += size
1500                 }
1501
1502         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1503                 bytesPerLoop := int64(64)
1504                 // This is used when moving more
1505                 // than 8 bytes on power9.  Moves start with
1506                 // as many 8 byte moves as possible, then
1507                 // 4, 2, or 1 byte(s) as remaining.  This will
1508                 // work and be efficient for power8 or later.
1509                 // If there are 64 or more bytes, then a
1510                 // loop is generated to move 32 bytes and
1511                 // update the src and dst addresses on each
1512                 // iteration. When < 64 bytes, the appropriate
1513                 // number of moves are generated based on the
1514                 // size.
1515                 // When moving >= 64 bytes a loop is used
1516                 //      MOVD len/32,REG_TMP
1517                 //      MOVD REG_TMP,CTR
1518                 // top:
1519                 //      LXV 0(R21),VS32
1520                 //      LXV 16(R21),VS33
1521                 //      ADD $32,R21
1522                 //      STXV VS32,0(R20)
1523                 //      STXV VS33,16(R20)
1524                 //      ADD $32,R20
1525                 //      BC 16,0,top
1526                 // Bytes not moved by this loop are moved
1527                 // with a combination of the following instructions,
1528                 // starting with the largest sizes and generating as
1529                 // many as needed, using the appropriate offset value.
1530                 //      MOVD  n(R21),R31
1531                 //      MOVD  R31,n(R20)
1532                 //      MOVW  n1(R21),R31
1533                 //      MOVW  R31,n1(R20)
1534                 //      MOVH  n2(R21),R31
1535                 //      MOVH  R31,n2(R20)
1536                 //      MOVB  n3(R21),R31
1537                 //      MOVB  R31,n3(R20)
1538
1539                 // Each loop iteration moves 32 bytes
1540                 ctr := v.AuxInt / bytesPerLoop
1541
1542                 // Remainder after the loop
1543                 rem := v.AuxInt % bytesPerLoop
1544
1545                 dstReg := v.Args[0].Reg()
1546                 srcReg := v.Args[1].Reg()
1547
1548                 offset := int64(0)
1549
1550                 // top of the loop
1551                 var top *obj.Prog
1552
1553                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1554                 if ctr > 1 {
1555                         // Set up the CTR
1556                         p := s.Prog(ppc64.AMOVD)
1557                         p.From.Type = obj.TYPE_CONST
1558                         p.From.Offset = ctr
1559                         p.To.Type = obj.TYPE_REG
1560                         p.To.Reg = ppc64.REGTMP
1561
1562                         p = s.Prog(ppc64.AMOVD)
1563                         p.From.Type = obj.TYPE_REG
1564                         p.From.Reg = ppc64.REGTMP
1565                         p.To.Type = obj.TYPE_REG
1566                         p.To.Reg = ppc64.REG_CTR
1567
1568                         p = s.Prog(obj.APCALIGN)
1569                         p.From.Type = obj.TYPE_CONST
1570                         p.From.Offset = 16
1571
1572                         // Generate 16 byte loads and stores.
1573                         p = s.Prog(ppc64.ALXV)
1574                         p.From.Type = obj.TYPE_MEM
1575                         p.From.Reg = srcReg
1576                         p.From.Offset = offset
1577                         p.To.Type = obj.TYPE_REG
1578                         p.To.Reg = ppc64.REG_VS32
1579                         if top == nil {
1580                                 top = p
1581                         }
1582                         p = s.Prog(ppc64.ALXV)
1583                         p.From.Type = obj.TYPE_MEM
1584                         p.From.Reg = srcReg
1585                         p.From.Offset = offset + 16
1586                         p.To.Type = obj.TYPE_REG
1587                         p.To.Reg = ppc64.REG_VS33
1588
1589                         // generate 16 byte stores
1590                         p = s.Prog(ppc64.ASTXV)
1591                         p.From.Type = obj.TYPE_REG
1592                         p.From.Reg = ppc64.REG_VS32
1593                         p.To.Type = obj.TYPE_MEM
1594                         p.To.Reg = dstReg
1595                         p.To.Offset = offset
1596
1597                         p = s.Prog(ppc64.ASTXV)
1598                         p.From.Type = obj.TYPE_REG
1599                         p.From.Reg = ppc64.REG_VS33
1600                         p.To.Type = obj.TYPE_MEM
1601                         p.To.Reg = dstReg
1602                         p.To.Offset = offset + 16
1603
1604                         // Generate 16 byte loads and stores.
1605                         p = s.Prog(ppc64.ALXV)
1606                         p.From.Type = obj.TYPE_MEM
1607                         p.From.Reg = srcReg
1608                         p.From.Offset = offset + 32
1609                         p.To.Type = obj.TYPE_REG
1610                         p.To.Reg = ppc64.REG_VS32
1611
1612                         p = s.Prog(ppc64.ALXV)
1613                         p.From.Type = obj.TYPE_MEM
1614                         p.From.Reg = srcReg
1615                         p.From.Offset = offset + 48
1616                         p.To.Type = obj.TYPE_REG
1617                         p.To.Reg = ppc64.REG_VS33
1618
1619                         // generate 16 byte stores
1620                         p = s.Prog(ppc64.ASTXV)
1621                         p.From.Type = obj.TYPE_REG
1622                         p.From.Reg = ppc64.REG_VS32
1623                         p.To.Type = obj.TYPE_MEM
1624                         p.To.Reg = dstReg
1625                         p.To.Offset = offset + 32
1626
1627                         p = s.Prog(ppc64.ASTXV)
1628                         p.From.Type = obj.TYPE_REG
1629                         p.From.Reg = ppc64.REG_VS33
1630                         p.To.Type = obj.TYPE_MEM
1631                         p.To.Reg = dstReg
1632                         p.To.Offset = offset + 48
1633
1634                         // increment the src reg for next iteration
1635                         p = s.Prog(ppc64.AADD)
1636                         p.Reg = srcReg
1637                         p.From.Type = obj.TYPE_CONST
1638                         p.From.Offset = bytesPerLoop
1639                         p.To.Type = obj.TYPE_REG
1640                         p.To.Reg = srcReg
1641
1642                         // increment the dst reg for next iteration
1643                         p = s.Prog(ppc64.AADD)
1644                         p.Reg = dstReg
1645                         p.From.Type = obj.TYPE_CONST
1646                         p.From.Offset = bytesPerLoop
1647                         p.To.Type = obj.TYPE_REG
1648                         p.To.Reg = dstReg
1649
1650                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1651                         // to loop top.
1652                         p = s.Prog(ppc64.ABC)
1653                         p.From.Type = obj.TYPE_CONST
1654                         p.From.Offset = ppc64.BO_BCTR
1655                         p.Reg = ppc64.REG_R0
1656                         p.To.Type = obj.TYPE_BRANCH
1657                         p.To.SetTarget(top)
1658
1659                         // srcReg and dstReg were incremented in the loop, so
1660                         // later instructions start with offset 0.
1661                         offset = int64(0)
1662                 }
1663
1664                 // No loop was generated for one iteration, so
1665                 // add 32 bytes to the remainder to move those bytes.
1666                 if ctr == 1 {
1667                         rem += bytesPerLoop
1668                 }
1669                 if rem >= 32 {
1670                         p := s.Prog(ppc64.ALXV)
1671                         p.From.Type = obj.TYPE_MEM
1672                         p.From.Reg = srcReg
1673                         p.To.Type = obj.TYPE_REG
1674                         p.To.Reg = ppc64.REG_VS32
1675
1676                         p = s.Prog(ppc64.ALXV)
1677                         p.From.Type = obj.TYPE_MEM
1678                         p.From.Reg = srcReg
1679                         p.From.Offset = 16
1680                         p.To.Type = obj.TYPE_REG
1681                         p.To.Reg = ppc64.REG_VS33
1682
1683                         p = s.Prog(ppc64.ASTXV)
1684                         p.From.Type = obj.TYPE_REG
1685                         p.From.Reg = ppc64.REG_VS32
1686                         p.To.Type = obj.TYPE_MEM
1687                         p.To.Reg = dstReg
1688
1689                         p = s.Prog(ppc64.ASTXV)
1690                         p.From.Type = obj.TYPE_REG
1691                         p.From.Reg = ppc64.REG_VS33
1692                         p.To.Type = obj.TYPE_MEM
1693                         p.To.Reg = dstReg
1694                         p.To.Offset = 16
1695
1696                         offset = 32
1697                         rem -= 32
1698                 }
1699
1700                 if rem >= 16 {
1701                         // Generate 16 byte loads and stores.
1702                         p := s.Prog(ppc64.ALXV)
1703                         p.From.Type = obj.TYPE_MEM
1704                         p.From.Reg = srcReg
1705                         p.From.Offset = offset
1706                         p.To.Type = obj.TYPE_REG
1707                         p.To.Reg = ppc64.REG_VS32
1708
1709                         p = s.Prog(ppc64.ASTXV)
1710                         p.From.Type = obj.TYPE_REG
1711                         p.From.Reg = ppc64.REG_VS32
1712                         p.To.Type = obj.TYPE_MEM
1713                         p.To.Reg = dstReg
1714                         p.To.Offset = offset
1715
1716                         offset += 16
1717                         rem -= 16
1718
1719                         if rem >= 16 {
1720                                 p := s.Prog(ppc64.ALXV)
1721                                 p.From.Type = obj.TYPE_MEM
1722                                 p.From.Reg = srcReg
1723                                 p.From.Offset = offset
1724                                 p.To.Type = obj.TYPE_REG
1725                                 p.To.Reg = ppc64.REG_VS32
1726
1727                                 p = s.Prog(ppc64.ASTXV)
1728                                 p.From.Type = obj.TYPE_REG
1729                                 p.From.Reg = ppc64.REG_VS32
1730                                 p.To.Type = obj.TYPE_MEM
1731                                 p.To.Reg = dstReg
1732                                 p.To.Offset = offset
1733
1734                                 offset += 16
1735                                 rem -= 16
1736                         }
1737                 }
1738                 // Generate all the remaining load and store pairs, starting with
1739                 // as many 8 byte moves as possible, then 4, 2, 1.
1740                 for rem > 0 {
1741                         op, size := ppc64.AMOVB, int64(1)
1742                         switch {
1743                         case rem >= 8:
1744                                 op, size = ppc64.AMOVD, 8
1745                         case rem >= 4:
1746                                 op, size = ppc64.AMOVW, 4
1747                         case rem >= 2:
1748                                 op, size = ppc64.AMOVH, 2
1749                         }
1750                         // Load
1751                         p := s.Prog(op)
1752                         p.To.Type = obj.TYPE_REG
1753                         p.To.Reg = ppc64.REGTMP
1754                         p.From.Type = obj.TYPE_MEM
1755                         p.From.Reg = srcReg
1756                         p.From.Offset = offset
1757
1758                         // Store
1759                         p = s.Prog(op)
1760                         p.From.Type = obj.TYPE_REG
1761                         p.From.Reg = ppc64.REGTMP
1762                         p.To.Type = obj.TYPE_MEM
1763                         p.To.Reg = dstReg
1764                         p.To.Offset = offset
1765                         rem -= size
1766                         offset += size
1767                 }
1768
1769         case ssa.OpPPC64CALLstatic:
1770                 s.Call(v)
1771
1772         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1773                 p := s.Prog(ppc64.AMOVD)
1774                 p.From.Type = obj.TYPE_REG
1775                 p.From.Reg = v.Args[0].Reg()
1776                 p.To.Type = obj.TYPE_REG
1777                 p.To.Reg = ppc64.REG_LR
1778
1779                 if v.Args[0].Reg() != ppc64.REG_R12 {
1780                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1781                 }
1782
1783                 pp := s.Call(v)
1784                 pp.To.Reg = ppc64.REG_LR
1785
1786                 // Insert a hint this is not a subroutine return.
1787                 pp.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 1})
1788
1789                 if base.Ctxt.Flag_shared {
1790                         // When compiling Go into PIC, the function we just
1791                         // called via pointer might have been implemented in
1792                         // a separate module and so overwritten the TOC
1793                         // pointer in R2; reload it.
1794                         q := s.Prog(ppc64.AMOVD)
1795                         q.From.Type = obj.TYPE_MEM
1796                         q.From.Offset = 24
1797                         q.From.Reg = ppc64.REGSP
1798                         q.To.Type = obj.TYPE_REG
1799                         q.To.Reg = ppc64.REG_R2
1800                 }
1801
1802         case ssa.OpPPC64LoweredWB:
1803                 p := s.Prog(obj.ACALL)
1804                 p.To.Type = obj.TYPE_MEM
1805                 p.To.Name = obj.NAME_EXTERN
1806                 p.To.Sym = v.Aux.(*obj.LSym)
1807
1808         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1809                 p := s.Prog(obj.ACALL)
1810                 p.To.Type = obj.TYPE_MEM
1811                 p.To.Name = obj.NAME_EXTERN
1812                 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1813                 s.UseArgs(16) // space used in callee args area by assembly stubs
1814
1815         case ssa.OpPPC64LoweredNilCheck:
1816                 if objabi.GOOS == "aix" {
1817                         // CMP Rarg0, R0
1818                         // BNE 2(PC)
1819                         // STW R0, 0(R0)
1820                         // NOP (so the BNE has somewhere to land)
1821
1822                         // CMP Rarg0, R0
1823                         p := s.Prog(ppc64.ACMP)
1824                         p.From.Type = obj.TYPE_REG
1825                         p.From.Reg = v.Args[0].Reg()
1826                         p.To.Type = obj.TYPE_REG
1827                         p.To.Reg = ppc64.REG_R0
1828
1829                         // BNE 2(PC)
1830                         p2 := s.Prog(ppc64.ABNE)
1831                         p2.To.Type = obj.TYPE_BRANCH
1832
1833                         // STW R0, 0(R0)
1834                         // Write at 0 is forbidden and will trigger a SIGSEGV
1835                         p = s.Prog(ppc64.AMOVW)
1836                         p.From.Type = obj.TYPE_REG
1837                         p.From.Reg = ppc64.REG_R0
1838                         p.To.Type = obj.TYPE_MEM
1839                         p.To.Reg = ppc64.REG_R0
1840
1841                         // NOP (so the BNE has somewhere to land)
1842                         nop := s.Prog(obj.ANOP)
1843                         p2.To.SetTarget(nop)
1844
1845                 } else {
1846                         // Issue a load which will fault if arg is nil.
1847                         p := s.Prog(ppc64.AMOVBZ)
1848                         p.From.Type = obj.TYPE_MEM
1849                         p.From.Reg = v.Args[0].Reg()
1850                         gc.AddAux(&p.From, v)
1851                         p.To.Type = obj.TYPE_REG
1852                         p.To.Reg = ppc64.REGTMP
1853                 }
1854                 if logopt.Enabled() {
1855                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1856                 }
1857                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1858                         base.WarnfAt(v.Pos, "generated nil check")
1859                 }
1860
1861         // These should be resolved by rules and not make it here.
1862         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1863                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1864                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1865                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1866         case ssa.OpPPC64InvertFlags:
1867                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1868         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1869                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1870         case ssa.OpClobber:
1871                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1872         default:
1873                 v.Fatalf("genValue not implemented: %s", v.LongString())
1874         }
1875 }
1876
1877 var blockJump = [...]struct {
1878         asm, invasm     obj.As
1879         asmeq, invasmun bool
1880 }{
1881         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1882         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1883
1884         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1885         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1886         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1887         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1888
1889         // TODO: need to work FP comparisons into block jumps
1890         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1891         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1892         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1893         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1894 }
1895
1896 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1897         switch b.Kind {
1898         case ssa.BlockDefer:
1899                 // defer returns in R3:
1900                 // 0 if we should continue executing
1901                 // 1 if we should jump to deferreturn call
1902                 p := s.Prog(ppc64.ACMP)
1903                 p.From.Type = obj.TYPE_REG
1904                 p.From.Reg = ppc64.REG_R3
1905                 p.To.Type = obj.TYPE_REG
1906                 p.To.Reg = ppc64.REG_R0
1907
1908                 p = s.Prog(ppc64.ABNE)
1909                 p.To.Type = obj.TYPE_BRANCH
1910                 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1911                 if b.Succs[0].Block() != next {
1912                         p := s.Prog(obj.AJMP)
1913                         p.To.Type = obj.TYPE_BRANCH
1914                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1915                 }
1916
1917         case ssa.BlockPlain:
1918                 if b.Succs[0].Block() != next {
1919                         p := s.Prog(obj.AJMP)
1920                         p.To.Type = obj.TYPE_BRANCH
1921                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1922                 }
1923         case ssa.BlockExit:
1924         case ssa.BlockRet:
1925                 s.Prog(obj.ARET)
1926         case ssa.BlockRetJmp:
1927                 p := s.Prog(obj.AJMP)
1928                 p.To.Type = obj.TYPE_MEM
1929                 p.To.Name = obj.NAME_EXTERN
1930                 p.To.Sym = b.Aux.(*obj.LSym)
1931
1932         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1933                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1934                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1935                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1936                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1937                 jmp := blockJump[b.Kind]
1938                 switch next {
1939                 case b.Succs[0].Block():
1940                         s.Br(jmp.invasm, b.Succs[1].Block())
1941                         if jmp.invasmun {
1942                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1943                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
1944                         }
1945                 case b.Succs[1].Block():
1946                         s.Br(jmp.asm, b.Succs[0].Block())
1947                         if jmp.asmeq {
1948                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1949                         }
1950                 default:
1951                         if b.Likely != ssa.BranchUnlikely {
1952                                 s.Br(jmp.asm, b.Succs[0].Block())
1953                                 if jmp.asmeq {
1954                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
1955                                 }
1956                                 s.Br(obj.AJMP, b.Succs[1].Block())
1957                         } else {
1958                                 s.Br(jmp.invasm, b.Succs[1].Block())
1959                                 if jmp.invasmun {
1960                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1961                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
1962                                 }
1963                                 s.Br(obj.AJMP, b.Succs[0].Block())
1964                         }
1965                 }
1966         default:
1967                 b.Fatalf("branch not implemented: %s", b.LongString())
1968         }
1969 }