]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
cmd/compile/internal: add ABI register information for ppc64
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/base"
9         "cmd/compile/internal/ir"
10         "cmd/compile/internal/logopt"
11         "cmd/compile/internal/ssa"
12         "cmd/compile/internal/ssagen"
13         "cmd/compile/internal/types"
14         "cmd/internal/obj"
15         "cmd/internal/obj/ppc64"
16         "internal/buildcfg"
17         "math"
18         "strings"
19 )
20
21 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
22 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
23         //      flive := b.FlagsLiveAtEnd
24         //      if b.Control != nil && b.Control.Type.IsFlags() {
25         //              flive = true
26         //      }
27         //      for i := len(b.Values) - 1; i >= 0; i-- {
28         //              v := b.Values[i]
29         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
30         //                      // The "mark" is any non-nil Aux value.
31         //                      v.Aux = v
32         //              }
33         //              if v.Type.IsFlags() {
34         //                      flive = false
35         //              }
36         //              for _, a := range v.Args {
37         //                      if a.Type.IsFlags() {
38         //                              flive = true
39         //                      }
40         //              }
41         //      }
42 }
43
44 // loadByType returns the load instruction of the given type.
45 func loadByType(t *types.Type) obj.As {
46         if t.IsFloat() {
47                 switch t.Size() {
48                 case 4:
49                         return ppc64.AFMOVS
50                 case 8:
51                         return ppc64.AFMOVD
52                 }
53         } else {
54                 switch t.Size() {
55                 case 1:
56                         if t.IsSigned() {
57                                 return ppc64.AMOVB
58                         } else {
59                                 return ppc64.AMOVBZ
60                         }
61                 case 2:
62                         if t.IsSigned() {
63                                 return ppc64.AMOVH
64                         } else {
65                                 return ppc64.AMOVHZ
66                         }
67                 case 4:
68                         if t.IsSigned() {
69                                 return ppc64.AMOVW
70                         } else {
71                                 return ppc64.AMOVWZ
72                         }
73                 case 8:
74                         return ppc64.AMOVD
75                 }
76         }
77         panic("bad load type")
78 }
79
80 // storeByType returns the store instruction of the given type.
81 func storeByType(t *types.Type) obj.As {
82         if t.IsFloat() {
83                 switch t.Size() {
84                 case 4:
85                         return ppc64.AFMOVS
86                 case 8:
87                         return ppc64.AFMOVD
88                 }
89         } else {
90                 switch t.Size() {
91                 case 1:
92                         return ppc64.AMOVB
93                 case 2:
94                         return ppc64.AMOVH
95                 case 4:
96                         return ppc64.AMOVW
97                 case 8:
98                         return ppc64.AMOVD
99                 }
100         }
101         panic("bad store type")
102 }
103
104 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
105         switch v.Op {
106         case ssa.OpCopy:
107                 t := v.Type
108                 if t.IsMemory() {
109                         return
110                 }
111                 x := v.Args[0].Reg()
112                 y := v.Reg()
113                 if x != y {
114                         rt := obj.TYPE_REG
115                         op := ppc64.AMOVD
116
117                         if t.IsFloat() {
118                                 op = ppc64.AFMOVD
119                         }
120                         p := s.Prog(op)
121                         p.From.Type = rt
122                         p.From.Reg = x
123                         p.To.Type = rt
124                         p.To.Reg = y
125                 }
126
127         case ssa.OpPPC64LoweredMuluhilo:
128                 // MULHDU       Rarg1, Rarg0, Reg0
129                 // MULLD        Rarg1, Rarg0, Reg1
130                 r0 := v.Args[0].Reg()
131                 r1 := v.Args[1].Reg()
132                 p := s.Prog(ppc64.AMULHDU)
133                 p.From.Type = obj.TYPE_REG
134                 p.From.Reg = r1
135                 p.Reg = r0
136                 p.To.Type = obj.TYPE_REG
137                 p.To.Reg = v.Reg0()
138                 p1 := s.Prog(ppc64.AMULLD)
139                 p1.From.Type = obj.TYPE_REG
140                 p1.From.Reg = r1
141                 p1.Reg = r0
142                 p1.To.Type = obj.TYPE_REG
143                 p1.To.Reg = v.Reg1()
144
145         case ssa.OpPPC64LoweredAdd64Carry:
146                 // ADDC         Rarg2, -1, Rtmp
147                 // ADDE         Rarg1, Rarg0, Reg0
148                 // ADDZE        Rzero, Reg1
149                 r0 := v.Args[0].Reg()
150                 r1 := v.Args[1].Reg()
151                 r2 := v.Args[2].Reg()
152                 p := s.Prog(ppc64.AADDC)
153                 p.From.Type = obj.TYPE_CONST
154                 p.From.Offset = -1
155                 p.Reg = r2
156                 p.To.Type = obj.TYPE_REG
157                 p.To.Reg = ppc64.REGTMP
158                 p1 := s.Prog(ppc64.AADDE)
159                 p1.From.Type = obj.TYPE_REG
160                 p1.From.Reg = r1
161                 p1.Reg = r0
162                 p1.To.Type = obj.TYPE_REG
163                 p1.To.Reg = v.Reg0()
164                 p2 := s.Prog(ppc64.AADDZE)
165                 p2.From.Type = obj.TYPE_REG
166                 p2.From.Reg = ppc64.REGZERO
167                 p2.To.Type = obj.TYPE_REG
168                 p2.To.Reg = v.Reg1()
169
170         case ssa.OpPPC64LoweredAtomicAnd8,
171                 ssa.OpPPC64LoweredAtomicAnd32,
172                 ssa.OpPPC64LoweredAtomicOr8,
173                 ssa.OpPPC64LoweredAtomicOr32:
174                 // LWSYNC
175                 // LBAR/LWAR    (Rarg0), Rtmp
176                 // AND/OR       Rarg1, Rtmp
177                 // STBCCC/STWCCC Rtmp, (Rarg0)
178                 // BNE          -3(PC)
179                 ld := ppc64.ALBAR
180                 st := ppc64.ASTBCCC
181                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
182                         ld = ppc64.ALWAR
183                         st = ppc64.ASTWCCC
184                 }
185                 r0 := v.Args[0].Reg()
186                 r1 := v.Args[1].Reg()
187                 // LWSYNC - Assuming shared data not write-through-required nor
188                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
189                 plwsync := s.Prog(ppc64.ALWSYNC)
190                 plwsync.To.Type = obj.TYPE_NONE
191                 // LBAR or LWAR
192                 p := s.Prog(ld)
193                 p.From.Type = obj.TYPE_MEM
194                 p.From.Reg = r0
195                 p.To.Type = obj.TYPE_REG
196                 p.To.Reg = ppc64.REGTMP
197                 // AND/OR reg1,out
198                 p1 := s.Prog(v.Op.Asm())
199                 p1.From.Type = obj.TYPE_REG
200                 p1.From.Reg = r1
201                 p1.To.Type = obj.TYPE_REG
202                 p1.To.Reg = ppc64.REGTMP
203                 // STBCCC or STWCCC
204                 p2 := s.Prog(st)
205                 p2.From.Type = obj.TYPE_REG
206                 p2.From.Reg = ppc64.REGTMP
207                 p2.To.Type = obj.TYPE_MEM
208                 p2.To.Reg = r0
209                 p2.RegTo2 = ppc64.REGTMP
210                 // BNE retry
211                 p3 := s.Prog(ppc64.ABNE)
212                 p3.To.Type = obj.TYPE_BRANCH
213                 p3.To.SetTarget(p)
214
215         case ssa.OpPPC64LoweredAtomicAdd32,
216                 ssa.OpPPC64LoweredAtomicAdd64:
217                 // LWSYNC
218                 // LDAR/LWAR    (Rarg0), Rout
219                 // ADD          Rarg1, Rout
220                 // STDCCC/STWCCC Rout, (Rarg0)
221                 // BNE         -3(PC)
222                 // MOVW         Rout,Rout (if Add32)
223                 ld := ppc64.ALDAR
224                 st := ppc64.ASTDCCC
225                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
226                         ld = ppc64.ALWAR
227                         st = ppc64.ASTWCCC
228                 }
229                 r0 := v.Args[0].Reg()
230                 r1 := v.Args[1].Reg()
231                 out := v.Reg0()
232                 // LWSYNC - Assuming shared data not write-through-required nor
233                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
234                 plwsync := s.Prog(ppc64.ALWSYNC)
235                 plwsync.To.Type = obj.TYPE_NONE
236                 // LDAR or LWAR
237                 p := s.Prog(ld)
238                 p.From.Type = obj.TYPE_MEM
239                 p.From.Reg = r0
240                 p.To.Type = obj.TYPE_REG
241                 p.To.Reg = out
242                 // ADD reg1,out
243                 p1 := s.Prog(ppc64.AADD)
244                 p1.From.Type = obj.TYPE_REG
245                 p1.From.Reg = r1
246                 p1.To.Reg = out
247                 p1.To.Type = obj.TYPE_REG
248                 // STDCCC or STWCCC
249                 p3 := s.Prog(st)
250                 p3.From.Type = obj.TYPE_REG
251                 p3.From.Reg = out
252                 p3.To.Type = obj.TYPE_MEM
253                 p3.To.Reg = r0
254                 // BNE retry
255                 p4 := s.Prog(ppc64.ABNE)
256                 p4.To.Type = obj.TYPE_BRANCH
257                 p4.To.SetTarget(p)
258
259                 // Ensure a 32 bit result
260                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
261                         p5 := s.Prog(ppc64.AMOVWZ)
262                         p5.To.Type = obj.TYPE_REG
263                         p5.To.Reg = out
264                         p5.From.Type = obj.TYPE_REG
265                         p5.From.Reg = out
266                 }
267
268         case ssa.OpPPC64LoweredAtomicExchange32,
269                 ssa.OpPPC64LoweredAtomicExchange64:
270                 // LWSYNC
271                 // LDAR/LWAR    (Rarg0), Rout
272                 // STDCCC/STWCCC Rout, (Rarg0)
273                 // BNE         -2(PC)
274                 // ISYNC
275                 ld := ppc64.ALDAR
276                 st := ppc64.ASTDCCC
277                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
278                         ld = ppc64.ALWAR
279                         st = ppc64.ASTWCCC
280                 }
281                 r0 := v.Args[0].Reg()
282                 r1 := v.Args[1].Reg()
283                 out := v.Reg0()
284                 // LWSYNC - Assuming shared data not write-through-required nor
285                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
286                 plwsync := s.Prog(ppc64.ALWSYNC)
287                 plwsync.To.Type = obj.TYPE_NONE
288                 // LDAR or LWAR
289                 p := s.Prog(ld)
290                 p.From.Type = obj.TYPE_MEM
291                 p.From.Reg = r0
292                 p.To.Type = obj.TYPE_REG
293                 p.To.Reg = out
294                 // STDCCC or STWCCC
295                 p1 := s.Prog(st)
296                 p1.From.Type = obj.TYPE_REG
297                 p1.From.Reg = r1
298                 p1.To.Type = obj.TYPE_MEM
299                 p1.To.Reg = r0
300                 // BNE retry
301                 p2 := s.Prog(ppc64.ABNE)
302                 p2.To.Type = obj.TYPE_BRANCH
303                 p2.To.SetTarget(p)
304                 // ISYNC
305                 pisync := s.Prog(ppc64.AISYNC)
306                 pisync.To.Type = obj.TYPE_NONE
307
308         case ssa.OpPPC64LoweredAtomicLoad8,
309                 ssa.OpPPC64LoweredAtomicLoad32,
310                 ssa.OpPPC64LoweredAtomicLoad64,
311                 ssa.OpPPC64LoweredAtomicLoadPtr:
312                 // SYNC
313                 // MOVB/MOVD/MOVW (Rarg0), Rout
314                 // CMP Rout,Rout
315                 // BNE 1(PC)
316                 // ISYNC
317                 ld := ppc64.AMOVD
318                 cmp := ppc64.ACMP
319                 switch v.Op {
320                 case ssa.OpPPC64LoweredAtomicLoad8:
321                         ld = ppc64.AMOVBZ
322                 case ssa.OpPPC64LoweredAtomicLoad32:
323                         ld = ppc64.AMOVWZ
324                         cmp = ppc64.ACMPW
325                 }
326                 arg0 := v.Args[0].Reg()
327                 out := v.Reg0()
328                 // SYNC when AuxInt == 1; otherwise, load-acquire
329                 if v.AuxInt == 1 {
330                         psync := s.Prog(ppc64.ASYNC)
331                         psync.To.Type = obj.TYPE_NONE
332                 }
333                 // Load
334                 p := s.Prog(ld)
335                 p.From.Type = obj.TYPE_MEM
336                 p.From.Reg = arg0
337                 p.To.Type = obj.TYPE_REG
338                 p.To.Reg = out
339                 // CMP
340                 p1 := s.Prog(cmp)
341                 p1.From.Type = obj.TYPE_REG
342                 p1.From.Reg = out
343                 p1.To.Type = obj.TYPE_REG
344                 p1.To.Reg = out
345                 // BNE
346                 p2 := s.Prog(ppc64.ABNE)
347                 p2.To.Type = obj.TYPE_BRANCH
348                 // ISYNC
349                 pisync := s.Prog(ppc64.AISYNC)
350                 pisync.To.Type = obj.TYPE_NONE
351                 p2.To.SetTarget(pisync)
352
353         case ssa.OpPPC64LoweredAtomicStore8,
354                 ssa.OpPPC64LoweredAtomicStore32,
355                 ssa.OpPPC64LoweredAtomicStore64:
356                 // SYNC or LWSYNC
357                 // MOVB/MOVW/MOVD arg1,(arg0)
358                 st := ppc64.AMOVD
359                 switch v.Op {
360                 case ssa.OpPPC64LoweredAtomicStore8:
361                         st = ppc64.AMOVB
362                 case ssa.OpPPC64LoweredAtomicStore32:
363                         st = ppc64.AMOVW
364                 }
365                 arg0 := v.Args[0].Reg()
366                 arg1 := v.Args[1].Reg()
367                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
368                 // SYNC
369                 syncOp := ppc64.ASYNC
370                 if v.AuxInt == 0 {
371                         syncOp = ppc64.ALWSYNC
372                 }
373                 psync := s.Prog(syncOp)
374                 psync.To.Type = obj.TYPE_NONE
375                 // Store
376                 p := s.Prog(st)
377                 p.To.Type = obj.TYPE_MEM
378                 p.To.Reg = arg0
379                 p.From.Type = obj.TYPE_REG
380                 p.From.Reg = arg1
381
382         case ssa.OpPPC64LoweredAtomicCas64,
383                 ssa.OpPPC64LoweredAtomicCas32:
384                 // LWSYNC
385                 // loop:
386                 // LDAR        (Rarg0), MutexHint, Rtmp
387                 // CMP         Rarg1, Rtmp
388                 // BNE         fail
389                 // STDCCC      Rarg2, (Rarg0)
390                 // BNE         loop
391                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
392                 // MOVD        $1, Rout
393                 // BR          end
394                 // fail:
395                 // MOVD        $0, Rout
396                 // end:
397                 ld := ppc64.ALDAR
398                 st := ppc64.ASTDCCC
399                 cmp := ppc64.ACMP
400                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
401                         ld = ppc64.ALWAR
402                         st = ppc64.ASTWCCC
403                         cmp = ppc64.ACMPW
404                 }
405                 r0 := v.Args[0].Reg()
406                 r1 := v.Args[1].Reg()
407                 r2 := v.Args[2].Reg()
408                 out := v.Reg0()
409                 // LWSYNC - Assuming shared data not write-through-required nor
410                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
411                 plwsync1 := s.Prog(ppc64.ALWSYNC)
412                 plwsync1.To.Type = obj.TYPE_NONE
413                 // LDAR or LWAR
414                 p := s.Prog(ld)
415                 p.From.Type = obj.TYPE_MEM
416                 p.From.Reg = r0
417                 p.To.Type = obj.TYPE_REG
418                 p.To.Reg = ppc64.REGTMP
419                 // If it is a Compare-and-Swap-Release operation, set the EH field with
420                 // the release hint.
421                 if v.AuxInt == 0 {
422                         p.SetFrom3Const(0)
423                 }
424                 // CMP reg1,reg2
425                 p1 := s.Prog(cmp)
426                 p1.From.Type = obj.TYPE_REG
427                 p1.From.Reg = r1
428                 p1.To.Reg = ppc64.REGTMP
429                 p1.To.Type = obj.TYPE_REG
430                 // BNE cas_fail
431                 p2 := s.Prog(ppc64.ABNE)
432                 p2.To.Type = obj.TYPE_BRANCH
433                 // STDCCC or STWCCC
434                 p3 := s.Prog(st)
435                 p3.From.Type = obj.TYPE_REG
436                 p3.From.Reg = r2
437                 p3.To.Type = obj.TYPE_MEM
438                 p3.To.Reg = r0
439                 // BNE retry
440                 p4 := s.Prog(ppc64.ABNE)
441                 p4.To.Type = obj.TYPE_BRANCH
442                 p4.To.SetTarget(p)
443                 // LWSYNC - Assuming shared data not write-through-required nor
444                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
445                 // If the operation is a CAS-Release, then synchronization is not necessary.
446                 if v.AuxInt != 0 {
447                         plwsync2 := s.Prog(ppc64.ALWSYNC)
448                         plwsync2.To.Type = obj.TYPE_NONE
449                 }
450                 // return true
451                 p5 := s.Prog(ppc64.AMOVD)
452                 p5.From.Type = obj.TYPE_CONST
453                 p5.From.Offset = 1
454                 p5.To.Type = obj.TYPE_REG
455                 p5.To.Reg = out
456                 // BR done
457                 p6 := s.Prog(obj.AJMP)
458                 p6.To.Type = obj.TYPE_BRANCH
459                 // return false
460                 p7 := s.Prog(ppc64.AMOVD)
461                 p7.From.Type = obj.TYPE_CONST
462                 p7.From.Offset = 0
463                 p7.To.Type = obj.TYPE_REG
464                 p7.To.Reg = out
465                 p2.To.SetTarget(p7)
466                 // done (label)
467                 p8 := s.Prog(obj.ANOP)
468                 p6.To.SetTarget(p8)
469
470         case ssa.OpPPC64LoweredGetClosurePtr:
471                 // Closure pointer is R11 (already)
472                 ssagen.CheckLoweredGetClosurePtr(v)
473
474         case ssa.OpPPC64LoweredGetCallerSP:
475                 // caller's SP is FixedFrameSize below the address of the first arg
476                 p := s.Prog(ppc64.AMOVD)
477                 p.From.Type = obj.TYPE_ADDR
478                 p.From.Offset = -base.Ctxt.FixedFrameSize()
479                 p.From.Name = obj.NAME_PARAM
480                 p.To.Type = obj.TYPE_REG
481                 p.To.Reg = v.Reg()
482
483         case ssa.OpPPC64LoweredGetCallerPC:
484                 p := s.Prog(obj.AGETCALLERPC)
485                 p.To.Type = obj.TYPE_REG
486                 p.To.Reg = v.Reg()
487
488         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
489                 // input is already rounded
490
491         case ssa.OpLoadReg:
492                 loadOp := loadByType(v.Type)
493                 p := s.Prog(loadOp)
494                 ssagen.AddrAuto(&p.From, v.Args[0])
495                 p.To.Type = obj.TYPE_REG
496                 p.To.Reg = v.Reg()
497
498         case ssa.OpStoreReg:
499                 storeOp := storeByType(v.Type)
500                 p := s.Prog(storeOp)
501                 p.From.Type = obj.TYPE_REG
502                 p.From.Reg = v.Args[0].Reg()
503                 ssagen.AddrAuto(&p.To, v)
504
505         case ssa.OpArgIntReg, ssa.OpArgFloatReg:
506                 ssagen.CheckArgReg(v)
507
508         case ssa.OpPPC64DIVD:
509                 // For now,
510                 //
511                 // cmp arg1, -1
512                 // be  ahead
513                 // v = arg0 / arg1
514                 // b over
515                 // ahead: v = - arg0
516                 // over: nop
517                 r := v.Reg()
518                 r0 := v.Args[0].Reg()
519                 r1 := v.Args[1].Reg()
520
521                 p := s.Prog(ppc64.ACMP)
522                 p.From.Type = obj.TYPE_REG
523                 p.From.Reg = r1
524                 p.To.Type = obj.TYPE_CONST
525                 p.To.Offset = -1
526
527                 pbahead := s.Prog(ppc64.ABEQ)
528                 pbahead.To.Type = obj.TYPE_BRANCH
529
530                 p = s.Prog(v.Op.Asm())
531                 p.From.Type = obj.TYPE_REG
532                 p.From.Reg = r1
533                 p.Reg = r0
534                 p.To.Type = obj.TYPE_REG
535                 p.To.Reg = r
536
537                 pbover := s.Prog(obj.AJMP)
538                 pbover.To.Type = obj.TYPE_BRANCH
539
540                 p = s.Prog(ppc64.ANEG)
541                 p.To.Type = obj.TYPE_REG
542                 p.To.Reg = r
543                 p.From.Type = obj.TYPE_REG
544                 p.From.Reg = r0
545                 pbahead.To.SetTarget(p)
546
547                 p = s.Prog(obj.ANOP)
548                 pbover.To.SetTarget(p)
549
550         case ssa.OpPPC64DIVW:
551                 // word-width version of above
552                 r := v.Reg()
553                 r0 := v.Args[0].Reg()
554                 r1 := v.Args[1].Reg()
555
556                 p := s.Prog(ppc64.ACMPW)
557                 p.From.Type = obj.TYPE_REG
558                 p.From.Reg = r1
559                 p.To.Type = obj.TYPE_CONST
560                 p.To.Offset = -1
561
562                 pbahead := s.Prog(ppc64.ABEQ)
563                 pbahead.To.Type = obj.TYPE_BRANCH
564
565                 p = s.Prog(v.Op.Asm())
566                 p.From.Type = obj.TYPE_REG
567                 p.From.Reg = r1
568                 p.Reg = r0
569                 p.To.Type = obj.TYPE_REG
570                 p.To.Reg = r
571
572                 pbover := s.Prog(obj.AJMP)
573                 pbover.To.Type = obj.TYPE_BRANCH
574
575                 p = s.Prog(ppc64.ANEG)
576                 p.To.Type = obj.TYPE_REG
577                 p.To.Reg = r
578                 p.From.Type = obj.TYPE_REG
579                 p.From.Reg = r0
580                 pbahead.To.SetTarget(p)
581
582                 p = s.Prog(obj.ANOP)
583                 pbover.To.SetTarget(p)
584
585         case ssa.OpPPC64CLRLSLWI:
586                 r := v.Reg()
587                 r1 := v.Args[0].Reg()
588                 shifts := v.AuxInt
589                 p := s.Prog(v.Op.Asm())
590                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
591                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
592                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
593                 p.Reg = r1
594                 p.To.Type = obj.TYPE_REG
595                 p.To.Reg = r
596
597         case ssa.OpPPC64CLRLSLDI:
598                 r := v.Reg()
599                 r1 := v.Args[0].Reg()
600                 shifts := v.AuxInt
601                 p := s.Prog(v.Op.Asm())
602                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
603                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
604                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
605                 p.Reg = r1
606                 p.To.Type = obj.TYPE_REG
607                 p.To.Reg = r
608
609                 // Mask has been set as sh
610         case ssa.OpPPC64RLDICL:
611                 r := v.Reg()
612                 r1 := v.Args[0].Reg()
613                 shifts := v.AuxInt
614                 p := s.Prog(v.Op.Asm())
615                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
616                 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
617                 p.Reg = r1
618                 p.To.Type = obj.TYPE_REG
619                 p.To.Reg = r
620
621         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
622                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
623                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
624                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
625                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
626                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
627                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
628                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
629                 r := v.Reg()
630                 r1 := v.Args[0].Reg()
631                 r2 := v.Args[1].Reg()
632                 p := s.Prog(v.Op.Asm())
633                 p.From.Type = obj.TYPE_REG
634                 p.From.Reg = r2
635                 p.Reg = r1
636                 p.To.Type = obj.TYPE_REG
637                 p.To.Reg = r
638
639         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
640                 r1 := v.Args[0].Reg()
641                 r2 := v.Args[1].Reg()
642                 p := s.Prog(v.Op.Asm())
643                 p.From.Type = obj.TYPE_REG
644                 p.From.Reg = r2
645                 p.Reg = r1
646                 p.To.Type = obj.TYPE_REG
647                 p.To.Reg = ppc64.REGTMP // result is not needed
648
649         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
650                 p := s.Prog(v.Op.Asm())
651                 p.From.Type = obj.TYPE_CONST
652                 p.From.Offset = v.AuxInt
653                 p.Reg = v.Args[0].Reg()
654                 p.To.Type = obj.TYPE_REG
655                 p.To.Reg = v.Reg()
656
657                 // Auxint holds encoded rotate + mask
658         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
659                 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
660                 p := s.Prog(v.Op.Asm())
661                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
662                 p.Reg = v.Args[0].Reg()
663                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
664                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
665
666                 // Auxint holds mask
667         case ssa.OpPPC64RLWNM:
668                 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
669                 p := s.Prog(v.Op.Asm())
670                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
671                 p.Reg = v.Args[0].Reg()
672                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
673                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
674
675         case ssa.OpPPC64MADDLD:
676                 r := v.Reg()
677                 r1 := v.Args[0].Reg()
678                 r2 := v.Args[1].Reg()
679                 r3 := v.Args[2].Reg()
680                 // r = r1*r2 Â± r3
681                 p := s.Prog(v.Op.Asm())
682                 p.From.Type = obj.TYPE_REG
683                 p.From.Reg = r1
684                 p.Reg = r2
685                 p.SetFrom3Reg(r3)
686                 p.To.Type = obj.TYPE_REG
687                 p.To.Reg = r
688
689         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
690                 r := v.Reg()
691                 r1 := v.Args[0].Reg()
692                 r2 := v.Args[1].Reg()
693                 r3 := v.Args[2].Reg()
694                 // r = r1*r2 Â± r3
695                 p := s.Prog(v.Op.Asm())
696                 p.From.Type = obj.TYPE_REG
697                 p.From.Reg = r1
698                 p.Reg = r3
699                 p.SetFrom3Reg(r2)
700                 p.To.Type = obj.TYPE_REG
701                 p.To.Reg = r
702
703         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
704                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
705                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
706                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
707                 r := v.Reg()
708                 p := s.Prog(v.Op.Asm())
709                 p.To.Type = obj.TYPE_REG
710                 p.To.Reg = r
711                 p.From.Type = obj.TYPE_REG
712                 p.From.Reg = v.Args[0].Reg()
713
714         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
715                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
716                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
717                 p := s.Prog(v.Op.Asm())
718                 p.Reg = v.Args[0].Reg()
719                 p.From.Type = obj.TYPE_CONST
720                 p.From.Offset = v.AuxInt
721                 p.To.Type = obj.TYPE_REG
722                 p.To.Reg = v.Reg()
723
724         case ssa.OpPPC64SUBFCconst:
725                 p := s.Prog(v.Op.Asm())
726                 p.SetFrom3Const(v.AuxInt)
727                 p.From.Type = obj.TYPE_REG
728                 p.From.Reg = v.Args[0].Reg()
729                 p.To.Type = obj.TYPE_REG
730                 p.To.Reg = v.Reg()
731
732         case ssa.OpPPC64ANDCCconst:
733                 p := s.Prog(v.Op.Asm())
734                 p.Reg = v.Args[0].Reg()
735                 p.From.Type = obj.TYPE_CONST
736                 p.From.Offset = v.AuxInt
737                 p.To.Type = obj.TYPE_REG
738                 p.To.Reg = ppc64.REGTMP // discard result
739
740         case ssa.OpPPC64MOVDaddr:
741                 switch v.Aux.(type) {
742                 default:
743                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
744                 case nil:
745                         // If aux offset and aux int are both 0, and the same
746                         // input and output regs are used, no instruction
747                         // needs to be generated, since it would just be
748                         // addi rx, rx, 0.
749                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
750                                 p := s.Prog(ppc64.AMOVD)
751                                 p.From.Type = obj.TYPE_ADDR
752                                 p.From.Reg = v.Args[0].Reg()
753                                 p.From.Offset = v.AuxInt
754                                 p.To.Type = obj.TYPE_REG
755                                 p.To.Reg = v.Reg()
756                         }
757
758                 case *obj.LSym, ir.Node:
759                         p := s.Prog(ppc64.AMOVD)
760                         p.From.Type = obj.TYPE_ADDR
761                         p.From.Reg = v.Args[0].Reg()
762                         p.To.Type = obj.TYPE_REG
763                         p.To.Reg = v.Reg()
764                         ssagen.AddAux(&p.From, v)
765
766                 }
767
768         case ssa.OpPPC64MOVDconst:
769                 p := s.Prog(v.Op.Asm())
770                 p.From.Type = obj.TYPE_CONST
771                 p.From.Offset = v.AuxInt
772                 p.To.Type = obj.TYPE_REG
773                 p.To.Reg = v.Reg()
774
775         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
776                 p := s.Prog(v.Op.Asm())
777                 p.From.Type = obj.TYPE_FCONST
778                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
779                 p.To.Type = obj.TYPE_REG
780                 p.To.Reg = v.Reg()
781
782         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
783                 p := s.Prog(v.Op.Asm())
784                 p.From.Type = obj.TYPE_REG
785                 p.From.Reg = v.Args[0].Reg()
786                 p.To.Type = obj.TYPE_REG
787                 p.To.Reg = v.Args[1].Reg()
788
789         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
790                 p := s.Prog(v.Op.Asm())
791                 p.From.Type = obj.TYPE_REG
792                 p.From.Reg = v.Args[0].Reg()
793                 p.To.Type = obj.TYPE_CONST
794                 p.To.Offset = v.AuxInt
795
796         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
797                 // Shift in register to required size
798                 p := s.Prog(v.Op.Asm())
799                 p.From.Type = obj.TYPE_REG
800                 p.From.Reg = v.Args[0].Reg()
801                 p.To.Reg = v.Reg()
802                 p.To.Type = obj.TYPE_REG
803
804         case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
805
806                 // MOVDload and MOVWload are DS form instructions that are restricted to
807                 // offsets that are a multiple of 4. If the offset is not a multple of 4,
808                 // then the address of the symbol to be loaded is computed (base + offset)
809                 // and used as the new base register and the offset field in the instruction
810                 // can be set to zero.
811
812                 // This same problem can happen with gostrings since the final offset is not
813                 // known yet, but could be unaligned after the relocation is resolved.
814                 // So gostrings are handled the same way.
815
816                 // This allows the MOVDload and MOVWload to be generated in more cases and
817                 // eliminates some offset and alignment checking in the rules file.
818
819                 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
820                 ssagen.AddAux(&fromAddr, v)
821
822                 genAddr := false
823
824                 switch fromAddr.Name {
825                 case obj.NAME_EXTERN, obj.NAME_STATIC:
826                         // Special case for a rule combines the bytes of gostring.
827                         // The v alignment might seem OK, but we don't want to load it
828                         // using an offset because relocation comes later.
829                         genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go.string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
830                 default:
831                         genAddr = fromAddr.Offset%4 != 0
832                 }
833                 if genAddr {
834                         // Load full address into the temp register.
835                         p := s.Prog(ppc64.AMOVD)
836                         p.From.Type = obj.TYPE_ADDR
837                         p.From.Reg = v.Args[0].Reg()
838                         ssagen.AddAux(&p.From, v)
839                         // Load target using temp as base register
840                         // and offset zero. Setting NAME_NONE
841                         // prevents any extra offsets from being
842                         // added.
843                         p.To.Type = obj.TYPE_REG
844                         p.To.Reg = ppc64.REGTMP
845                         fromAddr.Reg = ppc64.REGTMP
846                         // Clear the offset field and other
847                         // information that might be used
848                         // by the assembler to add to the
849                         // final offset value.
850                         fromAddr.Offset = 0
851                         fromAddr.Name = obj.NAME_NONE
852                         fromAddr.Sym = nil
853                 }
854                 p := s.Prog(v.Op.Asm())
855                 p.From = fromAddr
856                 p.To.Type = obj.TYPE_REG
857                 p.To.Reg = v.Reg()
858                 break
859
860         case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
861                 p := s.Prog(v.Op.Asm())
862                 p.From.Type = obj.TYPE_MEM
863                 p.From.Reg = v.Args[0].Reg()
864                 ssagen.AddAux(&p.From, v)
865                 p.To.Type = obj.TYPE_REG
866                 p.To.Reg = v.Reg()
867
868         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
869                 p := s.Prog(v.Op.Asm())
870                 p.From.Type = obj.TYPE_MEM
871                 p.From.Reg = v.Args[0].Reg()
872                 p.To.Type = obj.TYPE_REG
873                 p.To.Reg = v.Reg()
874
875         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
876                 p := s.Prog(v.Op.Asm())
877                 p.To.Type = obj.TYPE_MEM
878                 p.To.Reg = v.Args[0].Reg()
879                 p.From.Type = obj.TYPE_REG
880                 p.From.Reg = v.Args[1].Reg()
881
882         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
883                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
884                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
885                 p := s.Prog(v.Op.Asm())
886                 p.From.Type = obj.TYPE_MEM
887                 p.From.Reg = v.Args[0].Reg()
888                 p.From.Index = v.Args[1].Reg()
889                 p.To.Type = obj.TYPE_REG
890                 p.To.Reg = v.Reg()
891
892         case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
893                 p := s.Prog(v.Op.Asm())
894                 p.From.Type = obj.TYPE_REG
895                 p.From.Reg = ppc64.REGZERO
896                 p.To.Type = obj.TYPE_MEM
897                 p.To.Reg = v.Args[0].Reg()
898                 ssagen.AddAux(&p.To, v)
899
900         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
901
902                 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
903                 // to offset values that are a multple of 4. If the offset field is not a
904                 // multiple of 4, then the full address of the store target is computed (base +
905                 // offset) and used as the new base register and the offset in the instruction
906                 // is set to 0.
907
908                 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
909                 // and prevents checking of the offset value and alignment in the rules.
910
911                 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
912                 ssagen.AddAux(&toAddr, v)
913
914                 if toAddr.Offset%4 != 0 {
915                         p := s.Prog(ppc64.AMOVD)
916                         p.From.Type = obj.TYPE_ADDR
917                         p.From.Reg = v.Args[0].Reg()
918                         ssagen.AddAux(&p.From, v)
919                         p.To.Type = obj.TYPE_REG
920                         p.To.Reg = ppc64.REGTMP
921                         toAddr.Reg = ppc64.REGTMP
922                         // Clear the offset field and other
923                         // information that might be used
924                         // by the assembler to add to the
925                         // final offset value.
926                         toAddr.Offset = 0
927                         toAddr.Name = obj.NAME_NONE
928                         toAddr.Sym = nil
929                 }
930                 p := s.Prog(v.Op.Asm())
931                 p.To = toAddr
932                 p.From.Type = obj.TYPE_REG
933                 if v.Op == ssa.OpPPC64MOVDstorezero {
934                         p.From.Reg = ppc64.REGZERO
935                 } else {
936                         p.From.Reg = v.Args[1].Reg()
937                 }
938
939         case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
940                 p := s.Prog(v.Op.Asm())
941                 p.From.Type = obj.TYPE_REG
942                 p.From.Reg = v.Args[1].Reg()
943                 p.To.Type = obj.TYPE_MEM
944                 p.To.Reg = v.Args[0].Reg()
945                 ssagen.AddAux(&p.To, v)
946
947         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
948                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
949                 ssa.OpPPC64MOVHBRstoreidx:
950                 p := s.Prog(v.Op.Asm())
951                 p.From.Type = obj.TYPE_REG
952                 p.From.Reg = v.Args[2].Reg()
953                 p.To.Index = v.Args[1].Reg()
954                 p.To.Type = obj.TYPE_MEM
955                 p.To.Reg = v.Args[0].Reg()
956
957         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
958                 // ISEL, ISELB
959                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
960                 // ISEL only accepts 0, 1, 2 condition values but the others can be
961                 // achieved by swapping operand order.
962                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
963                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
964                 // ISELB is used when a boolean result is needed, returning 0 or 1
965                 p := s.Prog(ppc64.AISEL)
966                 p.To.Type = obj.TYPE_REG
967                 p.To.Reg = v.Reg()
968                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
969                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
970                 if v.Op == ssa.OpPPC64ISEL {
971                         r.Reg = v.Args[1].Reg()
972                 }
973                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
974                 if v.AuxInt > 3 {
975                         p.Reg = r.Reg
976                         p.SetFrom3Reg(v.Args[0].Reg())
977                 } else {
978                         p.Reg = v.Args[0].Reg()
979                         p.SetFrom3(r)
980                 }
981                 p.From.Type = obj.TYPE_CONST
982                 p.From.Offset = v.AuxInt & 3
983
984         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
985                 // The LoweredQuad code generation
986                 // generates STXV instructions on
987                 // power9. The Short variation is used
988                 // if no loop is generated.
989
990                 // sizes >= 64 generate a loop as follows:
991
992                 // Set up loop counter in CTR, used by BC
993                 // XXLXOR clears VS32
994                 //       XXLXOR VS32,VS32,VS32
995                 //       MOVD len/64,REG_TMP
996                 //       MOVD REG_TMP,CTR
997                 //       loop:
998                 //       STXV VS32,0(R20)
999                 //       STXV VS32,16(R20)
1000                 //       STXV VS32,32(R20)
1001                 //       STXV VS32,48(R20)
1002                 //       ADD  $64,R20
1003                 //       BC   16, 0, loop
1004
1005                 // Bytes per iteration
1006                 ctr := v.AuxInt / 64
1007
1008                 // Remainder bytes
1009                 rem := v.AuxInt % 64
1010
1011                 // Only generate a loop if there is more
1012                 // than 1 iteration.
1013                 if ctr > 1 {
1014                         // Set up VS32 (V0) to hold 0s
1015                         p := s.Prog(ppc64.AXXLXOR)
1016                         p.From.Type = obj.TYPE_REG
1017                         p.From.Reg = ppc64.REG_VS32
1018                         p.To.Type = obj.TYPE_REG
1019                         p.To.Reg = ppc64.REG_VS32
1020                         p.Reg = ppc64.REG_VS32
1021
1022                         // Set up CTR loop counter
1023                         p = s.Prog(ppc64.AMOVD)
1024                         p.From.Type = obj.TYPE_CONST
1025                         p.From.Offset = ctr
1026                         p.To.Type = obj.TYPE_REG
1027                         p.To.Reg = ppc64.REGTMP
1028
1029                         p = s.Prog(ppc64.AMOVD)
1030                         p.From.Type = obj.TYPE_REG
1031                         p.From.Reg = ppc64.REGTMP
1032                         p.To.Type = obj.TYPE_REG
1033                         p.To.Reg = ppc64.REG_CTR
1034
1035                         // Don't generate padding for
1036                         // loops with few iterations.
1037                         if ctr > 3 {
1038                                 p = s.Prog(obj.APCALIGN)
1039                                 p.From.Type = obj.TYPE_CONST
1040                                 p.From.Offset = 16
1041                         }
1042
1043                         // generate 4 STXVs to zero 64 bytes
1044                         var top *obj.Prog
1045
1046                         p = s.Prog(ppc64.ASTXV)
1047                         p.From.Type = obj.TYPE_REG
1048                         p.From.Reg = ppc64.REG_VS32
1049                         p.To.Type = obj.TYPE_MEM
1050                         p.To.Reg = v.Args[0].Reg()
1051
1052                         //  Save the top of loop
1053                         if top == nil {
1054                                 top = p
1055                         }
1056                         p = s.Prog(ppc64.ASTXV)
1057                         p.From.Type = obj.TYPE_REG
1058                         p.From.Reg = ppc64.REG_VS32
1059                         p.To.Type = obj.TYPE_MEM
1060                         p.To.Reg = v.Args[0].Reg()
1061                         p.To.Offset = 16
1062
1063                         p = s.Prog(ppc64.ASTXV)
1064                         p.From.Type = obj.TYPE_REG
1065                         p.From.Reg = ppc64.REG_VS32
1066                         p.To.Type = obj.TYPE_MEM
1067                         p.To.Reg = v.Args[0].Reg()
1068                         p.To.Offset = 32
1069
1070                         p = s.Prog(ppc64.ASTXV)
1071                         p.From.Type = obj.TYPE_REG
1072                         p.From.Reg = ppc64.REG_VS32
1073                         p.To.Type = obj.TYPE_MEM
1074                         p.To.Reg = v.Args[0].Reg()
1075                         p.To.Offset = 48
1076
1077                         // Increment address for the
1078                         // 64 bytes just zeroed.
1079                         p = s.Prog(ppc64.AADD)
1080                         p.Reg = v.Args[0].Reg()
1081                         p.From.Type = obj.TYPE_CONST
1082                         p.From.Offset = 64
1083                         p.To.Type = obj.TYPE_REG
1084                         p.To.Reg = v.Args[0].Reg()
1085
1086                         // Branch back to top of loop
1087                         // based on CTR
1088                         // BC with BO_BCTR generates bdnz
1089                         p = s.Prog(ppc64.ABC)
1090                         p.From.Type = obj.TYPE_CONST
1091                         p.From.Offset = ppc64.BO_BCTR
1092                         p.Reg = ppc64.REG_R0
1093                         p.To.Type = obj.TYPE_BRANCH
1094                         p.To.SetTarget(top)
1095                 }
1096                 // When ctr == 1 the loop was not generated but
1097                 // there are at least 64 bytes to clear, so add
1098                 // that to the remainder to generate the code
1099                 // to clear those doublewords
1100                 if ctr == 1 {
1101                         rem += 64
1102                 }
1103
1104                 // Clear the remainder starting at offset zero
1105                 offset := int64(0)
1106
1107                 if rem >= 16 && ctr <= 1 {
1108                         // If the XXLXOR hasn't already been
1109                         // generated, do it here to initialize
1110                         // VS32 (V0) to 0.
1111                         p := s.Prog(ppc64.AXXLXOR)
1112                         p.From.Type = obj.TYPE_REG
1113                         p.From.Reg = ppc64.REG_VS32
1114                         p.To.Type = obj.TYPE_REG
1115                         p.To.Reg = ppc64.REG_VS32
1116                         p.Reg = ppc64.REG_VS32
1117                 }
1118                 // Generate STXV for 32 or 64
1119                 // bytes.
1120                 for rem >= 32 {
1121                         p := s.Prog(ppc64.ASTXV)
1122                         p.From.Type = obj.TYPE_REG
1123                         p.From.Reg = ppc64.REG_VS32
1124                         p.To.Type = obj.TYPE_MEM
1125                         p.To.Reg = v.Args[0].Reg()
1126                         p.To.Offset = offset
1127
1128                         p = s.Prog(ppc64.ASTXV)
1129                         p.From.Type = obj.TYPE_REG
1130                         p.From.Reg = ppc64.REG_VS32
1131                         p.To.Type = obj.TYPE_MEM
1132                         p.To.Reg = v.Args[0].Reg()
1133                         p.To.Offset = offset + 16
1134                         offset += 32
1135                         rem -= 32
1136                 }
1137                 // Generate 16 bytes
1138                 if rem >= 16 {
1139                         p := s.Prog(ppc64.ASTXV)
1140                         p.From.Type = obj.TYPE_REG
1141                         p.From.Reg = ppc64.REG_VS32
1142                         p.To.Type = obj.TYPE_MEM
1143                         p.To.Reg = v.Args[0].Reg()
1144                         p.To.Offset = offset
1145                         offset += 16
1146                         rem -= 16
1147                 }
1148
1149                 // first clear as many doublewords as possible
1150                 // then clear remaining sizes as available
1151                 for rem > 0 {
1152                         op, size := ppc64.AMOVB, int64(1)
1153                         switch {
1154                         case rem >= 8:
1155                                 op, size = ppc64.AMOVD, 8
1156                         case rem >= 4:
1157                                 op, size = ppc64.AMOVW, 4
1158                         case rem >= 2:
1159                                 op, size = ppc64.AMOVH, 2
1160                         }
1161                         p := s.Prog(op)
1162                         p.From.Type = obj.TYPE_REG
1163                         p.From.Reg = ppc64.REG_R0
1164                         p.To.Type = obj.TYPE_MEM
1165                         p.To.Reg = v.Args[0].Reg()
1166                         p.To.Offset = offset
1167                         rem -= size
1168                         offset += size
1169                 }
1170
1171         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1172
1173                 // Unaligned data doesn't hurt performance
1174                 // for these instructions on power8.
1175
1176                 // For sizes >= 64 generate a loop as follows:
1177
1178                 // Set up loop counter in CTR, used by BC
1179                 //       XXLXOR VS32,VS32,VS32
1180                 //       MOVD len/32,REG_TMP
1181                 //       MOVD REG_TMP,CTR
1182                 //       MOVD $16,REG_TMP
1183                 //       loop:
1184                 //       STXVD2X VS32,(R0)(R20)
1185                 //       STXVD2X VS32,(R31)(R20)
1186                 //       ADD  $32,R20
1187                 //       BC   16, 0, loop
1188                 //
1189                 // any remainder is done as described below
1190
1191                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1192                 // then handle the remainder
1193                 //      MOVD R0,(R20)
1194                 //      MOVD R0,8(R20)
1195                 // .... etc.
1196                 //
1197                 // the remainder bytes are cleared using one or more
1198                 // of the following instructions with the appropriate
1199                 // offsets depending which instructions are needed
1200                 //
1201                 //      MOVW R0,n1(R20) 4 bytes
1202                 //      MOVH R0,n2(R20) 2 bytes
1203                 //      MOVB R0,n3(R20) 1 byte
1204                 //
1205                 // 7 bytes: MOVW, MOVH, MOVB
1206                 // 6 bytes: MOVW, MOVH
1207                 // 5 bytes: MOVW, MOVB
1208                 // 3 bytes: MOVH, MOVB
1209
1210                 // each loop iteration does 32 bytes
1211                 ctr := v.AuxInt / 32
1212
1213                 // remainder bytes
1214                 rem := v.AuxInt % 32
1215
1216                 // only generate a loop if there is more
1217                 // than 1 iteration.
1218                 if ctr > 1 {
1219                         // Set up VS32 (V0) to hold 0s
1220                         p := s.Prog(ppc64.AXXLXOR)
1221                         p.From.Type = obj.TYPE_REG
1222                         p.From.Reg = ppc64.REG_VS32
1223                         p.To.Type = obj.TYPE_REG
1224                         p.To.Reg = ppc64.REG_VS32
1225                         p.Reg = ppc64.REG_VS32
1226
1227                         // Set up CTR loop counter
1228                         p = s.Prog(ppc64.AMOVD)
1229                         p.From.Type = obj.TYPE_CONST
1230                         p.From.Offset = ctr
1231                         p.To.Type = obj.TYPE_REG
1232                         p.To.Reg = ppc64.REGTMP
1233
1234                         p = s.Prog(ppc64.AMOVD)
1235                         p.From.Type = obj.TYPE_REG
1236                         p.From.Reg = ppc64.REGTMP
1237                         p.To.Type = obj.TYPE_REG
1238                         p.To.Reg = ppc64.REG_CTR
1239
1240                         // Set up R31 to hold index value 16
1241                         p = s.Prog(ppc64.AMOVD)
1242                         p.From.Type = obj.TYPE_CONST
1243                         p.From.Offset = 16
1244                         p.To.Type = obj.TYPE_REG
1245                         p.To.Reg = ppc64.REGTMP
1246
1247                         // Don't add padding for alignment
1248                         // with few loop iterations.
1249                         if ctr > 3 {
1250                                 p = s.Prog(obj.APCALIGN)
1251                                 p.From.Type = obj.TYPE_CONST
1252                                 p.From.Offset = 16
1253                         }
1254
1255                         // generate 2 STXVD2Xs to store 16 bytes
1256                         // when this is a loop then the top must be saved
1257                         var top *obj.Prog
1258                         // This is the top of loop
1259
1260                         p = s.Prog(ppc64.ASTXVD2X)
1261                         p.From.Type = obj.TYPE_REG
1262                         p.From.Reg = ppc64.REG_VS32
1263                         p.To.Type = obj.TYPE_MEM
1264                         p.To.Reg = v.Args[0].Reg()
1265                         p.To.Index = ppc64.REGZERO
1266                         // Save the top of loop
1267                         if top == nil {
1268                                 top = p
1269                         }
1270                         p = s.Prog(ppc64.ASTXVD2X)
1271                         p.From.Type = obj.TYPE_REG
1272                         p.From.Reg = ppc64.REG_VS32
1273                         p.To.Type = obj.TYPE_MEM
1274                         p.To.Reg = v.Args[0].Reg()
1275                         p.To.Index = ppc64.REGTMP
1276
1277                         // Increment address for the
1278                         // 4 doublewords just zeroed.
1279                         p = s.Prog(ppc64.AADD)
1280                         p.Reg = v.Args[0].Reg()
1281                         p.From.Type = obj.TYPE_CONST
1282                         p.From.Offset = 32
1283                         p.To.Type = obj.TYPE_REG
1284                         p.To.Reg = v.Args[0].Reg()
1285
1286                         // Branch back to top of loop
1287                         // based on CTR
1288                         // BC with BO_BCTR generates bdnz
1289                         p = s.Prog(ppc64.ABC)
1290                         p.From.Type = obj.TYPE_CONST
1291                         p.From.Offset = ppc64.BO_BCTR
1292                         p.Reg = ppc64.REG_R0
1293                         p.To.Type = obj.TYPE_BRANCH
1294                         p.To.SetTarget(top)
1295                 }
1296
1297                 // when ctr == 1 the loop was not generated but
1298                 // there are at least 32 bytes to clear, so add
1299                 // that to the remainder to generate the code
1300                 // to clear those doublewords
1301                 if ctr == 1 {
1302                         rem += 32
1303                 }
1304
1305                 // clear the remainder starting at offset zero
1306                 offset := int64(0)
1307
1308                 // first clear as many doublewords as possible
1309                 // then clear remaining sizes as available
1310                 for rem > 0 {
1311                         op, size := ppc64.AMOVB, int64(1)
1312                         switch {
1313                         case rem >= 8:
1314                                 op, size = ppc64.AMOVD, 8
1315                         case rem >= 4:
1316                                 op, size = ppc64.AMOVW, 4
1317                         case rem >= 2:
1318                                 op, size = ppc64.AMOVH, 2
1319                         }
1320                         p := s.Prog(op)
1321                         p.From.Type = obj.TYPE_REG
1322                         p.From.Reg = ppc64.REG_R0
1323                         p.To.Type = obj.TYPE_MEM
1324                         p.To.Reg = v.Args[0].Reg()
1325                         p.To.Offset = offset
1326                         rem -= size
1327                         offset += size
1328                 }
1329
1330         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1331
1332                 bytesPerLoop := int64(32)
1333                 // This will be used when moving more
1334                 // than 8 bytes.  Moves start with
1335                 // as many 8 byte moves as possible, then
1336                 // 4, 2, or 1 byte(s) as remaining.  This will
1337                 // work and be efficient for power8 or later.
1338                 // If there are 64 or more bytes, then a
1339                 // loop is generated to move 32 bytes and
1340                 // update the src and dst addresses on each
1341                 // iteration. When < 64 bytes, the appropriate
1342                 // number of moves are generated based on the
1343                 // size.
1344                 // When moving >= 64 bytes a loop is used
1345                 //      MOVD len/32,REG_TMP
1346                 //      MOVD REG_TMP,CTR
1347                 //      MOVD $16,REG_TMP
1348                 // top:
1349                 //      LXVD2X (R0)(R21),VS32
1350                 //      LXVD2X (R31)(R21),VS33
1351                 //      ADD $32,R21
1352                 //      STXVD2X VS32,(R0)(R20)
1353                 //      STXVD2X VS33,(R31)(R20)
1354                 //      ADD $32,R20
1355                 //      BC 16,0,top
1356                 // Bytes not moved by this loop are moved
1357                 // with a combination of the following instructions,
1358                 // starting with the largest sizes and generating as
1359                 // many as needed, using the appropriate offset value.
1360                 //      MOVD  n(R21),R31
1361                 //      MOVD  R31,n(R20)
1362                 //      MOVW  n1(R21),R31
1363                 //      MOVW  R31,n1(R20)
1364                 //      MOVH  n2(R21),R31
1365                 //      MOVH  R31,n2(R20)
1366                 //      MOVB  n3(R21),R31
1367                 //      MOVB  R31,n3(R20)
1368
1369                 // Each loop iteration moves 32 bytes
1370                 ctr := v.AuxInt / bytesPerLoop
1371
1372                 // Remainder after the loop
1373                 rem := v.AuxInt % bytesPerLoop
1374
1375                 dstReg := v.Args[0].Reg()
1376                 srcReg := v.Args[1].Reg()
1377
1378                 // The set of registers used here, must match the clobbered reg list
1379                 // in PPC64Ops.go.
1380                 offset := int64(0)
1381
1382                 // top of the loop
1383                 var top *obj.Prog
1384                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1385                 if ctr > 1 {
1386                         // Set up the CTR
1387                         p := s.Prog(ppc64.AMOVD)
1388                         p.From.Type = obj.TYPE_CONST
1389                         p.From.Offset = ctr
1390                         p.To.Type = obj.TYPE_REG
1391                         p.To.Reg = ppc64.REGTMP
1392
1393                         p = s.Prog(ppc64.AMOVD)
1394                         p.From.Type = obj.TYPE_REG
1395                         p.From.Reg = ppc64.REGTMP
1396                         p.To.Type = obj.TYPE_REG
1397                         p.To.Reg = ppc64.REG_CTR
1398
1399                         // Use REGTMP as index reg
1400                         p = s.Prog(ppc64.AMOVD)
1401                         p.From.Type = obj.TYPE_CONST
1402                         p.From.Offset = 16
1403                         p.To.Type = obj.TYPE_REG
1404                         p.To.Reg = ppc64.REGTMP
1405
1406                         // Don't adding padding for
1407                         // alignment with small iteration
1408                         // counts.
1409                         if ctr > 3 {
1410                                 p = s.Prog(obj.APCALIGN)
1411                                 p.From.Type = obj.TYPE_CONST
1412                                 p.From.Offset = 16
1413                         }
1414
1415                         // Generate 16 byte loads and stores.
1416                         // Use temp register for index (16)
1417                         // on the second one.
1418
1419                         p = s.Prog(ppc64.ALXVD2X)
1420                         p.From.Type = obj.TYPE_MEM
1421                         p.From.Reg = srcReg
1422                         p.From.Index = ppc64.REGZERO
1423                         p.To.Type = obj.TYPE_REG
1424                         p.To.Reg = ppc64.REG_VS32
1425                         if top == nil {
1426                                 top = p
1427                         }
1428                         p = s.Prog(ppc64.ALXVD2X)
1429                         p.From.Type = obj.TYPE_MEM
1430                         p.From.Reg = srcReg
1431                         p.From.Index = ppc64.REGTMP
1432                         p.To.Type = obj.TYPE_REG
1433                         p.To.Reg = ppc64.REG_VS33
1434
1435                         // increment the src reg for next iteration
1436                         p = s.Prog(ppc64.AADD)
1437                         p.Reg = srcReg
1438                         p.From.Type = obj.TYPE_CONST
1439                         p.From.Offset = bytesPerLoop
1440                         p.To.Type = obj.TYPE_REG
1441                         p.To.Reg = srcReg
1442
1443                         // generate 16 byte stores
1444                         p = s.Prog(ppc64.ASTXVD2X)
1445                         p.From.Type = obj.TYPE_REG
1446                         p.From.Reg = ppc64.REG_VS32
1447                         p.To.Type = obj.TYPE_MEM
1448                         p.To.Reg = dstReg
1449                         p.To.Index = ppc64.REGZERO
1450
1451                         p = s.Prog(ppc64.ASTXVD2X)
1452                         p.From.Type = obj.TYPE_REG
1453                         p.From.Reg = ppc64.REG_VS33
1454                         p.To.Type = obj.TYPE_MEM
1455                         p.To.Reg = dstReg
1456                         p.To.Index = ppc64.REGTMP
1457
1458                         // increment the dst reg for next iteration
1459                         p = s.Prog(ppc64.AADD)
1460                         p.Reg = dstReg
1461                         p.From.Type = obj.TYPE_CONST
1462                         p.From.Offset = bytesPerLoop
1463                         p.To.Type = obj.TYPE_REG
1464                         p.To.Reg = dstReg
1465
1466                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1467                         // to loop top.
1468                         p = s.Prog(ppc64.ABC)
1469                         p.From.Type = obj.TYPE_CONST
1470                         p.From.Offset = ppc64.BO_BCTR
1471                         p.Reg = ppc64.REG_R0
1472                         p.To.Type = obj.TYPE_BRANCH
1473                         p.To.SetTarget(top)
1474
1475                         // srcReg and dstReg were incremented in the loop, so
1476                         // later instructions start with offset 0.
1477                         offset = int64(0)
1478                 }
1479
1480                 // No loop was generated for one iteration, so
1481                 // add 32 bytes to the remainder to move those bytes.
1482                 if ctr == 1 {
1483                         rem += bytesPerLoop
1484                 }
1485
1486                 if rem >= 16 {
1487                         // Generate 16 byte loads and stores.
1488                         // Use temp register for index (value 16)
1489                         // on the second one.
1490                         p := s.Prog(ppc64.ALXVD2X)
1491                         p.From.Type = obj.TYPE_MEM
1492                         p.From.Reg = srcReg
1493                         p.From.Index = ppc64.REGZERO
1494                         p.To.Type = obj.TYPE_REG
1495                         p.To.Reg = ppc64.REG_VS32
1496
1497                         p = s.Prog(ppc64.ASTXVD2X)
1498                         p.From.Type = obj.TYPE_REG
1499                         p.From.Reg = ppc64.REG_VS32
1500                         p.To.Type = obj.TYPE_MEM
1501                         p.To.Reg = dstReg
1502                         p.To.Index = ppc64.REGZERO
1503
1504                         offset = 16
1505                         rem -= 16
1506
1507                         if rem >= 16 {
1508                                 // Use REGTMP as index reg
1509                                 p := s.Prog(ppc64.AMOVD)
1510                                 p.From.Type = obj.TYPE_CONST
1511                                 p.From.Offset = 16
1512                                 p.To.Type = obj.TYPE_REG
1513                                 p.To.Reg = ppc64.REGTMP
1514
1515                                 p = s.Prog(ppc64.ALXVD2X)
1516                                 p.From.Type = obj.TYPE_MEM
1517                                 p.From.Reg = srcReg
1518                                 p.From.Index = ppc64.REGTMP
1519                                 p.To.Type = obj.TYPE_REG
1520                                 p.To.Reg = ppc64.REG_VS32
1521
1522                                 p = s.Prog(ppc64.ASTXVD2X)
1523                                 p.From.Type = obj.TYPE_REG
1524                                 p.From.Reg = ppc64.REG_VS32
1525                                 p.To.Type = obj.TYPE_MEM
1526                                 p.To.Reg = dstReg
1527                                 p.To.Index = ppc64.REGTMP
1528
1529                                 offset = 32
1530                                 rem -= 16
1531                         }
1532                 }
1533
1534                 // Generate all the remaining load and store pairs, starting with
1535                 // as many 8 byte moves as possible, then 4, 2, 1.
1536                 for rem > 0 {
1537                         op, size := ppc64.AMOVB, int64(1)
1538                         switch {
1539                         case rem >= 8:
1540                                 op, size = ppc64.AMOVD, 8
1541                         case rem >= 4:
1542                                 op, size = ppc64.AMOVWZ, 4
1543                         case rem >= 2:
1544                                 op, size = ppc64.AMOVH, 2
1545                         }
1546                         // Load
1547                         p := s.Prog(op)
1548                         p.To.Type = obj.TYPE_REG
1549                         p.To.Reg = ppc64.REGTMP
1550                         p.From.Type = obj.TYPE_MEM
1551                         p.From.Reg = srcReg
1552                         p.From.Offset = offset
1553
1554                         // Store
1555                         p = s.Prog(op)
1556                         p.From.Type = obj.TYPE_REG
1557                         p.From.Reg = ppc64.REGTMP
1558                         p.To.Type = obj.TYPE_MEM
1559                         p.To.Reg = dstReg
1560                         p.To.Offset = offset
1561                         rem -= size
1562                         offset += size
1563                 }
1564
1565         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1566                 bytesPerLoop := int64(64)
1567                 // This is used when moving more
1568                 // than 8 bytes on power9.  Moves start with
1569                 // as many 8 byte moves as possible, then
1570                 // 4, 2, or 1 byte(s) as remaining.  This will
1571                 // work and be efficient for power8 or later.
1572                 // If there are 64 or more bytes, then a
1573                 // loop is generated to move 32 bytes and
1574                 // update the src and dst addresses on each
1575                 // iteration. When < 64 bytes, the appropriate
1576                 // number of moves are generated based on the
1577                 // size.
1578                 // When moving >= 64 bytes a loop is used
1579                 //      MOVD len/32,REG_TMP
1580                 //      MOVD REG_TMP,CTR
1581                 // top:
1582                 //      LXV 0(R21),VS32
1583                 //      LXV 16(R21),VS33
1584                 //      ADD $32,R21
1585                 //      STXV VS32,0(R20)
1586                 //      STXV VS33,16(R20)
1587                 //      ADD $32,R20
1588                 //      BC 16,0,top
1589                 // Bytes not moved by this loop are moved
1590                 // with a combination of the following instructions,
1591                 // starting with the largest sizes and generating as
1592                 // many as needed, using the appropriate offset value.
1593                 //      MOVD  n(R21),R31
1594                 //      MOVD  R31,n(R20)
1595                 //      MOVW  n1(R21),R31
1596                 //      MOVW  R31,n1(R20)
1597                 //      MOVH  n2(R21),R31
1598                 //      MOVH  R31,n2(R20)
1599                 //      MOVB  n3(R21),R31
1600                 //      MOVB  R31,n3(R20)
1601
1602                 // Each loop iteration moves 32 bytes
1603                 ctr := v.AuxInt / bytesPerLoop
1604
1605                 // Remainder after the loop
1606                 rem := v.AuxInt % bytesPerLoop
1607
1608                 dstReg := v.Args[0].Reg()
1609                 srcReg := v.Args[1].Reg()
1610
1611                 offset := int64(0)
1612
1613                 // top of the loop
1614                 var top *obj.Prog
1615
1616                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1617                 if ctr > 1 {
1618                         // Set up the CTR
1619                         p := s.Prog(ppc64.AMOVD)
1620                         p.From.Type = obj.TYPE_CONST
1621                         p.From.Offset = ctr
1622                         p.To.Type = obj.TYPE_REG
1623                         p.To.Reg = ppc64.REGTMP
1624
1625                         p = s.Prog(ppc64.AMOVD)
1626                         p.From.Type = obj.TYPE_REG
1627                         p.From.Reg = ppc64.REGTMP
1628                         p.To.Type = obj.TYPE_REG
1629                         p.To.Reg = ppc64.REG_CTR
1630
1631                         p = s.Prog(obj.APCALIGN)
1632                         p.From.Type = obj.TYPE_CONST
1633                         p.From.Offset = 16
1634
1635                         // Generate 16 byte loads and stores.
1636                         p = s.Prog(ppc64.ALXV)
1637                         p.From.Type = obj.TYPE_MEM
1638                         p.From.Reg = srcReg
1639                         p.From.Offset = offset
1640                         p.To.Type = obj.TYPE_REG
1641                         p.To.Reg = ppc64.REG_VS32
1642                         if top == nil {
1643                                 top = p
1644                         }
1645                         p = s.Prog(ppc64.ALXV)
1646                         p.From.Type = obj.TYPE_MEM
1647                         p.From.Reg = srcReg
1648                         p.From.Offset = offset + 16
1649                         p.To.Type = obj.TYPE_REG
1650                         p.To.Reg = ppc64.REG_VS33
1651
1652                         // generate 16 byte stores
1653                         p = s.Prog(ppc64.ASTXV)
1654                         p.From.Type = obj.TYPE_REG
1655                         p.From.Reg = ppc64.REG_VS32
1656                         p.To.Type = obj.TYPE_MEM
1657                         p.To.Reg = dstReg
1658                         p.To.Offset = offset
1659
1660                         p = s.Prog(ppc64.ASTXV)
1661                         p.From.Type = obj.TYPE_REG
1662                         p.From.Reg = ppc64.REG_VS33
1663                         p.To.Type = obj.TYPE_MEM
1664                         p.To.Reg = dstReg
1665                         p.To.Offset = offset + 16
1666
1667                         // Generate 16 byte loads and stores.
1668                         p = s.Prog(ppc64.ALXV)
1669                         p.From.Type = obj.TYPE_MEM
1670                         p.From.Reg = srcReg
1671                         p.From.Offset = offset + 32
1672                         p.To.Type = obj.TYPE_REG
1673                         p.To.Reg = ppc64.REG_VS32
1674
1675                         p = s.Prog(ppc64.ALXV)
1676                         p.From.Type = obj.TYPE_MEM
1677                         p.From.Reg = srcReg
1678                         p.From.Offset = offset + 48
1679                         p.To.Type = obj.TYPE_REG
1680                         p.To.Reg = ppc64.REG_VS33
1681
1682                         // generate 16 byte stores
1683                         p = s.Prog(ppc64.ASTXV)
1684                         p.From.Type = obj.TYPE_REG
1685                         p.From.Reg = ppc64.REG_VS32
1686                         p.To.Type = obj.TYPE_MEM
1687                         p.To.Reg = dstReg
1688                         p.To.Offset = offset + 32
1689
1690                         p = s.Prog(ppc64.ASTXV)
1691                         p.From.Type = obj.TYPE_REG
1692                         p.From.Reg = ppc64.REG_VS33
1693                         p.To.Type = obj.TYPE_MEM
1694                         p.To.Reg = dstReg
1695                         p.To.Offset = offset + 48
1696
1697                         // increment the src reg for next iteration
1698                         p = s.Prog(ppc64.AADD)
1699                         p.Reg = srcReg
1700                         p.From.Type = obj.TYPE_CONST
1701                         p.From.Offset = bytesPerLoop
1702                         p.To.Type = obj.TYPE_REG
1703                         p.To.Reg = srcReg
1704
1705                         // increment the dst reg for next iteration
1706                         p = s.Prog(ppc64.AADD)
1707                         p.Reg = dstReg
1708                         p.From.Type = obj.TYPE_CONST
1709                         p.From.Offset = bytesPerLoop
1710                         p.To.Type = obj.TYPE_REG
1711                         p.To.Reg = dstReg
1712
1713                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1714                         // to loop top.
1715                         p = s.Prog(ppc64.ABC)
1716                         p.From.Type = obj.TYPE_CONST
1717                         p.From.Offset = ppc64.BO_BCTR
1718                         p.Reg = ppc64.REG_R0
1719                         p.To.Type = obj.TYPE_BRANCH
1720                         p.To.SetTarget(top)
1721
1722                         // srcReg and dstReg were incremented in the loop, so
1723                         // later instructions start with offset 0.
1724                         offset = int64(0)
1725                 }
1726
1727                 // No loop was generated for one iteration, so
1728                 // add 32 bytes to the remainder to move those bytes.
1729                 if ctr == 1 {
1730                         rem += bytesPerLoop
1731                 }
1732                 if rem >= 32 {
1733                         p := s.Prog(ppc64.ALXV)
1734                         p.From.Type = obj.TYPE_MEM
1735                         p.From.Reg = srcReg
1736                         p.To.Type = obj.TYPE_REG
1737                         p.To.Reg = ppc64.REG_VS32
1738
1739                         p = s.Prog(ppc64.ALXV)
1740                         p.From.Type = obj.TYPE_MEM
1741                         p.From.Reg = srcReg
1742                         p.From.Offset = 16
1743                         p.To.Type = obj.TYPE_REG
1744                         p.To.Reg = ppc64.REG_VS33
1745
1746                         p = s.Prog(ppc64.ASTXV)
1747                         p.From.Type = obj.TYPE_REG
1748                         p.From.Reg = ppc64.REG_VS32
1749                         p.To.Type = obj.TYPE_MEM
1750                         p.To.Reg = dstReg
1751
1752                         p = s.Prog(ppc64.ASTXV)
1753                         p.From.Type = obj.TYPE_REG
1754                         p.From.Reg = ppc64.REG_VS33
1755                         p.To.Type = obj.TYPE_MEM
1756                         p.To.Reg = dstReg
1757                         p.To.Offset = 16
1758
1759                         offset = 32
1760                         rem -= 32
1761                 }
1762
1763                 if rem >= 16 {
1764                         // Generate 16 byte loads and stores.
1765                         p := s.Prog(ppc64.ALXV)
1766                         p.From.Type = obj.TYPE_MEM
1767                         p.From.Reg = srcReg
1768                         p.From.Offset = offset
1769                         p.To.Type = obj.TYPE_REG
1770                         p.To.Reg = ppc64.REG_VS32
1771
1772                         p = s.Prog(ppc64.ASTXV)
1773                         p.From.Type = obj.TYPE_REG
1774                         p.From.Reg = ppc64.REG_VS32
1775                         p.To.Type = obj.TYPE_MEM
1776                         p.To.Reg = dstReg
1777                         p.To.Offset = offset
1778
1779                         offset += 16
1780                         rem -= 16
1781
1782                         if rem >= 16 {
1783                                 p := s.Prog(ppc64.ALXV)
1784                                 p.From.Type = obj.TYPE_MEM
1785                                 p.From.Reg = srcReg
1786                                 p.From.Offset = offset
1787                                 p.To.Type = obj.TYPE_REG
1788                                 p.To.Reg = ppc64.REG_VS32
1789
1790                                 p = s.Prog(ppc64.ASTXV)
1791                                 p.From.Type = obj.TYPE_REG
1792                                 p.From.Reg = ppc64.REG_VS32
1793                                 p.To.Type = obj.TYPE_MEM
1794                                 p.To.Reg = dstReg
1795                                 p.To.Offset = offset
1796
1797                                 offset += 16
1798                                 rem -= 16
1799                         }
1800                 }
1801                 // Generate all the remaining load and store pairs, starting with
1802                 // as many 8 byte moves as possible, then 4, 2, 1.
1803                 for rem > 0 {
1804                         op, size := ppc64.AMOVB, int64(1)
1805                         switch {
1806                         case rem >= 8:
1807                                 op, size = ppc64.AMOVD, 8
1808                         case rem >= 4:
1809                                 op, size = ppc64.AMOVWZ, 4
1810                         case rem >= 2:
1811                                 op, size = ppc64.AMOVH, 2
1812                         }
1813                         // Load
1814                         p := s.Prog(op)
1815                         p.To.Type = obj.TYPE_REG
1816                         p.To.Reg = ppc64.REGTMP
1817                         p.From.Type = obj.TYPE_MEM
1818                         p.From.Reg = srcReg
1819                         p.From.Offset = offset
1820
1821                         // Store
1822                         p = s.Prog(op)
1823                         p.From.Type = obj.TYPE_REG
1824                         p.From.Reg = ppc64.REGTMP
1825                         p.To.Type = obj.TYPE_MEM
1826                         p.To.Reg = dstReg
1827                         p.To.Offset = offset
1828                         rem -= size
1829                         offset += size
1830                 }
1831
1832         case ssa.OpPPC64CALLstatic:
1833                 s.Call(v)
1834
1835         case ssa.OpPPC64CALLtail:
1836                 s.TailCall(v)
1837
1838         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1839                 p := s.Prog(ppc64.AMOVD)
1840                 p.From.Type = obj.TYPE_REG
1841                 p.From.Reg = v.Args[0].Reg()
1842                 p.To.Type = obj.TYPE_REG
1843                 p.To.Reg = ppc64.REG_LR
1844
1845                 if v.Args[0].Reg() != ppc64.REG_R12 {
1846                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1847                 }
1848
1849                 pp := s.Call(v)
1850                 pp.To.Reg = ppc64.REG_LR
1851
1852                 // Insert a hint this is not a subroutine return.
1853                 pp.SetFrom3Const(1)
1854
1855                 if base.Ctxt.Flag_shared {
1856                         // When compiling Go into PIC, the function we just
1857                         // called via pointer might have been implemented in
1858                         // a separate module and so overwritten the TOC
1859                         // pointer in R2; reload it.
1860                         q := s.Prog(ppc64.AMOVD)
1861                         q.From.Type = obj.TYPE_MEM
1862                         q.From.Offset = 24
1863                         q.From.Reg = ppc64.REGSP
1864                         q.To.Type = obj.TYPE_REG
1865                         q.To.Reg = ppc64.REG_R2
1866                 }
1867
1868         case ssa.OpPPC64LoweredWB:
1869                 p := s.Prog(obj.ACALL)
1870                 p.To.Type = obj.TYPE_MEM
1871                 p.To.Name = obj.NAME_EXTERN
1872                 p.To.Sym = v.Aux.(*obj.LSym)
1873
1874         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1875                 p := s.Prog(obj.ACALL)
1876                 p.To.Type = obj.TYPE_MEM
1877                 p.To.Name = obj.NAME_EXTERN
1878                 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1879                 s.UseArgs(16) // space used in callee args area by assembly stubs
1880
1881         case ssa.OpPPC64LoweredNilCheck:
1882                 if buildcfg.GOOS == "aix" {
1883                         // CMP Rarg0, R0
1884                         // BNE 2(PC)
1885                         // STW R0, 0(R0)
1886                         // NOP (so the BNE has somewhere to land)
1887
1888                         // CMP Rarg0, R0
1889                         p := s.Prog(ppc64.ACMP)
1890                         p.From.Type = obj.TYPE_REG
1891                         p.From.Reg = v.Args[0].Reg()
1892                         p.To.Type = obj.TYPE_REG
1893                         p.To.Reg = ppc64.REG_R0
1894
1895                         // BNE 2(PC)
1896                         p2 := s.Prog(ppc64.ABNE)
1897                         p2.To.Type = obj.TYPE_BRANCH
1898
1899                         // STW R0, 0(R0)
1900                         // Write at 0 is forbidden and will trigger a SIGSEGV
1901                         p = s.Prog(ppc64.AMOVW)
1902                         p.From.Type = obj.TYPE_REG
1903                         p.From.Reg = ppc64.REG_R0
1904                         p.To.Type = obj.TYPE_MEM
1905                         p.To.Reg = ppc64.REG_R0
1906
1907                         // NOP (so the BNE has somewhere to land)
1908                         nop := s.Prog(obj.ANOP)
1909                         p2.To.SetTarget(nop)
1910
1911                 } else {
1912                         // Issue a load which will fault if arg is nil.
1913                         p := s.Prog(ppc64.AMOVBZ)
1914                         p.From.Type = obj.TYPE_MEM
1915                         p.From.Reg = v.Args[0].Reg()
1916                         ssagen.AddAux(&p.From, v)
1917                         p.To.Type = obj.TYPE_REG
1918                         p.To.Reg = ppc64.REGTMP
1919                 }
1920                 if logopt.Enabled() {
1921                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1922                 }
1923                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1924                         base.WarnfAt(v.Pos, "generated nil check")
1925                 }
1926
1927         // These should be resolved by rules and not make it here.
1928         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1929                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1930                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1931                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1932         case ssa.OpPPC64InvertFlags:
1933                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1934         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1935                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1936         case ssa.OpClobber, ssa.OpClobberReg:
1937                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1938         default:
1939                 v.Fatalf("genValue not implemented: %s", v.LongString())
1940         }
1941 }
1942
1943 var blockJump = [...]struct {
1944         asm, invasm     obj.As
1945         asmeq, invasmun bool
1946 }{
1947         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1948         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1949
1950         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1951         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1952         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1953         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1954
1955         // TODO: need to work FP comparisons into block jumps
1956         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1957         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1958         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1959         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1960 }
1961
1962 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1963         switch b.Kind {
1964         case ssa.BlockDefer:
1965                 // defer returns in R3:
1966                 // 0 if we should continue executing
1967                 // 1 if we should jump to deferreturn call
1968                 p := s.Prog(ppc64.ACMP)
1969                 p.From.Type = obj.TYPE_REG
1970                 p.From.Reg = ppc64.REG_R3
1971                 p.To.Type = obj.TYPE_REG
1972                 p.To.Reg = ppc64.REG_R0
1973
1974                 p = s.Prog(ppc64.ABNE)
1975                 p.To.Type = obj.TYPE_BRANCH
1976                 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1977                 if b.Succs[0].Block() != next {
1978                         p := s.Prog(obj.AJMP)
1979                         p.To.Type = obj.TYPE_BRANCH
1980                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1981                 }
1982
1983         case ssa.BlockPlain:
1984                 if b.Succs[0].Block() != next {
1985                         p := s.Prog(obj.AJMP)
1986                         p.To.Type = obj.TYPE_BRANCH
1987                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1988                 }
1989         case ssa.BlockExit, ssa.BlockRetJmp:
1990         case ssa.BlockRet:
1991                 s.Prog(obj.ARET)
1992
1993         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1994                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1995                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1996                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1997                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1998                 jmp := blockJump[b.Kind]
1999                 switch next {
2000                 case b.Succs[0].Block():
2001                         s.Br(jmp.invasm, b.Succs[1].Block())
2002                         if jmp.invasmun {
2003                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2004                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
2005                         }
2006                 case b.Succs[1].Block():
2007                         s.Br(jmp.asm, b.Succs[0].Block())
2008                         if jmp.asmeq {
2009                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2010                         }
2011                 default:
2012                         if b.Likely != ssa.BranchUnlikely {
2013                                 s.Br(jmp.asm, b.Succs[0].Block())
2014                                 if jmp.asmeq {
2015                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
2016                                 }
2017                                 s.Br(obj.AJMP, b.Succs[1].Block())
2018                         } else {
2019                                 s.Br(jmp.invasm, b.Succs[1].Block())
2020                                 if jmp.invasmun {
2021                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2022                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
2023                                 }
2024                                 s.Br(obj.AJMP, b.Succs[0].Block())
2025                         }
2026                 }
2027         default:
2028                 b.Fatalf("branch not implemented: %s", b.LongString())
2029         }
2030 }