]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
[dev.typeparams] all: merge master (9b84814) into dev.typeparams
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/base"
9         "cmd/compile/internal/ir"
10         "cmd/compile/internal/logopt"
11         "cmd/compile/internal/ssa"
12         "cmd/compile/internal/ssagen"
13         "cmd/compile/internal/types"
14         "cmd/internal/obj"
15         "cmd/internal/obj/ppc64"
16         "internal/buildcfg"
17         "math"
18         "strings"
19 )
20
21 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
22 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
23         //      flive := b.FlagsLiveAtEnd
24         //      if b.Control != nil && b.Control.Type.IsFlags() {
25         //              flive = true
26         //      }
27         //      for i := len(b.Values) - 1; i >= 0; i-- {
28         //              v := b.Values[i]
29         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
30         //                      // The "mark" is any non-nil Aux value.
31         //                      v.Aux = v
32         //              }
33         //              if v.Type.IsFlags() {
34         //                      flive = false
35         //              }
36         //              for _, a := range v.Args {
37         //                      if a.Type.IsFlags() {
38         //                              flive = true
39         //                      }
40         //              }
41         //      }
42 }
43
44 // loadByType returns the load instruction of the given type.
45 func loadByType(t *types.Type) obj.As {
46         if t.IsFloat() {
47                 switch t.Size() {
48                 case 4:
49                         return ppc64.AFMOVS
50                 case 8:
51                         return ppc64.AFMOVD
52                 }
53         } else {
54                 switch t.Size() {
55                 case 1:
56                         if t.IsSigned() {
57                                 return ppc64.AMOVB
58                         } else {
59                                 return ppc64.AMOVBZ
60                         }
61                 case 2:
62                         if t.IsSigned() {
63                                 return ppc64.AMOVH
64                         } else {
65                                 return ppc64.AMOVHZ
66                         }
67                 case 4:
68                         if t.IsSigned() {
69                                 return ppc64.AMOVW
70                         } else {
71                                 return ppc64.AMOVWZ
72                         }
73                 case 8:
74                         return ppc64.AMOVD
75                 }
76         }
77         panic("bad load type")
78 }
79
80 // storeByType returns the store instruction of the given type.
81 func storeByType(t *types.Type) obj.As {
82         if t.IsFloat() {
83                 switch t.Size() {
84                 case 4:
85                         return ppc64.AFMOVS
86                 case 8:
87                         return ppc64.AFMOVD
88                 }
89         } else {
90                 switch t.Size() {
91                 case 1:
92                         return ppc64.AMOVB
93                 case 2:
94                         return ppc64.AMOVH
95                 case 4:
96                         return ppc64.AMOVW
97                 case 8:
98                         return ppc64.AMOVD
99                 }
100         }
101         panic("bad store type")
102 }
103
104 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
105         switch v.Op {
106         case ssa.OpCopy:
107                 t := v.Type
108                 if t.IsMemory() {
109                         return
110                 }
111                 x := v.Args[0].Reg()
112                 y := v.Reg()
113                 if x != y {
114                         rt := obj.TYPE_REG
115                         op := ppc64.AMOVD
116
117                         if t.IsFloat() {
118                                 op = ppc64.AFMOVD
119                         }
120                         p := s.Prog(op)
121                         p.From.Type = rt
122                         p.From.Reg = x
123                         p.To.Type = rt
124                         p.To.Reg = y
125                 }
126
127         case ssa.OpPPC64LoweredMuluhilo:
128                 // MULHDU       Rarg1, Rarg0, Reg0
129                 // MULLD        Rarg1, Rarg0, Reg1
130                 r0 := v.Args[0].Reg()
131                 r1 := v.Args[1].Reg()
132                 p := s.Prog(ppc64.AMULHDU)
133                 p.From.Type = obj.TYPE_REG
134                 p.From.Reg = r1
135                 p.Reg = r0
136                 p.To.Type = obj.TYPE_REG
137                 p.To.Reg = v.Reg0()
138                 p1 := s.Prog(ppc64.AMULLD)
139                 p1.From.Type = obj.TYPE_REG
140                 p1.From.Reg = r1
141                 p1.Reg = r0
142                 p1.To.Type = obj.TYPE_REG
143                 p1.To.Reg = v.Reg1()
144
145         case ssa.OpPPC64LoweredAdd64Carry:
146                 // ADDC         Rarg2, -1, Rtmp
147                 // ADDE         Rarg1, Rarg0, Reg0
148                 // ADDZE        Rzero, Reg1
149                 r0 := v.Args[0].Reg()
150                 r1 := v.Args[1].Reg()
151                 r2 := v.Args[2].Reg()
152                 p := s.Prog(ppc64.AADDC)
153                 p.From.Type = obj.TYPE_CONST
154                 p.From.Offset = -1
155                 p.Reg = r2
156                 p.To.Type = obj.TYPE_REG
157                 p.To.Reg = ppc64.REGTMP
158                 p1 := s.Prog(ppc64.AADDE)
159                 p1.From.Type = obj.TYPE_REG
160                 p1.From.Reg = r1
161                 p1.Reg = r0
162                 p1.To.Type = obj.TYPE_REG
163                 p1.To.Reg = v.Reg0()
164                 p2 := s.Prog(ppc64.AADDZE)
165                 p2.From.Type = obj.TYPE_REG
166                 p2.From.Reg = ppc64.REGZERO
167                 p2.To.Type = obj.TYPE_REG
168                 p2.To.Reg = v.Reg1()
169
170         case ssa.OpPPC64LoweredAtomicAnd8,
171                 ssa.OpPPC64LoweredAtomicAnd32,
172                 ssa.OpPPC64LoweredAtomicOr8,
173                 ssa.OpPPC64LoweredAtomicOr32:
174                 // LWSYNC
175                 // LBAR/LWAR    (Rarg0), Rtmp
176                 // AND/OR       Rarg1, Rtmp
177                 // STBCCC/STWCCC Rtmp, (Rarg0)
178                 // BNE          -3(PC)
179                 ld := ppc64.ALBAR
180                 st := ppc64.ASTBCCC
181                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
182                         ld = ppc64.ALWAR
183                         st = ppc64.ASTWCCC
184                 }
185                 r0 := v.Args[0].Reg()
186                 r1 := v.Args[1].Reg()
187                 // LWSYNC - Assuming shared data not write-through-required nor
188                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
189                 plwsync := s.Prog(ppc64.ALWSYNC)
190                 plwsync.To.Type = obj.TYPE_NONE
191                 // LBAR or LWAR
192                 p := s.Prog(ld)
193                 p.From.Type = obj.TYPE_MEM
194                 p.From.Reg = r0
195                 p.To.Type = obj.TYPE_REG
196                 p.To.Reg = ppc64.REGTMP
197                 // AND/OR reg1,out
198                 p1 := s.Prog(v.Op.Asm())
199                 p1.From.Type = obj.TYPE_REG
200                 p1.From.Reg = r1
201                 p1.To.Type = obj.TYPE_REG
202                 p1.To.Reg = ppc64.REGTMP
203                 // STBCCC or STWCCC
204                 p2 := s.Prog(st)
205                 p2.From.Type = obj.TYPE_REG
206                 p2.From.Reg = ppc64.REGTMP
207                 p2.To.Type = obj.TYPE_MEM
208                 p2.To.Reg = r0
209                 p2.RegTo2 = ppc64.REGTMP
210                 // BNE retry
211                 p3 := s.Prog(ppc64.ABNE)
212                 p3.To.Type = obj.TYPE_BRANCH
213                 p3.To.SetTarget(p)
214
215         case ssa.OpPPC64LoweredAtomicAdd32,
216                 ssa.OpPPC64LoweredAtomicAdd64:
217                 // LWSYNC
218                 // LDAR/LWAR    (Rarg0), Rout
219                 // ADD          Rarg1, Rout
220                 // STDCCC/STWCCC Rout, (Rarg0)
221                 // BNE         -3(PC)
222                 // MOVW         Rout,Rout (if Add32)
223                 ld := ppc64.ALDAR
224                 st := ppc64.ASTDCCC
225                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
226                         ld = ppc64.ALWAR
227                         st = ppc64.ASTWCCC
228                 }
229                 r0 := v.Args[0].Reg()
230                 r1 := v.Args[1].Reg()
231                 out := v.Reg0()
232                 // LWSYNC - Assuming shared data not write-through-required nor
233                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
234                 plwsync := s.Prog(ppc64.ALWSYNC)
235                 plwsync.To.Type = obj.TYPE_NONE
236                 // LDAR or LWAR
237                 p := s.Prog(ld)
238                 p.From.Type = obj.TYPE_MEM
239                 p.From.Reg = r0
240                 p.To.Type = obj.TYPE_REG
241                 p.To.Reg = out
242                 // ADD reg1,out
243                 p1 := s.Prog(ppc64.AADD)
244                 p1.From.Type = obj.TYPE_REG
245                 p1.From.Reg = r1
246                 p1.To.Reg = out
247                 p1.To.Type = obj.TYPE_REG
248                 // STDCCC or STWCCC
249                 p3 := s.Prog(st)
250                 p3.From.Type = obj.TYPE_REG
251                 p3.From.Reg = out
252                 p3.To.Type = obj.TYPE_MEM
253                 p3.To.Reg = r0
254                 // BNE retry
255                 p4 := s.Prog(ppc64.ABNE)
256                 p4.To.Type = obj.TYPE_BRANCH
257                 p4.To.SetTarget(p)
258
259                 // Ensure a 32 bit result
260                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
261                         p5 := s.Prog(ppc64.AMOVWZ)
262                         p5.To.Type = obj.TYPE_REG
263                         p5.To.Reg = out
264                         p5.From.Type = obj.TYPE_REG
265                         p5.From.Reg = out
266                 }
267
268         case ssa.OpPPC64LoweredAtomicExchange32,
269                 ssa.OpPPC64LoweredAtomicExchange64:
270                 // LWSYNC
271                 // LDAR/LWAR    (Rarg0), Rout
272                 // STDCCC/STWCCC Rout, (Rarg0)
273                 // BNE         -2(PC)
274                 // ISYNC
275                 ld := ppc64.ALDAR
276                 st := ppc64.ASTDCCC
277                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
278                         ld = ppc64.ALWAR
279                         st = ppc64.ASTWCCC
280                 }
281                 r0 := v.Args[0].Reg()
282                 r1 := v.Args[1].Reg()
283                 out := v.Reg0()
284                 // LWSYNC - Assuming shared data not write-through-required nor
285                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
286                 plwsync := s.Prog(ppc64.ALWSYNC)
287                 plwsync.To.Type = obj.TYPE_NONE
288                 // LDAR or LWAR
289                 p := s.Prog(ld)
290                 p.From.Type = obj.TYPE_MEM
291                 p.From.Reg = r0
292                 p.To.Type = obj.TYPE_REG
293                 p.To.Reg = out
294                 // STDCCC or STWCCC
295                 p1 := s.Prog(st)
296                 p1.From.Type = obj.TYPE_REG
297                 p1.From.Reg = r1
298                 p1.To.Type = obj.TYPE_MEM
299                 p1.To.Reg = r0
300                 // BNE retry
301                 p2 := s.Prog(ppc64.ABNE)
302                 p2.To.Type = obj.TYPE_BRANCH
303                 p2.To.SetTarget(p)
304                 // ISYNC
305                 pisync := s.Prog(ppc64.AISYNC)
306                 pisync.To.Type = obj.TYPE_NONE
307
308         case ssa.OpPPC64LoweredAtomicLoad8,
309                 ssa.OpPPC64LoweredAtomicLoad32,
310                 ssa.OpPPC64LoweredAtomicLoad64,
311                 ssa.OpPPC64LoweredAtomicLoadPtr:
312                 // SYNC
313                 // MOVB/MOVD/MOVW (Rarg0), Rout
314                 // CMP Rout,Rout
315                 // BNE 1(PC)
316                 // ISYNC
317                 ld := ppc64.AMOVD
318                 cmp := ppc64.ACMP
319                 switch v.Op {
320                 case ssa.OpPPC64LoweredAtomicLoad8:
321                         ld = ppc64.AMOVBZ
322                 case ssa.OpPPC64LoweredAtomicLoad32:
323                         ld = ppc64.AMOVWZ
324                         cmp = ppc64.ACMPW
325                 }
326                 arg0 := v.Args[0].Reg()
327                 out := v.Reg0()
328                 // SYNC when AuxInt == 1; otherwise, load-acquire
329                 if v.AuxInt == 1 {
330                         psync := s.Prog(ppc64.ASYNC)
331                         psync.To.Type = obj.TYPE_NONE
332                 }
333                 // Load
334                 p := s.Prog(ld)
335                 p.From.Type = obj.TYPE_MEM
336                 p.From.Reg = arg0
337                 p.To.Type = obj.TYPE_REG
338                 p.To.Reg = out
339                 // CMP
340                 p1 := s.Prog(cmp)
341                 p1.From.Type = obj.TYPE_REG
342                 p1.From.Reg = out
343                 p1.To.Type = obj.TYPE_REG
344                 p1.To.Reg = out
345                 // BNE
346                 p2 := s.Prog(ppc64.ABNE)
347                 p2.To.Type = obj.TYPE_BRANCH
348                 // ISYNC
349                 pisync := s.Prog(ppc64.AISYNC)
350                 pisync.To.Type = obj.TYPE_NONE
351                 p2.To.SetTarget(pisync)
352
353         case ssa.OpPPC64LoweredAtomicStore8,
354                 ssa.OpPPC64LoweredAtomicStore32,
355                 ssa.OpPPC64LoweredAtomicStore64:
356                 // SYNC or LWSYNC
357                 // MOVB/MOVW/MOVD arg1,(arg0)
358                 st := ppc64.AMOVD
359                 switch v.Op {
360                 case ssa.OpPPC64LoweredAtomicStore8:
361                         st = ppc64.AMOVB
362                 case ssa.OpPPC64LoweredAtomicStore32:
363                         st = ppc64.AMOVW
364                 }
365                 arg0 := v.Args[0].Reg()
366                 arg1 := v.Args[1].Reg()
367                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
368                 // SYNC
369                 syncOp := ppc64.ASYNC
370                 if v.AuxInt == 0 {
371                         syncOp = ppc64.ALWSYNC
372                 }
373                 psync := s.Prog(syncOp)
374                 psync.To.Type = obj.TYPE_NONE
375                 // Store
376                 p := s.Prog(st)
377                 p.To.Type = obj.TYPE_MEM
378                 p.To.Reg = arg0
379                 p.From.Type = obj.TYPE_REG
380                 p.From.Reg = arg1
381
382         case ssa.OpPPC64LoweredAtomicCas64,
383                 ssa.OpPPC64LoweredAtomicCas32:
384                 // LWSYNC
385                 // loop:
386                 // LDAR        (Rarg0), MutexHint, Rtmp
387                 // CMP         Rarg1, Rtmp
388                 // BNE         fail
389                 // STDCCC      Rarg2, (Rarg0)
390                 // BNE         loop
391                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
392                 // MOVD        $1, Rout
393                 // BR          end
394                 // fail:
395                 // MOVD        $0, Rout
396                 // end:
397                 ld := ppc64.ALDAR
398                 st := ppc64.ASTDCCC
399                 cmp := ppc64.ACMP
400                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
401                         ld = ppc64.ALWAR
402                         st = ppc64.ASTWCCC
403                         cmp = ppc64.ACMPW
404                 }
405                 r0 := v.Args[0].Reg()
406                 r1 := v.Args[1].Reg()
407                 r2 := v.Args[2].Reg()
408                 out := v.Reg0()
409                 // LWSYNC - Assuming shared data not write-through-required nor
410                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
411                 plwsync1 := s.Prog(ppc64.ALWSYNC)
412                 plwsync1.To.Type = obj.TYPE_NONE
413                 // LDAR or LWAR
414                 p := s.Prog(ld)
415                 p.From.Type = obj.TYPE_MEM
416                 p.From.Reg = r0
417                 p.To.Type = obj.TYPE_REG
418                 p.To.Reg = ppc64.REGTMP
419                 // If it is a Compare-and-Swap-Release operation, set the EH field with
420                 // the release hint.
421                 if v.AuxInt == 0 {
422                         p.SetFrom3Const(0)
423                 }
424                 // CMP reg1,reg2
425                 p1 := s.Prog(cmp)
426                 p1.From.Type = obj.TYPE_REG
427                 p1.From.Reg = r1
428                 p1.To.Reg = ppc64.REGTMP
429                 p1.To.Type = obj.TYPE_REG
430                 // BNE cas_fail
431                 p2 := s.Prog(ppc64.ABNE)
432                 p2.To.Type = obj.TYPE_BRANCH
433                 // STDCCC or STWCCC
434                 p3 := s.Prog(st)
435                 p3.From.Type = obj.TYPE_REG
436                 p3.From.Reg = r2
437                 p3.To.Type = obj.TYPE_MEM
438                 p3.To.Reg = r0
439                 // BNE retry
440                 p4 := s.Prog(ppc64.ABNE)
441                 p4.To.Type = obj.TYPE_BRANCH
442                 p4.To.SetTarget(p)
443                 // LWSYNC - Assuming shared data not write-through-required nor
444                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
445                 // If the operation is a CAS-Release, then synchronization is not necessary.
446                 if v.AuxInt != 0 {
447                         plwsync2 := s.Prog(ppc64.ALWSYNC)
448                         plwsync2.To.Type = obj.TYPE_NONE
449                 }
450                 // return true
451                 p5 := s.Prog(ppc64.AMOVD)
452                 p5.From.Type = obj.TYPE_CONST
453                 p5.From.Offset = 1
454                 p5.To.Type = obj.TYPE_REG
455                 p5.To.Reg = out
456                 // BR done
457                 p6 := s.Prog(obj.AJMP)
458                 p6.To.Type = obj.TYPE_BRANCH
459                 // return false
460                 p7 := s.Prog(ppc64.AMOVD)
461                 p7.From.Type = obj.TYPE_CONST
462                 p7.From.Offset = 0
463                 p7.To.Type = obj.TYPE_REG
464                 p7.To.Reg = out
465                 p2.To.SetTarget(p7)
466                 // done (label)
467                 p8 := s.Prog(obj.ANOP)
468                 p6.To.SetTarget(p8)
469
470         case ssa.OpPPC64LoweredGetClosurePtr:
471                 // Closure pointer is R11 (already)
472                 ssagen.CheckLoweredGetClosurePtr(v)
473
474         case ssa.OpPPC64LoweredGetCallerSP:
475                 // caller's SP is FixedFrameSize below the address of the first arg
476                 p := s.Prog(ppc64.AMOVD)
477                 p.From.Type = obj.TYPE_ADDR
478                 p.From.Offset = -base.Ctxt.FixedFrameSize()
479                 p.From.Name = obj.NAME_PARAM
480                 p.To.Type = obj.TYPE_REG
481                 p.To.Reg = v.Reg()
482
483         case ssa.OpPPC64LoweredGetCallerPC:
484                 p := s.Prog(obj.AGETCALLERPC)
485                 p.To.Type = obj.TYPE_REG
486                 p.To.Reg = v.Reg()
487
488         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
489                 // input is already rounded
490
491         case ssa.OpLoadReg:
492                 loadOp := loadByType(v.Type)
493                 p := s.Prog(loadOp)
494                 ssagen.AddrAuto(&p.From, v.Args[0])
495                 p.To.Type = obj.TYPE_REG
496                 p.To.Reg = v.Reg()
497
498         case ssa.OpStoreReg:
499                 storeOp := storeByType(v.Type)
500                 p := s.Prog(storeOp)
501                 p.From.Type = obj.TYPE_REG
502                 p.From.Reg = v.Args[0].Reg()
503                 ssagen.AddrAuto(&p.To, v)
504
505         case ssa.OpPPC64DIVD:
506                 // For now,
507                 //
508                 // cmp arg1, -1
509                 // be  ahead
510                 // v = arg0 / arg1
511                 // b over
512                 // ahead: v = - arg0
513                 // over: nop
514                 r := v.Reg()
515                 r0 := v.Args[0].Reg()
516                 r1 := v.Args[1].Reg()
517
518                 p := s.Prog(ppc64.ACMP)
519                 p.From.Type = obj.TYPE_REG
520                 p.From.Reg = r1
521                 p.To.Type = obj.TYPE_CONST
522                 p.To.Offset = -1
523
524                 pbahead := s.Prog(ppc64.ABEQ)
525                 pbahead.To.Type = obj.TYPE_BRANCH
526
527                 p = s.Prog(v.Op.Asm())
528                 p.From.Type = obj.TYPE_REG
529                 p.From.Reg = r1
530                 p.Reg = r0
531                 p.To.Type = obj.TYPE_REG
532                 p.To.Reg = r
533
534                 pbover := s.Prog(obj.AJMP)
535                 pbover.To.Type = obj.TYPE_BRANCH
536
537                 p = s.Prog(ppc64.ANEG)
538                 p.To.Type = obj.TYPE_REG
539                 p.To.Reg = r
540                 p.From.Type = obj.TYPE_REG
541                 p.From.Reg = r0
542                 pbahead.To.SetTarget(p)
543
544                 p = s.Prog(obj.ANOP)
545                 pbover.To.SetTarget(p)
546
547         case ssa.OpPPC64DIVW:
548                 // word-width version of above
549                 r := v.Reg()
550                 r0 := v.Args[0].Reg()
551                 r1 := v.Args[1].Reg()
552
553                 p := s.Prog(ppc64.ACMPW)
554                 p.From.Type = obj.TYPE_REG
555                 p.From.Reg = r1
556                 p.To.Type = obj.TYPE_CONST
557                 p.To.Offset = -1
558
559                 pbahead := s.Prog(ppc64.ABEQ)
560                 pbahead.To.Type = obj.TYPE_BRANCH
561
562                 p = s.Prog(v.Op.Asm())
563                 p.From.Type = obj.TYPE_REG
564                 p.From.Reg = r1
565                 p.Reg = r0
566                 p.To.Type = obj.TYPE_REG
567                 p.To.Reg = r
568
569                 pbover := s.Prog(obj.AJMP)
570                 pbover.To.Type = obj.TYPE_BRANCH
571
572                 p = s.Prog(ppc64.ANEG)
573                 p.To.Type = obj.TYPE_REG
574                 p.To.Reg = r
575                 p.From.Type = obj.TYPE_REG
576                 p.From.Reg = r0
577                 pbahead.To.SetTarget(p)
578
579                 p = s.Prog(obj.ANOP)
580                 pbover.To.SetTarget(p)
581
582         case ssa.OpPPC64CLRLSLWI:
583                 r := v.Reg()
584                 r1 := v.Args[0].Reg()
585                 shifts := v.AuxInt
586                 p := s.Prog(v.Op.Asm())
587                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
588                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
589                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
590                 p.Reg = r1
591                 p.To.Type = obj.TYPE_REG
592                 p.To.Reg = r
593
594         case ssa.OpPPC64CLRLSLDI:
595                 r := v.Reg()
596                 r1 := v.Args[0].Reg()
597                 shifts := v.AuxInt
598                 p := s.Prog(v.Op.Asm())
599                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
600                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
601                 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts))
602                 p.Reg = r1
603                 p.To.Type = obj.TYPE_REG
604                 p.To.Reg = r
605
606                 // Mask has been set as sh
607         case ssa.OpPPC64RLDICL:
608                 r := v.Reg()
609                 r1 := v.Args[0].Reg()
610                 shifts := v.AuxInt
611                 p := s.Prog(v.Op.Asm())
612                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
613                 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts))
614                 p.Reg = r1
615                 p.To.Type = obj.TYPE_REG
616                 p.To.Reg = r
617
618         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
619                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
620                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
621                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
622                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
623                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
624                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
625                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
626                 r := v.Reg()
627                 r1 := v.Args[0].Reg()
628                 r2 := v.Args[1].Reg()
629                 p := s.Prog(v.Op.Asm())
630                 p.From.Type = obj.TYPE_REG
631                 p.From.Reg = r2
632                 p.Reg = r1
633                 p.To.Type = obj.TYPE_REG
634                 p.To.Reg = r
635
636         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
637                 r1 := v.Args[0].Reg()
638                 r2 := v.Args[1].Reg()
639                 p := s.Prog(v.Op.Asm())
640                 p.From.Type = obj.TYPE_REG
641                 p.From.Reg = r2
642                 p.Reg = r1
643                 p.To.Type = obj.TYPE_REG
644                 p.To.Reg = ppc64.REGTMP // result is not needed
645
646         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
647                 p := s.Prog(v.Op.Asm())
648                 p.From.Type = obj.TYPE_CONST
649                 p.From.Offset = v.AuxInt
650                 p.Reg = v.Args[0].Reg()
651                 p.To.Type = obj.TYPE_REG
652                 p.To.Reg = v.Reg()
653
654                 // Auxint holds encoded rotate + mask
655         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
656                 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
657                 p := s.Prog(v.Op.Asm())
658                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
659                 p.Reg = v.Args[0].Reg()
660                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
661                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
662
663                 // Auxint holds mask
664         case ssa.OpPPC64RLWNM:
665                 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
666                 p := s.Prog(v.Op.Asm())
667                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
668                 p.Reg = v.Args[0].Reg()
669                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
670                 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
671
672         case ssa.OpPPC64MADDLD:
673                 r := v.Reg()
674                 r1 := v.Args[0].Reg()
675                 r2 := v.Args[1].Reg()
676                 r3 := v.Args[2].Reg()
677                 // r = r1*r2 Â± r3
678                 p := s.Prog(v.Op.Asm())
679                 p.From.Type = obj.TYPE_REG
680                 p.From.Reg = r1
681                 p.Reg = r2
682                 p.SetFrom3Reg(r3)
683                 p.To.Type = obj.TYPE_REG
684                 p.To.Reg = r
685
686         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
687                 r := v.Reg()
688                 r1 := v.Args[0].Reg()
689                 r2 := v.Args[1].Reg()
690                 r3 := v.Args[2].Reg()
691                 // r = r1*r2 Â± r3
692                 p := s.Prog(v.Op.Asm())
693                 p.From.Type = obj.TYPE_REG
694                 p.From.Reg = r1
695                 p.Reg = r3
696                 p.SetFrom3Reg(r2)
697                 p.To.Type = obj.TYPE_REG
698                 p.To.Reg = r
699
700         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
701                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
702                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
703                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
704                 r := v.Reg()
705                 p := s.Prog(v.Op.Asm())
706                 p.To.Type = obj.TYPE_REG
707                 p.To.Reg = r
708                 p.From.Type = obj.TYPE_REG
709                 p.From.Reg = v.Args[0].Reg()
710
711         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
712                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
713                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
714                 p := s.Prog(v.Op.Asm())
715                 p.Reg = v.Args[0].Reg()
716                 p.From.Type = obj.TYPE_CONST
717                 p.From.Offset = v.AuxInt
718                 p.To.Type = obj.TYPE_REG
719                 p.To.Reg = v.Reg()
720
721         case ssa.OpPPC64SUBFCconst:
722                 p := s.Prog(v.Op.Asm())
723                 p.SetFrom3Const(v.AuxInt)
724                 p.From.Type = obj.TYPE_REG
725                 p.From.Reg = v.Args[0].Reg()
726                 p.To.Type = obj.TYPE_REG
727                 p.To.Reg = v.Reg()
728
729         case ssa.OpPPC64ANDCCconst:
730                 p := s.Prog(v.Op.Asm())
731                 p.Reg = v.Args[0].Reg()
732                 p.From.Type = obj.TYPE_CONST
733                 p.From.Offset = v.AuxInt
734                 p.To.Type = obj.TYPE_REG
735                 p.To.Reg = ppc64.REGTMP // discard result
736
737         case ssa.OpPPC64MOVDaddr:
738                 switch v.Aux.(type) {
739                 default:
740                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
741                 case nil:
742                         // If aux offset and aux int are both 0, and the same
743                         // input and output regs are used, no instruction
744                         // needs to be generated, since it would just be
745                         // addi rx, rx, 0.
746                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
747                                 p := s.Prog(ppc64.AMOVD)
748                                 p.From.Type = obj.TYPE_ADDR
749                                 p.From.Reg = v.Args[0].Reg()
750                                 p.From.Offset = v.AuxInt
751                                 p.To.Type = obj.TYPE_REG
752                                 p.To.Reg = v.Reg()
753                         }
754
755                 case *obj.LSym, ir.Node:
756                         p := s.Prog(ppc64.AMOVD)
757                         p.From.Type = obj.TYPE_ADDR
758                         p.From.Reg = v.Args[0].Reg()
759                         p.To.Type = obj.TYPE_REG
760                         p.To.Reg = v.Reg()
761                         ssagen.AddAux(&p.From, v)
762
763                 }
764
765         case ssa.OpPPC64MOVDconst:
766                 p := s.Prog(v.Op.Asm())
767                 p.From.Type = obj.TYPE_CONST
768                 p.From.Offset = v.AuxInt
769                 p.To.Type = obj.TYPE_REG
770                 p.To.Reg = v.Reg()
771
772         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
773                 p := s.Prog(v.Op.Asm())
774                 p.From.Type = obj.TYPE_FCONST
775                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
776                 p.To.Type = obj.TYPE_REG
777                 p.To.Reg = v.Reg()
778
779         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
780                 p := s.Prog(v.Op.Asm())
781                 p.From.Type = obj.TYPE_REG
782                 p.From.Reg = v.Args[0].Reg()
783                 p.To.Type = obj.TYPE_REG
784                 p.To.Reg = v.Args[1].Reg()
785
786         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
787                 p := s.Prog(v.Op.Asm())
788                 p.From.Type = obj.TYPE_REG
789                 p.From.Reg = v.Args[0].Reg()
790                 p.To.Type = obj.TYPE_CONST
791                 p.To.Offset = v.AuxInt
792
793         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
794                 // Shift in register to required size
795                 p := s.Prog(v.Op.Asm())
796                 p.From.Type = obj.TYPE_REG
797                 p.From.Reg = v.Args[0].Reg()
798                 p.To.Reg = v.Reg()
799                 p.To.Type = obj.TYPE_REG
800
801         case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
802
803                 // MOVDload and MOVWload are DS form instructions that are restricted to
804                 // offsets that are a multiple of 4. If the offset is not a multple of 4,
805                 // then the address of the symbol to be loaded is computed (base + offset)
806                 // and used as the new base register and the offset field in the instruction
807                 // can be set to zero.
808
809                 // This same problem can happen with gostrings since the final offset is not
810                 // known yet, but could be unaligned after the relocation is resolved.
811                 // So gostrings are handled the same way.
812
813                 // This allows the MOVDload and MOVWload to be generated in more cases and
814                 // eliminates some offset and alignment checking in the rules file.
815
816                 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
817                 ssagen.AddAux(&fromAddr, v)
818
819                 genAddr := false
820
821                 switch fromAddr.Name {
822                 case obj.NAME_EXTERN, obj.NAME_STATIC:
823                         // Special case for a rule combines the bytes of gostring.
824                         // The v alignment might seem OK, but we don't want to load it
825                         // using an offset because relocation comes later.
826                         genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go.string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
827                 default:
828                         genAddr = fromAddr.Offset%4 != 0
829                 }
830                 if genAddr {
831                         // Load full address into the temp register.
832                         p := s.Prog(ppc64.AMOVD)
833                         p.From.Type = obj.TYPE_ADDR
834                         p.From.Reg = v.Args[0].Reg()
835                         ssagen.AddAux(&p.From, v)
836                         // Load target using temp as base register
837                         // and offset zero. Setting NAME_NONE
838                         // prevents any extra offsets from being
839                         // added.
840                         p.To.Type = obj.TYPE_REG
841                         p.To.Reg = ppc64.REGTMP
842                         fromAddr.Reg = ppc64.REGTMP
843                         // Clear the offset field and other
844                         // information that might be used
845                         // by the assembler to add to the
846                         // final offset value.
847                         fromAddr.Offset = 0
848                         fromAddr.Name = obj.NAME_NONE
849                         fromAddr.Sym = nil
850                 }
851                 p := s.Prog(v.Op.Asm())
852                 p.From = fromAddr
853                 p.To.Type = obj.TYPE_REG
854                 p.To.Reg = v.Reg()
855                 break
856
857         case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
858                 p := s.Prog(v.Op.Asm())
859                 p.From.Type = obj.TYPE_MEM
860                 p.From.Reg = v.Args[0].Reg()
861                 ssagen.AddAux(&p.From, v)
862                 p.To.Type = obj.TYPE_REG
863                 p.To.Reg = v.Reg()
864
865         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
866                 p := s.Prog(v.Op.Asm())
867                 p.From.Type = obj.TYPE_MEM
868                 p.From.Reg = v.Args[0].Reg()
869                 p.To.Type = obj.TYPE_REG
870                 p.To.Reg = v.Reg()
871
872         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
873                 p := s.Prog(v.Op.Asm())
874                 p.To.Type = obj.TYPE_MEM
875                 p.To.Reg = v.Args[0].Reg()
876                 p.From.Type = obj.TYPE_REG
877                 p.From.Reg = v.Args[1].Reg()
878
879         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
880                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
881                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
882                 p := s.Prog(v.Op.Asm())
883                 p.From.Type = obj.TYPE_MEM
884                 p.From.Reg = v.Args[0].Reg()
885                 p.From.Index = v.Args[1].Reg()
886                 p.To.Type = obj.TYPE_REG
887                 p.To.Reg = v.Reg()
888
889         case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
890                 p := s.Prog(v.Op.Asm())
891                 p.From.Type = obj.TYPE_REG
892                 p.From.Reg = ppc64.REGZERO
893                 p.To.Type = obj.TYPE_MEM
894                 p.To.Reg = v.Args[0].Reg()
895                 ssagen.AddAux(&p.To, v)
896
897         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
898
899                 // MOVDstore and MOVDstorezero become DS form instructions that are restricted
900                 // to offset values that are a multple of 4. If the offset field is not a
901                 // multiple of 4, then the full address of the store target is computed (base +
902                 // offset) and used as the new base register and the offset in the instruction
903                 // is set to 0.
904
905                 // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
906                 // and prevents checking of the offset value and alignment in the rules.
907
908                 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
909                 ssagen.AddAux(&toAddr, v)
910
911                 if toAddr.Offset%4 != 0 {
912                         p := s.Prog(ppc64.AMOVD)
913                         p.From.Type = obj.TYPE_ADDR
914                         p.From.Reg = v.Args[0].Reg()
915                         ssagen.AddAux(&p.From, v)
916                         p.To.Type = obj.TYPE_REG
917                         p.To.Reg = ppc64.REGTMP
918                         toAddr.Reg = ppc64.REGTMP
919                         // Clear the offset field and other
920                         // information that might be used
921                         // by the assembler to add to the
922                         // final offset value.
923                         toAddr.Offset = 0
924                         toAddr.Name = obj.NAME_NONE
925                         toAddr.Sym = nil
926                 }
927                 p := s.Prog(v.Op.Asm())
928                 p.To = toAddr
929                 p.From.Type = obj.TYPE_REG
930                 if v.Op == ssa.OpPPC64MOVDstorezero {
931                         p.From.Reg = ppc64.REGZERO
932                 } else {
933                         p.From.Reg = v.Args[1].Reg()
934                 }
935
936         case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
937                 p := s.Prog(v.Op.Asm())
938                 p.From.Type = obj.TYPE_REG
939                 p.From.Reg = v.Args[1].Reg()
940                 p.To.Type = obj.TYPE_MEM
941                 p.To.Reg = v.Args[0].Reg()
942                 ssagen.AddAux(&p.To, v)
943
944         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
945                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
946                 ssa.OpPPC64MOVHBRstoreidx:
947                 p := s.Prog(v.Op.Asm())
948                 p.From.Type = obj.TYPE_REG
949                 p.From.Reg = v.Args[2].Reg()
950                 p.To.Index = v.Args[1].Reg()
951                 p.To.Type = obj.TYPE_MEM
952                 p.To.Reg = v.Args[0].Reg()
953
954         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
955                 // ISEL, ISELB
956                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
957                 // ISEL only accepts 0, 1, 2 condition values but the others can be
958                 // achieved by swapping operand order.
959                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
960                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
961                 // ISELB is used when a boolean result is needed, returning 0 or 1
962                 p := s.Prog(ppc64.AISEL)
963                 p.To.Type = obj.TYPE_REG
964                 p.To.Reg = v.Reg()
965                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
966                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
967                 if v.Op == ssa.OpPPC64ISEL {
968                         r.Reg = v.Args[1].Reg()
969                 }
970                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
971                 if v.AuxInt > 3 {
972                         p.Reg = r.Reg
973                         p.SetFrom3Reg(v.Args[0].Reg())
974                 } else {
975                         p.Reg = v.Args[0].Reg()
976                         p.SetFrom3(r)
977                 }
978                 p.From.Type = obj.TYPE_CONST
979                 p.From.Offset = v.AuxInt & 3
980
981         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
982                 // The LoweredQuad code generation
983                 // generates STXV instructions on
984                 // power9. The Short variation is used
985                 // if no loop is generated.
986
987                 // sizes >= 64 generate a loop as follows:
988
989                 // Set up loop counter in CTR, used by BC
990                 // XXLXOR clears VS32
991                 //       XXLXOR VS32,VS32,VS32
992                 //       MOVD len/64,REG_TMP
993                 //       MOVD REG_TMP,CTR
994                 //       loop:
995                 //       STXV VS32,0(R20)
996                 //       STXV VS32,16(R20)
997                 //       STXV VS32,32(R20)
998                 //       STXV VS32,48(R20)
999                 //       ADD  $64,R20
1000                 //       BC   16, 0, loop
1001
1002                 // Bytes per iteration
1003                 ctr := v.AuxInt / 64
1004
1005                 // Remainder bytes
1006                 rem := v.AuxInt % 64
1007
1008                 // Only generate a loop if there is more
1009                 // than 1 iteration.
1010                 if ctr > 1 {
1011                         // Set up VS32 (V0) to hold 0s
1012                         p := s.Prog(ppc64.AXXLXOR)
1013                         p.From.Type = obj.TYPE_REG
1014                         p.From.Reg = ppc64.REG_VS32
1015                         p.To.Type = obj.TYPE_REG
1016                         p.To.Reg = ppc64.REG_VS32
1017                         p.Reg = ppc64.REG_VS32
1018
1019                         // Set up CTR loop counter
1020                         p = s.Prog(ppc64.AMOVD)
1021                         p.From.Type = obj.TYPE_CONST
1022                         p.From.Offset = ctr
1023                         p.To.Type = obj.TYPE_REG
1024                         p.To.Reg = ppc64.REGTMP
1025
1026                         p = s.Prog(ppc64.AMOVD)
1027                         p.From.Type = obj.TYPE_REG
1028                         p.From.Reg = ppc64.REGTMP
1029                         p.To.Type = obj.TYPE_REG
1030                         p.To.Reg = ppc64.REG_CTR
1031
1032                         // Don't generate padding for
1033                         // loops with few iterations.
1034                         if ctr > 3 {
1035                                 p = s.Prog(obj.APCALIGN)
1036                                 p.From.Type = obj.TYPE_CONST
1037                                 p.From.Offset = 16
1038                         }
1039
1040                         // generate 4 STXVs to zero 64 bytes
1041                         var top *obj.Prog
1042
1043                         p = s.Prog(ppc64.ASTXV)
1044                         p.From.Type = obj.TYPE_REG
1045                         p.From.Reg = ppc64.REG_VS32
1046                         p.To.Type = obj.TYPE_MEM
1047                         p.To.Reg = v.Args[0].Reg()
1048
1049                         //  Save the top of loop
1050                         if top == nil {
1051                                 top = p
1052                         }
1053                         p = s.Prog(ppc64.ASTXV)
1054                         p.From.Type = obj.TYPE_REG
1055                         p.From.Reg = ppc64.REG_VS32
1056                         p.To.Type = obj.TYPE_MEM
1057                         p.To.Reg = v.Args[0].Reg()
1058                         p.To.Offset = 16
1059
1060                         p = s.Prog(ppc64.ASTXV)
1061                         p.From.Type = obj.TYPE_REG
1062                         p.From.Reg = ppc64.REG_VS32
1063                         p.To.Type = obj.TYPE_MEM
1064                         p.To.Reg = v.Args[0].Reg()
1065                         p.To.Offset = 32
1066
1067                         p = s.Prog(ppc64.ASTXV)
1068                         p.From.Type = obj.TYPE_REG
1069                         p.From.Reg = ppc64.REG_VS32
1070                         p.To.Type = obj.TYPE_MEM
1071                         p.To.Reg = v.Args[0].Reg()
1072                         p.To.Offset = 48
1073
1074                         // Increment address for the
1075                         // 64 bytes just zeroed.
1076                         p = s.Prog(ppc64.AADD)
1077                         p.Reg = v.Args[0].Reg()
1078                         p.From.Type = obj.TYPE_CONST
1079                         p.From.Offset = 64
1080                         p.To.Type = obj.TYPE_REG
1081                         p.To.Reg = v.Args[0].Reg()
1082
1083                         // Branch back to top of loop
1084                         // based on CTR
1085                         // BC with BO_BCTR generates bdnz
1086                         p = s.Prog(ppc64.ABC)
1087                         p.From.Type = obj.TYPE_CONST
1088                         p.From.Offset = ppc64.BO_BCTR
1089                         p.Reg = ppc64.REG_R0
1090                         p.To.Type = obj.TYPE_BRANCH
1091                         p.To.SetTarget(top)
1092                 }
1093                 // When ctr == 1 the loop was not generated but
1094                 // there are at least 64 bytes to clear, so add
1095                 // that to the remainder to generate the code
1096                 // to clear those doublewords
1097                 if ctr == 1 {
1098                         rem += 64
1099                 }
1100
1101                 // Clear the remainder starting at offset zero
1102                 offset := int64(0)
1103
1104                 if rem >= 16 && ctr <= 1 {
1105                         // If the XXLXOR hasn't already been
1106                         // generated, do it here to initialize
1107                         // VS32 (V0) to 0.
1108                         p := s.Prog(ppc64.AXXLXOR)
1109                         p.From.Type = obj.TYPE_REG
1110                         p.From.Reg = ppc64.REG_VS32
1111                         p.To.Type = obj.TYPE_REG
1112                         p.To.Reg = ppc64.REG_VS32
1113                         p.Reg = ppc64.REG_VS32
1114                 }
1115                 // Generate STXV for 32 or 64
1116                 // bytes.
1117                 for rem >= 32 {
1118                         p := s.Prog(ppc64.ASTXV)
1119                         p.From.Type = obj.TYPE_REG
1120                         p.From.Reg = ppc64.REG_VS32
1121                         p.To.Type = obj.TYPE_MEM
1122                         p.To.Reg = v.Args[0].Reg()
1123                         p.To.Offset = offset
1124
1125                         p = s.Prog(ppc64.ASTXV)
1126                         p.From.Type = obj.TYPE_REG
1127                         p.From.Reg = ppc64.REG_VS32
1128                         p.To.Type = obj.TYPE_MEM
1129                         p.To.Reg = v.Args[0].Reg()
1130                         p.To.Offset = offset + 16
1131                         offset += 32
1132                         rem -= 32
1133                 }
1134                 // Generate 16 bytes
1135                 if rem >= 16 {
1136                         p := s.Prog(ppc64.ASTXV)
1137                         p.From.Type = obj.TYPE_REG
1138                         p.From.Reg = ppc64.REG_VS32
1139                         p.To.Type = obj.TYPE_MEM
1140                         p.To.Reg = v.Args[0].Reg()
1141                         p.To.Offset = offset
1142                         offset += 16
1143                         rem -= 16
1144                 }
1145
1146                 // first clear as many doublewords as possible
1147                 // then clear remaining sizes as available
1148                 for rem > 0 {
1149                         op, size := ppc64.AMOVB, int64(1)
1150                         switch {
1151                         case rem >= 8:
1152                                 op, size = ppc64.AMOVD, 8
1153                         case rem >= 4:
1154                                 op, size = ppc64.AMOVW, 4
1155                         case rem >= 2:
1156                                 op, size = ppc64.AMOVH, 2
1157                         }
1158                         p := s.Prog(op)
1159                         p.From.Type = obj.TYPE_REG
1160                         p.From.Reg = ppc64.REG_R0
1161                         p.To.Type = obj.TYPE_MEM
1162                         p.To.Reg = v.Args[0].Reg()
1163                         p.To.Offset = offset
1164                         rem -= size
1165                         offset += size
1166                 }
1167
1168         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1169
1170                 // Unaligned data doesn't hurt performance
1171                 // for these instructions on power8.
1172
1173                 // For sizes >= 64 generate a loop as follows:
1174
1175                 // Set up loop counter in CTR, used by BC
1176                 //       XXLXOR VS32,VS32,VS32
1177                 //       MOVD len/32,REG_TMP
1178                 //       MOVD REG_TMP,CTR
1179                 //       MOVD $16,REG_TMP
1180                 //       loop:
1181                 //       STXVD2X VS32,(R0)(R20)
1182                 //       STXVD2X VS32,(R31)(R20)
1183                 //       ADD  $32,R20
1184                 //       BC   16, 0, loop
1185                 //
1186                 // any remainder is done as described below
1187
1188                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1189                 // then handle the remainder
1190                 //      MOVD R0,(R20)
1191                 //      MOVD R0,8(R20)
1192                 // .... etc.
1193                 //
1194                 // the remainder bytes are cleared using one or more
1195                 // of the following instructions with the appropriate
1196                 // offsets depending which instructions are needed
1197                 //
1198                 //      MOVW R0,n1(R20) 4 bytes
1199                 //      MOVH R0,n2(R20) 2 bytes
1200                 //      MOVB R0,n3(R20) 1 byte
1201                 //
1202                 // 7 bytes: MOVW, MOVH, MOVB
1203                 // 6 bytes: MOVW, MOVH
1204                 // 5 bytes: MOVW, MOVB
1205                 // 3 bytes: MOVH, MOVB
1206
1207                 // each loop iteration does 32 bytes
1208                 ctr := v.AuxInt / 32
1209
1210                 // remainder bytes
1211                 rem := v.AuxInt % 32
1212
1213                 // only generate a loop if there is more
1214                 // than 1 iteration.
1215                 if ctr > 1 {
1216                         // Set up VS32 (V0) to hold 0s
1217                         p := s.Prog(ppc64.AXXLXOR)
1218                         p.From.Type = obj.TYPE_REG
1219                         p.From.Reg = ppc64.REG_VS32
1220                         p.To.Type = obj.TYPE_REG
1221                         p.To.Reg = ppc64.REG_VS32
1222                         p.Reg = ppc64.REG_VS32
1223
1224                         // Set up CTR loop counter
1225                         p = s.Prog(ppc64.AMOVD)
1226                         p.From.Type = obj.TYPE_CONST
1227                         p.From.Offset = ctr
1228                         p.To.Type = obj.TYPE_REG
1229                         p.To.Reg = ppc64.REGTMP
1230
1231                         p = s.Prog(ppc64.AMOVD)
1232                         p.From.Type = obj.TYPE_REG
1233                         p.From.Reg = ppc64.REGTMP
1234                         p.To.Type = obj.TYPE_REG
1235                         p.To.Reg = ppc64.REG_CTR
1236
1237                         // Set up R31 to hold index value 16
1238                         p = s.Prog(ppc64.AMOVD)
1239                         p.From.Type = obj.TYPE_CONST
1240                         p.From.Offset = 16
1241                         p.To.Type = obj.TYPE_REG
1242                         p.To.Reg = ppc64.REGTMP
1243
1244                         // Don't add padding for alignment
1245                         // with few loop iterations.
1246                         if ctr > 3 {
1247                                 p = s.Prog(obj.APCALIGN)
1248                                 p.From.Type = obj.TYPE_CONST
1249                                 p.From.Offset = 16
1250                         }
1251
1252                         // generate 2 STXVD2Xs to store 16 bytes
1253                         // when this is a loop then the top must be saved
1254                         var top *obj.Prog
1255                         // This is the top of loop
1256
1257                         p = s.Prog(ppc64.ASTXVD2X)
1258                         p.From.Type = obj.TYPE_REG
1259                         p.From.Reg = ppc64.REG_VS32
1260                         p.To.Type = obj.TYPE_MEM
1261                         p.To.Reg = v.Args[0].Reg()
1262                         p.To.Index = ppc64.REGZERO
1263                         // Save the top of loop
1264                         if top == nil {
1265                                 top = p
1266                         }
1267                         p = s.Prog(ppc64.ASTXVD2X)
1268                         p.From.Type = obj.TYPE_REG
1269                         p.From.Reg = ppc64.REG_VS32
1270                         p.To.Type = obj.TYPE_MEM
1271                         p.To.Reg = v.Args[0].Reg()
1272                         p.To.Index = ppc64.REGTMP
1273
1274                         // Increment address for the
1275                         // 4 doublewords just zeroed.
1276                         p = s.Prog(ppc64.AADD)
1277                         p.Reg = v.Args[0].Reg()
1278                         p.From.Type = obj.TYPE_CONST
1279                         p.From.Offset = 32
1280                         p.To.Type = obj.TYPE_REG
1281                         p.To.Reg = v.Args[0].Reg()
1282
1283                         // Branch back to top of loop
1284                         // based on CTR
1285                         // BC with BO_BCTR generates bdnz
1286                         p = s.Prog(ppc64.ABC)
1287                         p.From.Type = obj.TYPE_CONST
1288                         p.From.Offset = ppc64.BO_BCTR
1289                         p.Reg = ppc64.REG_R0
1290                         p.To.Type = obj.TYPE_BRANCH
1291                         p.To.SetTarget(top)
1292                 }
1293
1294                 // when ctr == 1 the loop was not generated but
1295                 // there are at least 32 bytes to clear, so add
1296                 // that to the remainder to generate the code
1297                 // to clear those doublewords
1298                 if ctr == 1 {
1299                         rem += 32
1300                 }
1301
1302                 // clear the remainder starting at offset zero
1303                 offset := int64(0)
1304
1305                 // first clear as many doublewords as possible
1306                 // then clear remaining sizes as available
1307                 for rem > 0 {
1308                         op, size := ppc64.AMOVB, int64(1)
1309                         switch {
1310                         case rem >= 8:
1311                                 op, size = ppc64.AMOVD, 8
1312                         case rem >= 4:
1313                                 op, size = ppc64.AMOVW, 4
1314                         case rem >= 2:
1315                                 op, size = ppc64.AMOVH, 2
1316                         }
1317                         p := s.Prog(op)
1318                         p.From.Type = obj.TYPE_REG
1319                         p.From.Reg = ppc64.REG_R0
1320                         p.To.Type = obj.TYPE_MEM
1321                         p.To.Reg = v.Args[0].Reg()
1322                         p.To.Offset = offset
1323                         rem -= size
1324                         offset += size
1325                 }
1326
1327         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1328
1329                 bytesPerLoop := int64(32)
1330                 // This will be used when moving more
1331                 // than 8 bytes.  Moves start with
1332                 // as many 8 byte moves as possible, then
1333                 // 4, 2, or 1 byte(s) as remaining.  This will
1334                 // work and be efficient for power8 or later.
1335                 // If there are 64 or more bytes, then a
1336                 // loop is generated to move 32 bytes and
1337                 // update the src and dst addresses on each
1338                 // iteration. When < 64 bytes, the appropriate
1339                 // number of moves are generated based on the
1340                 // size.
1341                 // When moving >= 64 bytes a loop is used
1342                 //      MOVD len/32,REG_TMP
1343                 //      MOVD REG_TMP,CTR
1344                 //      MOVD $16,REG_TMP
1345                 // top:
1346                 //      LXVD2X (R0)(R21),VS32
1347                 //      LXVD2X (R31)(R21),VS33
1348                 //      ADD $32,R21
1349                 //      STXVD2X VS32,(R0)(R20)
1350                 //      STXVD2X VS33,(R31)(R20)
1351                 //      ADD $32,R20
1352                 //      BC 16,0,top
1353                 // Bytes not moved by this loop are moved
1354                 // with a combination of the following instructions,
1355                 // starting with the largest sizes and generating as
1356                 // many as needed, using the appropriate offset value.
1357                 //      MOVD  n(R21),R31
1358                 //      MOVD  R31,n(R20)
1359                 //      MOVW  n1(R21),R31
1360                 //      MOVW  R31,n1(R20)
1361                 //      MOVH  n2(R21),R31
1362                 //      MOVH  R31,n2(R20)
1363                 //      MOVB  n3(R21),R31
1364                 //      MOVB  R31,n3(R20)
1365
1366                 // Each loop iteration moves 32 bytes
1367                 ctr := v.AuxInt / bytesPerLoop
1368
1369                 // Remainder after the loop
1370                 rem := v.AuxInt % bytesPerLoop
1371
1372                 dstReg := v.Args[0].Reg()
1373                 srcReg := v.Args[1].Reg()
1374
1375                 // The set of registers used here, must match the clobbered reg list
1376                 // in PPC64Ops.go.
1377                 offset := int64(0)
1378
1379                 // top of the loop
1380                 var top *obj.Prog
1381                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1382                 if ctr > 1 {
1383                         // Set up the CTR
1384                         p := s.Prog(ppc64.AMOVD)
1385                         p.From.Type = obj.TYPE_CONST
1386                         p.From.Offset = ctr
1387                         p.To.Type = obj.TYPE_REG
1388                         p.To.Reg = ppc64.REGTMP
1389
1390                         p = s.Prog(ppc64.AMOVD)
1391                         p.From.Type = obj.TYPE_REG
1392                         p.From.Reg = ppc64.REGTMP
1393                         p.To.Type = obj.TYPE_REG
1394                         p.To.Reg = ppc64.REG_CTR
1395
1396                         // Use REGTMP as index reg
1397                         p = s.Prog(ppc64.AMOVD)
1398                         p.From.Type = obj.TYPE_CONST
1399                         p.From.Offset = 16
1400                         p.To.Type = obj.TYPE_REG
1401                         p.To.Reg = ppc64.REGTMP
1402
1403                         // Don't adding padding for
1404                         // alignment with small iteration
1405                         // counts.
1406                         if ctr > 3 {
1407                                 p = s.Prog(obj.APCALIGN)
1408                                 p.From.Type = obj.TYPE_CONST
1409                                 p.From.Offset = 16
1410                         }
1411
1412                         // Generate 16 byte loads and stores.
1413                         // Use temp register for index (16)
1414                         // on the second one.
1415
1416                         p = s.Prog(ppc64.ALXVD2X)
1417                         p.From.Type = obj.TYPE_MEM
1418                         p.From.Reg = srcReg
1419                         p.From.Index = ppc64.REGZERO
1420                         p.To.Type = obj.TYPE_REG
1421                         p.To.Reg = ppc64.REG_VS32
1422                         if top == nil {
1423                                 top = p
1424                         }
1425                         p = s.Prog(ppc64.ALXVD2X)
1426                         p.From.Type = obj.TYPE_MEM
1427                         p.From.Reg = srcReg
1428                         p.From.Index = ppc64.REGTMP
1429                         p.To.Type = obj.TYPE_REG
1430                         p.To.Reg = ppc64.REG_VS33
1431
1432                         // increment the src reg for next iteration
1433                         p = s.Prog(ppc64.AADD)
1434                         p.Reg = srcReg
1435                         p.From.Type = obj.TYPE_CONST
1436                         p.From.Offset = bytesPerLoop
1437                         p.To.Type = obj.TYPE_REG
1438                         p.To.Reg = srcReg
1439
1440                         // generate 16 byte stores
1441                         p = s.Prog(ppc64.ASTXVD2X)
1442                         p.From.Type = obj.TYPE_REG
1443                         p.From.Reg = ppc64.REG_VS32
1444                         p.To.Type = obj.TYPE_MEM
1445                         p.To.Reg = dstReg
1446                         p.To.Index = ppc64.REGZERO
1447
1448                         p = s.Prog(ppc64.ASTXVD2X)
1449                         p.From.Type = obj.TYPE_REG
1450                         p.From.Reg = ppc64.REG_VS33
1451                         p.To.Type = obj.TYPE_MEM
1452                         p.To.Reg = dstReg
1453                         p.To.Index = ppc64.REGTMP
1454
1455                         // increment the dst reg for next iteration
1456                         p = s.Prog(ppc64.AADD)
1457                         p.Reg = dstReg
1458                         p.From.Type = obj.TYPE_CONST
1459                         p.From.Offset = bytesPerLoop
1460                         p.To.Type = obj.TYPE_REG
1461                         p.To.Reg = dstReg
1462
1463                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1464                         // to loop top.
1465                         p = s.Prog(ppc64.ABC)
1466                         p.From.Type = obj.TYPE_CONST
1467                         p.From.Offset = ppc64.BO_BCTR
1468                         p.Reg = ppc64.REG_R0
1469                         p.To.Type = obj.TYPE_BRANCH
1470                         p.To.SetTarget(top)
1471
1472                         // srcReg and dstReg were incremented in the loop, so
1473                         // later instructions start with offset 0.
1474                         offset = int64(0)
1475                 }
1476
1477                 // No loop was generated for one iteration, so
1478                 // add 32 bytes to the remainder to move those bytes.
1479                 if ctr == 1 {
1480                         rem += bytesPerLoop
1481                 }
1482
1483                 if rem >= 16 {
1484                         // Generate 16 byte loads and stores.
1485                         // Use temp register for index (value 16)
1486                         // on the second one.
1487                         p := s.Prog(ppc64.ALXVD2X)
1488                         p.From.Type = obj.TYPE_MEM
1489                         p.From.Reg = srcReg
1490                         p.From.Index = ppc64.REGZERO
1491                         p.To.Type = obj.TYPE_REG
1492                         p.To.Reg = ppc64.REG_VS32
1493
1494                         p = s.Prog(ppc64.ASTXVD2X)
1495                         p.From.Type = obj.TYPE_REG
1496                         p.From.Reg = ppc64.REG_VS32
1497                         p.To.Type = obj.TYPE_MEM
1498                         p.To.Reg = dstReg
1499                         p.To.Index = ppc64.REGZERO
1500
1501                         offset = 16
1502                         rem -= 16
1503
1504                         if rem >= 16 {
1505                                 // Use REGTMP as index reg
1506                                 p := s.Prog(ppc64.AMOVD)
1507                                 p.From.Type = obj.TYPE_CONST
1508                                 p.From.Offset = 16
1509                                 p.To.Type = obj.TYPE_REG
1510                                 p.To.Reg = ppc64.REGTMP
1511
1512                                 p = s.Prog(ppc64.ALXVD2X)
1513                                 p.From.Type = obj.TYPE_MEM
1514                                 p.From.Reg = srcReg
1515                                 p.From.Index = ppc64.REGTMP
1516                                 p.To.Type = obj.TYPE_REG
1517                                 p.To.Reg = ppc64.REG_VS32
1518
1519                                 p = s.Prog(ppc64.ASTXVD2X)
1520                                 p.From.Type = obj.TYPE_REG
1521                                 p.From.Reg = ppc64.REG_VS32
1522                                 p.To.Type = obj.TYPE_MEM
1523                                 p.To.Reg = dstReg
1524                                 p.To.Index = ppc64.REGTMP
1525
1526                                 offset = 32
1527                                 rem -= 16
1528                         }
1529                 }
1530
1531                 // Generate all the remaining load and store pairs, starting with
1532                 // as many 8 byte moves as possible, then 4, 2, 1.
1533                 for rem > 0 {
1534                         op, size := ppc64.AMOVB, int64(1)
1535                         switch {
1536                         case rem >= 8:
1537                                 op, size = ppc64.AMOVD, 8
1538                         case rem >= 4:
1539                                 op, size = ppc64.AMOVWZ, 4
1540                         case rem >= 2:
1541                                 op, size = ppc64.AMOVH, 2
1542                         }
1543                         // Load
1544                         p := s.Prog(op)
1545                         p.To.Type = obj.TYPE_REG
1546                         p.To.Reg = ppc64.REGTMP
1547                         p.From.Type = obj.TYPE_MEM
1548                         p.From.Reg = srcReg
1549                         p.From.Offset = offset
1550
1551                         // Store
1552                         p = s.Prog(op)
1553                         p.From.Type = obj.TYPE_REG
1554                         p.From.Reg = ppc64.REGTMP
1555                         p.To.Type = obj.TYPE_MEM
1556                         p.To.Reg = dstReg
1557                         p.To.Offset = offset
1558                         rem -= size
1559                         offset += size
1560                 }
1561
1562         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1563                 bytesPerLoop := int64(64)
1564                 // This is used when moving more
1565                 // than 8 bytes on power9.  Moves start with
1566                 // as many 8 byte moves as possible, then
1567                 // 4, 2, or 1 byte(s) as remaining.  This will
1568                 // work and be efficient for power8 or later.
1569                 // If there are 64 or more bytes, then a
1570                 // loop is generated to move 32 bytes and
1571                 // update the src and dst addresses on each
1572                 // iteration. When < 64 bytes, the appropriate
1573                 // number of moves are generated based on the
1574                 // size.
1575                 // When moving >= 64 bytes a loop is used
1576                 //      MOVD len/32,REG_TMP
1577                 //      MOVD REG_TMP,CTR
1578                 // top:
1579                 //      LXV 0(R21),VS32
1580                 //      LXV 16(R21),VS33
1581                 //      ADD $32,R21
1582                 //      STXV VS32,0(R20)
1583                 //      STXV VS33,16(R20)
1584                 //      ADD $32,R20
1585                 //      BC 16,0,top
1586                 // Bytes not moved by this loop are moved
1587                 // with a combination of the following instructions,
1588                 // starting with the largest sizes and generating as
1589                 // many as needed, using the appropriate offset value.
1590                 //      MOVD  n(R21),R31
1591                 //      MOVD  R31,n(R20)
1592                 //      MOVW  n1(R21),R31
1593                 //      MOVW  R31,n1(R20)
1594                 //      MOVH  n2(R21),R31
1595                 //      MOVH  R31,n2(R20)
1596                 //      MOVB  n3(R21),R31
1597                 //      MOVB  R31,n3(R20)
1598
1599                 // Each loop iteration moves 32 bytes
1600                 ctr := v.AuxInt / bytesPerLoop
1601
1602                 // Remainder after the loop
1603                 rem := v.AuxInt % bytesPerLoop
1604
1605                 dstReg := v.Args[0].Reg()
1606                 srcReg := v.Args[1].Reg()
1607
1608                 offset := int64(0)
1609
1610                 // top of the loop
1611                 var top *obj.Prog
1612
1613                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1614                 if ctr > 1 {
1615                         // Set up the CTR
1616                         p := s.Prog(ppc64.AMOVD)
1617                         p.From.Type = obj.TYPE_CONST
1618                         p.From.Offset = ctr
1619                         p.To.Type = obj.TYPE_REG
1620                         p.To.Reg = ppc64.REGTMP
1621
1622                         p = s.Prog(ppc64.AMOVD)
1623                         p.From.Type = obj.TYPE_REG
1624                         p.From.Reg = ppc64.REGTMP
1625                         p.To.Type = obj.TYPE_REG
1626                         p.To.Reg = ppc64.REG_CTR
1627
1628                         p = s.Prog(obj.APCALIGN)
1629                         p.From.Type = obj.TYPE_CONST
1630                         p.From.Offset = 16
1631
1632                         // Generate 16 byte loads and stores.
1633                         p = s.Prog(ppc64.ALXV)
1634                         p.From.Type = obj.TYPE_MEM
1635                         p.From.Reg = srcReg
1636                         p.From.Offset = offset
1637                         p.To.Type = obj.TYPE_REG
1638                         p.To.Reg = ppc64.REG_VS32
1639                         if top == nil {
1640                                 top = p
1641                         }
1642                         p = s.Prog(ppc64.ALXV)
1643                         p.From.Type = obj.TYPE_MEM
1644                         p.From.Reg = srcReg
1645                         p.From.Offset = offset + 16
1646                         p.To.Type = obj.TYPE_REG
1647                         p.To.Reg = ppc64.REG_VS33
1648
1649                         // generate 16 byte stores
1650                         p = s.Prog(ppc64.ASTXV)
1651                         p.From.Type = obj.TYPE_REG
1652                         p.From.Reg = ppc64.REG_VS32
1653                         p.To.Type = obj.TYPE_MEM
1654                         p.To.Reg = dstReg
1655                         p.To.Offset = offset
1656
1657                         p = s.Prog(ppc64.ASTXV)
1658                         p.From.Type = obj.TYPE_REG
1659                         p.From.Reg = ppc64.REG_VS33
1660                         p.To.Type = obj.TYPE_MEM
1661                         p.To.Reg = dstReg
1662                         p.To.Offset = offset + 16
1663
1664                         // Generate 16 byte loads and stores.
1665                         p = s.Prog(ppc64.ALXV)
1666                         p.From.Type = obj.TYPE_MEM
1667                         p.From.Reg = srcReg
1668                         p.From.Offset = offset + 32
1669                         p.To.Type = obj.TYPE_REG
1670                         p.To.Reg = ppc64.REG_VS32
1671
1672                         p = s.Prog(ppc64.ALXV)
1673                         p.From.Type = obj.TYPE_MEM
1674                         p.From.Reg = srcReg
1675                         p.From.Offset = offset + 48
1676                         p.To.Type = obj.TYPE_REG
1677                         p.To.Reg = ppc64.REG_VS33
1678
1679                         // generate 16 byte stores
1680                         p = s.Prog(ppc64.ASTXV)
1681                         p.From.Type = obj.TYPE_REG
1682                         p.From.Reg = ppc64.REG_VS32
1683                         p.To.Type = obj.TYPE_MEM
1684                         p.To.Reg = dstReg
1685                         p.To.Offset = offset + 32
1686
1687                         p = s.Prog(ppc64.ASTXV)
1688                         p.From.Type = obj.TYPE_REG
1689                         p.From.Reg = ppc64.REG_VS33
1690                         p.To.Type = obj.TYPE_MEM
1691                         p.To.Reg = dstReg
1692                         p.To.Offset = offset + 48
1693
1694                         // increment the src reg for next iteration
1695                         p = s.Prog(ppc64.AADD)
1696                         p.Reg = srcReg
1697                         p.From.Type = obj.TYPE_CONST
1698                         p.From.Offset = bytesPerLoop
1699                         p.To.Type = obj.TYPE_REG
1700                         p.To.Reg = srcReg
1701
1702                         // increment the dst reg for next iteration
1703                         p = s.Prog(ppc64.AADD)
1704                         p.Reg = dstReg
1705                         p.From.Type = obj.TYPE_CONST
1706                         p.From.Offset = bytesPerLoop
1707                         p.To.Type = obj.TYPE_REG
1708                         p.To.Reg = dstReg
1709
1710                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1711                         // to loop top.
1712                         p = s.Prog(ppc64.ABC)
1713                         p.From.Type = obj.TYPE_CONST
1714                         p.From.Offset = ppc64.BO_BCTR
1715                         p.Reg = ppc64.REG_R0
1716                         p.To.Type = obj.TYPE_BRANCH
1717                         p.To.SetTarget(top)
1718
1719                         // srcReg and dstReg were incremented in the loop, so
1720                         // later instructions start with offset 0.
1721                         offset = int64(0)
1722                 }
1723
1724                 // No loop was generated for one iteration, so
1725                 // add 32 bytes to the remainder to move those bytes.
1726                 if ctr == 1 {
1727                         rem += bytesPerLoop
1728                 }
1729                 if rem >= 32 {
1730                         p := s.Prog(ppc64.ALXV)
1731                         p.From.Type = obj.TYPE_MEM
1732                         p.From.Reg = srcReg
1733                         p.To.Type = obj.TYPE_REG
1734                         p.To.Reg = ppc64.REG_VS32
1735
1736                         p = s.Prog(ppc64.ALXV)
1737                         p.From.Type = obj.TYPE_MEM
1738                         p.From.Reg = srcReg
1739                         p.From.Offset = 16
1740                         p.To.Type = obj.TYPE_REG
1741                         p.To.Reg = ppc64.REG_VS33
1742
1743                         p = s.Prog(ppc64.ASTXV)
1744                         p.From.Type = obj.TYPE_REG
1745                         p.From.Reg = ppc64.REG_VS32
1746                         p.To.Type = obj.TYPE_MEM
1747                         p.To.Reg = dstReg
1748
1749                         p = s.Prog(ppc64.ASTXV)
1750                         p.From.Type = obj.TYPE_REG
1751                         p.From.Reg = ppc64.REG_VS33
1752                         p.To.Type = obj.TYPE_MEM
1753                         p.To.Reg = dstReg
1754                         p.To.Offset = 16
1755
1756                         offset = 32
1757                         rem -= 32
1758                 }
1759
1760                 if rem >= 16 {
1761                         // Generate 16 byte loads and stores.
1762                         p := s.Prog(ppc64.ALXV)
1763                         p.From.Type = obj.TYPE_MEM
1764                         p.From.Reg = srcReg
1765                         p.From.Offset = offset
1766                         p.To.Type = obj.TYPE_REG
1767                         p.To.Reg = ppc64.REG_VS32
1768
1769                         p = s.Prog(ppc64.ASTXV)
1770                         p.From.Type = obj.TYPE_REG
1771                         p.From.Reg = ppc64.REG_VS32
1772                         p.To.Type = obj.TYPE_MEM
1773                         p.To.Reg = dstReg
1774                         p.To.Offset = offset
1775
1776                         offset += 16
1777                         rem -= 16
1778
1779                         if rem >= 16 {
1780                                 p := s.Prog(ppc64.ALXV)
1781                                 p.From.Type = obj.TYPE_MEM
1782                                 p.From.Reg = srcReg
1783                                 p.From.Offset = offset
1784                                 p.To.Type = obj.TYPE_REG
1785                                 p.To.Reg = ppc64.REG_VS32
1786
1787                                 p = s.Prog(ppc64.ASTXV)
1788                                 p.From.Type = obj.TYPE_REG
1789                                 p.From.Reg = ppc64.REG_VS32
1790                                 p.To.Type = obj.TYPE_MEM
1791                                 p.To.Reg = dstReg
1792                                 p.To.Offset = offset
1793
1794                                 offset += 16
1795                                 rem -= 16
1796                         }
1797                 }
1798                 // Generate all the remaining load and store pairs, starting with
1799                 // as many 8 byte moves as possible, then 4, 2, 1.
1800                 for rem > 0 {
1801                         op, size := ppc64.AMOVB, int64(1)
1802                         switch {
1803                         case rem >= 8:
1804                                 op, size = ppc64.AMOVD, 8
1805                         case rem >= 4:
1806                                 op, size = ppc64.AMOVWZ, 4
1807                         case rem >= 2:
1808                                 op, size = ppc64.AMOVH, 2
1809                         }
1810                         // Load
1811                         p := s.Prog(op)
1812                         p.To.Type = obj.TYPE_REG
1813                         p.To.Reg = ppc64.REGTMP
1814                         p.From.Type = obj.TYPE_MEM
1815                         p.From.Reg = srcReg
1816                         p.From.Offset = offset
1817
1818                         // Store
1819                         p = s.Prog(op)
1820                         p.From.Type = obj.TYPE_REG
1821                         p.From.Reg = ppc64.REGTMP
1822                         p.To.Type = obj.TYPE_MEM
1823                         p.To.Reg = dstReg
1824                         p.To.Offset = offset
1825                         rem -= size
1826                         offset += size
1827                 }
1828
1829         case ssa.OpPPC64CALLstatic:
1830                 s.Call(v)
1831
1832         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1833                 p := s.Prog(ppc64.AMOVD)
1834                 p.From.Type = obj.TYPE_REG
1835                 p.From.Reg = v.Args[0].Reg()
1836                 p.To.Type = obj.TYPE_REG
1837                 p.To.Reg = ppc64.REG_LR
1838
1839                 if v.Args[0].Reg() != ppc64.REG_R12 {
1840                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1841                 }
1842
1843                 pp := s.Call(v)
1844                 pp.To.Reg = ppc64.REG_LR
1845
1846                 // Insert a hint this is not a subroutine return.
1847                 pp.SetFrom3Const(1)
1848
1849                 if base.Ctxt.Flag_shared {
1850                         // When compiling Go into PIC, the function we just
1851                         // called via pointer might have been implemented in
1852                         // a separate module and so overwritten the TOC
1853                         // pointer in R2; reload it.
1854                         q := s.Prog(ppc64.AMOVD)
1855                         q.From.Type = obj.TYPE_MEM
1856                         q.From.Offset = 24
1857                         q.From.Reg = ppc64.REGSP
1858                         q.To.Type = obj.TYPE_REG
1859                         q.To.Reg = ppc64.REG_R2
1860                 }
1861
1862         case ssa.OpPPC64LoweredWB:
1863                 p := s.Prog(obj.ACALL)
1864                 p.To.Type = obj.TYPE_MEM
1865                 p.To.Name = obj.NAME_EXTERN
1866                 p.To.Sym = v.Aux.(*obj.LSym)
1867
1868         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1869                 p := s.Prog(obj.ACALL)
1870                 p.To.Type = obj.TYPE_MEM
1871                 p.To.Name = obj.NAME_EXTERN
1872                 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1873                 s.UseArgs(16) // space used in callee args area by assembly stubs
1874
1875         case ssa.OpPPC64LoweredNilCheck:
1876                 if buildcfg.GOOS == "aix" {
1877                         // CMP Rarg0, R0
1878                         // BNE 2(PC)
1879                         // STW R0, 0(R0)
1880                         // NOP (so the BNE has somewhere to land)
1881
1882                         // CMP Rarg0, R0
1883                         p := s.Prog(ppc64.ACMP)
1884                         p.From.Type = obj.TYPE_REG
1885                         p.From.Reg = v.Args[0].Reg()
1886                         p.To.Type = obj.TYPE_REG
1887                         p.To.Reg = ppc64.REG_R0
1888
1889                         // BNE 2(PC)
1890                         p2 := s.Prog(ppc64.ABNE)
1891                         p2.To.Type = obj.TYPE_BRANCH
1892
1893                         // STW R0, 0(R0)
1894                         // Write at 0 is forbidden and will trigger a SIGSEGV
1895                         p = s.Prog(ppc64.AMOVW)
1896                         p.From.Type = obj.TYPE_REG
1897                         p.From.Reg = ppc64.REG_R0
1898                         p.To.Type = obj.TYPE_MEM
1899                         p.To.Reg = ppc64.REG_R0
1900
1901                         // NOP (so the BNE has somewhere to land)
1902                         nop := s.Prog(obj.ANOP)
1903                         p2.To.SetTarget(nop)
1904
1905                 } else {
1906                         // Issue a load which will fault if arg is nil.
1907                         p := s.Prog(ppc64.AMOVBZ)
1908                         p.From.Type = obj.TYPE_MEM
1909                         p.From.Reg = v.Args[0].Reg()
1910                         ssagen.AddAux(&p.From, v)
1911                         p.To.Type = obj.TYPE_REG
1912                         p.To.Reg = ppc64.REGTMP
1913                 }
1914                 if logopt.Enabled() {
1915                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1916                 }
1917                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1918                         base.WarnfAt(v.Pos, "generated nil check")
1919                 }
1920
1921         // These should be resolved by rules and not make it here.
1922         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1923                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1924                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1925                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1926         case ssa.OpPPC64InvertFlags:
1927                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1928         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1929                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1930         case ssa.OpClobber, ssa.OpClobberReg:
1931                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1932         default:
1933                 v.Fatalf("genValue not implemented: %s", v.LongString())
1934         }
1935 }
1936
1937 var blockJump = [...]struct {
1938         asm, invasm     obj.As
1939         asmeq, invasmun bool
1940 }{
1941         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1942         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1943
1944         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1945         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1946         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1947         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1948
1949         // TODO: need to work FP comparisons into block jumps
1950         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1951         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1952         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1953         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1954 }
1955
1956 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1957         switch b.Kind {
1958         case ssa.BlockDefer:
1959                 // defer returns in R3:
1960                 // 0 if we should continue executing
1961                 // 1 if we should jump to deferreturn call
1962                 p := s.Prog(ppc64.ACMP)
1963                 p.From.Type = obj.TYPE_REG
1964                 p.From.Reg = ppc64.REG_R3
1965                 p.To.Type = obj.TYPE_REG
1966                 p.To.Reg = ppc64.REG_R0
1967
1968                 p = s.Prog(ppc64.ABNE)
1969                 p.To.Type = obj.TYPE_BRANCH
1970                 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1971                 if b.Succs[0].Block() != next {
1972                         p := s.Prog(obj.AJMP)
1973                         p.To.Type = obj.TYPE_BRANCH
1974                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1975                 }
1976
1977         case ssa.BlockPlain:
1978                 if b.Succs[0].Block() != next {
1979                         p := s.Prog(obj.AJMP)
1980                         p.To.Type = obj.TYPE_BRANCH
1981                         s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1982                 }
1983         case ssa.BlockExit:
1984         case ssa.BlockRet:
1985                 s.Prog(obj.ARET)
1986         case ssa.BlockRetJmp:
1987                 p := s.Prog(obj.AJMP)
1988                 p.To.Type = obj.TYPE_MEM
1989                 p.To.Name = obj.NAME_EXTERN
1990                 p.To.Sym = b.Aux.(*obj.LSym)
1991
1992         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1993                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1994                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1995                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1996                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1997                 jmp := blockJump[b.Kind]
1998                 switch next {
1999                 case b.Succs[0].Block():
2000                         s.Br(jmp.invasm, b.Succs[1].Block())
2001                         if jmp.invasmun {
2002                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2003                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
2004                         }
2005                 case b.Succs[1].Block():
2006                         s.Br(jmp.asm, b.Succs[0].Block())
2007                         if jmp.asmeq {
2008                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
2009                         }
2010                 default:
2011                         if b.Likely != ssa.BranchUnlikely {
2012                                 s.Br(jmp.asm, b.Succs[0].Block())
2013                                 if jmp.asmeq {
2014                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
2015                                 }
2016                                 s.Br(obj.AJMP, b.Succs[1].Block())
2017                         } else {
2018                                 s.Br(jmp.invasm, b.Succs[1].Block())
2019                                 if jmp.invasmun {
2020                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
2021                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
2022                                 }
2023                                 s.Br(obj.AJMP, b.Succs[0].Block())
2024                         }
2025                 }
2026         default:
2027                 b.Fatalf("branch not implemented: %s", b.LongString())
2028         }
2029 }