]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
[dev.boringcrypto] crypto/hmac: merge up to 2a206c7 and skip test
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/gc"
9         "cmd/compile/internal/logopt"
10         "cmd/compile/internal/ssa"
11         "cmd/compile/internal/types"
12         "cmd/internal/obj"
13         "cmd/internal/obj/ppc64"
14         "cmd/internal/objabi"
15         "math"
16         "strings"
17 )
18
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21         //      flive := b.FlagsLiveAtEnd
22         //      if b.Control != nil && b.Control.Type.IsFlags() {
23         //              flive = true
24         //      }
25         //      for i := len(b.Values) - 1; i >= 0; i-- {
26         //              v := b.Values[i]
27         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28         //                      // The "mark" is any non-nil Aux value.
29         //                      v.Aux = v
30         //              }
31         //              if v.Type.IsFlags() {
32         //                      flive = false
33         //              }
34         //              for _, a := range v.Args {
35         //                      if a.Type.IsFlags() {
36         //                              flive = true
37         //                      }
38         //              }
39         //      }
40 }
41
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
44         if t.IsFloat() {
45                 switch t.Size() {
46                 case 4:
47                         return ppc64.AFMOVS
48                 case 8:
49                         return ppc64.AFMOVD
50                 }
51         } else {
52                 switch t.Size() {
53                 case 1:
54                         if t.IsSigned() {
55                                 return ppc64.AMOVB
56                         } else {
57                                 return ppc64.AMOVBZ
58                         }
59                 case 2:
60                         if t.IsSigned() {
61                                 return ppc64.AMOVH
62                         } else {
63                                 return ppc64.AMOVHZ
64                         }
65                 case 4:
66                         if t.IsSigned() {
67                                 return ppc64.AMOVW
68                         } else {
69                                 return ppc64.AMOVWZ
70                         }
71                 case 8:
72                         return ppc64.AMOVD
73                 }
74         }
75         panic("bad load type")
76 }
77
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
80         if t.IsFloat() {
81                 switch t.Size() {
82                 case 4:
83                         return ppc64.AFMOVS
84                 case 8:
85                         return ppc64.AFMOVD
86                 }
87         } else {
88                 switch t.Size() {
89                 case 1:
90                         return ppc64.AMOVB
91                 case 2:
92                         return ppc64.AMOVH
93                 case 4:
94                         return ppc64.AMOVW
95                 case 8:
96                         return ppc64.AMOVD
97                 }
98         }
99         panic("bad store type")
100 }
101
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
103         switch v.Op {
104         case ssa.OpCopy:
105                 t := v.Type
106                 if t.IsMemory() {
107                         return
108                 }
109                 x := v.Args[0].Reg()
110                 y := v.Reg()
111                 if x != y {
112                         rt := obj.TYPE_REG
113                         op := ppc64.AMOVD
114
115                         if t.IsFloat() {
116                                 op = ppc64.AFMOVD
117                         }
118                         p := s.Prog(op)
119                         p.From.Type = rt
120                         p.From.Reg = x
121                         p.To.Type = rt
122                         p.To.Reg = y
123                 }
124
125         case ssa.OpPPC64LoweredMuluhilo:
126                 // MULHDU       Rarg1, Rarg0, Reg0
127                 // MULLD        Rarg1, Rarg0, Reg1
128                 r0 := v.Args[0].Reg()
129                 r1 := v.Args[1].Reg()
130                 p := s.Prog(ppc64.AMULHDU)
131                 p.From.Type = obj.TYPE_REG
132                 p.From.Reg = r1
133                 p.Reg = r0
134                 p.To.Type = obj.TYPE_REG
135                 p.To.Reg = v.Reg0()
136                 p1 := s.Prog(ppc64.AMULLD)
137                 p1.From.Type = obj.TYPE_REG
138                 p1.From.Reg = r1
139                 p1.Reg = r0
140                 p1.To.Type = obj.TYPE_REG
141                 p1.To.Reg = v.Reg1()
142
143         case ssa.OpPPC64LoweredAdd64Carry:
144                 // ADDC         Rarg2, -1, Rtmp
145                 // ADDE         Rarg1, Rarg0, Reg0
146                 // ADDZE        Rzero, Reg1
147                 r0 := v.Args[0].Reg()
148                 r1 := v.Args[1].Reg()
149                 r2 := v.Args[2].Reg()
150                 p := s.Prog(ppc64.AADDC)
151                 p.From.Type = obj.TYPE_CONST
152                 p.From.Offset = -1
153                 p.Reg = r2
154                 p.To.Type = obj.TYPE_REG
155                 p.To.Reg = ppc64.REGTMP
156                 p1 := s.Prog(ppc64.AADDE)
157                 p1.From.Type = obj.TYPE_REG
158                 p1.From.Reg = r1
159                 p1.Reg = r0
160                 p1.To.Type = obj.TYPE_REG
161                 p1.To.Reg = v.Reg0()
162                 p2 := s.Prog(ppc64.AADDZE)
163                 p2.From.Type = obj.TYPE_REG
164                 p2.From.Reg = ppc64.REGZERO
165                 p2.To.Type = obj.TYPE_REG
166                 p2.To.Reg = v.Reg1()
167
168         case ssa.OpPPC64LoweredAtomicAnd8,
169                 ssa.OpPPC64LoweredAtomicOr8:
170                 // LWSYNC
171                 // LBAR         (Rarg0), Rtmp
172                 // AND/OR       Rarg1, Rtmp
173                 // STBCCC       Rtmp, (Rarg0)
174                 // BNE          -3(PC)
175                 r0 := v.Args[0].Reg()
176                 r1 := v.Args[1].Reg()
177                 // LWSYNC - Assuming shared data not write-through-required nor
178                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179                 plwsync := s.Prog(ppc64.ALWSYNC)
180                 plwsync.To.Type = obj.TYPE_NONE
181                 p := s.Prog(ppc64.ALBAR)
182                 p.From.Type = obj.TYPE_MEM
183                 p.From.Reg = r0
184                 p.To.Type = obj.TYPE_REG
185                 p.To.Reg = ppc64.REGTMP
186                 p1 := s.Prog(v.Op.Asm())
187                 p1.From.Type = obj.TYPE_REG
188                 p1.From.Reg = r1
189                 p1.To.Type = obj.TYPE_REG
190                 p1.To.Reg = ppc64.REGTMP
191                 p2 := s.Prog(ppc64.ASTBCCC)
192                 p2.From.Type = obj.TYPE_REG
193                 p2.From.Reg = ppc64.REGTMP
194                 p2.To.Type = obj.TYPE_MEM
195                 p2.To.Reg = r0
196                 p2.RegTo2 = ppc64.REGTMP
197                 p3 := s.Prog(ppc64.ABNE)
198                 p3.To.Type = obj.TYPE_BRANCH
199                 gc.Patch(p3, p)
200
201         case ssa.OpPPC64LoweredAtomicAdd32,
202                 ssa.OpPPC64LoweredAtomicAdd64:
203                 // LWSYNC
204                 // LDAR/LWAR    (Rarg0), Rout
205                 // ADD          Rarg1, Rout
206                 // STDCCC/STWCCC Rout, (Rarg0)
207                 // BNE         -3(PC)
208                 // MOVW         Rout,Rout (if Add32)
209                 ld := ppc64.ALDAR
210                 st := ppc64.ASTDCCC
211                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
212                         ld = ppc64.ALWAR
213                         st = ppc64.ASTWCCC
214                 }
215                 r0 := v.Args[0].Reg()
216                 r1 := v.Args[1].Reg()
217                 out := v.Reg0()
218                 // LWSYNC - Assuming shared data not write-through-required nor
219                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220                 plwsync := s.Prog(ppc64.ALWSYNC)
221                 plwsync.To.Type = obj.TYPE_NONE
222                 // LDAR or LWAR
223                 p := s.Prog(ld)
224                 p.From.Type = obj.TYPE_MEM
225                 p.From.Reg = r0
226                 p.To.Type = obj.TYPE_REG
227                 p.To.Reg = out
228                 // ADD reg1,out
229                 p1 := s.Prog(ppc64.AADD)
230                 p1.From.Type = obj.TYPE_REG
231                 p1.From.Reg = r1
232                 p1.To.Reg = out
233                 p1.To.Type = obj.TYPE_REG
234                 // STDCCC or STWCCC
235                 p3 := s.Prog(st)
236                 p3.From.Type = obj.TYPE_REG
237                 p3.From.Reg = out
238                 p3.To.Type = obj.TYPE_MEM
239                 p3.To.Reg = r0
240                 // BNE retry
241                 p4 := s.Prog(ppc64.ABNE)
242                 p4.To.Type = obj.TYPE_BRANCH
243                 gc.Patch(p4, p)
244
245                 // Ensure a 32 bit result
246                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247                         p5 := s.Prog(ppc64.AMOVWZ)
248                         p5.To.Type = obj.TYPE_REG
249                         p5.To.Reg = out
250                         p5.From.Type = obj.TYPE_REG
251                         p5.From.Reg = out
252                 }
253
254         case ssa.OpPPC64LoweredAtomicExchange32,
255                 ssa.OpPPC64LoweredAtomicExchange64:
256                 // LWSYNC
257                 // LDAR/LWAR    (Rarg0), Rout
258                 // STDCCC/STWCCC Rout, (Rarg0)
259                 // BNE         -2(PC)
260                 // ISYNC
261                 ld := ppc64.ALDAR
262                 st := ppc64.ASTDCCC
263                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
264                         ld = ppc64.ALWAR
265                         st = ppc64.ASTWCCC
266                 }
267                 r0 := v.Args[0].Reg()
268                 r1 := v.Args[1].Reg()
269                 out := v.Reg0()
270                 // LWSYNC - Assuming shared data not write-through-required nor
271                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272                 plwsync := s.Prog(ppc64.ALWSYNC)
273                 plwsync.To.Type = obj.TYPE_NONE
274                 // LDAR or LWAR
275                 p := s.Prog(ld)
276                 p.From.Type = obj.TYPE_MEM
277                 p.From.Reg = r0
278                 p.To.Type = obj.TYPE_REG
279                 p.To.Reg = out
280                 // STDCCC or STWCCC
281                 p1 := s.Prog(st)
282                 p1.From.Type = obj.TYPE_REG
283                 p1.From.Reg = r1
284                 p1.To.Type = obj.TYPE_MEM
285                 p1.To.Reg = r0
286                 // BNE retry
287                 p2 := s.Prog(ppc64.ABNE)
288                 p2.To.Type = obj.TYPE_BRANCH
289                 gc.Patch(p2, p)
290                 // ISYNC
291                 pisync := s.Prog(ppc64.AISYNC)
292                 pisync.To.Type = obj.TYPE_NONE
293
294         case ssa.OpPPC64LoweredAtomicLoad8,
295                 ssa.OpPPC64LoweredAtomicLoad32,
296                 ssa.OpPPC64LoweredAtomicLoad64,
297                 ssa.OpPPC64LoweredAtomicLoadPtr:
298                 // SYNC
299                 // MOVB/MOVD/MOVW (Rarg0), Rout
300                 // CMP Rout,Rout
301                 // BNE 1(PC)
302                 // ISYNC
303                 ld := ppc64.AMOVD
304                 cmp := ppc64.ACMP
305                 switch v.Op {
306                 case ssa.OpPPC64LoweredAtomicLoad8:
307                         ld = ppc64.AMOVBZ
308                 case ssa.OpPPC64LoweredAtomicLoad32:
309                         ld = ppc64.AMOVWZ
310                         cmp = ppc64.ACMPW
311                 }
312                 arg0 := v.Args[0].Reg()
313                 out := v.Reg0()
314                 // SYNC when AuxInt == 1; otherwise, load-acquire
315                 if v.AuxInt == 1 {
316                         psync := s.Prog(ppc64.ASYNC)
317                         psync.To.Type = obj.TYPE_NONE
318                 }
319                 // Load
320                 p := s.Prog(ld)
321                 p.From.Type = obj.TYPE_MEM
322                 p.From.Reg = arg0
323                 p.To.Type = obj.TYPE_REG
324                 p.To.Reg = out
325                 // CMP
326                 p1 := s.Prog(cmp)
327                 p1.From.Type = obj.TYPE_REG
328                 p1.From.Reg = out
329                 p1.To.Type = obj.TYPE_REG
330                 p1.To.Reg = out
331                 // BNE
332                 p2 := s.Prog(ppc64.ABNE)
333                 p2.To.Type = obj.TYPE_BRANCH
334                 // ISYNC
335                 pisync := s.Prog(ppc64.AISYNC)
336                 pisync.To.Type = obj.TYPE_NONE
337                 gc.Patch(p2, pisync)
338
339         case ssa.OpPPC64LoweredAtomicStore8,
340                 ssa.OpPPC64LoweredAtomicStore32,
341                 ssa.OpPPC64LoweredAtomicStore64:
342                 // SYNC or LWSYNC
343                 // MOVB/MOVW/MOVD arg1,(arg0)
344                 st := ppc64.AMOVD
345                 switch v.Op {
346                 case ssa.OpPPC64LoweredAtomicStore8:
347                         st = ppc64.AMOVB
348                 case ssa.OpPPC64LoweredAtomicStore32:
349                         st = ppc64.AMOVW
350                 }
351                 arg0 := v.Args[0].Reg()
352                 arg1 := v.Args[1].Reg()
353                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
354                 // SYNC
355                 syncOp := ppc64.ASYNC
356                 if v.AuxInt == 0 {
357                         syncOp = ppc64.ALWSYNC
358                 }
359                 psync := s.Prog(syncOp)
360                 psync.To.Type = obj.TYPE_NONE
361                 // Store
362                 p := s.Prog(st)
363                 p.To.Type = obj.TYPE_MEM
364                 p.To.Reg = arg0
365                 p.From.Type = obj.TYPE_REG
366                 p.From.Reg = arg1
367
368         case ssa.OpPPC64LoweredAtomicCas64,
369                 ssa.OpPPC64LoweredAtomicCas32:
370                 // LWSYNC
371                 // loop:
372                 // LDAR        (Rarg0), MutexHint, Rtmp
373                 // CMP         Rarg1, Rtmp
374                 // BNE         fail
375                 // STDCCC      Rarg2, (Rarg0)
376                 // BNE         loop
377                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
378                 // MOVD        $1, Rout
379                 // BR          end
380                 // fail:
381                 // MOVD        $0, Rout
382                 // end:
383                 ld := ppc64.ALDAR
384                 st := ppc64.ASTDCCC
385                 cmp := ppc64.ACMP
386                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
387                         ld = ppc64.ALWAR
388                         st = ppc64.ASTWCCC
389                         cmp = ppc64.ACMPW
390                 }
391                 r0 := v.Args[0].Reg()
392                 r1 := v.Args[1].Reg()
393                 r2 := v.Args[2].Reg()
394                 out := v.Reg0()
395                 // LWSYNC - Assuming shared data not write-through-required nor
396                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397                 plwsync1 := s.Prog(ppc64.ALWSYNC)
398                 plwsync1.To.Type = obj.TYPE_NONE
399                 // LDAR or LWAR
400                 p := s.Prog(ld)
401                 p.From.Type = obj.TYPE_MEM
402                 p.From.Reg = r0
403                 p.To.Type = obj.TYPE_REG
404                 p.To.Reg = ppc64.REGTMP
405                 // If it is a Compare-and-Swap-Release operation, set the EH field with
406                 // the release hint.
407                 if v.AuxInt == 0 {
408                         p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
409                 }
410                 // CMP reg1,reg2
411                 p1 := s.Prog(cmp)
412                 p1.From.Type = obj.TYPE_REG
413                 p1.From.Reg = r1
414                 p1.To.Reg = ppc64.REGTMP
415                 p1.To.Type = obj.TYPE_REG
416                 // BNE cas_fail
417                 p2 := s.Prog(ppc64.ABNE)
418                 p2.To.Type = obj.TYPE_BRANCH
419                 // STDCCC or STWCCC
420                 p3 := s.Prog(st)
421                 p3.From.Type = obj.TYPE_REG
422                 p3.From.Reg = r2
423                 p3.To.Type = obj.TYPE_MEM
424                 p3.To.Reg = r0
425                 // BNE retry
426                 p4 := s.Prog(ppc64.ABNE)
427                 p4.To.Type = obj.TYPE_BRANCH
428                 gc.Patch(p4, p)
429                 // LWSYNC - Assuming shared data not write-through-required nor
430                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431                 // If the operation is a CAS-Release, then synchronization is not necessary.
432                 if v.AuxInt != 0 {
433                         plwsync2 := s.Prog(ppc64.ALWSYNC)
434                         plwsync2.To.Type = obj.TYPE_NONE
435                 }
436                 // return true
437                 p5 := s.Prog(ppc64.AMOVD)
438                 p5.From.Type = obj.TYPE_CONST
439                 p5.From.Offset = 1
440                 p5.To.Type = obj.TYPE_REG
441                 p5.To.Reg = out
442                 // BR done
443                 p6 := s.Prog(obj.AJMP)
444                 p6.To.Type = obj.TYPE_BRANCH
445                 // return false
446                 p7 := s.Prog(ppc64.AMOVD)
447                 p7.From.Type = obj.TYPE_CONST
448                 p7.From.Offset = 0
449                 p7.To.Type = obj.TYPE_REG
450                 p7.To.Reg = out
451                 gc.Patch(p2, p7)
452                 // done (label)
453                 p8 := s.Prog(obj.ANOP)
454                 gc.Patch(p6, p8)
455
456         case ssa.OpPPC64LoweredGetClosurePtr:
457                 // Closure pointer is R11 (already)
458                 gc.CheckLoweredGetClosurePtr(v)
459
460         case ssa.OpPPC64LoweredGetCallerSP:
461                 // caller's SP is FixedFrameSize below the address of the first arg
462                 p := s.Prog(ppc64.AMOVD)
463                 p.From.Type = obj.TYPE_ADDR
464                 p.From.Offset = -gc.Ctxt.FixedFrameSize()
465                 p.From.Name = obj.NAME_PARAM
466                 p.To.Type = obj.TYPE_REG
467                 p.To.Reg = v.Reg()
468
469         case ssa.OpPPC64LoweredGetCallerPC:
470                 p := s.Prog(obj.AGETCALLERPC)
471                 p.To.Type = obj.TYPE_REG
472                 p.To.Reg = v.Reg()
473
474         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475                 // input is already rounded
476
477         case ssa.OpLoadReg:
478                 loadOp := loadByType(v.Type)
479                 p := s.Prog(loadOp)
480                 gc.AddrAuto(&p.From, v.Args[0])
481                 p.To.Type = obj.TYPE_REG
482                 p.To.Reg = v.Reg()
483
484         case ssa.OpStoreReg:
485                 storeOp := storeByType(v.Type)
486                 p := s.Prog(storeOp)
487                 p.From.Type = obj.TYPE_REG
488                 p.From.Reg = v.Args[0].Reg()
489                 gc.AddrAuto(&p.To, v)
490
491         case ssa.OpPPC64DIVD:
492                 // For now,
493                 //
494                 // cmp arg1, -1
495                 // be  ahead
496                 // v = arg0 / arg1
497                 // b over
498                 // ahead: v = - arg0
499                 // over: nop
500                 r := v.Reg()
501                 r0 := v.Args[0].Reg()
502                 r1 := v.Args[1].Reg()
503
504                 p := s.Prog(ppc64.ACMP)
505                 p.From.Type = obj.TYPE_REG
506                 p.From.Reg = r1
507                 p.To.Type = obj.TYPE_CONST
508                 p.To.Offset = -1
509
510                 pbahead := s.Prog(ppc64.ABEQ)
511                 pbahead.To.Type = obj.TYPE_BRANCH
512
513                 p = s.Prog(v.Op.Asm())
514                 p.From.Type = obj.TYPE_REG
515                 p.From.Reg = r1
516                 p.Reg = r0
517                 p.To.Type = obj.TYPE_REG
518                 p.To.Reg = r
519
520                 pbover := s.Prog(obj.AJMP)
521                 pbover.To.Type = obj.TYPE_BRANCH
522
523                 p = s.Prog(ppc64.ANEG)
524                 p.To.Type = obj.TYPE_REG
525                 p.To.Reg = r
526                 p.From.Type = obj.TYPE_REG
527                 p.From.Reg = r0
528                 gc.Patch(pbahead, p)
529
530                 p = s.Prog(obj.ANOP)
531                 gc.Patch(pbover, p)
532
533         case ssa.OpPPC64DIVW:
534                 // word-width version of above
535                 r := v.Reg()
536                 r0 := v.Args[0].Reg()
537                 r1 := v.Args[1].Reg()
538
539                 p := s.Prog(ppc64.ACMPW)
540                 p.From.Type = obj.TYPE_REG
541                 p.From.Reg = r1
542                 p.To.Type = obj.TYPE_CONST
543                 p.To.Offset = -1
544
545                 pbahead := s.Prog(ppc64.ABEQ)
546                 pbahead.To.Type = obj.TYPE_BRANCH
547
548                 p = s.Prog(v.Op.Asm())
549                 p.From.Type = obj.TYPE_REG
550                 p.From.Reg = r1
551                 p.Reg = r0
552                 p.To.Type = obj.TYPE_REG
553                 p.To.Reg = r
554
555                 pbover := s.Prog(obj.AJMP)
556                 pbover.To.Type = obj.TYPE_BRANCH
557
558                 p = s.Prog(ppc64.ANEG)
559                 p.To.Type = obj.TYPE_REG
560                 p.To.Reg = r
561                 p.From.Type = obj.TYPE_REG
562                 p.From.Reg = r0
563                 gc.Patch(pbahead, p)
564
565                 p = s.Prog(obj.ANOP)
566                 gc.Patch(pbover, p)
567
568         case ssa.OpPPC64CLRLSLWI:
569                 r := v.Reg()
570                 r1 := v.Args[0].Reg()
571                 shifts := v.AuxInt
572                 p := s.Prog(v.Op.Asm())
573                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
574                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
575                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
576                 p.Reg = r1
577                 p.To.Type = obj.TYPE_REG
578                 p.To.Reg = r
579
580         case ssa.OpPPC64CLRLSLDI:
581                 r := v.Reg()
582                 r1 := v.Args[0].Reg()
583                 shifts := v.AuxInt
584                 p := s.Prog(v.Op.Asm())
585                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
586                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
587                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
588                 p.Reg = r1
589                 p.To.Type = obj.TYPE_REG
590                 p.To.Reg = r
591
592                 // Mask has been set as sh
593         case ssa.OpPPC64RLDICL:
594                 r := v.Reg()
595                 r1 := v.Args[0].Reg()
596                 shifts := v.AuxInt
597                 p := s.Prog(v.Op.Asm())
598                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
599                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)})
600                 p.Reg = r1
601                 p.To.Type = obj.TYPE_REG
602                 p.To.Reg = r
603
604         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
605                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
606                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
607                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
608                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
609                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
610                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
611                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
612                 r := v.Reg()
613                 r1 := v.Args[0].Reg()
614                 r2 := v.Args[1].Reg()
615                 p := s.Prog(v.Op.Asm())
616                 p.From.Type = obj.TYPE_REG
617                 p.From.Reg = r2
618                 p.Reg = r1
619                 p.To.Type = obj.TYPE_REG
620                 p.To.Reg = r
621
622         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
623                 r1 := v.Args[0].Reg()
624                 r2 := v.Args[1].Reg()
625                 p := s.Prog(v.Op.Asm())
626                 p.From.Type = obj.TYPE_REG
627                 p.From.Reg = r2
628                 p.Reg = r1
629                 p.To.Type = obj.TYPE_REG
630                 p.To.Reg = ppc64.REGTMP // result is not needed
631
632         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
633                 p := s.Prog(v.Op.Asm())
634                 p.From.Type = obj.TYPE_CONST
635                 p.From.Offset = v.AuxInt
636                 p.Reg = v.Args[0].Reg()
637                 p.To.Type = obj.TYPE_REG
638                 p.To.Reg = v.Reg()
639
640         case ssa.OpPPC64MADDLD:
641                 r := v.Reg()
642                 r1 := v.Args[0].Reg()
643                 r2 := v.Args[1].Reg()
644                 r3 := v.Args[2].Reg()
645                 // r = r1*r2 Â± r3
646                 p := s.Prog(v.Op.Asm())
647                 p.From.Type = obj.TYPE_REG
648                 p.From.Reg = r1
649                 p.Reg = r2
650                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
651                 p.To.Type = obj.TYPE_REG
652                 p.To.Reg = r
653
654         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
655                 r := v.Reg()
656                 r1 := v.Args[0].Reg()
657                 r2 := v.Args[1].Reg()
658                 r3 := v.Args[2].Reg()
659                 // r = r1*r2 Â± r3
660                 p := s.Prog(v.Op.Asm())
661                 p.From.Type = obj.TYPE_REG
662                 p.From.Reg = r1
663                 p.Reg = r3
664                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
665                 p.To.Type = obj.TYPE_REG
666                 p.To.Reg = r
667
668         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
669                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
670                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
671                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
672                 r := v.Reg()
673                 p := s.Prog(v.Op.Asm())
674                 p.To.Type = obj.TYPE_REG
675                 p.To.Reg = r
676                 p.From.Type = obj.TYPE_REG
677                 p.From.Reg = v.Args[0].Reg()
678
679         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
680                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
681                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
682                 p := s.Prog(v.Op.Asm())
683                 p.Reg = v.Args[0].Reg()
684                 p.From.Type = obj.TYPE_CONST
685                 p.From.Offset = v.AuxInt
686                 p.To.Type = obj.TYPE_REG
687                 p.To.Reg = v.Reg()
688
689         case ssa.OpPPC64SUBFCconst:
690                 p := s.Prog(v.Op.Asm())
691                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
692                 p.From.Type = obj.TYPE_REG
693                 p.From.Reg = v.Args[0].Reg()
694                 p.To.Type = obj.TYPE_REG
695                 p.To.Reg = v.Reg()
696
697         case ssa.OpPPC64ANDCCconst:
698                 p := s.Prog(v.Op.Asm())
699                 p.Reg = v.Args[0].Reg()
700                 p.From.Type = obj.TYPE_CONST
701                 p.From.Offset = v.AuxInt
702                 p.To.Type = obj.TYPE_REG
703                 p.To.Reg = ppc64.REGTMP // discard result
704
705         case ssa.OpPPC64MOVDaddr:
706                 switch v.Aux.(type) {
707                 default:
708                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
709                 case nil:
710                         // If aux offset and aux int are both 0, and the same
711                         // input and output regs are used, no instruction
712                         // needs to be generated, since it would just be
713                         // addi rx, rx, 0.
714                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
715                                 p := s.Prog(ppc64.AMOVD)
716                                 p.From.Type = obj.TYPE_ADDR
717                                 p.From.Reg = v.Args[0].Reg()
718                                 p.From.Offset = v.AuxInt
719                                 p.To.Type = obj.TYPE_REG
720                                 p.To.Reg = v.Reg()
721                         }
722
723                 case *obj.LSym, *gc.Node:
724                         p := s.Prog(ppc64.AMOVD)
725                         p.From.Type = obj.TYPE_ADDR
726                         p.From.Reg = v.Args[0].Reg()
727                         p.To.Type = obj.TYPE_REG
728                         p.To.Reg = v.Reg()
729                         gc.AddAux(&p.From, v)
730
731                 }
732
733         case ssa.OpPPC64MOVDconst:
734                 p := s.Prog(v.Op.Asm())
735                 p.From.Type = obj.TYPE_CONST
736                 p.From.Offset = v.AuxInt
737                 p.To.Type = obj.TYPE_REG
738                 p.To.Reg = v.Reg()
739
740         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
741                 p := s.Prog(v.Op.Asm())
742                 p.From.Type = obj.TYPE_FCONST
743                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
744                 p.To.Type = obj.TYPE_REG
745                 p.To.Reg = v.Reg()
746
747         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
748                 p := s.Prog(v.Op.Asm())
749                 p.From.Type = obj.TYPE_REG
750                 p.From.Reg = v.Args[0].Reg()
751                 p.To.Type = obj.TYPE_REG
752                 p.To.Reg = v.Args[1].Reg()
753
754         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
755                 p := s.Prog(v.Op.Asm())
756                 p.From.Type = obj.TYPE_REG
757                 p.From.Reg = v.Args[0].Reg()
758                 p.To.Type = obj.TYPE_CONST
759                 p.To.Offset = v.AuxInt
760
761         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
762                 // Shift in register to required size
763                 p := s.Prog(v.Op.Asm())
764                 p.From.Type = obj.TYPE_REG
765                 p.From.Reg = v.Args[0].Reg()
766                 p.To.Reg = v.Reg()
767                 p.To.Type = obj.TYPE_REG
768
769         case ssa.OpPPC64MOVDload:
770
771                 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
772                 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
773                 // the offset is not known until link time. If the load of a go.string uses relocation for the
774                 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
775                 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
776                 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
777                 // go.string types because other types will have proper alignment.
778
779                 gostring := false
780                 switch n := v.Aux.(type) {
781                 case *obj.LSym:
782                         gostring = strings.HasPrefix(n.Name, "go.string.")
783                 }
784                 if gostring {
785                         // Generate full addr of the go.string const
786                         // including AuxInt
787                         p := s.Prog(ppc64.AMOVD)
788                         p.From.Type = obj.TYPE_ADDR
789                         p.From.Reg = v.Args[0].Reg()
790                         gc.AddAux(&p.From, v)
791                         p.To.Type = obj.TYPE_REG
792                         p.To.Reg = v.Reg()
793                         // Load go.string using 0 offset
794                         p = s.Prog(v.Op.Asm())
795                         p.From.Type = obj.TYPE_MEM
796                         p.From.Reg = v.Reg()
797                         p.To.Type = obj.TYPE_REG
798                         p.To.Reg = v.Reg()
799                         break
800                 }
801                 // Not a go.string, generate a normal load
802                 fallthrough
803
804         case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
805                 p := s.Prog(v.Op.Asm())
806                 p.From.Type = obj.TYPE_MEM
807                 p.From.Reg = v.Args[0].Reg()
808                 gc.AddAux(&p.From, v)
809                 p.To.Type = obj.TYPE_REG
810                 p.To.Reg = v.Reg()
811
812         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
813                 p := s.Prog(v.Op.Asm())
814                 p.From.Type = obj.TYPE_MEM
815                 p.From.Reg = v.Args[0].Reg()
816                 p.To.Type = obj.TYPE_REG
817                 p.To.Reg = v.Reg()
818
819         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
820                 p := s.Prog(v.Op.Asm())
821                 p.To.Type = obj.TYPE_MEM
822                 p.To.Reg = v.Args[0].Reg()
823                 p.From.Type = obj.TYPE_REG
824                 p.From.Reg = v.Args[1].Reg()
825
826         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
827                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
828                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
829                 p := s.Prog(v.Op.Asm())
830                 p.From.Type = obj.TYPE_MEM
831                 p.From.Reg = v.Args[0].Reg()
832                 p.From.Index = v.Args[1].Reg()
833                 p.To.Type = obj.TYPE_REG
834                 p.To.Reg = v.Reg()
835
836         case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
837                 p := s.Prog(v.Op.Asm())
838                 p.From.Type = obj.TYPE_REG
839                 p.From.Reg = ppc64.REGZERO
840                 p.To.Type = obj.TYPE_MEM
841                 p.To.Reg = v.Args[0].Reg()
842                 gc.AddAux(&p.To, v)
843
844         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
845                 p := s.Prog(v.Op.Asm())
846                 p.From.Type = obj.TYPE_REG
847                 p.From.Reg = v.Args[1].Reg()
848                 p.To.Type = obj.TYPE_MEM
849                 p.To.Reg = v.Args[0].Reg()
850                 gc.AddAux(&p.To, v)
851
852         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
853                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
854                 ssa.OpPPC64MOVHBRstoreidx:
855                 p := s.Prog(v.Op.Asm())
856                 p.From.Type = obj.TYPE_REG
857                 p.From.Reg = v.Args[2].Reg()
858                 p.To.Index = v.Args[1].Reg()
859                 p.To.Type = obj.TYPE_MEM
860                 p.To.Reg = v.Args[0].Reg()
861
862         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
863                 // ISEL, ISELB
864                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
865                 // ISEL only accepts 0, 1, 2 condition values but the others can be
866                 // achieved by swapping operand order.
867                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
868                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
869                 // ISELB is used when a boolean result is needed, returning 0 or 1
870                 p := s.Prog(ppc64.AISEL)
871                 p.To.Type = obj.TYPE_REG
872                 p.To.Reg = v.Reg()
873                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
874                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
875                 if v.Op == ssa.OpPPC64ISEL {
876                         r.Reg = v.Args[1].Reg()
877                 }
878                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
879                 if v.AuxInt > 3 {
880                         p.Reg = r.Reg
881                         p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
882                 } else {
883                         p.Reg = v.Args[0].Reg()
884                         p.SetFrom3(r)
885                 }
886                 p.From.Type = obj.TYPE_CONST
887                 p.From.Offset = v.AuxInt & 3
888
889         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
890                 // The LoweredQuad code generation
891                 // generates STXV instructions on
892                 // power9. The Short variation is used
893                 // if no loop is generated.
894
895                 // sizes >= 64 generate a loop as follows:
896
897                 // Set up loop counter in CTR, used by BC
898                 // XXLXOR clears VS32
899                 //       XXLXOR VS32,VS32,VS32
900                 //       MOVD len/64,REG_TMP
901                 //       MOVD REG_TMP,CTR
902                 //       loop:
903                 //       STXV VS32,0(R20)
904                 //       STXV VS32,16(R20)
905                 //       STXV VS32,32(R20)
906                 //       STXV VS32,48(R20)
907                 //       ADD  $64,R20
908                 //       BC   16, 0, loop
909
910                 // Bytes per iteration
911                 ctr := v.AuxInt / 64
912
913                 // Remainder bytes
914                 rem := v.AuxInt % 64
915
916                 // Only generate a loop if there is more
917                 // than 1 iteration.
918                 if ctr > 1 {
919                         // Set up VS32 (V0) to hold 0s
920                         p := s.Prog(ppc64.AXXLXOR)
921                         p.From.Type = obj.TYPE_REG
922                         p.From.Reg = ppc64.REG_VS32
923                         p.To.Type = obj.TYPE_REG
924                         p.To.Reg = ppc64.REG_VS32
925                         p.Reg = ppc64.REG_VS32
926
927                         // Set up CTR loop counter
928                         p = s.Prog(ppc64.AMOVD)
929                         p.From.Type = obj.TYPE_CONST
930                         p.From.Offset = ctr
931                         p.To.Type = obj.TYPE_REG
932                         p.To.Reg = ppc64.REGTMP
933
934                         p = s.Prog(ppc64.AMOVD)
935                         p.From.Type = obj.TYPE_REG
936                         p.From.Reg = ppc64.REGTMP
937                         p.To.Type = obj.TYPE_REG
938                         p.To.Reg = ppc64.REG_CTR
939
940                         // Don't generate padding for
941                         // loops with few iterations.
942                         if ctr > 3 {
943                                 p = s.Prog(obj.APCALIGN)
944                                 p.From.Type = obj.TYPE_CONST
945                                 p.From.Offset = 16
946                         }
947
948                         // generate 4 STXVs to zero 64 bytes
949                         var top *obj.Prog
950
951                         p = s.Prog(ppc64.ASTXV)
952                         p.From.Type = obj.TYPE_REG
953                         p.From.Reg = ppc64.REG_VS32
954                         p.To.Type = obj.TYPE_MEM
955                         p.To.Reg = v.Args[0].Reg()
956
957                         //  Save the top of loop
958                         if top == nil {
959                                 top = p
960                         }
961                         p = s.Prog(ppc64.ASTXV)
962                         p.From.Type = obj.TYPE_REG
963                         p.From.Reg = ppc64.REG_VS32
964                         p.To.Type = obj.TYPE_MEM
965                         p.To.Reg = v.Args[0].Reg()
966                         p.To.Offset = 16
967
968                         p = s.Prog(ppc64.ASTXV)
969                         p.From.Type = obj.TYPE_REG
970                         p.From.Reg = ppc64.REG_VS32
971                         p.To.Type = obj.TYPE_MEM
972                         p.To.Reg = v.Args[0].Reg()
973                         p.To.Offset = 32
974
975                         p = s.Prog(ppc64.ASTXV)
976                         p.From.Type = obj.TYPE_REG
977                         p.From.Reg = ppc64.REG_VS32
978                         p.To.Type = obj.TYPE_MEM
979                         p.To.Reg = v.Args[0].Reg()
980                         p.To.Offset = 48
981
982                         // Increment address for the
983                         // 64 bytes just zeroed.
984                         p = s.Prog(ppc64.AADD)
985                         p.Reg = v.Args[0].Reg()
986                         p.From.Type = obj.TYPE_CONST
987                         p.From.Offset = 64
988                         p.To.Type = obj.TYPE_REG
989                         p.To.Reg = v.Args[0].Reg()
990
991                         // Branch back to top of loop
992                         // based on CTR
993                         // BC with BO_BCTR generates bdnz
994                         p = s.Prog(ppc64.ABC)
995                         p.From.Type = obj.TYPE_CONST
996                         p.From.Offset = ppc64.BO_BCTR
997                         p.Reg = ppc64.REG_R0
998                         p.To.Type = obj.TYPE_BRANCH
999                         gc.Patch(p, top)
1000                 }
1001                 // When ctr == 1 the loop was not generated but
1002                 // there are at least 64 bytes to clear, so add
1003                 // that to the remainder to generate the code
1004                 // to clear those doublewords
1005                 if ctr == 1 {
1006                         rem += 64
1007                 }
1008
1009                 // Clear the remainder starting at offset zero
1010                 offset := int64(0)
1011
1012                 if rem >= 16 && ctr <= 1 {
1013                         // If the XXLXOR hasn't already been
1014                         // generated, do it here to initialize
1015                         // VS32 (V0) to 0.
1016                         p := s.Prog(ppc64.AXXLXOR)
1017                         p.From.Type = obj.TYPE_REG
1018                         p.From.Reg = ppc64.REG_VS32
1019                         p.To.Type = obj.TYPE_REG
1020                         p.To.Reg = ppc64.REG_VS32
1021                         p.Reg = ppc64.REG_VS32
1022                 }
1023                 // Generate STXV for 32 or 64
1024                 // bytes.
1025                 for rem >= 32 {
1026                         p := s.Prog(ppc64.ASTXV)
1027                         p.From.Type = obj.TYPE_REG
1028                         p.From.Reg = ppc64.REG_VS32
1029                         p.To.Type = obj.TYPE_MEM
1030                         p.To.Reg = v.Args[0].Reg()
1031                         p.To.Offset = offset
1032
1033                         p = s.Prog(ppc64.ASTXV)
1034                         p.From.Type = obj.TYPE_REG
1035                         p.From.Reg = ppc64.REG_VS32
1036                         p.To.Type = obj.TYPE_MEM
1037                         p.To.Reg = v.Args[0].Reg()
1038                         p.To.Offset = offset + 16
1039                         offset += 32
1040                         rem -= 32
1041                 }
1042                 // Generate 16 bytes
1043                 if rem >= 16 {
1044                         p := s.Prog(ppc64.ASTXV)
1045                         p.From.Type = obj.TYPE_REG
1046                         p.From.Reg = ppc64.REG_VS32
1047                         p.To.Type = obj.TYPE_MEM
1048                         p.To.Reg = v.Args[0].Reg()
1049                         p.To.Offset = offset
1050                         offset += 16
1051                         rem -= 16
1052                 }
1053
1054                 // first clear as many doublewords as possible
1055                 // then clear remaining sizes as available
1056                 for rem > 0 {
1057                         op, size := ppc64.AMOVB, int64(1)
1058                         switch {
1059                         case rem >= 8:
1060                                 op, size = ppc64.AMOVD, 8
1061                         case rem >= 4:
1062                                 op, size = ppc64.AMOVW, 4
1063                         case rem >= 2:
1064                                 op, size = ppc64.AMOVH, 2
1065                         }
1066                         p := s.Prog(op)
1067                         p.From.Type = obj.TYPE_REG
1068                         p.From.Reg = ppc64.REG_R0
1069                         p.To.Type = obj.TYPE_MEM
1070                         p.To.Reg = v.Args[0].Reg()
1071                         p.To.Offset = offset
1072                         rem -= size
1073                         offset += size
1074                 }
1075
1076         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1077
1078                 // Unaligned data doesn't hurt performance
1079                 // for these instructions on power8.
1080
1081                 // For sizes >= 64 generate a loop as follows:
1082
1083                 // Set up loop counter in CTR, used by BC
1084                 //       XXLXOR VS32,VS32,VS32
1085                 //       MOVD len/32,REG_TMP
1086                 //       MOVD REG_TMP,CTR
1087                 //       MOVD $16,REG_TMP
1088                 //       loop:
1089                 //       STXVD2X VS32,(R0)(R20)
1090                 //       STXVD2X VS32,(R31)(R20)
1091                 //       ADD  $32,R20
1092                 //       BC   16, 0, loop
1093                 //
1094                 // any remainder is done as described below
1095
1096                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1097                 // then handle the remainder
1098                 //      MOVD R0,(R20)
1099                 //      MOVD R0,8(R20)
1100                 // .... etc.
1101                 //
1102                 // the remainder bytes are cleared using one or more
1103                 // of the following instructions with the appropriate
1104                 // offsets depending which instructions are needed
1105                 //
1106                 //      MOVW R0,n1(R20) 4 bytes
1107                 //      MOVH R0,n2(R20) 2 bytes
1108                 //      MOVB R0,n3(R20) 1 byte
1109                 //
1110                 // 7 bytes: MOVW, MOVH, MOVB
1111                 // 6 bytes: MOVW, MOVH
1112                 // 5 bytes: MOVW, MOVB
1113                 // 3 bytes: MOVH, MOVB
1114
1115                 // each loop iteration does 32 bytes
1116                 ctr := v.AuxInt / 32
1117
1118                 // remainder bytes
1119                 rem := v.AuxInt % 32
1120
1121                 // only generate a loop if there is more
1122                 // than 1 iteration.
1123                 if ctr > 1 {
1124                         // Set up VS32 (V0) to hold 0s
1125                         p := s.Prog(ppc64.AXXLXOR)
1126                         p.From.Type = obj.TYPE_REG
1127                         p.From.Reg = ppc64.REG_VS32
1128                         p.To.Type = obj.TYPE_REG
1129                         p.To.Reg = ppc64.REG_VS32
1130                         p.Reg = ppc64.REG_VS32
1131
1132                         // Set up CTR loop counter
1133                         p = s.Prog(ppc64.AMOVD)
1134                         p.From.Type = obj.TYPE_CONST
1135                         p.From.Offset = ctr
1136                         p.To.Type = obj.TYPE_REG
1137                         p.To.Reg = ppc64.REGTMP
1138
1139                         p = s.Prog(ppc64.AMOVD)
1140                         p.From.Type = obj.TYPE_REG
1141                         p.From.Reg = ppc64.REGTMP
1142                         p.To.Type = obj.TYPE_REG
1143                         p.To.Reg = ppc64.REG_CTR
1144
1145                         // Set up R31 to hold index value 16
1146                         p = s.Prog(ppc64.AMOVD)
1147                         p.From.Type = obj.TYPE_CONST
1148                         p.From.Offset = 16
1149                         p.To.Type = obj.TYPE_REG
1150                         p.To.Reg = ppc64.REGTMP
1151
1152                         // Don't add padding for alignment
1153                         // with few loop iterations.
1154                         if ctr > 3 {
1155                                 p = s.Prog(obj.APCALIGN)
1156                                 p.From.Type = obj.TYPE_CONST
1157                                 p.From.Offset = 16
1158                         }
1159
1160                         // generate 2 STXVD2Xs to store 16 bytes
1161                         // when this is a loop then the top must be saved
1162                         var top *obj.Prog
1163                         // This is the top of loop
1164
1165                         p = s.Prog(ppc64.ASTXVD2X)
1166                         p.From.Type = obj.TYPE_REG
1167                         p.From.Reg = ppc64.REG_VS32
1168                         p.To.Type = obj.TYPE_MEM
1169                         p.To.Reg = v.Args[0].Reg()
1170                         p.To.Index = ppc64.REGZERO
1171                         // Save the top of loop
1172                         if top == nil {
1173                                 top = p
1174                         }
1175                         p = s.Prog(ppc64.ASTXVD2X)
1176                         p.From.Type = obj.TYPE_REG
1177                         p.From.Reg = ppc64.REG_VS32
1178                         p.To.Type = obj.TYPE_MEM
1179                         p.To.Reg = v.Args[0].Reg()
1180                         p.To.Index = ppc64.REGTMP
1181
1182                         // Increment address for the
1183                         // 4 doublewords just zeroed.
1184                         p = s.Prog(ppc64.AADD)
1185                         p.Reg = v.Args[0].Reg()
1186                         p.From.Type = obj.TYPE_CONST
1187                         p.From.Offset = 32
1188                         p.To.Type = obj.TYPE_REG
1189                         p.To.Reg = v.Args[0].Reg()
1190
1191                         // Branch back to top of loop
1192                         // based on CTR
1193                         // BC with BO_BCTR generates bdnz
1194                         p = s.Prog(ppc64.ABC)
1195                         p.From.Type = obj.TYPE_CONST
1196                         p.From.Offset = ppc64.BO_BCTR
1197                         p.Reg = ppc64.REG_R0
1198                         p.To.Type = obj.TYPE_BRANCH
1199                         gc.Patch(p, top)
1200                 }
1201
1202                 // when ctr == 1 the loop was not generated but
1203                 // there are at least 32 bytes to clear, so add
1204                 // that to the remainder to generate the code
1205                 // to clear those doublewords
1206                 if ctr == 1 {
1207                         rem += 32
1208                 }
1209
1210                 // clear the remainder starting at offset zero
1211                 offset := int64(0)
1212
1213                 // first clear as many doublewords as possible
1214                 // then clear remaining sizes as available
1215                 for rem > 0 {
1216                         op, size := ppc64.AMOVB, int64(1)
1217                         switch {
1218                         case rem >= 8:
1219                                 op, size = ppc64.AMOVD, 8
1220                         case rem >= 4:
1221                                 op, size = ppc64.AMOVW, 4
1222                         case rem >= 2:
1223                                 op, size = ppc64.AMOVH, 2
1224                         }
1225                         p := s.Prog(op)
1226                         p.From.Type = obj.TYPE_REG
1227                         p.From.Reg = ppc64.REG_R0
1228                         p.To.Type = obj.TYPE_MEM
1229                         p.To.Reg = v.Args[0].Reg()
1230                         p.To.Offset = offset
1231                         rem -= size
1232                         offset += size
1233                 }
1234
1235         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1236
1237                 bytesPerLoop := int64(32)
1238                 // This will be used when moving more
1239                 // than 8 bytes.  Moves start with
1240                 // as many 8 byte moves as possible, then
1241                 // 4, 2, or 1 byte(s) as remaining.  This will
1242                 // work and be efficient for power8 or later.
1243                 // If there are 64 or more bytes, then a
1244                 // loop is generated to move 32 bytes and
1245                 // update the src and dst addresses on each
1246                 // iteration. When < 64 bytes, the appropriate
1247                 // number of moves are generated based on the
1248                 // size.
1249                 // When moving >= 64 bytes a loop is used
1250                 //      MOVD len/32,REG_TMP
1251                 //      MOVD REG_TMP,CTR
1252                 //      MOVD $16,REG_TMP
1253                 // top:
1254                 //      LXVD2X (R0)(R21),VS32
1255                 //      LXVD2X (R31)(R21),VS33
1256                 //      ADD $32,R21
1257                 //      STXVD2X VS32,(R0)(R20)
1258                 //      STXVD2X VS33,(R31)(R20)
1259                 //      ADD $32,R20
1260                 //      BC 16,0,top
1261                 // Bytes not moved by this loop are moved
1262                 // with a combination of the following instructions,
1263                 // starting with the largest sizes and generating as
1264                 // many as needed, using the appropriate offset value.
1265                 //      MOVD  n(R21),R31
1266                 //      MOVD  R31,n(R20)
1267                 //      MOVW  n1(R21),R31
1268                 //      MOVW  R31,n1(R20)
1269                 //      MOVH  n2(R21),R31
1270                 //      MOVH  R31,n2(R20)
1271                 //      MOVB  n3(R21),R31
1272                 //      MOVB  R31,n3(R20)
1273
1274                 // Each loop iteration moves 32 bytes
1275                 ctr := v.AuxInt / bytesPerLoop
1276
1277                 // Remainder after the loop
1278                 rem := v.AuxInt % bytesPerLoop
1279
1280                 dstReg := v.Args[0].Reg()
1281                 srcReg := v.Args[1].Reg()
1282
1283                 // The set of registers used here, must match the clobbered reg list
1284                 // in PPC64Ops.go.
1285                 offset := int64(0)
1286
1287                 // top of the loop
1288                 var top *obj.Prog
1289                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1290                 if ctr > 1 {
1291                         // Set up the CTR
1292                         p := s.Prog(ppc64.AMOVD)
1293                         p.From.Type = obj.TYPE_CONST
1294                         p.From.Offset = ctr
1295                         p.To.Type = obj.TYPE_REG
1296                         p.To.Reg = ppc64.REGTMP
1297
1298                         p = s.Prog(ppc64.AMOVD)
1299                         p.From.Type = obj.TYPE_REG
1300                         p.From.Reg = ppc64.REGTMP
1301                         p.To.Type = obj.TYPE_REG
1302                         p.To.Reg = ppc64.REG_CTR
1303
1304                         // Use REGTMP as index reg
1305                         p = s.Prog(ppc64.AMOVD)
1306                         p.From.Type = obj.TYPE_CONST
1307                         p.From.Offset = 16
1308                         p.To.Type = obj.TYPE_REG
1309                         p.To.Reg = ppc64.REGTMP
1310
1311                         // Don't adding padding for
1312                         // alignment with small iteration
1313                         // counts.
1314                         if ctr > 3 {
1315                                 p = s.Prog(obj.APCALIGN)
1316                                 p.From.Type = obj.TYPE_CONST
1317                                 p.From.Offset = 16
1318                         }
1319
1320                         // Generate 16 byte loads and stores.
1321                         // Use temp register for index (16)
1322                         // on the second one.
1323
1324                         p = s.Prog(ppc64.ALXVD2X)
1325                         p.From.Type = obj.TYPE_MEM
1326                         p.From.Reg = srcReg
1327                         p.From.Index = ppc64.REGZERO
1328                         p.To.Type = obj.TYPE_REG
1329                         p.To.Reg = ppc64.REG_VS32
1330                         if top == nil {
1331                                 top = p
1332                         }
1333                         p = s.Prog(ppc64.ALXVD2X)
1334                         p.From.Type = obj.TYPE_MEM
1335                         p.From.Reg = srcReg
1336                         p.From.Index = ppc64.REGTMP
1337                         p.To.Type = obj.TYPE_REG
1338                         p.To.Reg = ppc64.REG_VS33
1339
1340                         // increment the src reg for next iteration
1341                         p = s.Prog(ppc64.AADD)
1342                         p.Reg = srcReg
1343                         p.From.Type = obj.TYPE_CONST
1344                         p.From.Offset = bytesPerLoop
1345                         p.To.Type = obj.TYPE_REG
1346                         p.To.Reg = srcReg
1347
1348                         // generate 16 byte stores
1349                         p = s.Prog(ppc64.ASTXVD2X)
1350                         p.From.Type = obj.TYPE_REG
1351                         p.From.Reg = ppc64.REG_VS32
1352                         p.To.Type = obj.TYPE_MEM
1353                         p.To.Reg = dstReg
1354                         p.To.Index = ppc64.REGZERO
1355
1356                         p = s.Prog(ppc64.ASTXVD2X)
1357                         p.From.Type = obj.TYPE_REG
1358                         p.From.Reg = ppc64.REG_VS33
1359                         p.To.Type = obj.TYPE_MEM
1360                         p.To.Reg = dstReg
1361                         p.To.Index = ppc64.REGTMP
1362
1363                         // increment the dst reg for next iteration
1364                         p = s.Prog(ppc64.AADD)
1365                         p.Reg = dstReg
1366                         p.From.Type = obj.TYPE_CONST
1367                         p.From.Offset = bytesPerLoop
1368                         p.To.Type = obj.TYPE_REG
1369                         p.To.Reg = dstReg
1370
1371                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1372                         // to loop top.
1373                         p = s.Prog(ppc64.ABC)
1374                         p.From.Type = obj.TYPE_CONST
1375                         p.From.Offset = ppc64.BO_BCTR
1376                         p.Reg = ppc64.REG_R0
1377                         p.To.Type = obj.TYPE_BRANCH
1378                         gc.Patch(p, top)
1379
1380                         // srcReg and dstReg were incremented in the loop, so
1381                         // later instructions start with offset 0.
1382                         offset = int64(0)
1383                 }
1384
1385                 // No loop was generated for one iteration, so
1386                 // add 32 bytes to the remainder to move those bytes.
1387                 if ctr == 1 {
1388                         rem += bytesPerLoop
1389                 }
1390
1391                 if rem >= 16 {
1392                         // Generate 16 byte loads and stores.
1393                         // Use temp register for index (value 16)
1394                         // on the second one.
1395                         p := s.Prog(ppc64.ALXVD2X)
1396                         p.From.Type = obj.TYPE_MEM
1397                         p.From.Reg = srcReg
1398                         p.From.Index = ppc64.REGZERO
1399                         p.To.Type = obj.TYPE_REG
1400                         p.To.Reg = ppc64.REG_VS32
1401
1402                         p = s.Prog(ppc64.ASTXVD2X)
1403                         p.From.Type = obj.TYPE_REG
1404                         p.From.Reg = ppc64.REG_VS32
1405                         p.To.Type = obj.TYPE_MEM
1406                         p.To.Reg = dstReg
1407                         p.To.Index = ppc64.REGZERO
1408
1409                         offset = 16
1410                         rem -= 16
1411
1412                         if rem >= 16 {
1413                                 // Use REGTMP as index reg
1414                                 p := s.Prog(ppc64.AMOVD)
1415                                 p.From.Type = obj.TYPE_CONST
1416                                 p.From.Offset = 16
1417                                 p.To.Type = obj.TYPE_REG
1418                                 p.To.Reg = ppc64.REGTMP
1419
1420                                 p = s.Prog(ppc64.ALXVD2X)
1421                                 p.From.Type = obj.TYPE_MEM
1422                                 p.From.Reg = srcReg
1423                                 p.From.Index = ppc64.REGTMP
1424                                 p.To.Type = obj.TYPE_REG
1425                                 p.To.Reg = ppc64.REG_VS32
1426
1427                                 p = s.Prog(ppc64.ASTXVD2X)
1428                                 p.From.Type = obj.TYPE_REG
1429                                 p.From.Reg = ppc64.REG_VS32
1430                                 p.To.Type = obj.TYPE_MEM
1431                                 p.To.Reg = dstReg
1432                                 p.To.Index = ppc64.REGTMP
1433
1434                                 offset = 32
1435                                 rem -= 16
1436                         }
1437                 }
1438
1439                 // Generate all the remaining load and store pairs, starting with
1440                 // as many 8 byte moves as possible, then 4, 2, 1.
1441                 for rem > 0 {
1442                         op, size := ppc64.AMOVB, int64(1)
1443                         switch {
1444                         case rem >= 8:
1445                                 op, size = ppc64.AMOVD, 8
1446                         case rem >= 4:
1447                                 op, size = ppc64.AMOVW, 4
1448                         case rem >= 2:
1449                                 op, size = ppc64.AMOVH, 2
1450                         }
1451                         // Load
1452                         p := s.Prog(op)
1453                         p.To.Type = obj.TYPE_REG
1454                         p.To.Reg = ppc64.REGTMP
1455                         p.From.Type = obj.TYPE_MEM
1456                         p.From.Reg = srcReg
1457                         p.From.Offset = offset
1458
1459                         // Store
1460                         p = s.Prog(op)
1461                         p.From.Type = obj.TYPE_REG
1462                         p.From.Reg = ppc64.REGTMP
1463                         p.To.Type = obj.TYPE_MEM
1464                         p.To.Reg = dstReg
1465                         p.To.Offset = offset
1466                         rem -= size
1467                         offset += size
1468                 }
1469
1470         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1471                 bytesPerLoop := int64(64)
1472                 // This is used when moving more
1473                 // than 8 bytes on power9.  Moves start with
1474                 // as many 8 byte moves as possible, then
1475                 // 4, 2, or 1 byte(s) as remaining.  This will
1476                 // work and be efficient for power8 or later.
1477                 // If there are 64 or more bytes, then a
1478                 // loop is generated to move 32 bytes and
1479                 // update the src and dst addresses on each
1480                 // iteration. When < 64 bytes, the appropriate
1481                 // number of moves are generated based on the
1482                 // size.
1483                 // When moving >= 64 bytes a loop is used
1484                 //      MOVD len/32,REG_TMP
1485                 //      MOVD REG_TMP,CTR
1486                 // top:
1487                 //      LXV 0(R21),VS32
1488                 //      LXV 16(R21),VS33
1489                 //      ADD $32,R21
1490                 //      STXV VS32,0(R20)
1491                 //      STXV VS33,16(R20)
1492                 //      ADD $32,R20
1493                 //      BC 16,0,top
1494                 // Bytes not moved by this loop are moved
1495                 // with a combination of the following instructions,
1496                 // starting with the largest sizes and generating as
1497                 // many as needed, using the appropriate offset value.
1498                 //      MOVD  n(R21),R31
1499                 //      MOVD  R31,n(R20)
1500                 //      MOVW  n1(R21),R31
1501                 //      MOVW  R31,n1(R20)
1502                 //      MOVH  n2(R21),R31
1503                 //      MOVH  R31,n2(R20)
1504                 //      MOVB  n3(R21),R31
1505                 //      MOVB  R31,n3(R20)
1506
1507                 // Each loop iteration moves 32 bytes
1508                 ctr := v.AuxInt / bytesPerLoop
1509
1510                 // Remainder after the loop
1511                 rem := v.AuxInt % bytesPerLoop
1512
1513                 dstReg := v.Args[0].Reg()
1514                 srcReg := v.Args[1].Reg()
1515
1516                 offset := int64(0)
1517
1518                 // top of the loop
1519                 var top *obj.Prog
1520
1521                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1522                 if ctr > 1 {
1523                         // Set up the CTR
1524                         p := s.Prog(ppc64.AMOVD)
1525                         p.From.Type = obj.TYPE_CONST
1526                         p.From.Offset = ctr
1527                         p.To.Type = obj.TYPE_REG
1528                         p.To.Reg = ppc64.REGTMP
1529
1530                         p = s.Prog(ppc64.AMOVD)
1531                         p.From.Type = obj.TYPE_REG
1532                         p.From.Reg = ppc64.REGTMP
1533                         p.To.Type = obj.TYPE_REG
1534                         p.To.Reg = ppc64.REG_CTR
1535
1536                         p = s.Prog(obj.APCALIGN)
1537                         p.From.Type = obj.TYPE_CONST
1538                         p.From.Offset = 16
1539
1540                         // Generate 16 byte loads and stores.
1541                         p = s.Prog(ppc64.ALXV)
1542                         p.From.Type = obj.TYPE_MEM
1543                         p.From.Reg = srcReg
1544                         p.From.Offset = offset
1545                         p.To.Type = obj.TYPE_REG
1546                         p.To.Reg = ppc64.REG_VS32
1547                         if top == nil {
1548                                 top = p
1549                         }
1550                         p = s.Prog(ppc64.ALXV)
1551                         p.From.Type = obj.TYPE_MEM
1552                         p.From.Reg = srcReg
1553                         p.From.Offset = offset + 16
1554                         p.To.Type = obj.TYPE_REG
1555                         p.To.Reg = ppc64.REG_VS33
1556
1557                         // generate 16 byte stores
1558                         p = s.Prog(ppc64.ASTXV)
1559                         p.From.Type = obj.TYPE_REG
1560                         p.From.Reg = ppc64.REG_VS32
1561                         p.To.Type = obj.TYPE_MEM
1562                         p.To.Reg = dstReg
1563                         p.To.Offset = offset
1564
1565                         p = s.Prog(ppc64.ASTXV)
1566                         p.From.Type = obj.TYPE_REG
1567                         p.From.Reg = ppc64.REG_VS33
1568                         p.To.Type = obj.TYPE_MEM
1569                         p.To.Reg = dstReg
1570                         p.To.Offset = offset + 16
1571
1572                         // Generate 16 byte loads and stores.
1573                         p = s.Prog(ppc64.ALXV)
1574                         p.From.Type = obj.TYPE_MEM
1575                         p.From.Reg = srcReg
1576                         p.From.Offset = offset + 32
1577                         p.To.Type = obj.TYPE_REG
1578                         p.To.Reg = ppc64.REG_VS32
1579
1580                         p = s.Prog(ppc64.ALXV)
1581                         p.From.Type = obj.TYPE_MEM
1582                         p.From.Reg = srcReg
1583                         p.From.Offset = offset + 48
1584                         p.To.Type = obj.TYPE_REG
1585                         p.To.Reg = ppc64.REG_VS33
1586
1587                         // generate 16 byte stores
1588                         p = s.Prog(ppc64.ASTXV)
1589                         p.From.Type = obj.TYPE_REG
1590                         p.From.Reg = ppc64.REG_VS32
1591                         p.To.Type = obj.TYPE_MEM
1592                         p.To.Reg = dstReg
1593                         p.To.Offset = offset + 32
1594
1595                         p = s.Prog(ppc64.ASTXV)
1596                         p.From.Type = obj.TYPE_REG
1597                         p.From.Reg = ppc64.REG_VS33
1598                         p.To.Type = obj.TYPE_MEM
1599                         p.To.Reg = dstReg
1600                         p.To.Offset = offset + 48
1601
1602                         // increment the src reg for next iteration
1603                         p = s.Prog(ppc64.AADD)
1604                         p.Reg = srcReg
1605                         p.From.Type = obj.TYPE_CONST
1606                         p.From.Offset = bytesPerLoop
1607                         p.To.Type = obj.TYPE_REG
1608                         p.To.Reg = srcReg
1609
1610                         // increment the dst reg for next iteration
1611                         p = s.Prog(ppc64.AADD)
1612                         p.Reg = dstReg
1613                         p.From.Type = obj.TYPE_CONST
1614                         p.From.Offset = bytesPerLoop
1615                         p.To.Type = obj.TYPE_REG
1616                         p.To.Reg = dstReg
1617
1618                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1619                         // to loop top.
1620                         p = s.Prog(ppc64.ABC)
1621                         p.From.Type = obj.TYPE_CONST
1622                         p.From.Offset = ppc64.BO_BCTR
1623                         p.Reg = ppc64.REG_R0
1624                         p.To.Type = obj.TYPE_BRANCH
1625                         gc.Patch(p, top)
1626
1627                         // srcReg and dstReg were incremented in the loop, so
1628                         // later instructions start with offset 0.
1629                         offset = int64(0)
1630                 }
1631
1632                 // No loop was generated for one iteration, so
1633                 // add 32 bytes to the remainder to move those bytes.
1634                 if ctr == 1 {
1635                         rem += bytesPerLoop
1636                 }
1637                 if rem >= 32 {
1638                         p := s.Prog(ppc64.ALXV)
1639                         p.From.Type = obj.TYPE_MEM
1640                         p.From.Reg = srcReg
1641                         p.To.Type = obj.TYPE_REG
1642                         p.To.Reg = ppc64.REG_VS32
1643
1644                         p = s.Prog(ppc64.ALXV)
1645                         p.From.Type = obj.TYPE_MEM
1646                         p.From.Reg = srcReg
1647                         p.From.Offset = 16
1648                         p.To.Type = obj.TYPE_REG
1649                         p.To.Reg = ppc64.REG_VS33
1650
1651                         p = s.Prog(ppc64.ASTXV)
1652                         p.From.Type = obj.TYPE_REG
1653                         p.From.Reg = ppc64.REG_VS32
1654                         p.To.Type = obj.TYPE_MEM
1655                         p.To.Reg = dstReg
1656
1657                         p = s.Prog(ppc64.ASTXV)
1658                         p.From.Type = obj.TYPE_REG
1659                         p.From.Reg = ppc64.REG_VS33
1660                         p.To.Type = obj.TYPE_MEM
1661                         p.To.Reg = dstReg
1662                         p.To.Offset = 16
1663
1664                         offset = 32
1665                         rem -= 32
1666                 }
1667
1668                 if rem >= 16 {
1669                         // Generate 16 byte loads and stores.
1670                         p := s.Prog(ppc64.ALXV)
1671                         p.From.Type = obj.TYPE_MEM
1672                         p.From.Reg = srcReg
1673                         p.From.Offset = offset
1674                         p.To.Type = obj.TYPE_REG
1675                         p.To.Reg = ppc64.REG_VS32
1676
1677                         p = s.Prog(ppc64.ASTXV)
1678                         p.From.Type = obj.TYPE_REG
1679                         p.From.Reg = ppc64.REG_VS32
1680                         p.To.Type = obj.TYPE_MEM
1681                         p.To.Reg = dstReg
1682                         p.To.Offset = offset
1683
1684                         offset += 16
1685                         rem -= 16
1686
1687                         if rem >= 16 {
1688                                 p := s.Prog(ppc64.ALXV)
1689                                 p.From.Type = obj.TYPE_MEM
1690                                 p.From.Reg = srcReg
1691                                 p.From.Offset = offset
1692                                 p.To.Type = obj.TYPE_REG
1693                                 p.To.Reg = ppc64.REG_VS32
1694
1695                                 p = s.Prog(ppc64.ASTXV)
1696                                 p.From.Type = obj.TYPE_REG
1697                                 p.From.Reg = ppc64.REG_VS32
1698                                 p.To.Type = obj.TYPE_MEM
1699                                 p.To.Reg = dstReg
1700                                 p.To.Offset = offset
1701
1702                                 offset += 16
1703                                 rem -= 16
1704                         }
1705                 }
1706                 // Generate all the remaining load and store pairs, starting with
1707                 // as many 8 byte moves as possible, then 4, 2, 1.
1708                 for rem > 0 {
1709                         op, size := ppc64.AMOVB, int64(1)
1710                         switch {
1711                         case rem >= 8:
1712                                 op, size = ppc64.AMOVD, 8
1713                         case rem >= 4:
1714                                 op, size = ppc64.AMOVW, 4
1715                         case rem >= 2:
1716                                 op, size = ppc64.AMOVH, 2
1717                         }
1718                         // Load
1719                         p := s.Prog(op)
1720                         p.To.Type = obj.TYPE_REG
1721                         p.To.Reg = ppc64.REGTMP
1722                         p.From.Type = obj.TYPE_MEM
1723                         p.From.Reg = srcReg
1724                         p.From.Offset = offset
1725
1726                         // Store
1727                         p = s.Prog(op)
1728                         p.From.Type = obj.TYPE_REG
1729                         p.From.Reg = ppc64.REGTMP
1730                         p.To.Type = obj.TYPE_MEM
1731                         p.To.Reg = dstReg
1732                         p.To.Offset = offset
1733                         rem -= size
1734                         offset += size
1735                 }
1736
1737         case ssa.OpPPC64CALLstatic:
1738                 s.Call(v)
1739
1740         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1741                 p := s.Prog(ppc64.AMOVD)
1742                 p.From.Type = obj.TYPE_REG
1743                 p.From.Reg = v.Args[0].Reg()
1744                 p.To.Type = obj.TYPE_REG
1745                 p.To.Reg = ppc64.REG_LR
1746
1747                 if v.Args[0].Reg() != ppc64.REG_R12 {
1748                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1749                 }
1750
1751                 pp := s.Call(v)
1752                 pp.To.Reg = ppc64.REG_LR
1753
1754                 if gc.Ctxt.Flag_shared {
1755                         // When compiling Go into PIC, the function we just
1756                         // called via pointer might have been implemented in
1757                         // a separate module and so overwritten the TOC
1758                         // pointer in R2; reload it.
1759                         q := s.Prog(ppc64.AMOVD)
1760                         q.From.Type = obj.TYPE_MEM
1761                         q.From.Offset = 24
1762                         q.From.Reg = ppc64.REGSP
1763                         q.To.Type = obj.TYPE_REG
1764                         q.To.Reg = ppc64.REG_R2
1765                 }
1766
1767         case ssa.OpPPC64LoweredWB:
1768                 p := s.Prog(obj.ACALL)
1769                 p.To.Type = obj.TYPE_MEM
1770                 p.To.Name = obj.NAME_EXTERN
1771                 p.To.Sym = v.Aux.(*obj.LSym)
1772
1773         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1774                 p := s.Prog(obj.ACALL)
1775                 p.To.Type = obj.TYPE_MEM
1776                 p.To.Name = obj.NAME_EXTERN
1777                 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1778                 s.UseArgs(16) // space used in callee args area by assembly stubs
1779
1780         case ssa.OpPPC64LoweredNilCheck:
1781                 if objabi.GOOS == "aix" {
1782                         // CMP Rarg0, R0
1783                         // BNE 2(PC)
1784                         // STW R0, 0(R0)
1785                         // NOP (so the BNE has somewhere to land)
1786
1787                         // CMP Rarg0, R0
1788                         p := s.Prog(ppc64.ACMP)
1789                         p.From.Type = obj.TYPE_REG
1790                         p.From.Reg = v.Args[0].Reg()
1791                         p.To.Type = obj.TYPE_REG
1792                         p.To.Reg = ppc64.REG_R0
1793
1794                         // BNE 2(PC)
1795                         p2 := s.Prog(ppc64.ABNE)
1796                         p2.To.Type = obj.TYPE_BRANCH
1797
1798                         // STW R0, 0(R0)
1799                         // Write at 0 is forbidden and will trigger a SIGSEGV
1800                         p = s.Prog(ppc64.AMOVW)
1801                         p.From.Type = obj.TYPE_REG
1802                         p.From.Reg = ppc64.REG_R0
1803                         p.To.Type = obj.TYPE_MEM
1804                         p.To.Reg = ppc64.REG_R0
1805
1806                         // NOP (so the BNE has somewhere to land)
1807                         nop := s.Prog(obj.ANOP)
1808                         gc.Patch(p2, nop)
1809
1810                 } else {
1811                         // Issue a load which will fault if arg is nil.
1812                         p := s.Prog(ppc64.AMOVBZ)
1813                         p.From.Type = obj.TYPE_MEM
1814                         p.From.Reg = v.Args[0].Reg()
1815                         gc.AddAux(&p.From, v)
1816                         p.To.Type = obj.TYPE_REG
1817                         p.To.Reg = ppc64.REGTMP
1818                 }
1819                 if logopt.Enabled() {
1820                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1821                 }
1822                 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1823                         gc.Warnl(v.Pos, "generated nil check")
1824                 }
1825
1826         // These should be resolved by rules and not make it here.
1827         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1828                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1829                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1830                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1831         case ssa.OpPPC64InvertFlags:
1832                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1833         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1834                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1835         case ssa.OpClobber:
1836                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1837         default:
1838                 v.Fatalf("genValue not implemented: %s", v.LongString())
1839         }
1840 }
1841
1842 var blockJump = [...]struct {
1843         asm, invasm     obj.As
1844         asmeq, invasmun bool
1845 }{
1846         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1847         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1848
1849         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1850         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1851         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1852         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1853
1854         // TODO: need to work FP comparisons into block jumps
1855         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1856         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1857         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1858         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1859 }
1860
1861 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1862         switch b.Kind {
1863         case ssa.BlockDefer:
1864                 // defer returns in R3:
1865                 // 0 if we should continue executing
1866                 // 1 if we should jump to deferreturn call
1867                 p := s.Prog(ppc64.ACMP)
1868                 p.From.Type = obj.TYPE_REG
1869                 p.From.Reg = ppc64.REG_R3
1870                 p.To.Type = obj.TYPE_REG
1871                 p.To.Reg = ppc64.REG_R0
1872
1873                 p = s.Prog(ppc64.ABNE)
1874                 p.To.Type = obj.TYPE_BRANCH
1875                 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1876                 if b.Succs[0].Block() != next {
1877                         p := s.Prog(obj.AJMP)
1878                         p.To.Type = obj.TYPE_BRANCH
1879                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1880                 }
1881
1882         case ssa.BlockPlain:
1883                 if b.Succs[0].Block() != next {
1884                         p := s.Prog(obj.AJMP)
1885                         p.To.Type = obj.TYPE_BRANCH
1886                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1887                 }
1888         case ssa.BlockExit:
1889         case ssa.BlockRet:
1890                 s.Prog(obj.ARET)
1891         case ssa.BlockRetJmp:
1892                 p := s.Prog(obj.AJMP)
1893                 p.To.Type = obj.TYPE_MEM
1894                 p.To.Name = obj.NAME_EXTERN
1895                 p.To.Sym = b.Aux.(*obj.LSym)
1896
1897         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1898                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1899                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1900                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1901                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1902                 jmp := blockJump[b.Kind]
1903                 switch next {
1904                 case b.Succs[0].Block():
1905                         s.Br(jmp.invasm, b.Succs[1].Block())
1906                         if jmp.invasmun {
1907                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1908                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
1909                         }
1910                 case b.Succs[1].Block():
1911                         s.Br(jmp.asm, b.Succs[0].Block())
1912                         if jmp.asmeq {
1913                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1914                         }
1915                 default:
1916                         if b.Likely != ssa.BranchUnlikely {
1917                                 s.Br(jmp.asm, b.Succs[0].Block())
1918                                 if jmp.asmeq {
1919                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
1920                                 }
1921                                 s.Br(obj.AJMP, b.Succs[1].Block())
1922                         } else {
1923                                 s.Br(jmp.invasm, b.Succs[1].Block())
1924                                 if jmp.invasmun {
1925                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1926                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
1927                                 }
1928                                 s.Br(obj.AJMP, b.Succs[0].Block())
1929                         }
1930                 }
1931         default:
1932                 b.Fatalf("branch not implemented: %s", b.LongString())
1933         }
1934 }