]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
[dev.link] all: merge branch 'master' into dev.link
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/gc"
9         "cmd/compile/internal/logopt"
10         "cmd/compile/internal/ssa"
11         "cmd/compile/internal/types"
12         "cmd/internal/obj"
13         "cmd/internal/obj/ppc64"
14         "cmd/internal/objabi"
15         "math"
16         "strings"
17 )
18
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21         //      flive := b.FlagsLiveAtEnd
22         //      if b.Control != nil && b.Control.Type.IsFlags() {
23         //              flive = true
24         //      }
25         //      for i := len(b.Values) - 1; i >= 0; i-- {
26         //              v := b.Values[i]
27         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28         //                      // The "mark" is any non-nil Aux value.
29         //                      v.Aux = v
30         //              }
31         //              if v.Type.IsFlags() {
32         //                      flive = false
33         //              }
34         //              for _, a := range v.Args {
35         //                      if a.Type.IsFlags() {
36         //                              flive = true
37         //                      }
38         //              }
39         //      }
40 }
41
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
44         if t.IsFloat() {
45                 switch t.Size() {
46                 case 4:
47                         return ppc64.AFMOVS
48                 case 8:
49                         return ppc64.AFMOVD
50                 }
51         } else {
52                 switch t.Size() {
53                 case 1:
54                         if t.IsSigned() {
55                                 return ppc64.AMOVB
56                         } else {
57                                 return ppc64.AMOVBZ
58                         }
59                 case 2:
60                         if t.IsSigned() {
61                                 return ppc64.AMOVH
62                         } else {
63                                 return ppc64.AMOVHZ
64                         }
65                 case 4:
66                         if t.IsSigned() {
67                                 return ppc64.AMOVW
68                         } else {
69                                 return ppc64.AMOVWZ
70                         }
71                 case 8:
72                         return ppc64.AMOVD
73                 }
74         }
75         panic("bad load type")
76 }
77
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
80         if t.IsFloat() {
81                 switch t.Size() {
82                 case 4:
83                         return ppc64.AFMOVS
84                 case 8:
85                         return ppc64.AFMOVD
86                 }
87         } else {
88                 switch t.Size() {
89                 case 1:
90                         return ppc64.AMOVB
91                 case 2:
92                         return ppc64.AMOVH
93                 case 4:
94                         return ppc64.AMOVW
95                 case 8:
96                         return ppc64.AMOVD
97                 }
98         }
99         panic("bad store type")
100 }
101
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
103         switch v.Op {
104         case ssa.OpCopy:
105                 t := v.Type
106                 if t.IsMemory() {
107                         return
108                 }
109                 x := v.Args[0].Reg()
110                 y := v.Reg()
111                 if x != y {
112                         rt := obj.TYPE_REG
113                         op := ppc64.AMOVD
114
115                         if t.IsFloat() {
116                                 op = ppc64.AFMOVD
117                         }
118                         p := s.Prog(op)
119                         p.From.Type = rt
120                         p.From.Reg = x
121                         p.To.Type = rt
122                         p.To.Reg = y
123                 }
124
125         case ssa.OpPPC64LoweredMuluhilo:
126                 // MULHDU       Rarg1, Rarg0, Reg0
127                 // MULLD        Rarg1, Rarg0, Reg1
128                 r0 := v.Args[0].Reg()
129                 r1 := v.Args[1].Reg()
130                 p := s.Prog(ppc64.AMULHDU)
131                 p.From.Type = obj.TYPE_REG
132                 p.From.Reg = r1
133                 p.Reg = r0
134                 p.To.Type = obj.TYPE_REG
135                 p.To.Reg = v.Reg0()
136                 p1 := s.Prog(ppc64.AMULLD)
137                 p1.From.Type = obj.TYPE_REG
138                 p1.From.Reg = r1
139                 p1.Reg = r0
140                 p1.To.Type = obj.TYPE_REG
141                 p1.To.Reg = v.Reg1()
142
143         case ssa.OpPPC64LoweredAdd64Carry:
144                 // ADDC         Rarg2, -1, Rtmp
145                 // ADDE         Rarg1, Rarg0, Reg0
146                 // ADDZE        Rzero, Reg1
147                 r0 := v.Args[0].Reg()
148                 r1 := v.Args[1].Reg()
149                 r2 := v.Args[2].Reg()
150                 p := s.Prog(ppc64.AADDC)
151                 p.From.Type = obj.TYPE_CONST
152                 p.From.Offset = -1
153                 p.Reg = r2
154                 p.To.Type = obj.TYPE_REG
155                 p.To.Reg = ppc64.REGTMP
156                 p1 := s.Prog(ppc64.AADDE)
157                 p1.From.Type = obj.TYPE_REG
158                 p1.From.Reg = r1
159                 p1.Reg = r0
160                 p1.To.Type = obj.TYPE_REG
161                 p1.To.Reg = v.Reg0()
162                 p2 := s.Prog(ppc64.AADDZE)
163                 p2.From.Type = obj.TYPE_REG
164                 p2.From.Reg = ppc64.REGZERO
165                 p2.To.Type = obj.TYPE_REG
166                 p2.To.Reg = v.Reg1()
167
168         case ssa.OpPPC64LoweredAtomicAnd8,
169                 ssa.OpPPC64LoweredAtomicOr8:
170                 // LWSYNC
171                 // LBAR         (Rarg0), Rtmp
172                 // AND/OR       Rarg1, Rtmp
173                 // STBCCC       Rtmp, (Rarg0)
174                 // BNE          -3(PC)
175                 r0 := v.Args[0].Reg()
176                 r1 := v.Args[1].Reg()
177                 // LWSYNC - Assuming shared data not write-through-required nor
178                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179                 plwsync := s.Prog(ppc64.ALWSYNC)
180                 plwsync.To.Type = obj.TYPE_NONE
181                 p := s.Prog(ppc64.ALBAR)
182                 p.From.Type = obj.TYPE_MEM
183                 p.From.Reg = r0
184                 p.To.Type = obj.TYPE_REG
185                 p.To.Reg = ppc64.REGTMP
186                 p1 := s.Prog(v.Op.Asm())
187                 p1.From.Type = obj.TYPE_REG
188                 p1.From.Reg = r1
189                 p1.To.Type = obj.TYPE_REG
190                 p1.To.Reg = ppc64.REGTMP
191                 p2 := s.Prog(ppc64.ASTBCCC)
192                 p2.From.Type = obj.TYPE_REG
193                 p2.From.Reg = ppc64.REGTMP
194                 p2.To.Type = obj.TYPE_MEM
195                 p2.To.Reg = r0
196                 p2.RegTo2 = ppc64.REGTMP
197                 p3 := s.Prog(ppc64.ABNE)
198                 p3.To.Type = obj.TYPE_BRANCH
199                 gc.Patch(p3, p)
200
201         case ssa.OpPPC64LoweredAtomicAdd32,
202                 ssa.OpPPC64LoweredAtomicAdd64:
203                 // LWSYNC
204                 // LDAR/LWAR    (Rarg0), Rout
205                 // ADD          Rarg1, Rout
206                 // STDCCC/STWCCC Rout, (Rarg0)
207                 // BNE         -3(PC)
208                 // MOVW         Rout,Rout (if Add32)
209                 ld := ppc64.ALDAR
210                 st := ppc64.ASTDCCC
211                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
212                         ld = ppc64.ALWAR
213                         st = ppc64.ASTWCCC
214                 }
215                 r0 := v.Args[0].Reg()
216                 r1 := v.Args[1].Reg()
217                 out := v.Reg0()
218                 // LWSYNC - Assuming shared data not write-through-required nor
219                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220                 plwsync := s.Prog(ppc64.ALWSYNC)
221                 plwsync.To.Type = obj.TYPE_NONE
222                 // LDAR or LWAR
223                 p := s.Prog(ld)
224                 p.From.Type = obj.TYPE_MEM
225                 p.From.Reg = r0
226                 p.To.Type = obj.TYPE_REG
227                 p.To.Reg = out
228                 // ADD reg1,out
229                 p1 := s.Prog(ppc64.AADD)
230                 p1.From.Type = obj.TYPE_REG
231                 p1.From.Reg = r1
232                 p1.To.Reg = out
233                 p1.To.Type = obj.TYPE_REG
234                 // STDCCC or STWCCC
235                 p3 := s.Prog(st)
236                 p3.From.Type = obj.TYPE_REG
237                 p3.From.Reg = out
238                 p3.To.Type = obj.TYPE_MEM
239                 p3.To.Reg = r0
240                 // BNE retry
241                 p4 := s.Prog(ppc64.ABNE)
242                 p4.To.Type = obj.TYPE_BRANCH
243                 gc.Patch(p4, p)
244
245                 // Ensure a 32 bit result
246                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247                         p5 := s.Prog(ppc64.AMOVWZ)
248                         p5.To.Type = obj.TYPE_REG
249                         p5.To.Reg = out
250                         p5.From.Type = obj.TYPE_REG
251                         p5.From.Reg = out
252                 }
253
254         case ssa.OpPPC64LoweredAtomicExchange32,
255                 ssa.OpPPC64LoweredAtomicExchange64:
256                 // LWSYNC
257                 // LDAR/LWAR    (Rarg0), Rout
258                 // STDCCC/STWCCC Rout, (Rarg0)
259                 // BNE         -2(PC)
260                 // ISYNC
261                 ld := ppc64.ALDAR
262                 st := ppc64.ASTDCCC
263                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
264                         ld = ppc64.ALWAR
265                         st = ppc64.ASTWCCC
266                 }
267                 r0 := v.Args[0].Reg()
268                 r1 := v.Args[1].Reg()
269                 out := v.Reg0()
270                 // LWSYNC - Assuming shared data not write-through-required nor
271                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272                 plwsync := s.Prog(ppc64.ALWSYNC)
273                 plwsync.To.Type = obj.TYPE_NONE
274                 // LDAR or LWAR
275                 p := s.Prog(ld)
276                 p.From.Type = obj.TYPE_MEM
277                 p.From.Reg = r0
278                 p.To.Type = obj.TYPE_REG
279                 p.To.Reg = out
280                 // STDCCC or STWCCC
281                 p1 := s.Prog(st)
282                 p1.From.Type = obj.TYPE_REG
283                 p1.From.Reg = r1
284                 p1.To.Type = obj.TYPE_MEM
285                 p1.To.Reg = r0
286                 // BNE retry
287                 p2 := s.Prog(ppc64.ABNE)
288                 p2.To.Type = obj.TYPE_BRANCH
289                 gc.Patch(p2, p)
290                 // ISYNC
291                 pisync := s.Prog(ppc64.AISYNC)
292                 pisync.To.Type = obj.TYPE_NONE
293
294         case ssa.OpPPC64LoweredAtomicLoad8,
295                 ssa.OpPPC64LoweredAtomicLoad32,
296                 ssa.OpPPC64LoweredAtomicLoad64,
297                 ssa.OpPPC64LoweredAtomicLoadPtr:
298                 // SYNC
299                 // MOVB/MOVD/MOVW (Rarg0), Rout
300                 // CMP Rout,Rout
301                 // BNE 1(PC)
302                 // ISYNC
303                 ld := ppc64.AMOVD
304                 cmp := ppc64.ACMP
305                 switch v.Op {
306                 case ssa.OpPPC64LoweredAtomicLoad8:
307                         ld = ppc64.AMOVBZ
308                 case ssa.OpPPC64LoweredAtomicLoad32:
309                         ld = ppc64.AMOVWZ
310                         cmp = ppc64.ACMPW
311                 }
312                 arg0 := v.Args[0].Reg()
313                 out := v.Reg0()
314                 // SYNC when AuxInt == 1; otherwise, load-acquire
315                 if v.AuxInt == 1 {
316                         psync := s.Prog(ppc64.ASYNC)
317                         psync.To.Type = obj.TYPE_NONE
318                 }
319                 // Load
320                 p := s.Prog(ld)
321                 p.From.Type = obj.TYPE_MEM
322                 p.From.Reg = arg0
323                 p.To.Type = obj.TYPE_REG
324                 p.To.Reg = out
325                 // CMP
326                 p1 := s.Prog(cmp)
327                 p1.From.Type = obj.TYPE_REG
328                 p1.From.Reg = out
329                 p1.To.Type = obj.TYPE_REG
330                 p1.To.Reg = out
331                 // BNE
332                 p2 := s.Prog(ppc64.ABNE)
333                 p2.To.Type = obj.TYPE_BRANCH
334                 // ISYNC
335                 pisync := s.Prog(ppc64.AISYNC)
336                 pisync.To.Type = obj.TYPE_NONE
337                 gc.Patch(p2, pisync)
338
339         case ssa.OpPPC64LoweredAtomicStore8,
340                 ssa.OpPPC64LoweredAtomicStore32,
341                 ssa.OpPPC64LoweredAtomicStore64:
342                 // SYNC or LWSYNC
343                 // MOVB/MOVW/MOVD arg1,(arg0)
344                 st := ppc64.AMOVD
345                 switch v.Op {
346                 case ssa.OpPPC64LoweredAtomicStore8:
347                         st = ppc64.AMOVB
348                 case ssa.OpPPC64LoweredAtomicStore32:
349                         st = ppc64.AMOVW
350                 }
351                 arg0 := v.Args[0].Reg()
352                 arg1 := v.Args[1].Reg()
353                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
354                 // SYNC
355                 syncOp := ppc64.ASYNC
356                 if v.AuxInt == 0 {
357                         syncOp = ppc64.ALWSYNC
358                 }
359                 psync := s.Prog(syncOp)
360                 psync.To.Type = obj.TYPE_NONE
361                 // Store
362                 p := s.Prog(st)
363                 p.To.Type = obj.TYPE_MEM
364                 p.To.Reg = arg0
365                 p.From.Type = obj.TYPE_REG
366                 p.From.Reg = arg1
367
368         case ssa.OpPPC64LoweredAtomicCas64,
369                 ssa.OpPPC64LoweredAtomicCas32:
370                 // LWSYNC
371                 // loop:
372                 // LDAR        (Rarg0), MutexHint, Rtmp
373                 // CMP         Rarg1, Rtmp
374                 // BNE         fail
375                 // STDCCC      Rarg2, (Rarg0)
376                 // BNE         loop
377                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
378                 // MOVD        $1, Rout
379                 // BR          end
380                 // fail:
381                 // MOVD        $0, Rout
382                 // end:
383                 ld := ppc64.ALDAR
384                 st := ppc64.ASTDCCC
385                 cmp := ppc64.ACMP
386                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
387                         ld = ppc64.ALWAR
388                         st = ppc64.ASTWCCC
389                         cmp = ppc64.ACMPW
390                 }
391                 r0 := v.Args[0].Reg()
392                 r1 := v.Args[1].Reg()
393                 r2 := v.Args[2].Reg()
394                 out := v.Reg0()
395                 // LWSYNC - Assuming shared data not write-through-required nor
396                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397                 plwsync1 := s.Prog(ppc64.ALWSYNC)
398                 plwsync1.To.Type = obj.TYPE_NONE
399                 // LDAR or LWAR
400                 p := s.Prog(ld)
401                 p.From.Type = obj.TYPE_MEM
402                 p.From.Reg = r0
403                 p.To.Type = obj.TYPE_REG
404                 p.To.Reg = ppc64.REGTMP
405                 // If it is a Compare-and-Swap-Release operation, set the EH field with
406                 // the release hint.
407                 if v.AuxInt == 0 {
408                         p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
409                 }
410                 // CMP reg1,reg2
411                 p1 := s.Prog(cmp)
412                 p1.From.Type = obj.TYPE_REG
413                 p1.From.Reg = r1
414                 p1.To.Reg = ppc64.REGTMP
415                 p1.To.Type = obj.TYPE_REG
416                 // BNE cas_fail
417                 p2 := s.Prog(ppc64.ABNE)
418                 p2.To.Type = obj.TYPE_BRANCH
419                 // STDCCC or STWCCC
420                 p3 := s.Prog(st)
421                 p3.From.Type = obj.TYPE_REG
422                 p3.From.Reg = r2
423                 p3.To.Type = obj.TYPE_MEM
424                 p3.To.Reg = r0
425                 // BNE retry
426                 p4 := s.Prog(ppc64.ABNE)
427                 p4.To.Type = obj.TYPE_BRANCH
428                 gc.Patch(p4, p)
429                 // LWSYNC - Assuming shared data not write-through-required nor
430                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431                 // If the operation is a CAS-Release, then synchronization is not necessary.
432                 if v.AuxInt != 0 {
433                         plwsync2 := s.Prog(ppc64.ALWSYNC)
434                         plwsync2.To.Type = obj.TYPE_NONE
435                 }
436                 // return true
437                 p5 := s.Prog(ppc64.AMOVD)
438                 p5.From.Type = obj.TYPE_CONST
439                 p5.From.Offset = 1
440                 p5.To.Type = obj.TYPE_REG
441                 p5.To.Reg = out
442                 // BR done
443                 p6 := s.Prog(obj.AJMP)
444                 p6.To.Type = obj.TYPE_BRANCH
445                 // return false
446                 p7 := s.Prog(ppc64.AMOVD)
447                 p7.From.Type = obj.TYPE_CONST
448                 p7.From.Offset = 0
449                 p7.To.Type = obj.TYPE_REG
450                 p7.To.Reg = out
451                 gc.Patch(p2, p7)
452                 // done (label)
453                 p8 := s.Prog(obj.ANOP)
454                 gc.Patch(p6, p8)
455
456         case ssa.OpPPC64LoweredGetClosurePtr:
457                 // Closure pointer is R11 (already)
458                 gc.CheckLoweredGetClosurePtr(v)
459
460         case ssa.OpPPC64LoweredGetCallerSP:
461                 // caller's SP is FixedFrameSize below the address of the first arg
462                 p := s.Prog(ppc64.AMOVD)
463                 p.From.Type = obj.TYPE_ADDR
464                 p.From.Offset = -gc.Ctxt.FixedFrameSize()
465                 p.From.Name = obj.NAME_PARAM
466                 p.To.Type = obj.TYPE_REG
467                 p.To.Reg = v.Reg()
468
469         case ssa.OpPPC64LoweredGetCallerPC:
470                 p := s.Prog(obj.AGETCALLERPC)
471                 p.To.Type = obj.TYPE_REG
472                 p.To.Reg = v.Reg()
473
474         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475                 // input is already rounded
476
477         case ssa.OpLoadReg:
478                 loadOp := loadByType(v.Type)
479                 p := s.Prog(loadOp)
480                 gc.AddrAuto(&p.From, v.Args[0])
481                 p.To.Type = obj.TYPE_REG
482                 p.To.Reg = v.Reg()
483
484         case ssa.OpStoreReg:
485                 storeOp := storeByType(v.Type)
486                 p := s.Prog(storeOp)
487                 p.From.Type = obj.TYPE_REG
488                 p.From.Reg = v.Args[0].Reg()
489                 gc.AddrAuto(&p.To, v)
490
491         case ssa.OpPPC64DIVD:
492                 // For now,
493                 //
494                 // cmp arg1, -1
495                 // be  ahead
496                 // v = arg0 / arg1
497                 // b over
498                 // ahead: v = - arg0
499                 // over: nop
500                 r := v.Reg()
501                 r0 := v.Args[0].Reg()
502                 r1 := v.Args[1].Reg()
503
504                 p := s.Prog(ppc64.ACMP)
505                 p.From.Type = obj.TYPE_REG
506                 p.From.Reg = r1
507                 p.To.Type = obj.TYPE_CONST
508                 p.To.Offset = -1
509
510                 pbahead := s.Prog(ppc64.ABEQ)
511                 pbahead.To.Type = obj.TYPE_BRANCH
512
513                 p = s.Prog(v.Op.Asm())
514                 p.From.Type = obj.TYPE_REG
515                 p.From.Reg = r1
516                 p.Reg = r0
517                 p.To.Type = obj.TYPE_REG
518                 p.To.Reg = r
519
520                 pbover := s.Prog(obj.AJMP)
521                 pbover.To.Type = obj.TYPE_BRANCH
522
523                 p = s.Prog(ppc64.ANEG)
524                 p.To.Type = obj.TYPE_REG
525                 p.To.Reg = r
526                 p.From.Type = obj.TYPE_REG
527                 p.From.Reg = r0
528                 gc.Patch(pbahead, p)
529
530                 p = s.Prog(obj.ANOP)
531                 gc.Patch(pbover, p)
532
533         case ssa.OpPPC64DIVW:
534                 // word-width version of above
535                 r := v.Reg()
536                 r0 := v.Args[0].Reg()
537                 r1 := v.Args[1].Reg()
538
539                 p := s.Prog(ppc64.ACMPW)
540                 p.From.Type = obj.TYPE_REG
541                 p.From.Reg = r1
542                 p.To.Type = obj.TYPE_CONST
543                 p.To.Offset = -1
544
545                 pbahead := s.Prog(ppc64.ABEQ)
546                 pbahead.To.Type = obj.TYPE_BRANCH
547
548                 p = s.Prog(v.Op.Asm())
549                 p.From.Type = obj.TYPE_REG
550                 p.From.Reg = r1
551                 p.Reg = r0
552                 p.To.Type = obj.TYPE_REG
553                 p.To.Reg = r
554
555                 pbover := s.Prog(obj.AJMP)
556                 pbover.To.Type = obj.TYPE_BRANCH
557
558                 p = s.Prog(ppc64.ANEG)
559                 p.To.Type = obj.TYPE_REG
560                 p.To.Reg = r
561                 p.From.Type = obj.TYPE_REG
562                 p.From.Reg = r0
563                 gc.Patch(pbahead, p)
564
565                 p = s.Prog(obj.ANOP)
566                 gc.Patch(pbover, p)
567
568         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
569                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
570                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
571                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
572                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
573                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
574                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
575                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
576                 r := v.Reg()
577                 r1 := v.Args[0].Reg()
578                 r2 := v.Args[1].Reg()
579                 p := s.Prog(v.Op.Asm())
580                 p.From.Type = obj.TYPE_REG
581                 p.From.Reg = r2
582                 p.Reg = r1
583                 p.To.Type = obj.TYPE_REG
584                 p.To.Reg = r
585
586         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
587                 r1 := v.Args[0].Reg()
588                 r2 := v.Args[1].Reg()
589                 p := s.Prog(v.Op.Asm())
590                 p.From.Type = obj.TYPE_REG
591                 p.From.Reg = r2
592                 p.Reg = r1
593                 p.To.Type = obj.TYPE_REG
594                 p.To.Reg = ppc64.REGTMP // result is not needed
595
596         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
597                 p := s.Prog(v.Op.Asm())
598                 p.From.Type = obj.TYPE_CONST
599                 p.From.Offset = v.AuxInt
600                 p.Reg = v.Args[0].Reg()
601                 p.To.Type = obj.TYPE_REG
602                 p.To.Reg = v.Reg()
603
604         case ssa.OpPPC64MADDLD:
605                 r := v.Reg()
606                 r1 := v.Args[0].Reg()
607                 r2 := v.Args[1].Reg()
608                 r3 := v.Args[2].Reg()
609                 // r = r1*r2 Â± r3
610                 p := s.Prog(v.Op.Asm())
611                 p.From.Type = obj.TYPE_REG
612                 p.From.Reg = r1
613                 p.Reg = r2
614                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
615                 p.To.Type = obj.TYPE_REG
616                 p.To.Reg = r
617
618         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
619                 r := v.Reg()
620                 r1 := v.Args[0].Reg()
621                 r2 := v.Args[1].Reg()
622                 r3 := v.Args[2].Reg()
623                 // r = r1*r2 Â± r3
624                 p := s.Prog(v.Op.Asm())
625                 p.From.Type = obj.TYPE_REG
626                 p.From.Reg = r1
627                 p.Reg = r3
628                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
629                 p.To.Type = obj.TYPE_REG
630                 p.To.Reg = r
631
632         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
633                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
634                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
635                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
636                 r := v.Reg()
637                 p := s.Prog(v.Op.Asm())
638                 p.To.Type = obj.TYPE_REG
639                 p.To.Reg = r
640                 p.From.Type = obj.TYPE_REG
641                 p.From.Reg = v.Args[0].Reg()
642
643         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
644                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
645                 p := s.Prog(v.Op.Asm())
646                 p.Reg = v.Args[0].Reg()
647                 p.From.Type = obj.TYPE_CONST
648                 p.From.Offset = v.AuxInt
649                 p.To.Type = obj.TYPE_REG
650                 p.To.Reg = v.Reg()
651
652         case ssa.OpPPC64SUBFCconst:
653                 p := s.Prog(v.Op.Asm())
654                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
655                 p.From.Type = obj.TYPE_REG
656                 p.From.Reg = v.Args[0].Reg()
657                 p.To.Type = obj.TYPE_REG
658                 p.To.Reg = v.Reg()
659
660         case ssa.OpPPC64ANDCCconst:
661                 p := s.Prog(v.Op.Asm())
662                 p.Reg = v.Args[0].Reg()
663                 p.From.Type = obj.TYPE_CONST
664                 p.From.Offset = v.AuxInt
665                 p.To.Type = obj.TYPE_REG
666                 p.To.Reg = ppc64.REGTMP // discard result
667
668         case ssa.OpPPC64MOVDaddr:
669                 switch v.Aux.(type) {
670                 default:
671                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
672                 case nil:
673                         // If aux offset and aux int are both 0, and the same
674                         // input and output regs are used, no instruction
675                         // needs to be generated, since it would just be
676                         // addi rx, rx, 0.
677                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
678                                 p := s.Prog(ppc64.AMOVD)
679                                 p.From.Type = obj.TYPE_ADDR
680                                 p.From.Reg = v.Args[0].Reg()
681                                 p.From.Offset = v.AuxInt
682                                 p.To.Type = obj.TYPE_REG
683                                 p.To.Reg = v.Reg()
684                         }
685
686                 case *obj.LSym, *gc.Node:
687                         p := s.Prog(ppc64.AMOVD)
688                         p.From.Type = obj.TYPE_ADDR
689                         p.From.Reg = v.Args[0].Reg()
690                         p.To.Type = obj.TYPE_REG
691                         p.To.Reg = v.Reg()
692                         gc.AddAux(&p.From, v)
693
694                 }
695
696         case ssa.OpPPC64MOVDconst:
697                 p := s.Prog(v.Op.Asm())
698                 p.From.Type = obj.TYPE_CONST
699                 p.From.Offset = v.AuxInt
700                 p.To.Type = obj.TYPE_REG
701                 p.To.Reg = v.Reg()
702
703         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
704                 p := s.Prog(v.Op.Asm())
705                 p.From.Type = obj.TYPE_FCONST
706                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
707                 p.To.Type = obj.TYPE_REG
708                 p.To.Reg = v.Reg()
709
710         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
711                 p := s.Prog(v.Op.Asm())
712                 p.From.Type = obj.TYPE_REG
713                 p.From.Reg = v.Args[0].Reg()
714                 p.To.Type = obj.TYPE_REG
715                 p.To.Reg = v.Args[1].Reg()
716
717         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
718                 p := s.Prog(v.Op.Asm())
719                 p.From.Type = obj.TYPE_REG
720                 p.From.Reg = v.Args[0].Reg()
721                 p.To.Type = obj.TYPE_CONST
722                 p.To.Offset = v.AuxInt
723
724         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
725                 // Shift in register to required size
726                 p := s.Prog(v.Op.Asm())
727                 p.From.Type = obj.TYPE_REG
728                 p.From.Reg = v.Args[0].Reg()
729                 p.To.Reg = v.Reg()
730                 p.To.Type = obj.TYPE_REG
731
732         case ssa.OpPPC64MOVDload:
733
734                 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
735                 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
736                 // the offset is not known until link time. If the load of a go.string uses relocation for the
737                 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
738                 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
739                 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
740                 // go.string types because other types will have proper alignment.
741
742                 gostring := false
743                 switch n := v.Aux.(type) {
744                 case *obj.LSym:
745                         gostring = strings.HasPrefix(n.Name, "go.string.")
746                 }
747                 if gostring {
748                         // Generate full addr of the go.string const
749                         // including AuxInt
750                         p := s.Prog(ppc64.AMOVD)
751                         p.From.Type = obj.TYPE_ADDR
752                         p.From.Reg = v.Args[0].Reg()
753                         gc.AddAux(&p.From, v)
754                         p.To.Type = obj.TYPE_REG
755                         p.To.Reg = v.Reg()
756                         // Load go.string using 0 offset
757                         p = s.Prog(v.Op.Asm())
758                         p.From.Type = obj.TYPE_MEM
759                         p.From.Reg = v.Reg()
760                         p.To.Type = obj.TYPE_REG
761                         p.To.Reg = v.Reg()
762                         break
763                 }
764                 // Not a go.string, generate a normal load
765                 fallthrough
766
767         case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
768                 p := s.Prog(v.Op.Asm())
769                 p.From.Type = obj.TYPE_MEM
770                 p.From.Reg = v.Args[0].Reg()
771                 gc.AddAux(&p.From, v)
772                 p.To.Type = obj.TYPE_REG
773                 p.To.Reg = v.Reg()
774
775         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
776                 p := s.Prog(v.Op.Asm())
777                 p.From.Type = obj.TYPE_MEM
778                 p.From.Reg = v.Args[0].Reg()
779                 p.To.Type = obj.TYPE_REG
780                 p.To.Reg = v.Reg()
781
782         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
783                 p := s.Prog(v.Op.Asm())
784                 p.To.Type = obj.TYPE_MEM
785                 p.To.Reg = v.Args[0].Reg()
786                 p.From.Type = obj.TYPE_REG
787                 p.From.Reg = v.Args[1].Reg()
788
789         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
790                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
791                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
792                 p := s.Prog(v.Op.Asm())
793                 p.From.Type = obj.TYPE_MEM
794                 p.From.Reg = v.Args[0].Reg()
795                 p.From.Index = v.Args[1].Reg()
796                 p.To.Type = obj.TYPE_REG
797                 p.To.Reg = v.Reg()
798
799         case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
800                 p := s.Prog(v.Op.Asm())
801                 p.From.Type = obj.TYPE_REG
802                 p.From.Reg = ppc64.REGZERO
803                 p.To.Type = obj.TYPE_MEM
804                 p.To.Reg = v.Args[0].Reg()
805                 gc.AddAux(&p.To, v)
806
807         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
808                 p := s.Prog(v.Op.Asm())
809                 p.From.Type = obj.TYPE_REG
810                 p.From.Reg = v.Args[1].Reg()
811                 p.To.Type = obj.TYPE_MEM
812                 p.To.Reg = v.Args[0].Reg()
813                 gc.AddAux(&p.To, v)
814
815         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
816                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
817                 ssa.OpPPC64MOVHBRstoreidx:
818                 p := s.Prog(v.Op.Asm())
819                 p.From.Type = obj.TYPE_REG
820                 p.From.Reg = v.Args[2].Reg()
821                 p.To.Index = v.Args[1].Reg()
822                 p.To.Type = obj.TYPE_MEM
823                 p.To.Reg = v.Args[0].Reg()
824
825         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
826                 // ISEL, ISELB
827                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
828                 // ISEL only accepts 0, 1, 2 condition values but the others can be
829                 // achieved by swapping operand order.
830                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
831                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
832                 // ISELB is used when a boolean result is needed, returning 0 or 1
833                 p := s.Prog(ppc64.AISEL)
834                 p.To.Type = obj.TYPE_REG
835                 p.To.Reg = v.Reg()
836                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
837                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
838                 if v.Op == ssa.OpPPC64ISEL {
839                         r.Reg = v.Args[1].Reg()
840                 }
841                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
842                 if v.AuxInt > 3 {
843                         p.Reg = r.Reg
844                         p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
845                 } else {
846                         p.Reg = v.Args[0].Reg()
847                         p.SetFrom3(r)
848                 }
849                 p.From.Type = obj.TYPE_CONST
850                 p.From.Offset = v.AuxInt & 3
851
852         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
853                 // The LoweredQuad code generation
854                 // generates STXV instructions on
855                 // power9. The Short variation is used
856                 // if no loop is generated.
857
858                 // sizes >= 64 generate a loop as follows:
859
860                 // Set up loop counter in CTR, used by BC
861                 // XXLXOR clears VS32
862                 //       XXLXOR VS32,VS32,VS32
863                 //       MOVD len/64,REG_TMP
864                 //       MOVD REG_TMP,CTR
865                 //       loop:
866                 //       STXV VS32,0(R20)
867                 //       STXV VS32,16(R20)
868                 //       STXV VS32,32(R20)
869                 //       STXV VS32,48(R20)
870                 //       ADD  $64,R20
871                 //       BC   16, 0, loop
872
873                 // Bytes per iteration
874                 ctr := v.AuxInt / 64
875
876                 // Remainder bytes
877                 rem := v.AuxInt % 64
878
879                 // Only generate a loop if there is more
880                 // than 1 iteration.
881                 if ctr > 1 {
882                         // Set up VS32 (V0) to hold 0s
883                         p := s.Prog(ppc64.AXXLXOR)
884                         p.From.Type = obj.TYPE_REG
885                         p.From.Reg = ppc64.REG_VS32
886                         p.To.Type = obj.TYPE_REG
887                         p.To.Reg = ppc64.REG_VS32
888                         p.Reg = ppc64.REG_VS32
889
890                         // Set up CTR loop counter
891                         p = s.Prog(ppc64.AMOVD)
892                         p.From.Type = obj.TYPE_CONST
893                         p.From.Offset = ctr
894                         p.To.Type = obj.TYPE_REG
895                         p.To.Reg = ppc64.REGTMP
896
897                         p = s.Prog(ppc64.AMOVD)
898                         p.From.Type = obj.TYPE_REG
899                         p.From.Reg = ppc64.REGTMP
900                         p.To.Type = obj.TYPE_REG
901                         p.To.Reg = ppc64.REG_CTR
902
903                         // Don't generate padding for
904                         // loops with few iterations.
905                         if ctr > 3 {
906                                 p = s.Prog(obj.APCALIGN)
907                                 p.From.Type = obj.TYPE_CONST
908                                 p.From.Offset = 16
909                         }
910
911                         // generate 4 STXVs to zero 64 bytes
912                         var top *obj.Prog
913
914                         p = s.Prog(ppc64.ASTXV)
915                         p.From.Type = obj.TYPE_REG
916                         p.From.Reg = ppc64.REG_VS32
917                         p.To.Type = obj.TYPE_MEM
918                         p.To.Reg = v.Args[0].Reg()
919
920                         //  Save the top of loop
921                         if top == nil {
922                                 top = p
923                         }
924                         p = s.Prog(ppc64.ASTXV)
925                         p.From.Type = obj.TYPE_REG
926                         p.From.Reg = ppc64.REG_VS32
927                         p.To.Type = obj.TYPE_MEM
928                         p.To.Reg = v.Args[0].Reg()
929                         p.To.Offset = 16
930
931                         p = s.Prog(ppc64.ASTXV)
932                         p.From.Type = obj.TYPE_REG
933                         p.From.Reg = ppc64.REG_VS32
934                         p.To.Type = obj.TYPE_MEM
935                         p.To.Reg = v.Args[0].Reg()
936                         p.To.Offset = 32
937
938                         p = s.Prog(ppc64.ASTXV)
939                         p.From.Type = obj.TYPE_REG
940                         p.From.Reg = ppc64.REG_VS32
941                         p.To.Type = obj.TYPE_MEM
942                         p.To.Reg = v.Args[0].Reg()
943                         p.To.Offset = 48
944
945                         // Increment address for the
946                         // 64 bytes just zeroed.
947                         p = s.Prog(ppc64.AADD)
948                         p.Reg = v.Args[0].Reg()
949                         p.From.Type = obj.TYPE_CONST
950                         p.From.Offset = 64
951                         p.To.Type = obj.TYPE_REG
952                         p.To.Reg = v.Args[0].Reg()
953
954                         // Branch back to top of loop
955                         // based on CTR
956                         // BC with BO_BCTR generates bdnz
957                         p = s.Prog(ppc64.ABC)
958                         p.From.Type = obj.TYPE_CONST
959                         p.From.Offset = ppc64.BO_BCTR
960                         p.Reg = ppc64.REG_R0
961                         p.To.Type = obj.TYPE_BRANCH
962                         gc.Patch(p, top)
963                 }
964                 // When ctr == 1 the loop was not generated but
965                 // there are at least 64 bytes to clear, so add
966                 // that to the remainder to generate the code
967                 // to clear those doublewords
968                 if ctr == 1 {
969                         rem += 64
970                 }
971
972                 // Clear the remainder starting at offset zero
973                 offset := int64(0)
974
975                 if rem >= 16 && ctr <= 1 {
976                         // If the XXLXOR hasn't already been
977                         // generated, do it here to initialize
978                         // VS32 (V0) to 0.
979                         p := s.Prog(ppc64.AXXLXOR)
980                         p.From.Type = obj.TYPE_REG
981                         p.From.Reg = ppc64.REG_VS32
982                         p.To.Type = obj.TYPE_REG
983                         p.To.Reg = ppc64.REG_VS32
984                         p.Reg = ppc64.REG_VS32
985                 }
986                 // Generate STXV for 32 or 64
987                 // bytes.
988                 for rem >= 32 {
989                         p := s.Prog(ppc64.ASTXV)
990                         p.From.Type = obj.TYPE_REG
991                         p.From.Reg = ppc64.REG_VS32
992                         p.To.Type = obj.TYPE_MEM
993                         p.To.Reg = v.Args[0].Reg()
994                         p.To.Offset = offset
995
996                         p = s.Prog(ppc64.ASTXV)
997                         p.From.Type = obj.TYPE_REG
998                         p.From.Reg = ppc64.REG_VS32
999                         p.To.Type = obj.TYPE_MEM
1000                         p.To.Reg = v.Args[0].Reg()
1001                         p.To.Offset = offset + 16
1002                         offset += 32
1003                         rem -= 32
1004                 }
1005                 // Generate 16 bytes
1006                 if rem >= 16 {
1007                         p := s.Prog(ppc64.ASTXV)
1008                         p.From.Type = obj.TYPE_REG
1009                         p.From.Reg = ppc64.REG_VS32
1010                         p.To.Type = obj.TYPE_MEM
1011                         p.To.Reg = v.Args[0].Reg()
1012                         p.To.Offset = offset
1013                         offset += 16
1014                         rem -= 16
1015                 }
1016
1017                 // first clear as many doublewords as possible
1018                 // then clear remaining sizes as available
1019                 for rem > 0 {
1020                         op, size := ppc64.AMOVB, int64(1)
1021                         switch {
1022                         case rem >= 8:
1023                                 op, size = ppc64.AMOVD, 8
1024                         case rem >= 4:
1025                                 op, size = ppc64.AMOVW, 4
1026                         case rem >= 2:
1027                                 op, size = ppc64.AMOVH, 2
1028                         }
1029                         p := s.Prog(op)
1030                         p.From.Type = obj.TYPE_REG
1031                         p.From.Reg = ppc64.REG_R0
1032                         p.To.Type = obj.TYPE_MEM
1033                         p.To.Reg = v.Args[0].Reg()
1034                         p.To.Offset = offset
1035                         rem -= size
1036                         offset += size
1037                 }
1038
1039         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1040
1041                 // Unaligned data doesn't hurt performance
1042                 // for these instructions on power8.
1043
1044                 // For sizes >= 64 generate a loop as follows:
1045
1046                 // Set up loop counter in CTR, used by BC
1047                 //       XXLXOR VS32,VS32,VS32
1048                 //       MOVD len/32,REG_TMP
1049                 //       MOVD REG_TMP,CTR
1050                 //       MOVD $16,REG_TMP
1051                 //       loop:
1052                 //       STXVD2X VS32,(R0)(R20)
1053                 //       STXVD2X VS32,(R31)(R20)
1054                 //       ADD  $32,R20
1055                 //       BC   16, 0, loop
1056                 //
1057                 // any remainder is done as described below
1058
1059                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1060                 // then handle the remainder
1061                 //      MOVD R0,(R20)
1062                 //      MOVD R0,8(R20)
1063                 // .... etc.
1064                 //
1065                 // the remainder bytes are cleared using one or more
1066                 // of the following instructions with the appropriate
1067                 // offsets depending which instructions are needed
1068                 //
1069                 //      MOVW R0,n1(R20) 4 bytes
1070                 //      MOVH R0,n2(R20) 2 bytes
1071                 //      MOVB R0,n3(R20) 1 byte
1072                 //
1073                 // 7 bytes: MOVW, MOVH, MOVB
1074                 // 6 bytes: MOVW, MOVH
1075                 // 5 bytes: MOVW, MOVB
1076                 // 3 bytes: MOVH, MOVB
1077
1078                 // each loop iteration does 32 bytes
1079                 ctr := v.AuxInt / 32
1080
1081                 // remainder bytes
1082                 rem := v.AuxInt % 32
1083
1084                 // only generate a loop if there is more
1085                 // than 1 iteration.
1086                 if ctr > 1 {
1087                         // Set up VS32 (V0) to hold 0s
1088                         p := s.Prog(ppc64.AXXLXOR)
1089                         p.From.Type = obj.TYPE_REG
1090                         p.From.Reg = ppc64.REG_VS32
1091                         p.To.Type = obj.TYPE_REG
1092                         p.To.Reg = ppc64.REG_VS32
1093                         p.Reg = ppc64.REG_VS32
1094
1095                         // Set up CTR loop counter
1096                         p = s.Prog(ppc64.AMOVD)
1097                         p.From.Type = obj.TYPE_CONST
1098                         p.From.Offset = ctr
1099                         p.To.Type = obj.TYPE_REG
1100                         p.To.Reg = ppc64.REGTMP
1101
1102                         p = s.Prog(ppc64.AMOVD)
1103                         p.From.Type = obj.TYPE_REG
1104                         p.From.Reg = ppc64.REGTMP
1105                         p.To.Type = obj.TYPE_REG
1106                         p.To.Reg = ppc64.REG_CTR
1107
1108                         // Set up R31 to hold index value 16
1109                         p = s.Prog(ppc64.AMOVD)
1110                         p.From.Type = obj.TYPE_CONST
1111                         p.From.Offset = 16
1112                         p.To.Type = obj.TYPE_REG
1113                         p.To.Reg = ppc64.REGTMP
1114
1115                         // Don't add padding for alignment
1116                         // with few loop iterations.
1117                         if ctr > 3 {
1118                                 p = s.Prog(obj.APCALIGN)
1119                                 p.From.Type = obj.TYPE_CONST
1120                                 p.From.Offset = 16
1121                         }
1122
1123                         // generate 2 STXVD2Xs to store 16 bytes
1124                         // when this is a loop then the top must be saved
1125                         var top *obj.Prog
1126                         // This is the top of loop
1127
1128                         p = s.Prog(ppc64.ASTXVD2X)
1129                         p.From.Type = obj.TYPE_REG
1130                         p.From.Reg = ppc64.REG_VS32
1131                         p.To.Type = obj.TYPE_MEM
1132                         p.To.Reg = v.Args[0].Reg()
1133                         p.To.Index = ppc64.REGZERO
1134                         // Save the top of loop
1135                         if top == nil {
1136                                 top = p
1137                         }
1138                         p = s.Prog(ppc64.ASTXVD2X)
1139                         p.From.Type = obj.TYPE_REG
1140                         p.From.Reg = ppc64.REG_VS32
1141                         p.To.Type = obj.TYPE_MEM
1142                         p.To.Reg = v.Args[0].Reg()
1143                         p.To.Index = ppc64.REGTMP
1144
1145                         // Increment address for the
1146                         // 4 doublewords just zeroed.
1147                         p = s.Prog(ppc64.AADD)
1148                         p.Reg = v.Args[0].Reg()
1149                         p.From.Type = obj.TYPE_CONST
1150                         p.From.Offset = 32
1151                         p.To.Type = obj.TYPE_REG
1152                         p.To.Reg = v.Args[0].Reg()
1153
1154                         // Branch back to top of loop
1155                         // based on CTR
1156                         // BC with BO_BCTR generates bdnz
1157                         p = s.Prog(ppc64.ABC)
1158                         p.From.Type = obj.TYPE_CONST
1159                         p.From.Offset = ppc64.BO_BCTR
1160                         p.Reg = ppc64.REG_R0
1161                         p.To.Type = obj.TYPE_BRANCH
1162                         gc.Patch(p, top)
1163                 }
1164
1165                 // when ctr == 1 the loop was not generated but
1166                 // there are at least 32 bytes to clear, so add
1167                 // that to the remainder to generate the code
1168                 // to clear those doublewords
1169                 if ctr == 1 {
1170                         rem += 32
1171                 }
1172
1173                 // clear the remainder starting at offset zero
1174                 offset := int64(0)
1175
1176                 // first clear as many doublewords as possible
1177                 // then clear remaining sizes as available
1178                 for rem > 0 {
1179                         op, size := ppc64.AMOVB, int64(1)
1180                         switch {
1181                         case rem >= 8:
1182                                 op, size = ppc64.AMOVD, 8
1183                         case rem >= 4:
1184                                 op, size = ppc64.AMOVW, 4
1185                         case rem >= 2:
1186                                 op, size = ppc64.AMOVH, 2
1187                         }
1188                         p := s.Prog(op)
1189                         p.From.Type = obj.TYPE_REG
1190                         p.From.Reg = ppc64.REG_R0
1191                         p.To.Type = obj.TYPE_MEM
1192                         p.To.Reg = v.Args[0].Reg()
1193                         p.To.Offset = offset
1194                         rem -= size
1195                         offset += size
1196                 }
1197
1198         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1199
1200                 bytesPerLoop := int64(32)
1201                 // This will be used when moving more
1202                 // than 8 bytes.  Moves start with
1203                 // as many 8 byte moves as possible, then
1204                 // 4, 2, or 1 byte(s) as remaining.  This will
1205                 // work and be efficient for power8 or later.
1206                 // If there are 64 or more bytes, then a
1207                 // loop is generated to move 32 bytes and
1208                 // update the src and dst addresses on each
1209                 // iteration. When < 64 bytes, the appropriate
1210                 // number of moves are generated based on the
1211                 // size.
1212                 // When moving >= 64 bytes a loop is used
1213                 //      MOVD len/32,REG_TMP
1214                 //      MOVD REG_TMP,CTR
1215                 //      MOVD $16,REG_TMP
1216                 // top:
1217                 //      LXVD2X (R0)(R21),VS32
1218                 //      LXVD2X (R31)(R21),VS33
1219                 //      ADD $32,R21
1220                 //      STXVD2X VS32,(R0)(R20)
1221                 //      STXVD2X VS33,(R31)(R20)
1222                 //      ADD $32,R20
1223                 //      BC 16,0,top
1224                 // Bytes not moved by this loop are moved
1225                 // with a combination of the following instructions,
1226                 // starting with the largest sizes and generating as
1227                 // many as needed, using the appropriate offset value.
1228                 //      MOVD  n(R21),R31
1229                 //      MOVD  R31,n(R20)
1230                 //      MOVW  n1(R21),R31
1231                 //      MOVW  R31,n1(R20)
1232                 //      MOVH  n2(R21),R31
1233                 //      MOVH  R31,n2(R20)
1234                 //      MOVB  n3(R21),R31
1235                 //      MOVB  R31,n3(R20)
1236
1237                 // Each loop iteration moves 32 bytes
1238                 ctr := v.AuxInt / bytesPerLoop
1239
1240                 // Remainder after the loop
1241                 rem := v.AuxInt % bytesPerLoop
1242
1243                 dstReg := v.Args[0].Reg()
1244                 srcReg := v.Args[1].Reg()
1245
1246                 // The set of registers used here, must match the clobbered reg list
1247                 // in PPC64Ops.go.
1248                 offset := int64(0)
1249
1250                 // top of the loop
1251                 var top *obj.Prog
1252                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1253                 if ctr > 1 {
1254                         // Set up the CTR
1255                         p := s.Prog(ppc64.AMOVD)
1256                         p.From.Type = obj.TYPE_CONST
1257                         p.From.Offset = ctr
1258                         p.To.Type = obj.TYPE_REG
1259                         p.To.Reg = ppc64.REGTMP
1260
1261                         p = s.Prog(ppc64.AMOVD)
1262                         p.From.Type = obj.TYPE_REG
1263                         p.From.Reg = ppc64.REGTMP
1264                         p.To.Type = obj.TYPE_REG
1265                         p.To.Reg = ppc64.REG_CTR
1266
1267                         // Use REGTMP as index reg
1268                         p = s.Prog(ppc64.AMOVD)
1269                         p.From.Type = obj.TYPE_CONST
1270                         p.From.Offset = 16
1271                         p.To.Type = obj.TYPE_REG
1272                         p.To.Reg = ppc64.REGTMP
1273
1274                         // Don't adding padding for
1275                         // alignment with small iteration
1276                         // counts.
1277                         if ctr > 3 {
1278                                 p = s.Prog(obj.APCALIGN)
1279                                 p.From.Type = obj.TYPE_CONST
1280                                 p.From.Offset = 16
1281                         }
1282
1283                         // Generate 16 byte loads and stores.
1284                         // Use temp register for index (16)
1285                         // on the second one.
1286
1287                         p = s.Prog(ppc64.ALXVD2X)
1288                         p.From.Type = obj.TYPE_MEM
1289                         p.From.Reg = srcReg
1290                         p.From.Index = ppc64.REGZERO
1291                         p.To.Type = obj.TYPE_REG
1292                         p.To.Reg = ppc64.REG_VS32
1293                         if top == nil {
1294                                 top = p
1295                         }
1296                         p = s.Prog(ppc64.ALXVD2X)
1297                         p.From.Type = obj.TYPE_MEM
1298                         p.From.Reg = srcReg
1299                         p.From.Index = ppc64.REGTMP
1300                         p.To.Type = obj.TYPE_REG
1301                         p.To.Reg = ppc64.REG_VS33
1302
1303                         // increment the src reg for next iteration
1304                         p = s.Prog(ppc64.AADD)
1305                         p.Reg = srcReg
1306                         p.From.Type = obj.TYPE_CONST
1307                         p.From.Offset = bytesPerLoop
1308                         p.To.Type = obj.TYPE_REG
1309                         p.To.Reg = srcReg
1310
1311                         // generate 16 byte stores
1312                         p = s.Prog(ppc64.ASTXVD2X)
1313                         p.From.Type = obj.TYPE_REG
1314                         p.From.Reg = ppc64.REG_VS32
1315                         p.To.Type = obj.TYPE_MEM
1316                         p.To.Reg = dstReg
1317                         p.To.Index = ppc64.REGZERO
1318
1319                         p = s.Prog(ppc64.ASTXVD2X)
1320                         p.From.Type = obj.TYPE_REG
1321                         p.From.Reg = ppc64.REG_VS33
1322                         p.To.Type = obj.TYPE_MEM
1323                         p.To.Reg = dstReg
1324                         p.To.Index = ppc64.REGTMP
1325
1326                         // increment the dst reg for next iteration
1327                         p = s.Prog(ppc64.AADD)
1328                         p.Reg = dstReg
1329                         p.From.Type = obj.TYPE_CONST
1330                         p.From.Offset = bytesPerLoop
1331                         p.To.Type = obj.TYPE_REG
1332                         p.To.Reg = dstReg
1333
1334                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1335                         // to loop top.
1336                         p = s.Prog(ppc64.ABC)
1337                         p.From.Type = obj.TYPE_CONST
1338                         p.From.Offset = ppc64.BO_BCTR
1339                         p.Reg = ppc64.REG_R0
1340                         p.To.Type = obj.TYPE_BRANCH
1341                         gc.Patch(p, top)
1342
1343                         // srcReg and dstReg were incremented in the loop, so
1344                         // later instructions start with offset 0.
1345                         offset = int64(0)
1346                 }
1347
1348                 // No loop was generated for one iteration, so
1349                 // add 32 bytes to the remainder to move those bytes.
1350                 if ctr == 1 {
1351                         rem += bytesPerLoop
1352                 }
1353
1354                 if rem >= 16 {
1355                         // Generate 16 byte loads and stores.
1356                         // Use temp register for index (value 16)
1357                         // on the second one.
1358                         p := s.Prog(ppc64.ALXVD2X)
1359                         p.From.Type = obj.TYPE_MEM
1360                         p.From.Reg = srcReg
1361                         p.From.Index = ppc64.REGZERO
1362                         p.To.Type = obj.TYPE_REG
1363                         p.To.Reg = ppc64.REG_VS32
1364
1365                         p = s.Prog(ppc64.ASTXVD2X)
1366                         p.From.Type = obj.TYPE_REG
1367                         p.From.Reg = ppc64.REG_VS32
1368                         p.To.Type = obj.TYPE_MEM
1369                         p.To.Reg = dstReg
1370                         p.To.Index = ppc64.REGZERO
1371
1372                         offset = 16
1373                         rem -= 16
1374
1375                         if rem >= 16 {
1376                                 // Use REGTMP as index reg
1377                                 p := s.Prog(ppc64.AMOVD)
1378                                 p.From.Type = obj.TYPE_CONST
1379                                 p.From.Offset = 16
1380                                 p.To.Type = obj.TYPE_REG
1381                                 p.To.Reg = ppc64.REGTMP
1382
1383                                 p = s.Prog(ppc64.ALXVD2X)
1384                                 p.From.Type = obj.TYPE_MEM
1385                                 p.From.Reg = srcReg
1386                                 p.From.Index = ppc64.REGTMP
1387                                 p.To.Type = obj.TYPE_REG
1388                                 p.To.Reg = ppc64.REG_VS32
1389
1390                                 p = s.Prog(ppc64.ASTXVD2X)
1391                                 p.From.Type = obj.TYPE_REG
1392                                 p.From.Reg = ppc64.REG_VS32
1393                                 p.To.Type = obj.TYPE_MEM
1394                                 p.To.Reg = dstReg
1395                                 p.To.Index = ppc64.REGTMP
1396
1397                                 offset = 32
1398                                 rem -= 16
1399                         }
1400                 }
1401
1402                 // Generate all the remaining load and store pairs, starting with
1403                 // as many 8 byte moves as possible, then 4, 2, 1.
1404                 for rem > 0 {
1405                         op, size := ppc64.AMOVB, int64(1)
1406                         switch {
1407                         case rem >= 8:
1408                                 op, size = ppc64.AMOVD, 8
1409                         case rem >= 4:
1410                                 op, size = ppc64.AMOVW, 4
1411                         case rem >= 2:
1412                                 op, size = ppc64.AMOVH, 2
1413                         }
1414                         // Load
1415                         p := s.Prog(op)
1416                         p.To.Type = obj.TYPE_REG
1417                         p.To.Reg = ppc64.REGTMP
1418                         p.From.Type = obj.TYPE_MEM
1419                         p.From.Reg = srcReg
1420                         p.From.Offset = offset
1421
1422                         // Store
1423                         p = s.Prog(op)
1424                         p.From.Type = obj.TYPE_REG
1425                         p.From.Reg = ppc64.REGTMP
1426                         p.To.Type = obj.TYPE_MEM
1427                         p.To.Reg = dstReg
1428                         p.To.Offset = offset
1429                         rem -= size
1430                         offset += size
1431                 }
1432
1433         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1434                 bytesPerLoop := int64(64)
1435                 // This is used when moving more
1436                 // than 8 bytes on power9.  Moves start with
1437                 // as many 8 byte moves as possible, then
1438                 // 4, 2, or 1 byte(s) as remaining.  This will
1439                 // work and be efficient for power8 or later.
1440                 // If there are 64 or more bytes, then a
1441                 // loop is generated to move 32 bytes and
1442                 // update the src and dst addresses on each
1443                 // iteration. When < 64 bytes, the appropriate
1444                 // number of moves are generated based on the
1445                 // size.
1446                 // When moving >= 64 bytes a loop is used
1447                 //      MOVD len/32,REG_TMP
1448                 //      MOVD REG_TMP,CTR
1449                 // top:
1450                 //      LXV 0(R21),VS32
1451                 //      LXV 16(R21),VS33
1452                 //      ADD $32,R21
1453                 //      STXV VS32,0(R20)
1454                 //      STXV VS33,16(R20)
1455                 //      ADD $32,R20
1456                 //      BC 16,0,top
1457                 // Bytes not moved by this loop are moved
1458                 // with a combination of the following instructions,
1459                 // starting with the largest sizes and generating as
1460                 // many as needed, using the appropriate offset value.
1461                 //      MOVD  n(R21),R31
1462                 //      MOVD  R31,n(R20)
1463                 //      MOVW  n1(R21),R31
1464                 //      MOVW  R31,n1(R20)
1465                 //      MOVH  n2(R21),R31
1466                 //      MOVH  R31,n2(R20)
1467                 //      MOVB  n3(R21),R31
1468                 //      MOVB  R31,n3(R20)
1469
1470                 // Each loop iteration moves 32 bytes
1471                 ctr := v.AuxInt / bytesPerLoop
1472
1473                 // Remainder after the loop
1474                 rem := v.AuxInt % bytesPerLoop
1475
1476                 dstReg := v.Args[0].Reg()
1477                 srcReg := v.Args[1].Reg()
1478
1479                 offset := int64(0)
1480
1481                 // top of the loop
1482                 var top *obj.Prog
1483
1484                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1485                 if ctr > 1 {
1486                         // Set up the CTR
1487                         p := s.Prog(ppc64.AMOVD)
1488                         p.From.Type = obj.TYPE_CONST
1489                         p.From.Offset = ctr
1490                         p.To.Type = obj.TYPE_REG
1491                         p.To.Reg = ppc64.REGTMP
1492
1493                         p = s.Prog(ppc64.AMOVD)
1494                         p.From.Type = obj.TYPE_REG
1495                         p.From.Reg = ppc64.REGTMP
1496                         p.To.Type = obj.TYPE_REG
1497                         p.To.Reg = ppc64.REG_CTR
1498
1499                         p = s.Prog(obj.APCALIGN)
1500                         p.From.Type = obj.TYPE_CONST
1501                         p.From.Offset = 16
1502
1503                         // Generate 16 byte loads and stores.
1504                         p = s.Prog(ppc64.ALXV)
1505                         p.From.Type = obj.TYPE_MEM
1506                         p.From.Reg = srcReg
1507                         p.From.Offset = offset
1508                         p.To.Type = obj.TYPE_REG
1509                         p.To.Reg = ppc64.REG_VS32
1510                         if top == nil {
1511                                 top = p
1512                         }
1513                         p = s.Prog(ppc64.ALXV)
1514                         p.From.Type = obj.TYPE_MEM
1515                         p.From.Reg = srcReg
1516                         p.From.Offset = offset + 16
1517                         p.To.Type = obj.TYPE_REG
1518                         p.To.Reg = ppc64.REG_VS33
1519
1520                         // generate 16 byte stores
1521                         p = s.Prog(ppc64.ASTXV)
1522                         p.From.Type = obj.TYPE_REG
1523                         p.From.Reg = ppc64.REG_VS32
1524                         p.To.Type = obj.TYPE_MEM
1525                         p.To.Reg = dstReg
1526                         p.To.Offset = offset
1527
1528                         p = s.Prog(ppc64.ASTXV)
1529                         p.From.Type = obj.TYPE_REG
1530                         p.From.Reg = ppc64.REG_VS33
1531                         p.To.Type = obj.TYPE_MEM
1532                         p.To.Reg = dstReg
1533                         p.To.Offset = offset + 16
1534
1535                         // Generate 16 byte loads and stores.
1536                         p = s.Prog(ppc64.ALXV)
1537                         p.From.Type = obj.TYPE_MEM
1538                         p.From.Reg = srcReg
1539                         p.From.Offset = offset + 32
1540                         p.To.Type = obj.TYPE_REG
1541                         p.To.Reg = ppc64.REG_VS32
1542
1543                         p = s.Prog(ppc64.ALXV)
1544                         p.From.Type = obj.TYPE_MEM
1545                         p.From.Reg = srcReg
1546                         p.From.Offset = offset + 48
1547                         p.To.Type = obj.TYPE_REG
1548                         p.To.Reg = ppc64.REG_VS33
1549
1550                         // generate 16 byte stores
1551                         p = s.Prog(ppc64.ASTXV)
1552                         p.From.Type = obj.TYPE_REG
1553                         p.From.Reg = ppc64.REG_VS32
1554                         p.To.Type = obj.TYPE_MEM
1555                         p.To.Reg = dstReg
1556                         p.To.Offset = offset + 32
1557
1558                         p = s.Prog(ppc64.ASTXV)
1559                         p.From.Type = obj.TYPE_REG
1560                         p.From.Reg = ppc64.REG_VS33
1561                         p.To.Type = obj.TYPE_MEM
1562                         p.To.Reg = dstReg
1563                         p.To.Offset = offset + 48
1564
1565                         // increment the src reg for next iteration
1566                         p = s.Prog(ppc64.AADD)
1567                         p.Reg = srcReg
1568                         p.From.Type = obj.TYPE_CONST
1569                         p.From.Offset = bytesPerLoop
1570                         p.To.Type = obj.TYPE_REG
1571                         p.To.Reg = srcReg
1572
1573                         // increment the dst reg for next iteration
1574                         p = s.Prog(ppc64.AADD)
1575                         p.Reg = dstReg
1576                         p.From.Type = obj.TYPE_CONST
1577                         p.From.Offset = bytesPerLoop
1578                         p.To.Type = obj.TYPE_REG
1579                         p.To.Reg = dstReg
1580
1581                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1582                         // to loop top.
1583                         p = s.Prog(ppc64.ABC)
1584                         p.From.Type = obj.TYPE_CONST
1585                         p.From.Offset = ppc64.BO_BCTR
1586                         p.Reg = ppc64.REG_R0
1587                         p.To.Type = obj.TYPE_BRANCH
1588                         gc.Patch(p, top)
1589
1590                         // srcReg and dstReg were incremented in the loop, so
1591                         // later instructions start with offset 0.
1592                         offset = int64(0)
1593                 }
1594
1595                 // No loop was generated for one iteration, so
1596                 // add 32 bytes to the remainder to move those bytes.
1597                 if ctr == 1 {
1598                         rem += bytesPerLoop
1599                 }
1600                 if rem >= 32 {
1601                         p := s.Prog(ppc64.ALXV)
1602                         p.From.Type = obj.TYPE_MEM
1603                         p.From.Reg = srcReg
1604                         p.To.Type = obj.TYPE_REG
1605                         p.To.Reg = ppc64.REG_VS32
1606
1607                         p = s.Prog(ppc64.ALXV)
1608                         p.From.Type = obj.TYPE_MEM
1609                         p.From.Reg = srcReg
1610                         p.From.Offset = 16
1611                         p.To.Type = obj.TYPE_REG
1612                         p.To.Reg = ppc64.REG_VS33
1613
1614                         p = s.Prog(ppc64.ASTXV)
1615                         p.From.Type = obj.TYPE_REG
1616                         p.From.Reg = ppc64.REG_VS32
1617                         p.To.Type = obj.TYPE_MEM
1618                         p.To.Reg = dstReg
1619
1620                         p = s.Prog(ppc64.ASTXV)
1621                         p.From.Type = obj.TYPE_REG
1622                         p.From.Reg = ppc64.REG_VS33
1623                         p.To.Type = obj.TYPE_MEM
1624                         p.To.Reg = dstReg
1625                         p.To.Offset = 16
1626
1627                         offset = 32
1628                         rem -= 32
1629                 }
1630
1631                 if rem >= 16 {
1632                         // Generate 16 byte loads and stores.
1633                         p := s.Prog(ppc64.ALXV)
1634                         p.From.Type = obj.TYPE_MEM
1635                         p.From.Reg = srcReg
1636                         p.From.Offset = offset
1637                         p.To.Type = obj.TYPE_REG
1638                         p.To.Reg = ppc64.REG_VS32
1639
1640                         p = s.Prog(ppc64.ASTXV)
1641                         p.From.Type = obj.TYPE_REG
1642                         p.From.Reg = ppc64.REG_VS32
1643                         p.To.Type = obj.TYPE_MEM
1644                         p.To.Reg = dstReg
1645                         p.To.Offset = offset
1646
1647                         offset += 16
1648                         rem -= 16
1649
1650                         if rem >= 16 {
1651                                 p := s.Prog(ppc64.ALXV)
1652                                 p.From.Type = obj.TYPE_MEM
1653                                 p.From.Reg = srcReg
1654                                 p.From.Offset = offset
1655                                 p.To.Type = obj.TYPE_REG
1656                                 p.To.Reg = ppc64.REG_VS32
1657
1658                                 p = s.Prog(ppc64.ASTXV)
1659                                 p.From.Type = obj.TYPE_REG
1660                                 p.From.Reg = ppc64.REG_VS32
1661                                 p.To.Type = obj.TYPE_MEM
1662                                 p.To.Reg = dstReg
1663                                 p.To.Offset = offset
1664
1665                                 offset += 16
1666                                 rem -= 16
1667                         }
1668                 }
1669                 // Generate all the remaining load and store pairs, starting with
1670                 // as many 8 byte moves as possible, then 4, 2, 1.
1671                 for rem > 0 {
1672                         op, size := ppc64.AMOVB, int64(1)
1673                         switch {
1674                         case rem >= 8:
1675                                 op, size = ppc64.AMOVD, 8
1676                         case rem >= 4:
1677                                 op, size = ppc64.AMOVW, 4
1678                         case rem >= 2:
1679                                 op, size = ppc64.AMOVH, 2
1680                         }
1681                         // Load
1682                         p := s.Prog(op)
1683                         p.To.Type = obj.TYPE_REG
1684                         p.To.Reg = ppc64.REGTMP
1685                         p.From.Type = obj.TYPE_MEM
1686                         p.From.Reg = srcReg
1687                         p.From.Offset = offset
1688
1689                         // Store
1690                         p = s.Prog(op)
1691                         p.From.Type = obj.TYPE_REG
1692                         p.From.Reg = ppc64.REGTMP
1693                         p.To.Type = obj.TYPE_MEM
1694                         p.To.Reg = dstReg
1695                         p.To.Offset = offset
1696                         rem -= size
1697                         offset += size
1698                 }
1699
1700         case ssa.OpPPC64CALLstatic:
1701                 s.Call(v)
1702
1703         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1704                 p := s.Prog(ppc64.AMOVD)
1705                 p.From.Type = obj.TYPE_REG
1706                 p.From.Reg = v.Args[0].Reg()
1707                 p.To.Type = obj.TYPE_REG
1708                 p.To.Reg = ppc64.REG_LR
1709
1710                 if v.Args[0].Reg() != ppc64.REG_R12 {
1711                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1712                 }
1713
1714                 pp := s.Call(v)
1715                 pp.To.Reg = ppc64.REG_LR
1716
1717                 if gc.Ctxt.Flag_shared {
1718                         // When compiling Go into PIC, the function we just
1719                         // called via pointer might have been implemented in
1720                         // a separate module and so overwritten the TOC
1721                         // pointer in R2; reload it.
1722                         q := s.Prog(ppc64.AMOVD)
1723                         q.From.Type = obj.TYPE_MEM
1724                         q.From.Offset = 24
1725                         q.From.Reg = ppc64.REGSP
1726                         q.To.Type = obj.TYPE_REG
1727                         q.To.Reg = ppc64.REG_R2
1728                 }
1729
1730         case ssa.OpPPC64LoweredWB:
1731                 p := s.Prog(obj.ACALL)
1732                 p.To.Type = obj.TYPE_MEM
1733                 p.To.Name = obj.NAME_EXTERN
1734                 p.To.Sym = v.Aux.(*obj.LSym)
1735
1736         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1737                 p := s.Prog(obj.ACALL)
1738                 p.To.Type = obj.TYPE_MEM
1739                 p.To.Name = obj.NAME_EXTERN
1740                 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1741                 s.UseArgs(16) // space used in callee args area by assembly stubs
1742
1743         case ssa.OpPPC64LoweredNilCheck:
1744                 if objabi.GOOS == "aix" {
1745                         // CMP Rarg0, R0
1746                         // BNE 2(PC)
1747                         // STW R0, 0(R0)
1748                         // NOP (so the BNE has somewhere to land)
1749
1750                         // CMP Rarg0, R0
1751                         p := s.Prog(ppc64.ACMP)
1752                         p.From.Type = obj.TYPE_REG
1753                         p.From.Reg = v.Args[0].Reg()
1754                         p.To.Type = obj.TYPE_REG
1755                         p.To.Reg = ppc64.REG_R0
1756
1757                         // BNE 2(PC)
1758                         p2 := s.Prog(ppc64.ABNE)
1759                         p2.To.Type = obj.TYPE_BRANCH
1760
1761                         // STW R0, 0(R0)
1762                         // Write at 0 is forbidden and will trigger a SIGSEGV
1763                         p = s.Prog(ppc64.AMOVW)
1764                         p.From.Type = obj.TYPE_REG
1765                         p.From.Reg = ppc64.REG_R0
1766                         p.To.Type = obj.TYPE_MEM
1767                         p.To.Reg = ppc64.REG_R0
1768
1769                         // NOP (so the BNE has somewhere to land)
1770                         nop := s.Prog(obj.ANOP)
1771                         gc.Patch(p2, nop)
1772
1773                 } else {
1774                         // Issue a load which will fault if arg is nil.
1775                         p := s.Prog(ppc64.AMOVBZ)
1776                         p.From.Type = obj.TYPE_MEM
1777                         p.From.Reg = v.Args[0].Reg()
1778                         gc.AddAux(&p.From, v)
1779                         p.To.Type = obj.TYPE_REG
1780                         p.To.Reg = ppc64.REGTMP
1781                 }
1782                 if logopt.Enabled() {
1783                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1784                 }
1785                 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1786                         gc.Warnl(v.Pos, "generated nil check")
1787                 }
1788
1789         // These should be resolved by rules and not make it here.
1790         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1791                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1792                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1793                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1794         case ssa.OpPPC64InvertFlags:
1795                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1796         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1797                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1798         case ssa.OpClobber:
1799                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1800         default:
1801                 v.Fatalf("genValue not implemented: %s", v.LongString())
1802         }
1803 }
1804
1805 var blockJump = [...]struct {
1806         asm, invasm     obj.As
1807         asmeq, invasmun bool
1808 }{
1809         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1810         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1811
1812         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1813         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1814         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1815         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1816
1817         // TODO: need to work FP comparisons into block jumps
1818         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1819         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1820         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1821         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1822 }
1823
1824 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1825         switch b.Kind {
1826         case ssa.BlockDefer:
1827                 // defer returns in R3:
1828                 // 0 if we should continue executing
1829                 // 1 if we should jump to deferreturn call
1830                 p := s.Prog(ppc64.ACMP)
1831                 p.From.Type = obj.TYPE_REG
1832                 p.From.Reg = ppc64.REG_R3
1833                 p.To.Type = obj.TYPE_REG
1834                 p.To.Reg = ppc64.REG_R0
1835
1836                 p = s.Prog(ppc64.ABNE)
1837                 p.To.Type = obj.TYPE_BRANCH
1838                 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1839                 if b.Succs[0].Block() != next {
1840                         p := s.Prog(obj.AJMP)
1841                         p.To.Type = obj.TYPE_BRANCH
1842                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1843                 }
1844
1845         case ssa.BlockPlain:
1846                 if b.Succs[0].Block() != next {
1847                         p := s.Prog(obj.AJMP)
1848                         p.To.Type = obj.TYPE_BRANCH
1849                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1850                 }
1851         case ssa.BlockExit:
1852         case ssa.BlockRet:
1853                 s.Prog(obj.ARET)
1854         case ssa.BlockRetJmp:
1855                 p := s.Prog(obj.AJMP)
1856                 p.To.Type = obj.TYPE_MEM
1857                 p.To.Name = obj.NAME_EXTERN
1858                 p.To.Sym = b.Aux.(*obj.LSym)
1859
1860         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1861                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1862                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1863                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1864                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1865                 jmp := blockJump[b.Kind]
1866                 switch next {
1867                 case b.Succs[0].Block():
1868                         s.Br(jmp.invasm, b.Succs[1].Block())
1869                         if jmp.invasmun {
1870                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1871                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
1872                         }
1873                 case b.Succs[1].Block():
1874                         s.Br(jmp.asm, b.Succs[0].Block())
1875                         if jmp.asmeq {
1876                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1877                         }
1878                 default:
1879                         if b.Likely != ssa.BranchUnlikely {
1880                                 s.Br(jmp.asm, b.Succs[0].Block())
1881                                 if jmp.asmeq {
1882                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
1883                                 }
1884                                 s.Br(obj.AJMP, b.Succs[1].Block())
1885                         } else {
1886                                 s.Br(jmp.invasm, b.Succs[1].Block())
1887                                 if jmp.invasmun {
1888                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1889                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
1890                                 }
1891                                 s.Br(obj.AJMP, b.Succs[0].Block())
1892                         }
1893                 }
1894         default:
1895                 b.Fatalf("branch not implemented: %s", b.LongString())
1896         }
1897 }