]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
cmd/compile, cmd/internal/obj/ppc64: use LR for indirect calls
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/gc"
9         "cmd/compile/internal/ssa"
10         "cmd/compile/internal/types"
11         "cmd/internal/obj"
12         "cmd/internal/obj/ppc64"
13         "cmd/internal/objabi"
14         "math"
15         "strings"
16 )
17
18 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
19 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
20         //      flive := b.FlagsLiveAtEnd
21         //      if b.Control != nil && b.Control.Type.IsFlags() {
22         //              flive = true
23         //      }
24         //      for i := len(b.Values) - 1; i >= 0; i-- {
25         //              v := b.Values[i]
26         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
27         //                      // The "mark" is any non-nil Aux value.
28         //                      v.Aux = v
29         //              }
30         //              if v.Type.IsFlags() {
31         //                      flive = false
32         //              }
33         //              for _, a := range v.Args {
34         //                      if a.Type.IsFlags() {
35         //                              flive = true
36         //                      }
37         //              }
38         //      }
39 }
40
41 // loadByType returns the load instruction of the given type.
42 func loadByType(t *types.Type) obj.As {
43         if t.IsFloat() {
44                 switch t.Size() {
45                 case 4:
46                         return ppc64.AFMOVS
47                 case 8:
48                         return ppc64.AFMOVD
49                 }
50         } else {
51                 switch t.Size() {
52                 case 1:
53                         if t.IsSigned() {
54                                 return ppc64.AMOVB
55                         } else {
56                                 return ppc64.AMOVBZ
57                         }
58                 case 2:
59                         if t.IsSigned() {
60                                 return ppc64.AMOVH
61                         } else {
62                                 return ppc64.AMOVHZ
63                         }
64                 case 4:
65                         if t.IsSigned() {
66                                 return ppc64.AMOVW
67                         } else {
68                                 return ppc64.AMOVWZ
69                         }
70                 case 8:
71                         return ppc64.AMOVD
72                 }
73         }
74         panic("bad load type")
75 }
76
77 // storeByType returns the store instruction of the given type.
78 func storeByType(t *types.Type) obj.As {
79         if t.IsFloat() {
80                 switch t.Size() {
81                 case 4:
82                         return ppc64.AFMOVS
83                 case 8:
84                         return ppc64.AFMOVD
85                 }
86         } else {
87                 switch t.Size() {
88                 case 1:
89                         return ppc64.AMOVB
90                 case 2:
91                         return ppc64.AMOVH
92                 case 4:
93                         return ppc64.AMOVW
94                 case 8:
95                         return ppc64.AMOVD
96                 }
97         }
98         panic("bad store type")
99 }
100
101 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
102         switch v.Op {
103         case ssa.OpCopy:
104                 t := v.Type
105                 if t.IsMemory() {
106                         return
107                 }
108                 x := v.Args[0].Reg()
109                 y := v.Reg()
110                 if x != y {
111                         rt := obj.TYPE_REG
112                         op := ppc64.AMOVD
113
114                         if t.IsFloat() {
115                                 op = ppc64.AFMOVD
116                         }
117                         p := s.Prog(op)
118                         p.From.Type = rt
119                         p.From.Reg = x
120                         p.To.Type = rt
121                         p.To.Reg = y
122                 }
123
124         case ssa.OpPPC64LoweredMuluhilo:
125                 // MULHDU       Rarg1, Rarg0, Reg0
126                 // MULLD        Rarg1, Rarg0, Reg1
127                 r0 := v.Args[0].Reg()
128                 r1 := v.Args[1].Reg()
129                 p := s.Prog(ppc64.AMULHDU)
130                 p.From.Type = obj.TYPE_REG
131                 p.From.Reg = r1
132                 p.Reg = r0
133                 p.To.Type = obj.TYPE_REG
134                 p.To.Reg = v.Reg0()
135                 p1 := s.Prog(ppc64.AMULLD)
136                 p1.From.Type = obj.TYPE_REG
137                 p1.From.Reg = r1
138                 p1.Reg = r0
139                 p1.To.Type = obj.TYPE_REG
140                 p1.To.Reg = v.Reg1()
141
142         case ssa.OpPPC64LoweredAdd64Carry:
143                 // ADDC         Rarg2, -1, Rtmp
144                 // ADDE         Rarg1, Rarg0, Reg0
145                 // ADDZE        Rzero, Reg1
146                 r0 := v.Args[0].Reg()
147                 r1 := v.Args[1].Reg()
148                 r2 := v.Args[2].Reg()
149                 p := s.Prog(ppc64.AADDC)
150                 p.From.Type = obj.TYPE_CONST
151                 p.From.Offset = -1
152                 p.Reg = r2
153                 p.To.Type = obj.TYPE_REG
154                 p.To.Reg = ppc64.REGTMP
155                 p1 := s.Prog(ppc64.AADDE)
156                 p1.From.Type = obj.TYPE_REG
157                 p1.From.Reg = r1
158                 p1.Reg = r0
159                 p1.To.Type = obj.TYPE_REG
160                 p1.To.Reg = v.Reg0()
161                 p2 := s.Prog(ppc64.AADDZE)
162                 p2.From.Type = obj.TYPE_REG
163                 p2.From.Reg = ppc64.REGZERO
164                 p2.To.Type = obj.TYPE_REG
165                 p2.To.Reg = v.Reg1()
166
167         case ssa.OpPPC64LoweredAtomicAnd8,
168                 ssa.OpPPC64LoweredAtomicOr8:
169                 // LWSYNC
170                 // LBAR         (Rarg0), Rtmp
171                 // AND/OR       Rarg1, Rtmp
172                 // STBCCC       Rtmp, (Rarg0)
173                 // BNE          -3(PC)
174                 r0 := v.Args[0].Reg()
175                 r1 := v.Args[1].Reg()
176                 // LWSYNC - Assuming shared data not write-through-required nor
177                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
178                 plwsync := s.Prog(ppc64.ALWSYNC)
179                 plwsync.To.Type = obj.TYPE_NONE
180                 p := s.Prog(ppc64.ALBAR)
181                 p.From.Type = obj.TYPE_MEM
182                 p.From.Reg = r0
183                 p.To.Type = obj.TYPE_REG
184                 p.To.Reg = ppc64.REGTMP
185                 p1 := s.Prog(v.Op.Asm())
186                 p1.From.Type = obj.TYPE_REG
187                 p1.From.Reg = r1
188                 p1.To.Type = obj.TYPE_REG
189                 p1.To.Reg = ppc64.REGTMP
190                 p2 := s.Prog(ppc64.ASTBCCC)
191                 p2.From.Type = obj.TYPE_REG
192                 p2.From.Reg = ppc64.REGTMP
193                 p2.To.Type = obj.TYPE_MEM
194                 p2.To.Reg = r0
195                 p2.RegTo2 = ppc64.REGTMP
196                 p3 := s.Prog(ppc64.ABNE)
197                 p3.To.Type = obj.TYPE_BRANCH
198                 gc.Patch(p3, p)
199
200         case ssa.OpPPC64LoweredAtomicAdd32,
201                 ssa.OpPPC64LoweredAtomicAdd64:
202                 // LWSYNC
203                 // LDAR/LWAR    (Rarg0), Rout
204                 // ADD          Rarg1, Rout
205                 // STDCCC/STWCCC Rout, (Rarg0)
206                 // BNE         -3(PC)
207                 // MOVW         Rout,Rout (if Add32)
208                 ld := ppc64.ALDAR
209                 st := ppc64.ASTDCCC
210                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
211                         ld = ppc64.ALWAR
212                         st = ppc64.ASTWCCC
213                 }
214                 r0 := v.Args[0].Reg()
215                 r1 := v.Args[1].Reg()
216                 out := v.Reg0()
217                 // LWSYNC - Assuming shared data not write-through-required nor
218                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
219                 plwsync := s.Prog(ppc64.ALWSYNC)
220                 plwsync.To.Type = obj.TYPE_NONE
221                 // LDAR or LWAR
222                 p := s.Prog(ld)
223                 p.From.Type = obj.TYPE_MEM
224                 p.From.Reg = r0
225                 p.To.Type = obj.TYPE_REG
226                 p.To.Reg = out
227                 // ADD reg1,out
228                 p1 := s.Prog(ppc64.AADD)
229                 p1.From.Type = obj.TYPE_REG
230                 p1.From.Reg = r1
231                 p1.To.Reg = out
232                 p1.To.Type = obj.TYPE_REG
233                 // STDCCC or STWCCC
234                 p3 := s.Prog(st)
235                 p3.From.Type = obj.TYPE_REG
236                 p3.From.Reg = out
237                 p3.To.Type = obj.TYPE_MEM
238                 p3.To.Reg = r0
239                 // BNE retry
240                 p4 := s.Prog(ppc64.ABNE)
241                 p4.To.Type = obj.TYPE_BRANCH
242                 gc.Patch(p4, p)
243
244                 // Ensure a 32 bit result
245                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
246                         p5 := s.Prog(ppc64.AMOVWZ)
247                         p5.To.Type = obj.TYPE_REG
248                         p5.To.Reg = out
249                         p5.From.Type = obj.TYPE_REG
250                         p5.From.Reg = out
251                 }
252
253         case ssa.OpPPC64LoweredAtomicExchange32,
254                 ssa.OpPPC64LoweredAtomicExchange64:
255                 // LWSYNC
256                 // LDAR/LWAR    (Rarg0), Rout
257                 // STDCCC/STWCCC Rout, (Rarg0)
258                 // BNE         -2(PC)
259                 // ISYNC
260                 ld := ppc64.ALDAR
261                 st := ppc64.ASTDCCC
262                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
263                         ld = ppc64.ALWAR
264                         st = ppc64.ASTWCCC
265                 }
266                 r0 := v.Args[0].Reg()
267                 r1 := v.Args[1].Reg()
268                 out := v.Reg0()
269                 // LWSYNC - Assuming shared data not write-through-required nor
270                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
271                 plwsync := s.Prog(ppc64.ALWSYNC)
272                 plwsync.To.Type = obj.TYPE_NONE
273                 // LDAR or LWAR
274                 p := s.Prog(ld)
275                 p.From.Type = obj.TYPE_MEM
276                 p.From.Reg = r0
277                 p.To.Type = obj.TYPE_REG
278                 p.To.Reg = out
279                 // STDCCC or STWCCC
280                 p1 := s.Prog(st)
281                 p1.From.Type = obj.TYPE_REG
282                 p1.From.Reg = r1
283                 p1.To.Type = obj.TYPE_MEM
284                 p1.To.Reg = r0
285                 // BNE retry
286                 p2 := s.Prog(ppc64.ABNE)
287                 p2.To.Type = obj.TYPE_BRANCH
288                 gc.Patch(p2, p)
289                 // ISYNC
290                 pisync := s.Prog(ppc64.AISYNC)
291                 pisync.To.Type = obj.TYPE_NONE
292
293         case ssa.OpPPC64LoweredAtomicLoad8,
294                 ssa.OpPPC64LoweredAtomicLoad32,
295                 ssa.OpPPC64LoweredAtomicLoad64,
296                 ssa.OpPPC64LoweredAtomicLoadPtr:
297                 // SYNC
298                 // MOVB/MOVD/MOVW (Rarg0), Rout
299                 // CMP Rout,Rout
300                 // BNE 1(PC)
301                 // ISYNC
302                 ld := ppc64.AMOVD
303                 cmp := ppc64.ACMP
304                 switch v.Op {
305                 case ssa.OpPPC64LoweredAtomicLoad8:
306                         ld = ppc64.AMOVBZ
307                 case ssa.OpPPC64LoweredAtomicLoad32:
308                         ld = ppc64.AMOVWZ
309                         cmp = ppc64.ACMPW
310                 }
311                 arg0 := v.Args[0].Reg()
312                 out := v.Reg0()
313                 // SYNC when AuxInt == 1; otherwise, load-acquire
314                 if v.AuxInt == 1 {
315                         psync := s.Prog(ppc64.ASYNC)
316                         psync.To.Type = obj.TYPE_NONE
317                 }
318                 // Load
319                 p := s.Prog(ld)
320                 p.From.Type = obj.TYPE_MEM
321                 p.From.Reg = arg0
322                 p.To.Type = obj.TYPE_REG
323                 p.To.Reg = out
324                 // CMP
325                 p1 := s.Prog(cmp)
326                 p1.From.Type = obj.TYPE_REG
327                 p1.From.Reg = out
328                 p1.To.Type = obj.TYPE_REG
329                 p1.To.Reg = out
330                 // BNE
331                 p2 := s.Prog(ppc64.ABNE)
332                 p2.To.Type = obj.TYPE_BRANCH
333                 // ISYNC
334                 pisync := s.Prog(ppc64.AISYNC)
335                 pisync.To.Type = obj.TYPE_NONE
336                 gc.Patch(p2, pisync)
337
338         case ssa.OpPPC64LoweredAtomicStore8,
339                 ssa.OpPPC64LoweredAtomicStore32,
340                 ssa.OpPPC64LoweredAtomicStore64:
341                 // SYNC or LWSYNC
342                 // MOVB/MOVW/MOVD arg1,(arg0)
343                 st := ppc64.AMOVD
344                 switch v.Op {
345                 case ssa.OpPPC64LoweredAtomicStore8:
346                         st = ppc64.AMOVB
347                 case ssa.OpPPC64LoweredAtomicStore32:
348                         st = ppc64.AMOVW
349                 }
350                 arg0 := v.Args[0].Reg()
351                 arg1 := v.Args[1].Reg()
352                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
353                 // SYNC
354                 syncOp := ppc64.ASYNC
355                 if v.AuxInt == 0 {
356                         syncOp = ppc64.ALWSYNC
357                 }
358                 psync := s.Prog(syncOp)
359                 psync.To.Type = obj.TYPE_NONE
360                 // Store
361                 p := s.Prog(st)
362                 p.To.Type = obj.TYPE_MEM
363                 p.To.Reg = arg0
364                 p.From.Type = obj.TYPE_REG
365                 p.From.Reg = arg1
366
367         case ssa.OpPPC64LoweredAtomicCas64,
368                 ssa.OpPPC64LoweredAtomicCas32:
369                 // LWSYNC
370                 // loop:
371                 // LDAR        (Rarg0), MutexHint, Rtmp
372                 // CMP         Rarg1, Rtmp
373                 // BNE         fail
374                 // STDCCC      Rarg2, (Rarg0)
375                 // BNE         loop
376                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
377                 // MOVD        $1, Rout
378                 // BR          end
379                 // fail:
380                 // MOVD        $0, Rout
381                 // end:
382                 ld := ppc64.ALDAR
383                 st := ppc64.ASTDCCC
384                 cmp := ppc64.ACMP
385                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
386                         ld = ppc64.ALWAR
387                         st = ppc64.ASTWCCC
388                         cmp = ppc64.ACMPW
389                 }
390                 r0 := v.Args[0].Reg()
391                 r1 := v.Args[1].Reg()
392                 r2 := v.Args[2].Reg()
393                 out := v.Reg0()
394                 // LWSYNC - Assuming shared data not write-through-required nor
395                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
396                 plwsync1 := s.Prog(ppc64.ALWSYNC)
397                 plwsync1.To.Type = obj.TYPE_NONE
398                 // LDAR or LWAR
399                 p := s.Prog(ld)
400                 p.From.Type = obj.TYPE_MEM
401                 p.From.Reg = r0
402                 p.To.Type = obj.TYPE_REG
403                 p.To.Reg = ppc64.REGTMP
404                 // If it is a Compare-and-Swap-Release operation, set the EH field with
405                 // the release hint.
406                 if v.AuxInt == 0 {
407                         p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
408                 }
409                 // CMP reg1,reg2
410                 p1 := s.Prog(cmp)
411                 p1.From.Type = obj.TYPE_REG
412                 p1.From.Reg = r1
413                 p1.To.Reg = ppc64.REGTMP
414                 p1.To.Type = obj.TYPE_REG
415                 // BNE cas_fail
416                 p2 := s.Prog(ppc64.ABNE)
417                 p2.To.Type = obj.TYPE_BRANCH
418                 // STDCCC or STWCCC
419                 p3 := s.Prog(st)
420                 p3.From.Type = obj.TYPE_REG
421                 p3.From.Reg = r2
422                 p3.To.Type = obj.TYPE_MEM
423                 p3.To.Reg = r0
424                 // BNE retry
425                 p4 := s.Prog(ppc64.ABNE)
426                 p4.To.Type = obj.TYPE_BRANCH
427                 gc.Patch(p4, p)
428                 // LWSYNC - Assuming shared data not write-through-required nor
429                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
430                 // If the operation is a CAS-Release, then synchronization is not necessary.
431                 if v.AuxInt != 0 {
432                         plwsync2 := s.Prog(ppc64.ALWSYNC)
433                         plwsync2.To.Type = obj.TYPE_NONE
434                 }
435                 // return true
436                 p5 := s.Prog(ppc64.AMOVD)
437                 p5.From.Type = obj.TYPE_CONST
438                 p5.From.Offset = 1
439                 p5.To.Type = obj.TYPE_REG
440                 p5.To.Reg = out
441                 // BR done
442                 p6 := s.Prog(obj.AJMP)
443                 p6.To.Type = obj.TYPE_BRANCH
444                 // return false
445                 p7 := s.Prog(ppc64.AMOVD)
446                 p7.From.Type = obj.TYPE_CONST
447                 p7.From.Offset = 0
448                 p7.To.Type = obj.TYPE_REG
449                 p7.To.Reg = out
450                 gc.Patch(p2, p7)
451                 // done (label)
452                 p8 := s.Prog(obj.ANOP)
453                 gc.Patch(p6, p8)
454
455         case ssa.OpPPC64LoweredGetClosurePtr:
456                 // Closure pointer is R11 (already)
457                 gc.CheckLoweredGetClosurePtr(v)
458
459         case ssa.OpPPC64LoweredGetCallerSP:
460                 // caller's SP is FixedFrameSize below the address of the first arg
461                 p := s.Prog(ppc64.AMOVD)
462                 p.From.Type = obj.TYPE_ADDR
463                 p.From.Offset = -gc.Ctxt.FixedFrameSize()
464                 p.From.Name = obj.NAME_PARAM
465                 p.To.Type = obj.TYPE_REG
466                 p.To.Reg = v.Reg()
467
468         case ssa.OpPPC64LoweredGetCallerPC:
469                 p := s.Prog(obj.AGETCALLERPC)
470                 p.To.Type = obj.TYPE_REG
471                 p.To.Reg = v.Reg()
472
473         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
474                 // input is already rounded
475
476         case ssa.OpLoadReg:
477                 loadOp := loadByType(v.Type)
478                 p := s.Prog(loadOp)
479                 gc.AddrAuto(&p.From, v.Args[0])
480                 p.To.Type = obj.TYPE_REG
481                 p.To.Reg = v.Reg()
482
483         case ssa.OpStoreReg:
484                 storeOp := storeByType(v.Type)
485                 p := s.Prog(storeOp)
486                 p.From.Type = obj.TYPE_REG
487                 p.From.Reg = v.Args[0].Reg()
488                 gc.AddrAuto(&p.To, v)
489
490         case ssa.OpPPC64DIVD:
491                 // For now,
492                 //
493                 // cmp arg1, -1
494                 // be  ahead
495                 // v = arg0 / arg1
496                 // b over
497                 // ahead: v = - arg0
498                 // over: nop
499                 r := v.Reg()
500                 r0 := v.Args[0].Reg()
501                 r1 := v.Args[1].Reg()
502
503                 p := s.Prog(ppc64.ACMP)
504                 p.From.Type = obj.TYPE_REG
505                 p.From.Reg = r1
506                 p.To.Type = obj.TYPE_CONST
507                 p.To.Offset = -1
508
509                 pbahead := s.Prog(ppc64.ABEQ)
510                 pbahead.To.Type = obj.TYPE_BRANCH
511
512                 p = s.Prog(v.Op.Asm())
513                 p.From.Type = obj.TYPE_REG
514                 p.From.Reg = r1
515                 p.Reg = r0
516                 p.To.Type = obj.TYPE_REG
517                 p.To.Reg = r
518
519                 pbover := s.Prog(obj.AJMP)
520                 pbover.To.Type = obj.TYPE_BRANCH
521
522                 p = s.Prog(ppc64.ANEG)
523                 p.To.Type = obj.TYPE_REG
524                 p.To.Reg = r
525                 p.From.Type = obj.TYPE_REG
526                 p.From.Reg = r0
527                 gc.Patch(pbahead, p)
528
529                 p = s.Prog(obj.ANOP)
530                 gc.Patch(pbover, p)
531
532         case ssa.OpPPC64DIVW:
533                 // word-width version of above
534                 r := v.Reg()
535                 r0 := v.Args[0].Reg()
536                 r1 := v.Args[1].Reg()
537
538                 p := s.Prog(ppc64.ACMPW)
539                 p.From.Type = obj.TYPE_REG
540                 p.From.Reg = r1
541                 p.To.Type = obj.TYPE_CONST
542                 p.To.Offset = -1
543
544                 pbahead := s.Prog(ppc64.ABEQ)
545                 pbahead.To.Type = obj.TYPE_BRANCH
546
547                 p = s.Prog(v.Op.Asm())
548                 p.From.Type = obj.TYPE_REG
549                 p.From.Reg = r1
550                 p.Reg = r0
551                 p.To.Type = obj.TYPE_REG
552                 p.To.Reg = r
553
554                 pbover := s.Prog(obj.AJMP)
555                 pbover.To.Type = obj.TYPE_BRANCH
556
557                 p = s.Prog(ppc64.ANEG)
558                 p.To.Type = obj.TYPE_REG
559                 p.To.Reg = r
560                 p.From.Type = obj.TYPE_REG
561                 p.From.Reg = r0
562                 gc.Patch(pbahead, p)
563
564                 p = s.Prog(obj.ANOP)
565                 gc.Patch(pbover, p)
566
567         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
568                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
569                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
570                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
571                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
572                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
573                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
574                 r := v.Reg()
575                 r1 := v.Args[0].Reg()
576                 r2 := v.Args[1].Reg()
577                 p := s.Prog(v.Op.Asm())
578                 p.From.Type = obj.TYPE_REG
579                 p.From.Reg = r2
580                 p.Reg = r1
581                 p.To.Type = obj.TYPE_REG
582                 p.To.Reg = r
583
584         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
585                 r1 := v.Args[0].Reg()
586                 r2 := v.Args[1].Reg()
587                 p := s.Prog(v.Op.Asm())
588                 p.From.Type = obj.TYPE_REG
589                 p.From.Reg = r2
590                 p.Reg = r1
591                 p.To.Type = obj.TYPE_REG
592                 p.To.Reg = ppc64.REGTMP // result is not needed
593
594         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
595                 p := s.Prog(v.Op.Asm())
596                 p.From.Type = obj.TYPE_CONST
597                 p.From.Offset = v.AuxInt
598                 p.Reg = v.Args[0].Reg()
599                 p.To.Type = obj.TYPE_REG
600                 p.To.Reg = v.Reg()
601
602         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
603                 r := v.Reg()
604                 r1 := v.Args[0].Reg()
605                 r2 := v.Args[1].Reg()
606                 r3 := v.Args[2].Reg()
607                 // r = r1*r2 Â± r3
608                 p := s.Prog(v.Op.Asm())
609                 p.From.Type = obj.TYPE_REG
610                 p.From.Reg = r1
611                 p.Reg = r3
612                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
613                 p.To.Type = obj.TYPE_REG
614                 p.To.Reg = r
615
616         case ssa.OpPPC64MaskIfNotCarry:
617                 r := v.Reg()
618                 p := s.Prog(v.Op.Asm())
619                 p.From.Type = obj.TYPE_REG
620                 p.From.Reg = ppc64.REGZERO
621                 p.To.Type = obj.TYPE_REG
622                 p.To.Reg = r
623
624         case ssa.OpPPC64ADDconstForCarry:
625                 r1 := v.Args[0].Reg()
626                 p := s.Prog(v.Op.Asm())
627                 p.Reg = r1
628                 p.From.Type = obj.TYPE_CONST
629                 p.From.Offset = v.AuxInt
630                 p.To.Type = obj.TYPE_REG
631                 p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
632
633         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
634                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
635                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
636                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
637                 r := v.Reg()
638                 p := s.Prog(v.Op.Asm())
639                 p.To.Type = obj.TYPE_REG
640                 p.To.Reg = r
641                 p.From.Type = obj.TYPE_REG
642                 p.From.Reg = v.Args[0].Reg()
643
644         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
645                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
646                 p := s.Prog(v.Op.Asm())
647                 p.Reg = v.Args[0].Reg()
648                 p.From.Type = obj.TYPE_CONST
649                 p.From.Offset = v.AuxInt
650                 p.To.Type = obj.TYPE_REG
651                 p.To.Reg = v.Reg()
652
653         case ssa.OpPPC64ANDCCconst:
654                 p := s.Prog(v.Op.Asm())
655                 p.Reg = v.Args[0].Reg()
656
657                 if v.Aux != nil {
658                         p.From.Type = obj.TYPE_CONST
659                         p.From.Offset = gc.AuxOffset(v)
660                 } else {
661                         p.From.Type = obj.TYPE_CONST
662                         p.From.Offset = v.AuxInt
663                 }
664
665                 p.To.Type = obj.TYPE_REG
666                 p.To.Reg = ppc64.REGTMP // discard result
667
668         case ssa.OpPPC64MOVDaddr:
669                 switch v.Aux.(type) {
670                 default:
671                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
672                 case nil:
673                         // If aux offset and aux int are both 0, and the same
674                         // input and output regs are used, no instruction
675                         // needs to be generated, since it would just be
676                         // addi rx, rx, 0.
677                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
678                                 p := s.Prog(ppc64.AMOVD)
679                                 p.From.Type = obj.TYPE_ADDR
680                                 p.From.Reg = v.Args[0].Reg()
681                                 p.From.Offset = v.AuxInt
682                                 p.To.Type = obj.TYPE_REG
683                                 p.To.Reg = v.Reg()
684                         }
685
686                 case *obj.LSym, *gc.Node:
687                         p := s.Prog(ppc64.AMOVD)
688                         p.From.Type = obj.TYPE_ADDR
689                         p.From.Reg = v.Args[0].Reg()
690                         p.To.Type = obj.TYPE_REG
691                         p.To.Reg = v.Reg()
692                         gc.AddAux(&p.From, v)
693
694                 }
695
696         case ssa.OpPPC64MOVDconst:
697                 p := s.Prog(v.Op.Asm())
698                 p.From.Type = obj.TYPE_CONST
699                 p.From.Offset = v.AuxInt
700                 p.To.Type = obj.TYPE_REG
701                 p.To.Reg = v.Reg()
702
703         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
704                 p := s.Prog(v.Op.Asm())
705                 p.From.Type = obj.TYPE_FCONST
706                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
707                 p.To.Type = obj.TYPE_REG
708                 p.To.Reg = v.Reg()
709
710         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
711                 p := s.Prog(v.Op.Asm())
712                 p.From.Type = obj.TYPE_REG
713                 p.From.Reg = v.Args[0].Reg()
714                 p.To.Type = obj.TYPE_REG
715                 p.To.Reg = v.Args[1].Reg()
716
717         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
718                 p := s.Prog(v.Op.Asm())
719                 p.From.Type = obj.TYPE_REG
720                 p.From.Reg = v.Args[0].Reg()
721                 p.To.Type = obj.TYPE_CONST
722                 p.To.Offset = v.AuxInt
723
724         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
725                 // Shift in register to required size
726                 p := s.Prog(v.Op.Asm())
727                 p.From.Type = obj.TYPE_REG
728                 p.From.Reg = v.Args[0].Reg()
729                 p.To.Reg = v.Reg()
730                 p.To.Type = obj.TYPE_REG
731
732         case ssa.OpPPC64MOVDload:
733
734                 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
735                 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
736                 // the offset is not known until link time. If the load of a go.string uses relocation for the
737                 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
738                 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
739                 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
740                 // go.string types because other types will have proper alignment.
741
742                 gostring := false
743                 switch n := v.Aux.(type) {
744                 case *obj.LSym:
745                         gostring = strings.HasPrefix(n.Name, "go.string.")
746                 }
747                 if gostring {
748                         // Generate full addr of the go.string const
749                         // including AuxInt
750                         p := s.Prog(ppc64.AMOVD)
751                         p.From.Type = obj.TYPE_ADDR
752                         p.From.Reg = v.Args[0].Reg()
753                         gc.AddAux(&p.From, v)
754                         p.To.Type = obj.TYPE_REG
755                         p.To.Reg = v.Reg()
756                         // Load go.string using 0 offset
757                         p = s.Prog(v.Op.Asm())
758                         p.From.Type = obj.TYPE_MEM
759                         p.From.Reg = v.Reg()
760                         p.To.Type = obj.TYPE_REG
761                         p.To.Reg = v.Reg()
762                         break
763                 }
764                 // Not a go.string, generate a normal load
765                 fallthrough
766
767         case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
768                 p := s.Prog(v.Op.Asm())
769                 p.From.Type = obj.TYPE_MEM
770                 p.From.Reg = v.Args[0].Reg()
771                 gc.AddAux(&p.From, v)
772                 p.To.Type = obj.TYPE_REG
773                 p.To.Reg = v.Reg()
774
775         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
776                 p := s.Prog(v.Op.Asm())
777                 p.From.Type = obj.TYPE_MEM
778                 p.From.Reg = v.Args[0].Reg()
779                 p.To.Type = obj.TYPE_REG
780                 p.To.Reg = v.Reg()
781
782         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
783                 p := s.Prog(v.Op.Asm())
784                 p.To.Type = obj.TYPE_MEM
785                 p.To.Reg = v.Args[0].Reg()
786                 p.From.Type = obj.TYPE_REG
787                 p.From.Reg = v.Args[1].Reg()
788
789         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
790                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
791                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
792                 p := s.Prog(v.Op.Asm())
793                 p.From.Type = obj.TYPE_MEM
794                 p.From.Reg = v.Args[0].Reg()
795                 p.From.Index = v.Args[1].Reg()
796                 gc.AddAux(&p.From, v)
797                 p.To.Type = obj.TYPE_REG
798                 p.To.Reg = v.Reg()
799
800         case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
801                 p := s.Prog(v.Op.Asm())
802                 p.From.Type = obj.TYPE_REG
803                 p.From.Reg = ppc64.REGZERO
804                 p.To.Type = obj.TYPE_MEM
805                 p.To.Reg = v.Args[0].Reg()
806                 gc.AddAux(&p.To, v)
807
808         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
809                 p := s.Prog(v.Op.Asm())
810                 p.From.Type = obj.TYPE_REG
811                 p.From.Reg = v.Args[1].Reg()
812                 p.To.Type = obj.TYPE_MEM
813                 p.To.Reg = v.Args[0].Reg()
814                 gc.AddAux(&p.To, v)
815
816         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
817                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
818                 ssa.OpPPC64MOVHBRstoreidx:
819                 p := s.Prog(v.Op.Asm())
820                 p.From.Type = obj.TYPE_REG
821                 p.From.Reg = v.Args[2].Reg()
822                 p.To.Index = v.Args[1].Reg()
823                 p.To.Type = obj.TYPE_MEM
824                 p.To.Reg = v.Args[0].Reg()
825                 gc.AddAux(&p.To, v)
826
827         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
828                 // ISEL, ISELB
829                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
830                 // ISEL only accepts 0, 1, 2 condition values but the others can be
831                 // achieved by swapping operand order.
832                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
833                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
834                 // ISELB is used when a boolean result is needed, returning 0 or 1
835                 p := s.Prog(ppc64.AISEL)
836                 p.To.Type = obj.TYPE_REG
837                 p.To.Reg = v.Reg()
838                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
839                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
840                 if v.Op == ssa.OpPPC64ISEL {
841                         r.Reg = v.Args[1].Reg()
842                 }
843                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
844                 if v.AuxInt > 3 {
845                         p.Reg = r.Reg
846                         p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
847                 } else {
848                         p.Reg = v.Args[0].Reg()
849                         p.SetFrom3(r)
850                 }
851                 p.From.Type = obj.TYPE_CONST
852                 p.From.Offset = v.AuxInt & 3
853
854         case ssa.OpPPC64LoweredZero:
855
856                 // unaligned data doesn't hurt performance
857                 // for these instructions on power8 or later
858
859                 // for sizes >= 64 generate a loop as follows:
860
861                 // set up loop counter in CTR, used by BC
862                 //       XXLXOR VS32,VS32,VS32
863                 //       MOVD len/32,REG_TMP
864                 //       MOVD REG_TMP,CTR
865                 //       MOVD $16,REG_TMP
866                 //       loop:
867                 //       STXVD2X VS32,(R0)(R3)
868                 //       STXVD2X VS32,(R31)(R3)
869                 //       ADD  $32,R3
870                 //       BC   16, 0, loop
871                 //
872                 // any remainder is done as described below
873
874                 // for sizes < 64 bytes, first clear as many doublewords as possible,
875                 // then handle the remainder
876                 //      MOVD R0,(R3)
877                 //      MOVD R0,8(R3)
878                 // .... etc.
879                 //
880                 // the remainder bytes are cleared using one or more
881                 // of the following instructions with the appropriate
882                 // offsets depending which instructions are needed
883                 //
884                 //      MOVW R0,n1(R3)  4 bytes
885                 //      MOVH R0,n2(R3)  2 bytes
886                 //      MOVB R0,n3(R3)  1 byte
887                 //
888                 // 7 bytes: MOVW, MOVH, MOVB
889                 // 6 bytes: MOVW, MOVH
890                 // 5 bytes: MOVW, MOVB
891                 // 3 bytes: MOVH, MOVB
892
893                 // each loop iteration does 32 bytes
894                 ctr := v.AuxInt / 32
895
896                 // remainder bytes
897                 rem := v.AuxInt % 32
898
899                 // only generate a loop if there is more
900                 // than 1 iteration.
901                 if ctr > 1 {
902                         // Set up VS32 (V0) to hold 0s
903                         p := s.Prog(ppc64.AXXLXOR)
904                         p.From.Type = obj.TYPE_REG
905                         p.From.Reg = ppc64.REG_VS32
906                         p.To.Type = obj.TYPE_REG
907                         p.To.Reg = ppc64.REG_VS32
908                         p.Reg = ppc64.REG_VS32
909
910                         // Set up CTR loop counter
911                         p = s.Prog(ppc64.AMOVD)
912                         p.From.Type = obj.TYPE_CONST
913                         p.From.Offset = ctr
914                         p.To.Type = obj.TYPE_REG
915                         p.To.Reg = ppc64.REGTMP
916
917                         p = s.Prog(ppc64.AMOVD)
918                         p.From.Type = obj.TYPE_REG
919                         p.From.Reg = ppc64.REGTMP
920                         p.To.Type = obj.TYPE_REG
921                         p.To.Reg = ppc64.REG_CTR
922
923                         // Set up R31 to hold index value 16
924                         p = s.Prog(ppc64.AMOVD)
925                         p.From.Type = obj.TYPE_CONST
926                         p.From.Offset = 16
927                         p.To.Type = obj.TYPE_REG
928                         p.To.Reg = ppc64.REGTMP
929
930                         // generate 2 STXVD2Xs to store 16 bytes
931                         // when this is a loop then the top must be saved
932                         var top *obj.Prog
933                         // This is the top of loop
934                         p = s.Prog(ppc64.ASTXVD2X)
935                         p.From.Type = obj.TYPE_REG
936                         p.From.Reg = ppc64.REG_VS32
937                         p.To.Type = obj.TYPE_MEM
938                         p.To.Reg = v.Args[0].Reg()
939                         p.To.Index = ppc64.REGZERO
940                         // Save the top of loop
941                         if top == nil {
942                                 top = p
943                         }
944
945                         p = s.Prog(ppc64.ASTXVD2X)
946                         p.From.Type = obj.TYPE_REG
947                         p.From.Reg = ppc64.REG_VS32
948                         p.To.Type = obj.TYPE_MEM
949                         p.To.Reg = v.Args[0].Reg()
950                         p.To.Index = ppc64.REGTMP
951
952                         // Increment address for the
953                         // 4 doublewords just zeroed.
954                         p = s.Prog(ppc64.AADD)
955                         p.Reg = v.Args[0].Reg()
956                         p.From.Type = obj.TYPE_CONST
957                         p.From.Offset = 32
958                         p.To.Type = obj.TYPE_REG
959                         p.To.Reg = v.Args[0].Reg()
960
961                         // Branch back to top of loop
962                         // based on CTR
963                         // BC with BO_BCTR generates bdnz
964                         p = s.Prog(ppc64.ABC)
965                         p.From.Type = obj.TYPE_CONST
966                         p.From.Offset = ppc64.BO_BCTR
967                         p.Reg = ppc64.REG_R0
968                         p.To.Type = obj.TYPE_BRANCH
969                         gc.Patch(p, top)
970                 }
971
972                 // when ctr == 1 the loop was not generated but
973                 // there are at least 32 bytes to clear, so add
974                 // that to the remainder to generate the code
975                 // to clear those doublewords
976                 if ctr == 1 {
977                         rem += 32
978                 }
979
980                 // clear the remainder starting at offset zero
981                 offset := int64(0)
982
983                 // first clear as many doublewords as possible
984                 // then clear remaining sizes as available
985                 for rem > 0 {
986                         op, size := ppc64.AMOVB, int64(1)
987                         switch {
988                         case rem >= 8:
989                                 op, size = ppc64.AMOVD, 8
990                         case rem >= 4:
991                                 op, size = ppc64.AMOVW, 4
992                         case rem >= 2:
993                                 op, size = ppc64.AMOVH, 2
994                         }
995                         p := s.Prog(op)
996                         p.From.Type = obj.TYPE_REG
997                         p.From.Reg = ppc64.REG_R0
998                         p.To.Type = obj.TYPE_MEM
999                         p.To.Reg = v.Args[0].Reg()
1000                         p.To.Offset = offset
1001                         rem -= size
1002                         offset += size
1003                 }
1004
1005         case ssa.OpPPC64LoweredMove:
1006
1007                 // This will be used when moving more
1008                 // than 8 bytes.  Moves start with
1009                 // as many 8 byte moves as possible, then
1010                 // 4, 2, or 1 byte(s) as remaining.  This will
1011                 // work and be efficient for power8 or later.
1012                 // If there are 64 or more bytes, then a
1013                 // loop is generated to move 32 bytes and
1014                 // update the src and dst addresses on each
1015                 // iteration. When < 64 bytes, the appropriate
1016                 // number of moves are generated based on the
1017                 // size.
1018                 // When moving >= 64 bytes a loop is used
1019                 //      MOVD len/32,REG_TMP
1020                 //      MOVD REG_TMP,CTR
1021                 //      MOVD $16,REG_TMP
1022                 // top:
1023                 //      LXVD2X (R0)(R4),VS32
1024                 //      LXVD2X (R31)(R4),VS33
1025                 //      ADD $32,R4
1026                 //      STXVD2X VS32,(R0)(R3)
1027                 //      STXVD2X VS33,(R31)(R4)
1028                 //      ADD $32,R3
1029                 //      BC 16,0,top
1030                 // Bytes not moved by this loop are moved
1031                 // with a combination of the following instructions,
1032                 // starting with the largest sizes and generating as
1033                 // many as needed, using the appropriate offset value.
1034                 //      MOVD  n(R4),R14
1035                 //      MOVD  R14,n(R3)
1036                 //      MOVW  n1(R4),R14
1037                 //      MOVW  R14,n1(R3)
1038                 //      MOVH  n2(R4),R14
1039                 //      MOVH  R14,n2(R3)
1040                 //      MOVB  n3(R4),R14
1041                 //      MOVB  R14,n3(R3)
1042
1043                 // Each loop iteration moves 32 bytes
1044                 ctr := v.AuxInt / 32
1045
1046                 // Remainder after the loop
1047                 rem := v.AuxInt % 32
1048
1049                 dst_reg := v.Args[0].Reg()
1050                 src_reg := v.Args[1].Reg()
1051
1052                 // The set of registers used here, must match the clobbered reg list
1053                 // in PPC64Ops.go.
1054                 offset := int64(0)
1055
1056                 // top of the loop
1057                 var top *obj.Prog
1058                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1059                 if ctr > 1 {
1060                         // Set up the CTR
1061                         p := s.Prog(ppc64.AMOVD)
1062                         p.From.Type = obj.TYPE_CONST
1063                         p.From.Offset = ctr
1064                         p.To.Type = obj.TYPE_REG
1065                         p.To.Reg = ppc64.REGTMP
1066
1067                         p = s.Prog(ppc64.AMOVD)
1068                         p.From.Type = obj.TYPE_REG
1069                         p.From.Reg = ppc64.REGTMP
1070                         p.To.Type = obj.TYPE_REG
1071                         p.To.Reg = ppc64.REG_CTR
1072
1073                         // Use REGTMP as index reg
1074                         p = s.Prog(ppc64.AMOVD)
1075                         p.From.Type = obj.TYPE_CONST
1076                         p.From.Offset = 16
1077                         p.To.Type = obj.TYPE_REG
1078                         p.To.Reg = ppc64.REGTMP
1079
1080                         // Generate 16 byte loads and stores.
1081                         // Use temp register for index (16)
1082                         // on the second one.
1083                         p = s.Prog(ppc64.ALXVD2X)
1084                         p.From.Type = obj.TYPE_MEM
1085                         p.From.Reg = src_reg
1086                         p.From.Index = ppc64.REGZERO
1087                         p.To.Type = obj.TYPE_REG
1088                         p.To.Reg = ppc64.REG_VS32
1089
1090                         if top == nil {
1091                                 top = p
1092                         }
1093
1094                         p = s.Prog(ppc64.ALXVD2X)
1095                         p.From.Type = obj.TYPE_MEM
1096                         p.From.Reg = src_reg
1097                         p.From.Index = ppc64.REGTMP
1098                         p.To.Type = obj.TYPE_REG
1099                         p.To.Reg = ppc64.REG_VS33
1100
1101                         // increment the src reg for next iteration
1102                         p = s.Prog(ppc64.AADD)
1103                         p.Reg = src_reg
1104                         p.From.Type = obj.TYPE_CONST
1105                         p.From.Offset = 32
1106                         p.To.Type = obj.TYPE_REG
1107                         p.To.Reg = src_reg
1108
1109                         // generate 16 byte stores
1110                         p = s.Prog(ppc64.ASTXVD2X)
1111                         p.From.Type = obj.TYPE_REG
1112                         p.From.Reg = ppc64.REG_VS32
1113                         p.To.Type = obj.TYPE_MEM
1114                         p.To.Reg = dst_reg
1115                         p.To.Index = ppc64.REGZERO
1116
1117                         p = s.Prog(ppc64.ASTXVD2X)
1118                         p.From.Type = obj.TYPE_REG
1119                         p.From.Reg = ppc64.REG_VS33
1120                         p.To.Type = obj.TYPE_MEM
1121                         p.To.Reg = dst_reg
1122                         p.To.Index = ppc64.REGTMP
1123
1124                         // increment the dst reg for next iteration
1125                         p = s.Prog(ppc64.AADD)
1126                         p.Reg = dst_reg
1127                         p.From.Type = obj.TYPE_CONST
1128                         p.From.Offset = 32
1129                         p.To.Type = obj.TYPE_REG
1130                         p.To.Reg = dst_reg
1131
1132                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1133                         // to loop top.
1134                         p = s.Prog(ppc64.ABC)
1135                         p.From.Type = obj.TYPE_CONST
1136                         p.From.Offset = ppc64.BO_BCTR
1137                         p.Reg = ppc64.REG_R0
1138                         p.To.Type = obj.TYPE_BRANCH
1139                         gc.Patch(p, top)
1140
1141                         // src_reg and dst_reg were incremented in the loop, so
1142                         // later instructions start with offset 0.
1143                         offset = int64(0)
1144                 }
1145
1146                 // No loop was generated for one iteration, so
1147                 // add 32 bytes to the remainder to move those bytes.
1148                 if ctr == 1 {
1149                         rem += 32
1150                 }
1151
1152                 if rem >= 16 {
1153                         // Generate 16 byte loads and stores.
1154                         // Use temp register for index (value 16)
1155                         // on the second one.
1156                         p := s.Prog(ppc64.ALXVD2X)
1157                         p.From.Type = obj.TYPE_MEM
1158                         p.From.Reg = src_reg
1159                         p.From.Index = ppc64.REGZERO
1160                         p.To.Type = obj.TYPE_REG
1161                         p.To.Reg = ppc64.REG_VS32
1162
1163                         p = s.Prog(ppc64.ASTXVD2X)
1164                         p.From.Type = obj.TYPE_REG
1165                         p.From.Reg = ppc64.REG_VS32
1166                         p.To.Type = obj.TYPE_MEM
1167                         p.To.Reg = dst_reg
1168                         p.To.Index = ppc64.REGZERO
1169
1170                         offset = 16
1171                         rem -= 16
1172
1173                         if rem >= 16 {
1174                                 // Use REGTMP as index reg
1175                                 p = s.Prog(ppc64.AMOVD)
1176                                 p.From.Type = obj.TYPE_CONST
1177                                 p.From.Offset = 16
1178                                 p.To.Type = obj.TYPE_REG
1179                                 p.To.Reg = ppc64.REGTMP
1180
1181                                 // Generate 16 byte loads and stores.
1182                                 // Use temp register for index (16)
1183                                 // on the second one.
1184                                 p = s.Prog(ppc64.ALXVD2X)
1185                                 p.From.Type = obj.TYPE_MEM
1186                                 p.From.Reg = src_reg
1187                                 p.From.Index = ppc64.REGTMP
1188                                 p.To.Type = obj.TYPE_REG
1189                                 p.To.Reg = ppc64.REG_VS32
1190
1191                                 p = s.Prog(ppc64.ASTXVD2X)
1192                                 p.From.Type = obj.TYPE_REG
1193                                 p.From.Reg = ppc64.REG_VS32
1194                                 p.To.Type = obj.TYPE_MEM
1195                                 p.To.Reg = dst_reg
1196                                 p.To.Index = ppc64.REGTMP
1197
1198                                 offset = 32
1199                                 rem -= 16
1200                         }
1201                 }
1202
1203                 // Generate all the remaining load and store pairs, starting with
1204                 // as many 8 byte moves as possible, then 4, 2, 1.
1205                 for rem > 0 {
1206                         op, size := ppc64.AMOVB, int64(1)
1207                         switch {
1208                         case rem >= 8:
1209                                 op, size = ppc64.AMOVD, 8
1210                         case rem >= 4:
1211                                 op, size = ppc64.AMOVW, 4
1212                         case rem >= 2:
1213                                 op, size = ppc64.AMOVH, 2
1214                         }
1215                         // Load
1216                         p := s.Prog(op)
1217                         p.To.Type = obj.TYPE_REG
1218                         p.To.Reg = ppc64.REG_R14
1219                         p.From.Type = obj.TYPE_MEM
1220                         p.From.Reg = src_reg
1221                         p.From.Offset = offset
1222
1223                         // Store
1224                         p = s.Prog(op)
1225                         p.From.Type = obj.TYPE_REG
1226                         p.From.Reg = ppc64.REG_R14
1227                         p.To.Type = obj.TYPE_MEM
1228                         p.To.Reg = dst_reg
1229                         p.To.Offset = offset
1230                         rem -= size
1231                         offset += size
1232                 }
1233
1234         case ssa.OpPPC64CALLstatic:
1235                 s.Call(v)
1236
1237         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1238                 p := s.Prog(ppc64.AMOVD)
1239                 p.From.Type = obj.TYPE_REG
1240                 p.From.Reg = v.Args[0].Reg()
1241                 p.To.Type = obj.TYPE_REG
1242                 p.To.Reg = ppc64.REG_LR
1243
1244                 if v.Args[0].Reg() != ppc64.REG_R12 {
1245                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1246                 }
1247
1248                 pp := s.Call(v)
1249                 pp.To.Reg = ppc64.REG_LR
1250
1251                 if gc.Ctxt.Flag_shared {
1252                         // When compiling Go into PIC, the function we just
1253                         // called via pointer might have been implemented in
1254                         // a separate module and so overwritten the TOC
1255                         // pointer in R2; reload it.
1256                         q := s.Prog(ppc64.AMOVD)
1257                         q.From.Type = obj.TYPE_MEM
1258                         q.From.Offset = 24
1259                         q.From.Reg = ppc64.REGSP
1260                         q.To.Type = obj.TYPE_REG
1261                         q.To.Reg = ppc64.REG_R2
1262                 }
1263
1264         case ssa.OpPPC64LoweredWB:
1265                 p := s.Prog(obj.ACALL)
1266                 p.To.Type = obj.TYPE_MEM
1267                 p.To.Name = obj.NAME_EXTERN
1268                 p.To.Sym = v.Aux.(*obj.LSym)
1269
1270         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1271                 p := s.Prog(obj.ACALL)
1272                 p.To.Type = obj.TYPE_MEM
1273                 p.To.Name = obj.NAME_EXTERN
1274                 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1275                 s.UseArgs(16) // space used in callee args area by assembly stubs
1276
1277         case ssa.OpPPC64LoweredNilCheck:
1278                 if objabi.GOOS == "aix" {
1279                         // CMP Rarg0, R0
1280                         // BNE 2(PC)
1281                         // STW R0, 0(R0)
1282                         // NOP (so the BNE has somewhere to land)
1283
1284                         // CMP Rarg0, R0
1285                         p := s.Prog(ppc64.ACMP)
1286                         p.From.Type = obj.TYPE_REG
1287                         p.From.Reg = v.Args[0].Reg()
1288                         p.To.Type = obj.TYPE_REG
1289                         p.To.Reg = ppc64.REG_R0
1290
1291                         // BNE 2(PC)
1292                         p2 := s.Prog(ppc64.ABNE)
1293                         p2.To.Type = obj.TYPE_BRANCH
1294
1295                         // STW R0, 0(R0)
1296                         // Write at 0 is forbidden and will trigger a SIGSEGV
1297                         p = s.Prog(ppc64.AMOVW)
1298                         p.From.Type = obj.TYPE_REG
1299                         p.From.Reg = ppc64.REG_R0
1300                         p.To.Type = obj.TYPE_MEM
1301                         p.To.Reg = ppc64.REG_R0
1302
1303                         // NOP (so the BNE has somewhere to land)
1304                         nop := s.Prog(obj.ANOP)
1305                         gc.Patch(p2, nop)
1306
1307                 } else {
1308                         // Issue a load which will fault if arg is nil.
1309                         p := s.Prog(ppc64.AMOVBZ)
1310                         p.From.Type = obj.TYPE_MEM
1311                         p.From.Reg = v.Args[0].Reg()
1312                         gc.AddAux(&p.From, v)
1313                         p.To.Type = obj.TYPE_REG
1314                         p.To.Reg = ppc64.REGTMP
1315                 }
1316                 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1317                         gc.Warnl(v.Pos, "generated nil check")
1318                 }
1319
1320         // These should be resolved by rules and not make it here.
1321         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1322                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1323                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1324                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1325         case ssa.OpPPC64InvertFlags:
1326                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1327         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1328                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1329         case ssa.OpClobber:
1330                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1331         default:
1332                 v.Fatalf("genValue not implemented: %s", v.LongString())
1333         }
1334 }
1335
1336 var blockJump = [...]struct {
1337         asm, invasm     obj.As
1338         asmeq, invasmun bool
1339 }{
1340         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1341         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1342
1343         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1344         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1345         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1346         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1347
1348         // TODO: need to work FP comparisons into block jumps
1349         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1350         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1351         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1352         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1353 }
1354
1355 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1356         switch b.Kind {
1357         case ssa.BlockDefer:
1358                 // defer returns in R3:
1359                 // 0 if we should continue executing
1360                 // 1 if we should jump to deferreturn call
1361                 p := s.Prog(ppc64.ACMP)
1362                 p.From.Type = obj.TYPE_REG
1363                 p.From.Reg = ppc64.REG_R3
1364                 p.To.Type = obj.TYPE_REG
1365                 p.To.Reg = ppc64.REG_R0
1366
1367                 p = s.Prog(ppc64.ABNE)
1368                 p.To.Type = obj.TYPE_BRANCH
1369                 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1370                 if b.Succs[0].Block() != next {
1371                         p := s.Prog(obj.AJMP)
1372                         p.To.Type = obj.TYPE_BRANCH
1373                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1374                 }
1375
1376         case ssa.BlockPlain:
1377                 if b.Succs[0].Block() != next {
1378                         p := s.Prog(obj.AJMP)
1379                         p.To.Type = obj.TYPE_BRANCH
1380                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1381                 }
1382         case ssa.BlockExit:
1383         case ssa.BlockRet:
1384                 s.Prog(obj.ARET)
1385         case ssa.BlockRetJmp:
1386                 p := s.Prog(obj.AJMP)
1387                 p.To.Type = obj.TYPE_MEM
1388                 p.To.Name = obj.NAME_EXTERN
1389                 p.To.Sym = b.Aux.(*obj.LSym)
1390
1391         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1392                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1393                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1394                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1395                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1396                 jmp := blockJump[b.Kind]
1397                 switch next {
1398                 case b.Succs[0].Block():
1399                         s.Br(jmp.invasm, b.Succs[1].Block())
1400                         if jmp.invasmun {
1401                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1402                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
1403                         }
1404                 case b.Succs[1].Block():
1405                         s.Br(jmp.asm, b.Succs[0].Block())
1406                         if jmp.asmeq {
1407                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1408                         }
1409                 default:
1410                         if b.Likely != ssa.BranchUnlikely {
1411                                 s.Br(jmp.asm, b.Succs[0].Block())
1412                                 if jmp.asmeq {
1413                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
1414                                 }
1415                                 s.Br(obj.AJMP, b.Succs[1].Block())
1416                         } else {
1417                                 s.Br(jmp.invasm, b.Succs[1].Block())
1418                                 if jmp.invasmun {
1419                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1420                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
1421                                 }
1422                                 s.Br(obj.AJMP, b.Succs[0].Block())
1423                         }
1424                 }
1425         default:
1426                 b.Fatalf("branch not implemented: %s", b.LongString())
1427         }
1428 }