]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
[dev.boringcrypto] all: merge master into dev.boringcrypto
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/gc"
9         "cmd/compile/internal/logopt"
10         "cmd/compile/internal/ssa"
11         "cmd/compile/internal/types"
12         "cmd/internal/obj"
13         "cmd/internal/obj/ppc64"
14         "cmd/internal/objabi"
15         "math"
16         "strings"
17 )
18
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21         //      flive := b.FlagsLiveAtEnd
22         //      if b.Control != nil && b.Control.Type.IsFlags() {
23         //              flive = true
24         //      }
25         //      for i := len(b.Values) - 1; i >= 0; i-- {
26         //              v := b.Values[i]
27         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28         //                      // The "mark" is any non-nil Aux value.
29         //                      v.Aux = v
30         //              }
31         //              if v.Type.IsFlags() {
32         //                      flive = false
33         //              }
34         //              for _, a := range v.Args {
35         //                      if a.Type.IsFlags() {
36         //                              flive = true
37         //                      }
38         //              }
39         //      }
40 }
41
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
44         if t.IsFloat() {
45                 switch t.Size() {
46                 case 4:
47                         return ppc64.AFMOVS
48                 case 8:
49                         return ppc64.AFMOVD
50                 }
51         } else {
52                 switch t.Size() {
53                 case 1:
54                         if t.IsSigned() {
55                                 return ppc64.AMOVB
56                         } else {
57                                 return ppc64.AMOVBZ
58                         }
59                 case 2:
60                         if t.IsSigned() {
61                                 return ppc64.AMOVH
62                         } else {
63                                 return ppc64.AMOVHZ
64                         }
65                 case 4:
66                         if t.IsSigned() {
67                                 return ppc64.AMOVW
68                         } else {
69                                 return ppc64.AMOVWZ
70                         }
71                 case 8:
72                         return ppc64.AMOVD
73                 }
74         }
75         panic("bad load type")
76 }
77
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
80         if t.IsFloat() {
81                 switch t.Size() {
82                 case 4:
83                         return ppc64.AFMOVS
84                 case 8:
85                         return ppc64.AFMOVD
86                 }
87         } else {
88                 switch t.Size() {
89                 case 1:
90                         return ppc64.AMOVB
91                 case 2:
92                         return ppc64.AMOVH
93                 case 4:
94                         return ppc64.AMOVW
95                 case 8:
96                         return ppc64.AMOVD
97                 }
98         }
99         panic("bad store type")
100 }
101
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
103         switch v.Op {
104         case ssa.OpCopy:
105                 t := v.Type
106                 if t.IsMemory() {
107                         return
108                 }
109                 x := v.Args[0].Reg()
110                 y := v.Reg()
111                 if x != y {
112                         rt := obj.TYPE_REG
113                         op := ppc64.AMOVD
114
115                         if t.IsFloat() {
116                                 op = ppc64.AFMOVD
117                         }
118                         p := s.Prog(op)
119                         p.From.Type = rt
120                         p.From.Reg = x
121                         p.To.Type = rt
122                         p.To.Reg = y
123                 }
124
125         case ssa.OpPPC64LoweredMuluhilo:
126                 // MULHDU       Rarg1, Rarg0, Reg0
127                 // MULLD        Rarg1, Rarg0, Reg1
128                 r0 := v.Args[0].Reg()
129                 r1 := v.Args[1].Reg()
130                 p := s.Prog(ppc64.AMULHDU)
131                 p.From.Type = obj.TYPE_REG
132                 p.From.Reg = r1
133                 p.Reg = r0
134                 p.To.Type = obj.TYPE_REG
135                 p.To.Reg = v.Reg0()
136                 p1 := s.Prog(ppc64.AMULLD)
137                 p1.From.Type = obj.TYPE_REG
138                 p1.From.Reg = r1
139                 p1.Reg = r0
140                 p1.To.Type = obj.TYPE_REG
141                 p1.To.Reg = v.Reg1()
142
143         case ssa.OpPPC64LoweredAdd64Carry:
144                 // ADDC         Rarg2, -1, Rtmp
145                 // ADDE         Rarg1, Rarg0, Reg0
146                 // ADDZE        Rzero, Reg1
147                 r0 := v.Args[0].Reg()
148                 r1 := v.Args[1].Reg()
149                 r2 := v.Args[2].Reg()
150                 p := s.Prog(ppc64.AADDC)
151                 p.From.Type = obj.TYPE_CONST
152                 p.From.Offset = -1
153                 p.Reg = r2
154                 p.To.Type = obj.TYPE_REG
155                 p.To.Reg = ppc64.REGTMP
156                 p1 := s.Prog(ppc64.AADDE)
157                 p1.From.Type = obj.TYPE_REG
158                 p1.From.Reg = r1
159                 p1.Reg = r0
160                 p1.To.Type = obj.TYPE_REG
161                 p1.To.Reg = v.Reg0()
162                 p2 := s.Prog(ppc64.AADDZE)
163                 p2.From.Type = obj.TYPE_REG
164                 p2.From.Reg = ppc64.REGZERO
165                 p2.To.Type = obj.TYPE_REG
166                 p2.To.Reg = v.Reg1()
167
168         case ssa.OpPPC64LoweredAtomicAnd8,
169                 ssa.OpPPC64LoweredAtomicOr8:
170                 // LWSYNC
171                 // LBAR         (Rarg0), Rtmp
172                 // AND/OR       Rarg1, Rtmp
173                 // STBCCC       Rtmp, (Rarg0)
174                 // BNE          -3(PC)
175                 r0 := v.Args[0].Reg()
176                 r1 := v.Args[1].Reg()
177                 // LWSYNC - Assuming shared data not write-through-required nor
178                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179                 plwsync := s.Prog(ppc64.ALWSYNC)
180                 plwsync.To.Type = obj.TYPE_NONE
181                 p := s.Prog(ppc64.ALBAR)
182                 p.From.Type = obj.TYPE_MEM
183                 p.From.Reg = r0
184                 p.To.Type = obj.TYPE_REG
185                 p.To.Reg = ppc64.REGTMP
186                 p1 := s.Prog(v.Op.Asm())
187                 p1.From.Type = obj.TYPE_REG
188                 p1.From.Reg = r1
189                 p1.To.Type = obj.TYPE_REG
190                 p1.To.Reg = ppc64.REGTMP
191                 p2 := s.Prog(ppc64.ASTBCCC)
192                 p2.From.Type = obj.TYPE_REG
193                 p2.From.Reg = ppc64.REGTMP
194                 p2.To.Type = obj.TYPE_MEM
195                 p2.To.Reg = r0
196                 p2.RegTo2 = ppc64.REGTMP
197                 p3 := s.Prog(ppc64.ABNE)
198                 p3.To.Type = obj.TYPE_BRANCH
199                 gc.Patch(p3, p)
200
201         case ssa.OpPPC64LoweredAtomicAdd32,
202                 ssa.OpPPC64LoweredAtomicAdd64:
203                 // LWSYNC
204                 // LDAR/LWAR    (Rarg0), Rout
205                 // ADD          Rarg1, Rout
206                 // STDCCC/STWCCC Rout, (Rarg0)
207                 // BNE         -3(PC)
208                 // MOVW         Rout,Rout (if Add32)
209                 ld := ppc64.ALDAR
210                 st := ppc64.ASTDCCC
211                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
212                         ld = ppc64.ALWAR
213                         st = ppc64.ASTWCCC
214                 }
215                 r0 := v.Args[0].Reg()
216                 r1 := v.Args[1].Reg()
217                 out := v.Reg0()
218                 // LWSYNC - Assuming shared data not write-through-required nor
219                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220                 plwsync := s.Prog(ppc64.ALWSYNC)
221                 plwsync.To.Type = obj.TYPE_NONE
222                 // LDAR or LWAR
223                 p := s.Prog(ld)
224                 p.From.Type = obj.TYPE_MEM
225                 p.From.Reg = r0
226                 p.To.Type = obj.TYPE_REG
227                 p.To.Reg = out
228                 // ADD reg1,out
229                 p1 := s.Prog(ppc64.AADD)
230                 p1.From.Type = obj.TYPE_REG
231                 p1.From.Reg = r1
232                 p1.To.Reg = out
233                 p1.To.Type = obj.TYPE_REG
234                 // STDCCC or STWCCC
235                 p3 := s.Prog(st)
236                 p3.From.Type = obj.TYPE_REG
237                 p3.From.Reg = out
238                 p3.To.Type = obj.TYPE_MEM
239                 p3.To.Reg = r0
240                 // BNE retry
241                 p4 := s.Prog(ppc64.ABNE)
242                 p4.To.Type = obj.TYPE_BRANCH
243                 gc.Patch(p4, p)
244
245                 // Ensure a 32 bit result
246                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247                         p5 := s.Prog(ppc64.AMOVWZ)
248                         p5.To.Type = obj.TYPE_REG
249                         p5.To.Reg = out
250                         p5.From.Type = obj.TYPE_REG
251                         p5.From.Reg = out
252                 }
253
254         case ssa.OpPPC64LoweredAtomicExchange32,
255                 ssa.OpPPC64LoweredAtomicExchange64:
256                 // LWSYNC
257                 // LDAR/LWAR    (Rarg0), Rout
258                 // STDCCC/STWCCC Rout, (Rarg0)
259                 // BNE         -2(PC)
260                 // ISYNC
261                 ld := ppc64.ALDAR
262                 st := ppc64.ASTDCCC
263                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
264                         ld = ppc64.ALWAR
265                         st = ppc64.ASTWCCC
266                 }
267                 r0 := v.Args[0].Reg()
268                 r1 := v.Args[1].Reg()
269                 out := v.Reg0()
270                 // LWSYNC - Assuming shared data not write-through-required nor
271                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272                 plwsync := s.Prog(ppc64.ALWSYNC)
273                 plwsync.To.Type = obj.TYPE_NONE
274                 // LDAR or LWAR
275                 p := s.Prog(ld)
276                 p.From.Type = obj.TYPE_MEM
277                 p.From.Reg = r0
278                 p.To.Type = obj.TYPE_REG
279                 p.To.Reg = out
280                 // STDCCC or STWCCC
281                 p1 := s.Prog(st)
282                 p1.From.Type = obj.TYPE_REG
283                 p1.From.Reg = r1
284                 p1.To.Type = obj.TYPE_MEM
285                 p1.To.Reg = r0
286                 // BNE retry
287                 p2 := s.Prog(ppc64.ABNE)
288                 p2.To.Type = obj.TYPE_BRANCH
289                 gc.Patch(p2, p)
290                 // ISYNC
291                 pisync := s.Prog(ppc64.AISYNC)
292                 pisync.To.Type = obj.TYPE_NONE
293
294         case ssa.OpPPC64LoweredAtomicLoad8,
295                 ssa.OpPPC64LoweredAtomicLoad32,
296                 ssa.OpPPC64LoweredAtomicLoad64,
297                 ssa.OpPPC64LoweredAtomicLoadPtr:
298                 // SYNC
299                 // MOVB/MOVD/MOVW (Rarg0), Rout
300                 // CMP Rout,Rout
301                 // BNE 1(PC)
302                 // ISYNC
303                 ld := ppc64.AMOVD
304                 cmp := ppc64.ACMP
305                 switch v.Op {
306                 case ssa.OpPPC64LoweredAtomicLoad8:
307                         ld = ppc64.AMOVBZ
308                 case ssa.OpPPC64LoweredAtomicLoad32:
309                         ld = ppc64.AMOVWZ
310                         cmp = ppc64.ACMPW
311                 }
312                 arg0 := v.Args[0].Reg()
313                 out := v.Reg0()
314                 // SYNC when AuxInt == 1; otherwise, load-acquire
315                 if v.AuxInt == 1 {
316                         psync := s.Prog(ppc64.ASYNC)
317                         psync.To.Type = obj.TYPE_NONE
318                 }
319                 // Load
320                 p := s.Prog(ld)
321                 p.From.Type = obj.TYPE_MEM
322                 p.From.Reg = arg0
323                 p.To.Type = obj.TYPE_REG
324                 p.To.Reg = out
325                 // CMP
326                 p1 := s.Prog(cmp)
327                 p1.From.Type = obj.TYPE_REG
328                 p1.From.Reg = out
329                 p1.To.Type = obj.TYPE_REG
330                 p1.To.Reg = out
331                 // BNE
332                 p2 := s.Prog(ppc64.ABNE)
333                 p2.To.Type = obj.TYPE_BRANCH
334                 // ISYNC
335                 pisync := s.Prog(ppc64.AISYNC)
336                 pisync.To.Type = obj.TYPE_NONE
337                 gc.Patch(p2, pisync)
338
339         case ssa.OpPPC64LoweredAtomicStore8,
340                 ssa.OpPPC64LoweredAtomicStore32,
341                 ssa.OpPPC64LoweredAtomicStore64:
342                 // SYNC or LWSYNC
343                 // MOVB/MOVW/MOVD arg1,(arg0)
344                 st := ppc64.AMOVD
345                 switch v.Op {
346                 case ssa.OpPPC64LoweredAtomicStore8:
347                         st = ppc64.AMOVB
348                 case ssa.OpPPC64LoweredAtomicStore32:
349                         st = ppc64.AMOVW
350                 }
351                 arg0 := v.Args[0].Reg()
352                 arg1 := v.Args[1].Reg()
353                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
354                 // SYNC
355                 syncOp := ppc64.ASYNC
356                 if v.AuxInt == 0 {
357                         syncOp = ppc64.ALWSYNC
358                 }
359                 psync := s.Prog(syncOp)
360                 psync.To.Type = obj.TYPE_NONE
361                 // Store
362                 p := s.Prog(st)
363                 p.To.Type = obj.TYPE_MEM
364                 p.To.Reg = arg0
365                 p.From.Type = obj.TYPE_REG
366                 p.From.Reg = arg1
367
368         case ssa.OpPPC64LoweredAtomicCas64,
369                 ssa.OpPPC64LoweredAtomicCas32:
370                 // LWSYNC
371                 // loop:
372                 // LDAR        (Rarg0), MutexHint, Rtmp
373                 // CMP         Rarg1, Rtmp
374                 // BNE         fail
375                 // STDCCC      Rarg2, (Rarg0)
376                 // BNE         loop
377                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
378                 // MOVD        $1, Rout
379                 // BR          end
380                 // fail:
381                 // MOVD        $0, Rout
382                 // end:
383                 ld := ppc64.ALDAR
384                 st := ppc64.ASTDCCC
385                 cmp := ppc64.ACMP
386                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
387                         ld = ppc64.ALWAR
388                         st = ppc64.ASTWCCC
389                         cmp = ppc64.ACMPW
390                 }
391                 r0 := v.Args[0].Reg()
392                 r1 := v.Args[1].Reg()
393                 r2 := v.Args[2].Reg()
394                 out := v.Reg0()
395                 // LWSYNC - Assuming shared data not write-through-required nor
396                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397                 plwsync1 := s.Prog(ppc64.ALWSYNC)
398                 plwsync1.To.Type = obj.TYPE_NONE
399                 // LDAR or LWAR
400                 p := s.Prog(ld)
401                 p.From.Type = obj.TYPE_MEM
402                 p.From.Reg = r0
403                 p.To.Type = obj.TYPE_REG
404                 p.To.Reg = ppc64.REGTMP
405                 // If it is a Compare-and-Swap-Release operation, set the EH field with
406                 // the release hint.
407                 if v.AuxInt == 0 {
408                         p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
409                 }
410                 // CMP reg1,reg2
411                 p1 := s.Prog(cmp)
412                 p1.From.Type = obj.TYPE_REG
413                 p1.From.Reg = r1
414                 p1.To.Reg = ppc64.REGTMP
415                 p1.To.Type = obj.TYPE_REG
416                 // BNE cas_fail
417                 p2 := s.Prog(ppc64.ABNE)
418                 p2.To.Type = obj.TYPE_BRANCH
419                 // STDCCC or STWCCC
420                 p3 := s.Prog(st)
421                 p3.From.Type = obj.TYPE_REG
422                 p3.From.Reg = r2
423                 p3.To.Type = obj.TYPE_MEM
424                 p3.To.Reg = r0
425                 // BNE retry
426                 p4 := s.Prog(ppc64.ABNE)
427                 p4.To.Type = obj.TYPE_BRANCH
428                 gc.Patch(p4, p)
429                 // LWSYNC - Assuming shared data not write-through-required nor
430                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431                 // If the operation is a CAS-Release, then synchronization is not necessary.
432                 if v.AuxInt != 0 {
433                         plwsync2 := s.Prog(ppc64.ALWSYNC)
434                         plwsync2.To.Type = obj.TYPE_NONE
435                 }
436                 // return true
437                 p5 := s.Prog(ppc64.AMOVD)
438                 p5.From.Type = obj.TYPE_CONST
439                 p5.From.Offset = 1
440                 p5.To.Type = obj.TYPE_REG
441                 p5.To.Reg = out
442                 // BR done
443                 p6 := s.Prog(obj.AJMP)
444                 p6.To.Type = obj.TYPE_BRANCH
445                 // return false
446                 p7 := s.Prog(ppc64.AMOVD)
447                 p7.From.Type = obj.TYPE_CONST
448                 p7.From.Offset = 0
449                 p7.To.Type = obj.TYPE_REG
450                 p7.To.Reg = out
451                 gc.Patch(p2, p7)
452                 // done (label)
453                 p8 := s.Prog(obj.ANOP)
454                 gc.Patch(p6, p8)
455
456         case ssa.OpPPC64LoweredGetClosurePtr:
457                 // Closure pointer is R11 (already)
458                 gc.CheckLoweredGetClosurePtr(v)
459
460         case ssa.OpPPC64LoweredGetCallerSP:
461                 // caller's SP is FixedFrameSize below the address of the first arg
462                 p := s.Prog(ppc64.AMOVD)
463                 p.From.Type = obj.TYPE_ADDR
464                 p.From.Offset = -gc.Ctxt.FixedFrameSize()
465                 p.From.Name = obj.NAME_PARAM
466                 p.To.Type = obj.TYPE_REG
467                 p.To.Reg = v.Reg()
468
469         case ssa.OpPPC64LoweredGetCallerPC:
470                 p := s.Prog(obj.AGETCALLERPC)
471                 p.To.Type = obj.TYPE_REG
472                 p.To.Reg = v.Reg()
473
474         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475                 // input is already rounded
476
477         case ssa.OpLoadReg:
478                 loadOp := loadByType(v.Type)
479                 p := s.Prog(loadOp)
480                 gc.AddrAuto(&p.From, v.Args[0])
481                 p.To.Type = obj.TYPE_REG
482                 p.To.Reg = v.Reg()
483
484         case ssa.OpStoreReg:
485                 storeOp := storeByType(v.Type)
486                 p := s.Prog(storeOp)
487                 p.From.Type = obj.TYPE_REG
488                 p.From.Reg = v.Args[0].Reg()
489                 gc.AddrAuto(&p.To, v)
490
491         case ssa.OpPPC64DIVD:
492                 // For now,
493                 //
494                 // cmp arg1, -1
495                 // be  ahead
496                 // v = arg0 / arg1
497                 // b over
498                 // ahead: v = - arg0
499                 // over: nop
500                 r := v.Reg()
501                 r0 := v.Args[0].Reg()
502                 r1 := v.Args[1].Reg()
503
504                 p := s.Prog(ppc64.ACMP)
505                 p.From.Type = obj.TYPE_REG
506                 p.From.Reg = r1
507                 p.To.Type = obj.TYPE_CONST
508                 p.To.Offset = -1
509
510                 pbahead := s.Prog(ppc64.ABEQ)
511                 pbahead.To.Type = obj.TYPE_BRANCH
512
513                 p = s.Prog(v.Op.Asm())
514                 p.From.Type = obj.TYPE_REG
515                 p.From.Reg = r1
516                 p.Reg = r0
517                 p.To.Type = obj.TYPE_REG
518                 p.To.Reg = r
519
520                 pbover := s.Prog(obj.AJMP)
521                 pbover.To.Type = obj.TYPE_BRANCH
522
523                 p = s.Prog(ppc64.ANEG)
524                 p.To.Type = obj.TYPE_REG
525                 p.To.Reg = r
526                 p.From.Type = obj.TYPE_REG
527                 p.From.Reg = r0
528                 gc.Patch(pbahead, p)
529
530                 p = s.Prog(obj.ANOP)
531                 gc.Patch(pbover, p)
532
533         case ssa.OpPPC64DIVW:
534                 // word-width version of above
535                 r := v.Reg()
536                 r0 := v.Args[0].Reg()
537                 r1 := v.Args[1].Reg()
538
539                 p := s.Prog(ppc64.ACMPW)
540                 p.From.Type = obj.TYPE_REG
541                 p.From.Reg = r1
542                 p.To.Type = obj.TYPE_CONST
543                 p.To.Offset = -1
544
545                 pbahead := s.Prog(ppc64.ABEQ)
546                 pbahead.To.Type = obj.TYPE_BRANCH
547
548                 p = s.Prog(v.Op.Asm())
549                 p.From.Type = obj.TYPE_REG
550                 p.From.Reg = r1
551                 p.Reg = r0
552                 p.To.Type = obj.TYPE_REG
553                 p.To.Reg = r
554
555                 pbover := s.Prog(obj.AJMP)
556                 pbover.To.Type = obj.TYPE_BRANCH
557
558                 p = s.Prog(ppc64.ANEG)
559                 p.To.Type = obj.TYPE_REG
560                 p.To.Reg = r
561                 p.From.Type = obj.TYPE_REG
562                 p.From.Reg = r0
563                 gc.Patch(pbahead, p)
564
565                 p = s.Prog(obj.ANOP)
566                 gc.Patch(pbover, p)
567
568         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
569                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
570                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
571                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
572                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
573                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
574                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
575                 r := v.Reg()
576                 r1 := v.Args[0].Reg()
577                 r2 := v.Args[1].Reg()
578                 p := s.Prog(v.Op.Asm())
579                 p.From.Type = obj.TYPE_REG
580                 p.From.Reg = r2
581                 p.Reg = r1
582                 p.To.Type = obj.TYPE_REG
583                 p.To.Reg = r
584
585         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
586                 r1 := v.Args[0].Reg()
587                 r2 := v.Args[1].Reg()
588                 p := s.Prog(v.Op.Asm())
589                 p.From.Type = obj.TYPE_REG
590                 p.From.Reg = r2
591                 p.Reg = r1
592                 p.To.Type = obj.TYPE_REG
593                 p.To.Reg = ppc64.REGTMP // result is not needed
594
595         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
596                 p := s.Prog(v.Op.Asm())
597                 p.From.Type = obj.TYPE_CONST
598                 p.From.Offset = v.AuxInt
599                 p.Reg = v.Args[0].Reg()
600                 p.To.Type = obj.TYPE_REG
601                 p.To.Reg = v.Reg()
602
603         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
604                 r := v.Reg()
605                 r1 := v.Args[0].Reg()
606                 r2 := v.Args[1].Reg()
607                 r3 := v.Args[2].Reg()
608                 // r = r1*r2 Â± r3
609                 p := s.Prog(v.Op.Asm())
610                 p.From.Type = obj.TYPE_REG
611                 p.From.Reg = r1
612                 p.Reg = r3
613                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
614                 p.To.Type = obj.TYPE_REG
615                 p.To.Reg = r
616
617         case ssa.OpPPC64MaskIfNotCarry:
618                 r := v.Reg()
619                 p := s.Prog(v.Op.Asm())
620                 p.From.Type = obj.TYPE_REG
621                 p.From.Reg = ppc64.REGZERO
622                 p.To.Type = obj.TYPE_REG
623                 p.To.Reg = r
624
625         case ssa.OpPPC64ADDconstForCarry:
626                 r1 := v.Args[0].Reg()
627                 p := s.Prog(v.Op.Asm())
628                 p.Reg = r1
629                 p.From.Type = obj.TYPE_CONST
630                 p.From.Offset = v.AuxInt
631                 p.To.Type = obj.TYPE_REG
632                 p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
633
634         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
635                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
636                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
637                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
638                 r := v.Reg()
639                 p := s.Prog(v.Op.Asm())
640                 p.To.Type = obj.TYPE_REG
641                 p.To.Reg = r
642                 p.From.Type = obj.TYPE_REG
643                 p.From.Reg = v.Args[0].Reg()
644
645         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
646                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
647                 p := s.Prog(v.Op.Asm())
648                 p.Reg = v.Args[0].Reg()
649                 p.From.Type = obj.TYPE_CONST
650                 p.From.Offset = v.AuxInt
651                 p.To.Type = obj.TYPE_REG
652                 p.To.Reg = v.Reg()
653
654         case ssa.OpPPC64ANDCCconst:
655                 p := s.Prog(v.Op.Asm())
656                 p.Reg = v.Args[0].Reg()
657
658                 if v.Aux != nil {
659                         p.From.Type = obj.TYPE_CONST
660                         p.From.Offset = gc.AuxOffset(v)
661                 } else {
662                         p.From.Type = obj.TYPE_CONST
663                         p.From.Offset = v.AuxInt
664                 }
665
666                 p.To.Type = obj.TYPE_REG
667                 p.To.Reg = ppc64.REGTMP // discard result
668
669         case ssa.OpPPC64MOVDaddr:
670                 switch v.Aux.(type) {
671                 default:
672                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
673                 case nil:
674                         // If aux offset and aux int are both 0, and the same
675                         // input and output regs are used, no instruction
676                         // needs to be generated, since it would just be
677                         // addi rx, rx, 0.
678                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
679                                 p := s.Prog(ppc64.AMOVD)
680                                 p.From.Type = obj.TYPE_ADDR
681                                 p.From.Reg = v.Args[0].Reg()
682                                 p.From.Offset = v.AuxInt
683                                 p.To.Type = obj.TYPE_REG
684                                 p.To.Reg = v.Reg()
685                         }
686
687                 case *obj.LSym, *gc.Node:
688                         p := s.Prog(ppc64.AMOVD)
689                         p.From.Type = obj.TYPE_ADDR
690                         p.From.Reg = v.Args[0].Reg()
691                         p.To.Type = obj.TYPE_REG
692                         p.To.Reg = v.Reg()
693                         gc.AddAux(&p.From, v)
694
695                 }
696
697         case ssa.OpPPC64MOVDconst:
698                 p := s.Prog(v.Op.Asm())
699                 p.From.Type = obj.TYPE_CONST
700                 p.From.Offset = v.AuxInt
701                 p.To.Type = obj.TYPE_REG
702                 p.To.Reg = v.Reg()
703
704         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
705                 p := s.Prog(v.Op.Asm())
706                 p.From.Type = obj.TYPE_FCONST
707                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
708                 p.To.Type = obj.TYPE_REG
709                 p.To.Reg = v.Reg()
710
711         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
712                 p := s.Prog(v.Op.Asm())
713                 p.From.Type = obj.TYPE_REG
714                 p.From.Reg = v.Args[0].Reg()
715                 p.To.Type = obj.TYPE_REG
716                 p.To.Reg = v.Args[1].Reg()
717
718         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
719                 p := s.Prog(v.Op.Asm())
720                 p.From.Type = obj.TYPE_REG
721                 p.From.Reg = v.Args[0].Reg()
722                 p.To.Type = obj.TYPE_CONST
723                 p.To.Offset = v.AuxInt
724
725         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
726                 // Shift in register to required size
727                 p := s.Prog(v.Op.Asm())
728                 p.From.Type = obj.TYPE_REG
729                 p.From.Reg = v.Args[0].Reg()
730                 p.To.Reg = v.Reg()
731                 p.To.Type = obj.TYPE_REG
732
733         case ssa.OpPPC64MOVDload:
734
735                 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
736                 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
737                 // the offset is not known until link time. If the load of a go.string uses relocation for the
738                 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
739                 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
740                 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
741                 // go.string types because other types will have proper alignment.
742
743                 gostring := false
744                 switch n := v.Aux.(type) {
745                 case *obj.LSym:
746                         gostring = strings.HasPrefix(n.Name, "go.string.")
747                 }
748                 if gostring {
749                         // Generate full addr of the go.string const
750                         // including AuxInt
751                         p := s.Prog(ppc64.AMOVD)
752                         p.From.Type = obj.TYPE_ADDR
753                         p.From.Reg = v.Args[0].Reg()
754                         gc.AddAux(&p.From, v)
755                         p.To.Type = obj.TYPE_REG
756                         p.To.Reg = v.Reg()
757                         // Load go.string using 0 offset
758                         p = s.Prog(v.Op.Asm())
759                         p.From.Type = obj.TYPE_MEM
760                         p.From.Reg = v.Reg()
761                         p.To.Type = obj.TYPE_REG
762                         p.To.Reg = v.Reg()
763                         break
764                 }
765                 // Not a go.string, generate a normal load
766                 fallthrough
767
768         case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
769                 p := s.Prog(v.Op.Asm())
770                 p.From.Type = obj.TYPE_MEM
771                 p.From.Reg = v.Args[0].Reg()
772                 gc.AddAux(&p.From, v)
773                 p.To.Type = obj.TYPE_REG
774                 p.To.Reg = v.Reg()
775
776         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
777                 p := s.Prog(v.Op.Asm())
778                 p.From.Type = obj.TYPE_MEM
779                 p.From.Reg = v.Args[0].Reg()
780                 p.To.Type = obj.TYPE_REG
781                 p.To.Reg = v.Reg()
782
783         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
784                 p := s.Prog(v.Op.Asm())
785                 p.To.Type = obj.TYPE_MEM
786                 p.To.Reg = v.Args[0].Reg()
787                 p.From.Type = obj.TYPE_REG
788                 p.From.Reg = v.Args[1].Reg()
789
790         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
791                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
792                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
793                 p := s.Prog(v.Op.Asm())
794                 p.From.Type = obj.TYPE_MEM
795                 p.From.Reg = v.Args[0].Reg()
796                 p.From.Index = v.Args[1].Reg()
797                 p.To.Type = obj.TYPE_REG
798                 p.To.Reg = v.Reg()
799
800         case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
801                 p := s.Prog(v.Op.Asm())
802                 p.From.Type = obj.TYPE_REG
803                 p.From.Reg = ppc64.REGZERO
804                 p.To.Type = obj.TYPE_MEM
805                 p.To.Reg = v.Args[0].Reg()
806                 gc.AddAux(&p.To, v)
807
808         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
809                 p := s.Prog(v.Op.Asm())
810                 p.From.Type = obj.TYPE_REG
811                 p.From.Reg = v.Args[1].Reg()
812                 p.To.Type = obj.TYPE_MEM
813                 p.To.Reg = v.Args[0].Reg()
814                 gc.AddAux(&p.To, v)
815
816         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
817                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
818                 ssa.OpPPC64MOVHBRstoreidx:
819                 p := s.Prog(v.Op.Asm())
820                 p.From.Type = obj.TYPE_REG
821                 p.From.Reg = v.Args[2].Reg()
822                 p.To.Index = v.Args[1].Reg()
823                 p.To.Type = obj.TYPE_MEM
824                 p.To.Reg = v.Args[0].Reg()
825
826         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
827                 // ISEL, ISELB
828                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
829                 // ISEL only accepts 0, 1, 2 condition values but the others can be
830                 // achieved by swapping operand order.
831                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
832                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
833                 // ISELB is used when a boolean result is needed, returning 0 or 1
834                 p := s.Prog(ppc64.AISEL)
835                 p.To.Type = obj.TYPE_REG
836                 p.To.Reg = v.Reg()
837                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
838                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
839                 if v.Op == ssa.OpPPC64ISEL {
840                         r.Reg = v.Args[1].Reg()
841                 }
842                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
843                 if v.AuxInt > 3 {
844                         p.Reg = r.Reg
845                         p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
846                 } else {
847                         p.Reg = v.Args[0].Reg()
848                         p.SetFrom3(r)
849                 }
850                 p.From.Type = obj.TYPE_CONST
851                 p.From.Offset = v.AuxInt & 3
852
853         case ssa.OpPPC64LoweredZero:
854
855                 // unaligned data doesn't hurt performance
856                 // for these instructions on power8 or later
857
858                 // for sizes >= 64 generate a loop as follows:
859
860                 // set up loop counter in CTR, used by BC
861                 //       XXLXOR VS32,VS32,VS32
862                 //       MOVD len/32,REG_TMP
863                 //       MOVD REG_TMP,CTR
864                 //       MOVD $16,REG_TMP
865                 //       loop:
866                 //       STXVD2X VS32,(R0)(R3)
867                 //       STXVD2X VS32,(R31)(R3)
868                 //       ADD  $32,R3
869                 //       BC   16, 0, loop
870                 //
871                 // any remainder is done as described below
872
873                 // for sizes < 64 bytes, first clear as many doublewords as possible,
874                 // then handle the remainder
875                 //      MOVD R0,(R3)
876                 //      MOVD R0,8(R3)
877                 // .... etc.
878                 //
879                 // the remainder bytes are cleared using one or more
880                 // of the following instructions with the appropriate
881                 // offsets depending which instructions are needed
882                 //
883                 //      MOVW R0,n1(R3)  4 bytes
884                 //      MOVH R0,n2(R3)  2 bytes
885                 //      MOVB R0,n3(R3)  1 byte
886                 //
887                 // 7 bytes: MOVW, MOVH, MOVB
888                 // 6 bytes: MOVW, MOVH
889                 // 5 bytes: MOVW, MOVB
890                 // 3 bytes: MOVH, MOVB
891
892                 // each loop iteration does 32 bytes
893                 ctr := v.AuxInt / 32
894
895                 // remainder bytes
896                 rem := v.AuxInt % 32
897
898                 // only generate a loop if there is more
899                 // than 1 iteration.
900                 if ctr > 1 {
901                         // Set up VS32 (V0) to hold 0s
902                         p := s.Prog(ppc64.AXXLXOR)
903                         p.From.Type = obj.TYPE_REG
904                         p.From.Reg = ppc64.REG_VS32
905                         p.To.Type = obj.TYPE_REG
906                         p.To.Reg = ppc64.REG_VS32
907                         p.Reg = ppc64.REG_VS32
908
909                         // Set up CTR loop counter
910                         p = s.Prog(ppc64.AMOVD)
911                         p.From.Type = obj.TYPE_CONST
912                         p.From.Offset = ctr
913                         p.To.Type = obj.TYPE_REG
914                         p.To.Reg = ppc64.REGTMP
915
916                         p = s.Prog(ppc64.AMOVD)
917                         p.From.Type = obj.TYPE_REG
918                         p.From.Reg = ppc64.REGTMP
919                         p.To.Type = obj.TYPE_REG
920                         p.To.Reg = ppc64.REG_CTR
921
922                         // Set up R31 to hold index value 16
923                         p = s.Prog(ppc64.AMOVD)
924                         p.From.Type = obj.TYPE_CONST
925                         p.From.Offset = 16
926                         p.To.Type = obj.TYPE_REG
927                         p.To.Reg = ppc64.REGTMP
928
929                         // generate 2 STXVD2Xs to store 16 bytes
930                         // when this is a loop then the top must be saved
931                         var top *obj.Prog
932                         // This is the top of loop
933                         p = s.Prog(ppc64.ASTXVD2X)
934                         p.From.Type = obj.TYPE_REG
935                         p.From.Reg = ppc64.REG_VS32
936                         p.To.Type = obj.TYPE_MEM
937                         p.To.Reg = v.Args[0].Reg()
938                         p.To.Index = ppc64.REGZERO
939                         // Save the top of loop
940                         if top == nil {
941                                 top = p
942                         }
943
944                         p = s.Prog(ppc64.ASTXVD2X)
945                         p.From.Type = obj.TYPE_REG
946                         p.From.Reg = ppc64.REG_VS32
947                         p.To.Type = obj.TYPE_MEM
948                         p.To.Reg = v.Args[0].Reg()
949                         p.To.Index = ppc64.REGTMP
950
951                         // Increment address for the
952                         // 4 doublewords just zeroed.
953                         p = s.Prog(ppc64.AADD)
954                         p.Reg = v.Args[0].Reg()
955                         p.From.Type = obj.TYPE_CONST
956                         p.From.Offset = 32
957                         p.To.Type = obj.TYPE_REG
958                         p.To.Reg = v.Args[0].Reg()
959
960                         // Branch back to top of loop
961                         // based on CTR
962                         // BC with BO_BCTR generates bdnz
963                         p = s.Prog(ppc64.ABC)
964                         p.From.Type = obj.TYPE_CONST
965                         p.From.Offset = ppc64.BO_BCTR
966                         p.Reg = ppc64.REG_R0
967                         p.To.Type = obj.TYPE_BRANCH
968                         gc.Patch(p, top)
969                 }
970
971                 // when ctr == 1 the loop was not generated but
972                 // there are at least 32 bytes to clear, so add
973                 // that to the remainder to generate the code
974                 // to clear those doublewords
975                 if ctr == 1 {
976                         rem += 32
977                 }
978
979                 // clear the remainder starting at offset zero
980                 offset := int64(0)
981
982                 // first clear as many doublewords as possible
983                 // then clear remaining sizes as available
984                 for rem > 0 {
985                         op, size := ppc64.AMOVB, int64(1)
986                         switch {
987                         case rem >= 8:
988                                 op, size = ppc64.AMOVD, 8
989                         case rem >= 4:
990                                 op, size = ppc64.AMOVW, 4
991                         case rem >= 2:
992                                 op, size = ppc64.AMOVH, 2
993                         }
994                         p := s.Prog(op)
995                         p.From.Type = obj.TYPE_REG
996                         p.From.Reg = ppc64.REG_R0
997                         p.To.Type = obj.TYPE_MEM
998                         p.To.Reg = v.Args[0].Reg()
999                         p.To.Offset = offset
1000                         rem -= size
1001                         offset += size
1002                 }
1003
1004         case ssa.OpPPC64LoweredMove:
1005
1006                 // This will be used when moving more
1007                 // than 8 bytes.  Moves start with
1008                 // as many 8 byte moves as possible, then
1009                 // 4, 2, or 1 byte(s) as remaining.  This will
1010                 // work and be efficient for power8 or later.
1011                 // If there are 64 or more bytes, then a
1012                 // loop is generated to move 32 bytes and
1013                 // update the src and dst addresses on each
1014                 // iteration. When < 64 bytes, the appropriate
1015                 // number of moves are generated based on the
1016                 // size.
1017                 // When moving >= 64 bytes a loop is used
1018                 //      MOVD len/32,REG_TMP
1019                 //      MOVD REG_TMP,CTR
1020                 //      MOVD $16,REG_TMP
1021                 // top:
1022                 //      LXVD2X (R0)(R4),VS32
1023                 //      LXVD2X (R31)(R4),VS33
1024                 //      ADD $32,R4
1025                 //      STXVD2X VS32,(R0)(R3)
1026                 //      STXVD2X VS33,(R31)(R4)
1027                 //      ADD $32,R3
1028                 //      BC 16,0,top
1029                 // Bytes not moved by this loop are moved
1030                 // with a combination of the following instructions,
1031                 // starting with the largest sizes and generating as
1032                 // many as needed, using the appropriate offset value.
1033                 //      MOVD  n(R4),R14
1034                 //      MOVD  R14,n(R3)
1035                 //      MOVW  n1(R4),R14
1036                 //      MOVW  R14,n1(R3)
1037                 //      MOVH  n2(R4),R14
1038                 //      MOVH  R14,n2(R3)
1039                 //      MOVB  n3(R4),R14
1040                 //      MOVB  R14,n3(R3)
1041
1042                 // Each loop iteration moves 32 bytes
1043                 ctr := v.AuxInt / 32
1044
1045                 // Remainder after the loop
1046                 rem := v.AuxInt % 32
1047
1048                 dst_reg := v.Args[0].Reg()
1049                 src_reg := v.Args[1].Reg()
1050
1051                 // The set of registers used here, must match the clobbered reg list
1052                 // in PPC64Ops.go.
1053                 offset := int64(0)
1054
1055                 // top of the loop
1056                 var top *obj.Prog
1057                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1058                 if ctr > 1 {
1059                         // Set up the CTR
1060                         p := s.Prog(ppc64.AMOVD)
1061                         p.From.Type = obj.TYPE_CONST
1062                         p.From.Offset = ctr
1063                         p.To.Type = obj.TYPE_REG
1064                         p.To.Reg = ppc64.REGTMP
1065
1066                         p = s.Prog(ppc64.AMOVD)
1067                         p.From.Type = obj.TYPE_REG
1068                         p.From.Reg = ppc64.REGTMP
1069                         p.To.Type = obj.TYPE_REG
1070                         p.To.Reg = ppc64.REG_CTR
1071
1072                         // Use REGTMP as index reg
1073                         p = s.Prog(ppc64.AMOVD)
1074                         p.From.Type = obj.TYPE_CONST
1075                         p.From.Offset = 16
1076                         p.To.Type = obj.TYPE_REG
1077                         p.To.Reg = ppc64.REGTMP
1078
1079                         // Generate 16 byte loads and stores.
1080                         // Use temp register for index (16)
1081                         // on the second one.
1082                         p = s.Prog(ppc64.ALXVD2X)
1083                         p.From.Type = obj.TYPE_MEM
1084                         p.From.Reg = src_reg
1085                         p.From.Index = ppc64.REGZERO
1086                         p.To.Type = obj.TYPE_REG
1087                         p.To.Reg = ppc64.REG_VS32
1088
1089                         if top == nil {
1090                                 top = p
1091                         }
1092
1093                         p = s.Prog(ppc64.ALXVD2X)
1094                         p.From.Type = obj.TYPE_MEM
1095                         p.From.Reg = src_reg
1096                         p.From.Index = ppc64.REGTMP
1097                         p.To.Type = obj.TYPE_REG
1098                         p.To.Reg = ppc64.REG_VS33
1099
1100                         // increment the src reg for next iteration
1101                         p = s.Prog(ppc64.AADD)
1102                         p.Reg = src_reg
1103                         p.From.Type = obj.TYPE_CONST
1104                         p.From.Offset = 32
1105                         p.To.Type = obj.TYPE_REG
1106                         p.To.Reg = src_reg
1107
1108                         // generate 16 byte stores
1109                         p = s.Prog(ppc64.ASTXVD2X)
1110                         p.From.Type = obj.TYPE_REG
1111                         p.From.Reg = ppc64.REG_VS32
1112                         p.To.Type = obj.TYPE_MEM
1113                         p.To.Reg = dst_reg
1114                         p.To.Index = ppc64.REGZERO
1115
1116                         p = s.Prog(ppc64.ASTXVD2X)
1117                         p.From.Type = obj.TYPE_REG
1118                         p.From.Reg = ppc64.REG_VS33
1119                         p.To.Type = obj.TYPE_MEM
1120                         p.To.Reg = dst_reg
1121                         p.To.Index = ppc64.REGTMP
1122
1123                         // increment the dst reg for next iteration
1124                         p = s.Prog(ppc64.AADD)
1125                         p.Reg = dst_reg
1126                         p.From.Type = obj.TYPE_CONST
1127                         p.From.Offset = 32
1128                         p.To.Type = obj.TYPE_REG
1129                         p.To.Reg = dst_reg
1130
1131                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1132                         // to loop top.
1133                         p = s.Prog(ppc64.ABC)
1134                         p.From.Type = obj.TYPE_CONST
1135                         p.From.Offset = ppc64.BO_BCTR
1136                         p.Reg = ppc64.REG_R0
1137                         p.To.Type = obj.TYPE_BRANCH
1138                         gc.Patch(p, top)
1139
1140                         // src_reg and dst_reg were incremented in the loop, so
1141                         // later instructions start with offset 0.
1142                         offset = int64(0)
1143                 }
1144
1145                 // No loop was generated for one iteration, so
1146                 // add 32 bytes to the remainder to move those bytes.
1147                 if ctr == 1 {
1148                         rem += 32
1149                 }
1150
1151                 if rem >= 16 {
1152                         // Generate 16 byte loads and stores.
1153                         // Use temp register for index (value 16)
1154                         // on the second one.
1155                         p := s.Prog(ppc64.ALXVD2X)
1156                         p.From.Type = obj.TYPE_MEM
1157                         p.From.Reg = src_reg
1158                         p.From.Index = ppc64.REGZERO
1159                         p.To.Type = obj.TYPE_REG
1160                         p.To.Reg = ppc64.REG_VS32
1161
1162                         p = s.Prog(ppc64.ASTXVD2X)
1163                         p.From.Type = obj.TYPE_REG
1164                         p.From.Reg = ppc64.REG_VS32
1165                         p.To.Type = obj.TYPE_MEM
1166                         p.To.Reg = dst_reg
1167                         p.To.Index = ppc64.REGZERO
1168
1169                         offset = 16
1170                         rem -= 16
1171
1172                         if rem >= 16 {
1173                                 // Use REGTMP as index reg
1174                                 p = s.Prog(ppc64.AMOVD)
1175                                 p.From.Type = obj.TYPE_CONST
1176                                 p.From.Offset = 16
1177                                 p.To.Type = obj.TYPE_REG
1178                                 p.To.Reg = ppc64.REGTMP
1179
1180                                 // Generate 16 byte loads and stores.
1181                                 // Use temp register for index (16)
1182                                 // on the second one.
1183                                 p = s.Prog(ppc64.ALXVD2X)
1184                                 p.From.Type = obj.TYPE_MEM
1185                                 p.From.Reg = src_reg
1186                                 p.From.Index = ppc64.REGTMP
1187                                 p.To.Type = obj.TYPE_REG
1188                                 p.To.Reg = ppc64.REG_VS32
1189
1190                                 p = s.Prog(ppc64.ASTXVD2X)
1191                                 p.From.Type = obj.TYPE_REG
1192                                 p.From.Reg = ppc64.REG_VS32
1193                                 p.To.Type = obj.TYPE_MEM
1194                                 p.To.Reg = dst_reg
1195                                 p.To.Index = ppc64.REGTMP
1196
1197                                 offset = 32
1198                                 rem -= 16
1199                         }
1200                 }
1201
1202                 // Generate all the remaining load and store pairs, starting with
1203                 // as many 8 byte moves as possible, then 4, 2, 1.
1204                 for rem > 0 {
1205                         op, size := ppc64.AMOVB, int64(1)
1206                         switch {
1207                         case rem >= 8:
1208                                 op, size = ppc64.AMOVD, 8
1209                         case rem >= 4:
1210                                 op, size = ppc64.AMOVW, 4
1211                         case rem >= 2:
1212                                 op, size = ppc64.AMOVH, 2
1213                         }
1214                         // Load
1215                         p := s.Prog(op)
1216                         p.To.Type = obj.TYPE_REG
1217                         p.To.Reg = ppc64.REG_R14
1218                         p.From.Type = obj.TYPE_MEM
1219                         p.From.Reg = src_reg
1220                         p.From.Offset = offset
1221
1222                         // Store
1223                         p = s.Prog(op)
1224                         p.From.Type = obj.TYPE_REG
1225                         p.From.Reg = ppc64.REG_R14
1226                         p.To.Type = obj.TYPE_MEM
1227                         p.To.Reg = dst_reg
1228                         p.To.Offset = offset
1229                         rem -= size
1230                         offset += size
1231                 }
1232
1233         case ssa.OpPPC64CALLstatic:
1234                 s.Call(v)
1235
1236         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1237                 p := s.Prog(ppc64.AMOVD)
1238                 p.From.Type = obj.TYPE_REG
1239                 p.From.Reg = v.Args[0].Reg()
1240                 p.To.Type = obj.TYPE_REG
1241                 p.To.Reg = ppc64.REG_LR
1242
1243                 if v.Args[0].Reg() != ppc64.REG_R12 {
1244                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1245                 }
1246
1247                 pp := s.Call(v)
1248                 pp.To.Reg = ppc64.REG_LR
1249
1250                 if gc.Ctxt.Flag_shared {
1251                         // When compiling Go into PIC, the function we just
1252                         // called via pointer might have been implemented in
1253                         // a separate module and so overwritten the TOC
1254                         // pointer in R2; reload it.
1255                         q := s.Prog(ppc64.AMOVD)
1256                         q.From.Type = obj.TYPE_MEM
1257                         q.From.Offset = 24
1258                         q.From.Reg = ppc64.REGSP
1259                         q.To.Type = obj.TYPE_REG
1260                         q.To.Reg = ppc64.REG_R2
1261                 }
1262
1263         case ssa.OpPPC64LoweredWB:
1264                 p := s.Prog(obj.ACALL)
1265                 p.To.Type = obj.TYPE_MEM
1266                 p.To.Name = obj.NAME_EXTERN
1267                 p.To.Sym = v.Aux.(*obj.LSym)
1268
1269         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1270                 p := s.Prog(obj.ACALL)
1271                 p.To.Type = obj.TYPE_MEM
1272                 p.To.Name = obj.NAME_EXTERN
1273                 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1274                 s.UseArgs(16) // space used in callee args area by assembly stubs
1275
1276         case ssa.OpPPC64LoweredNilCheck:
1277                 if objabi.GOOS == "aix" {
1278                         // CMP Rarg0, R0
1279                         // BNE 2(PC)
1280                         // STW R0, 0(R0)
1281                         // NOP (so the BNE has somewhere to land)
1282
1283                         // CMP Rarg0, R0
1284                         p := s.Prog(ppc64.ACMP)
1285                         p.From.Type = obj.TYPE_REG
1286                         p.From.Reg = v.Args[0].Reg()
1287                         p.To.Type = obj.TYPE_REG
1288                         p.To.Reg = ppc64.REG_R0
1289
1290                         // BNE 2(PC)
1291                         p2 := s.Prog(ppc64.ABNE)
1292                         p2.To.Type = obj.TYPE_BRANCH
1293
1294                         // STW R0, 0(R0)
1295                         // Write at 0 is forbidden and will trigger a SIGSEGV
1296                         p = s.Prog(ppc64.AMOVW)
1297                         p.From.Type = obj.TYPE_REG
1298                         p.From.Reg = ppc64.REG_R0
1299                         p.To.Type = obj.TYPE_MEM
1300                         p.To.Reg = ppc64.REG_R0
1301
1302                         // NOP (so the BNE has somewhere to land)
1303                         nop := s.Prog(obj.ANOP)
1304                         gc.Patch(p2, nop)
1305
1306                 } else {
1307                         // Issue a load which will fault if arg is nil.
1308                         p := s.Prog(ppc64.AMOVBZ)
1309                         p.From.Type = obj.TYPE_MEM
1310                         p.From.Reg = v.Args[0].Reg()
1311                         gc.AddAux(&p.From, v)
1312                         p.To.Type = obj.TYPE_REG
1313                         p.To.Reg = ppc64.REGTMP
1314                 }
1315                 if logopt.Enabled() {
1316                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1317                 }
1318                 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1319                         gc.Warnl(v.Pos, "generated nil check")
1320                 }
1321
1322         // These should be resolved by rules and not make it here.
1323         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1324                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1325                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1326                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1327         case ssa.OpPPC64InvertFlags:
1328                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1329         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT, ssa.OpPPC64FlagCarrySet, ssa.OpPPC64FlagCarryClear:
1330                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1331         case ssa.OpClobber:
1332                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1333         default:
1334                 v.Fatalf("genValue not implemented: %s", v.LongString())
1335         }
1336 }
1337
1338 var blockJump = [...]struct {
1339         asm, invasm     obj.As
1340         asmeq, invasmun bool
1341 }{
1342         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1343         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1344
1345         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1346         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1347         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1348         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1349
1350         // TODO: need to work FP comparisons into block jumps
1351         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1352         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1353         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1354         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1355 }
1356
1357 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1358         switch b.Kind {
1359         case ssa.BlockDefer:
1360                 // defer returns in R3:
1361                 // 0 if we should continue executing
1362                 // 1 if we should jump to deferreturn call
1363                 p := s.Prog(ppc64.ACMP)
1364                 p.From.Type = obj.TYPE_REG
1365                 p.From.Reg = ppc64.REG_R3
1366                 p.To.Type = obj.TYPE_REG
1367                 p.To.Reg = ppc64.REG_R0
1368
1369                 p = s.Prog(ppc64.ABNE)
1370                 p.To.Type = obj.TYPE_BRANCH
1371                 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1372                 if b.Succs[0].Block() != next {
1373                         p := s.Prog(obj.AJMP)
1374                         p.To.Type = obj.TYPE_BRANCH
1375                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1376                 }
1377
1378         case ssa.BlockPlain:
1379                 if b.Succs[0].Block() != next {
1380                         p := s.Prog(obj.AJMP)
1381                         p.To.Type = obj.TYPE_BRANCH
1382                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1383                 }
1384         case ssa.BlockExit:
1385         case ssa.BlockRet:
1386                 s.Prog(obj.ARET)
1387         case ssa.BlockRetJmp:
1388                 p := s.Prog(obj.AJMP)
1389                 p.To.Type = obj.TYPE_MEM
1390                 p.To.Name = obj.NAME_EXTERN
1391                 p.To.Sym = b.Aux.(*obj.LSym)
1392
1393         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1394                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1395                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1396                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1397                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1398                 jmp := blockJump[b.Kind]
1399                 switch next {
1400                 case b.Succs[0].Block():
1401                         s.Br(jmp.invasm, b.Succs[1].Block())
1402                         if jmp.invasmun {
1403                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1404                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
1405                         }
1406                 case b.Succs[1].Block():
1407                         s.Br(jmp.asm, b.Succs[0].Block())
1408                         if jmp.asmeq {
1409                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1410                         }
1411                 default:
1412                         if b.Likely != ssa.BranchUnlikely {
1413                                 s.Br(jmp.asm, b.Succs[0].Block())
1414                                 if jmp.asmeq {
1415                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
1416                                 }
1417                                 s.Br(obj.AJMP, b.Succs[1].Block())
1418                         } else {
1419                                 s.Br(jmp.invasm, b.Succs[1].Block())
1420                                 if jmp.invasmun {
1421                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1422                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
1423                                 }
1424                                 s.Br(obj.AJMP, b.Succs[0].Block())
1425                         }
1426                 }
1427         default:
1428                 b.Fatalf("branch not implemented: %s", b.LongString())
1429         }
1430 }