]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
[dev.link] all: merge branch 'master' into dev.link
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/gc"
9         "cmd/compile/internal/logopt"
10         "cmd/compile/internal/ssa"
11         "cmd/compile/internal/types"
12         "cmd/internal/obj"
13         "cmd/internal/obj/ppc64"
14         "cmd/internal/objabi"
15         "math"
16         "strings"
17 )
18
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21         //      flive := b.FlagsLiveAtEnd
22         //      if b.Control != nil && b.Control.Type.IsFlags() {
23         //              flive = true
24         //      }
25         //      for i := len(b.Values) - 1; i >= 0; i-- {
26         //              v := b.Values[i]
27         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28         //                      // The "mark" is any non-nil Aux value.
29         //                      v.Aux = v
30         //              }
31         //              if v.Type.IsFlags() {
32         //                      flive = false
33         //              }
34         //              for _, a := range v.Args {
35         //                      if a.Type.IsFlags() {
36         //                              flive = true
37         //                      }
38         //              }
39         //      }
40 }
41
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
44         if t.IsFloat() {
45                 switch t.Size() {
46                 case 4:
47                         return ppc64.AFMOVS
48                 case 8:
49                         return ppc64.AFMOVD
50                 }
51         } else {
52                 switch t.Size() {
53                 case 1:
54                         if t.IsSigned() {
55                                 return ppc64.AMOVB
56                         } else {
57                                 return ppc64.AMOVBZ
58                         }
59                 case 2:
60                         if t.IsSigned() {
61                                 return ppc64.AMOVH
62                         } else {
63                                 return ppc64.AMOVHZ
64                         }
65                 case 4:
66                         if t.IsSigned() {
67                                 return ppc64.AMOVW
68                         } else {
69                                 return ppc64.AMOVWZ
70                         }
71                 case 8:
72                         return ppc64.AMOVD
73                 }
74         }
75         panic("bad load type")
76 }
77
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
80         if t.IsFloat() {
81                 switch t.Size() {
82                 case 4:
83                         return ppc64.AFMOVS
84                 case 8:
85                         return ppc64.AFMOVD
86                 }
87         } else {
88                 switch t.Size() {
89                 case 1:
90                         return ppc64.AMOVB
91                 case 2:
92                         return ppc64.AMOVH
93                 case 4:
94                         return ppc64.AMOVW
95                 case 8:
96                         return ppc64.AMOVD
97                 }
98         }
99         panic("bad store type")
100 }
101
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
103         switch v.Op {
104         case ssa.OpCopy:
105                 t := v.Type
106                 if t.IsMemory() {
107                         return
108                 }
109                 x := v.Args[0].Reg()
110                 y := v.Reg()
111                 if x != y {
112                         rt := obj.TYPE_REG
113                         op := ppc64.AMOVD
114
115                         if t.IsFloat() {
116                                 op = ppc64.AFMOVD
117                         }
118                         p := s.Prog(op)
119                         p.From.Type = rt
120                         p.From.Reg = x
121                         p.To.Type = rt
122                         p.To.Reg = y
123                 }
124
125         case ssa.OpPPC64LoweredMuluhilo:
126                 // MULHDU       Rarg1, Rarg0, Reg0
127                 // MULLD        Rarg1, Rarg0, Reg1
128                 r0 := v.Args[0].Reg()
129                 r1 := v.Args[1].Reg()
130                 p := s.Prog(ppc64.AMULHDU)
131                 p.From.Type = obj.TYPE_REG
132                 p.From.Reg = r1
133                 p.Reg = r0
134                 p.To.Type = obj.TYPE_REG
135                 p.To.Reg = v.Reg0()
136                 p1 := s.Prog(ppc64.AMULLD)
137                 p1.From.Type = obj.TYPE_REG
138                 p1.From.Reg = r1
139                 p1.Reg = r0
140                 p1.To.Type = obj.TYPE_REG
141                 p1.To.Reg = v.Reg1()
142
143         case ssa.OpPPC64LoweredAdd64Carry:
144                 // ADDC         Rarg2, -1, Rtmp
145                 // ADDE         Rarg1, Rarg0, Reg0
146                 // ADDZE        Rzero, Reg1
147                 r0 := v.Args[0].Reg()
148                 r1 := v.Args[1].Reg()
149                 r2 := v.Args[2].Reg()
150                 p := s.Prog(ppc64.AADDC)
151                 p.From.Type = obj.TYPE_CONST
152                 p.From.Offset = -1
153                 p.Reg = r2
154                 p.To.Type = obj.TYPE_REG
155                 p.To.Reg = ppc64.REGTMP
156                 p1 := s.Prog(ppc64.AADDE)
157                 p1.From.Type = obj.TYPE_REG
158                 p1.From.Reg = r1
159                 p1.Reg = r0
160                 p1.To.Type = obj.TYPE_REG
161                 p1.To.Reg = v.Reg0()
162                 p2 := s.Prog(ppc64.AADDZE)
163                 p2.From.Type = obj.TYPE_REG
164                 p2.From.Reg = ppc64.REGZERO
165                 p2.To.Type = obj.TYPE_REG
166                 p2.To.Reg = v.Reg1()
167
168         case ssa.OpPPC64LoweredAtomicAnd8,
169                 ssa.OpPPC64LoweredAtomicOr8:
170                 // LWSYNC
171                 // LBAR         (Rarg0), Rtmp
172                 // AND/OR       Rarg1, Rtmp
173                 // STBCCC       Rtmp, (Rarg0)
174                 // BNE          -3(PC)
175                 r0 := v.Args[0].Reg()
176                 r1 := v.Args[1].Reg()
177                 // LWSYNC - Assuming shared data not write-through-required nor
178                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179                 plwsync := s.Prog(ppc64.ALWSYNC)
180                 plwsync.To.Type = obj.TYPE_NONE
181                 p := s.Prog(ppc64.ALBAR)
182                 p.From.Type = obj.TYPE_MEM
183                 p.From.Reg = r0
184                 p.To.Type = obj.TYPE_REG
185                 p.To.Reg = ppc64.REGTMP
186                 p1 := s.Prog(v.Op.Asm())
187                 p1.From.Type = obj.TYPE_REG
188                 p1.From.Reg = r1
189                 p1.To.Type = obj.TYPE_REG
190                 p1.To.Reg = ppc64.REGTMP
191                 p2 := s.Prog(ppc64.ASTBCCC)
192                 p2.From.Type = obj.TYPE_REG
193                 p2.From.Reg = ppc64.REGTMP
194                 p2.To.Type = obj.TYPE_MEM
195                 p2.To.Reg = r0
196                 p2.RegTo2 = ppc64.REGTMP
197                 p3 := s.Prog(ppc64.ABNE)
198                 p3.To.Type = obj.TYPE_BRANCH
199                 gc.Patch(p3, p)
200
201         case ssa.OpPPC64LoweredAtomicAdd32,
202                 ssa.OpPPC64LoweredAtomicAdd64:
203                 // LWSYNC
204                 // LDAR/LWAR    (Rarg0), Rout
205                 // ADD          Rarg1, Rout
206                 // STDCCC/STWCCC Rout, (Rarg0)
207                 // BNE         -3(PC)
208                 // MOVW         Rout,Rout (if Add32)
209                 ld := ppc64.ALDAR
210                 st := ppc64.ASTDCCC
211                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
212                         ld = ppc64.ALWAR
213                         st = ppc64.ASTWCCC
214                 }
215                 r0 := v.Args[0].Reg()
216                 r1 := v.Args[1].Reg()
217                 out := v.Reg0()
218                 // LWSYNC - Assuming shared data not write-through-required nor
219                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220                 plwsync := s.Prog(ppc64.ALWSYNC)
221                 plwsync.To.Type = obj.TYPE_NONE
222                 // LDAR or LWAR
223                 p := s.Prog(ld)
224                 p.From.Type = obj.TYPE_MEM
225                 p.From.Reg = r0
226                 p.To.Type = obj.TYPE_REG
227                 p.To.Reg = out
228                 // ADD reg1,out
229                 p1 := s.Prog(ppc64.AADD)
230                 p1.From.Type = obj.TYPE_REG
231                 p1.From.Reg = r1
232                 p1.To.Reg = out
233                 p1.To.Type = obj.TYPE_REG
234                 // STDCCC or STWCCC
235                 p3 := s.Prog(st)
236                 p3.From.Type = obj.TYPE_REG
237                 p3.From.Reg = out
238                 p3.To.Type = obj.TYPE_MEM
239                 p3.To.Reg = r0
240                 // BNE retry
241                 p4 := s.Prog(ppc64.ABNE)
242                 p4.To.Type = obj.TYPE_BRANCH
243                 gc.Patch(p4, p)
244
245                 // Ensure a 32 bit result
246                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247                         p5 := s.Prog(ppc64.AMOVWZ)
248                         p5.To.Type = obj.TYPE_REG
249                         p5.To.Reg = out
250                         p5.From.Type = obj.TYPE_REG
251                         p5.From.Reg = out
252                 }
253
254         case ssa.OpPPC64LoweredAtomicExchange32,
255                 ssa.OpPPC64LoweredAtomicExchange64:
256                 // LWSYNC
257                 // LDAR/LWAR    (Rarg0), Rout
258                 // STDCCC/STWCCC Rout, (Rarg0)
259                 // BNE         -2(PC)
260                 // ISYNC
261                 ld := ppc64.ALDAR
262                 st := ppc64.ASTDCCC
263                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
264                         ld = ppc64.ALWAR
265                         st = ppc64.ASTWCCC
266                 }
267                 r0 := v.Args[0].Reg()
268                 r1 := v.Args[1].Reg()
269                 out := v.Reg0()
270                 // LWSYNC - Assuming shared data not write-through-required nor
271                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272                 plwsync := s.Prog(ppc64.ALWSYNC)
273                 plwsync.To.Type = obj.TYPE_NONE
274                 // LDAR or LWAR
275                 p := s.Prog(ld)
276                 p.From.Type = obj.TYPE_MEM
277                 p.From.Reg = r0
278                 p.To.Type = obj.TYPE_REG
279                 p.To.Reg = out
280                 // STDCCC or STWCCC
281                 p1 := s.Prog(st)
282                 p1.From.Type = obj.TYPE_REG
283                 p1.From.Reg = r1
284                 p1.To.Type = obj.TYPE_MEM
285                 p1.To.Reg = r0
286                 // BNE retry
287                 p2 := s.Prog(ppc64.ABNE)
288                 p2.To.Type = obj.TYPE_BRANCH
289                 gc.Patch(p2, p)
290                 // ISYNC
291                 pisync := s.Prog(ppc64.AISYNC)
292                 pisync.To.Type = obj.TYPE_NONE
293
294         case ssa.OpPPC64LoweredAtomicLoad8,
295                 ssa.OpPPC64LoweredAtomicLoad32,
296                 ssa.OpPPC64LoweredAtomicLoad64,
297                 ssa.OpPPC64LoweredAtomicLoadPtr:
298                 // SYNC
299                 // MOVB/MOVD/MOVW (Rarg0), Rout
300                 // CMP Rout,Rout
301                 // BNE 1(PC)
302                 // ISYNC
303                 ld := ppc64.AMOVD
304                 cmp := ppc64.ACMP
305                 switch v.Op {
306                 case ssa.OpPPC64LoweredAtomicLoad8:
307                         ld = ppc64.AMOVBZ
308                 case ssa.OpPPC64LoweredAtomicLoad32:
309                         ld = ppc64.AMOVWZ
310                         cmp = ppc64.ACMPW
311                 }
312                 arg0 := v.Args[0].Reg()
313                 out := v.Reg0()
314                 // SYNC when AuxInt == 1; otherwise, load-acquire
315                 if v.AuxInt == 1 {
316                         psync := s.Prog(ppc64.ASYNC)
317                         psync.To.Type = obj.TYPE_NONE
318                 }
319                 // Load
320                 p := s.Prog(ld)
321                 p.From.Type = obj.TYPE_MEM
322                 p.From.Reg = arg0
323                 p.To.Type = obj.TYPE_REG
324                 p.To.Reg = out
325                 // CMP
326                 p1 := s.Prog(cmp)
327                 p1.From.Type = obj.TYPE_REG
328                 p1.From.Reg = out
329                 p1.To.Type = obj.TYPE_REG
330                 p1.To.Reg = out
331                 // BNE
332                 p2 := s.Prog(ppc64.ABNE)
333                 p2.To.Type = obj.TYPE_BRANCH
334                 // ISYNC
335                 pisync := s.Prog(ppc64.AISYNC)
336                 pisync.To.Type = obj.TYPE_NONE
337                 gc.Patch(p2, pisync)
338
339         case ssa.OpPPC64LoweredAtomicStore8,
340                 ssa.OpPPC64LoweredAtomicStore32,
341                 ssa.OpPPC64LoweredAtomicStore64:
342                 // SYNC or LWSYNC
343                 // MOVB/MOVW/MOVD arg1,(arg0)
344                 st := ppc64.AMOVD
345                 switch v.Op {
346                 case ssa.OpPPC64LoweredAtomicStore8:
347                         st = ppc64.AMOVB
348                 case ssa.OpPPC64LoweredAtomicStore32:
349                         st = ppc64.AMOVW
350                 }
351                 arg0 := v.Args[0].Reg()
352                 arg1 := v.Args[1].Reg()
353                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
354                 // SYNC
355                 syncOp := ppc64.ASYNC
356                 if v.AuxInt == 0 {
357                         syncOp = ppc64.ALWSYNC
358                 }
359                 psync := s.Prog(syncOp)
360                 psync.To.Type = obj.TYPE_NONE
361                 // Store
362                 p := s.Prog(st)
363                 p.To.Type = obj.TYPE_MEM
364                 p.To.Reg = arg0
365                 p.From.Type = obj.TYPE_REG
366                 p.From.Reg = arg1
367
368         case ssa.OpPPC64LoweredAtomicCas64,
369                 ssa.OpPPC64LoweredAtomicCas32:
370                 // LWSYNC
371                 // loop:
372                 // LDAR        (Rarg0), MutexHint, Rtmp
373                 // CMP         Rarg1, Rtmp
374                 // BNE         fail
375                 // STDCCC      Rarg2, (Rarg0)
376                 // BNE         loop
377                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
378                 // MOVD        $1, Rout
379                 // BR          end
380                 // fail:
381                 // MOVD        $0, Rout
382                 // end:
383                 ld := ppc64.ALDAR
384                 st := ppc64.ASTDCCC
385                 cmp := ppc64.ACMP
386                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
387                         ld = ppc64.ALWAR
388                         st = ppc64.ASTWCCC
389                         cmp = ppc64.ACMPW
390                 }
391                 r0 := v.Args[0].Reg()
392                 r1 := v.Args[1].Reg()
393                 r2 := v.Args[2].Reg()
394                 out := v.Reg0()
395                 // LWSYNC - Assuming shared data not write-through-required nor
396                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397                 plwsync1 := s.Prog(ppc64.ALWSYNC)
398                 plwsync1.To.Type = obj.TYPE_NONE
399                 // LDAR or LWAR
400                 p := s.Prog(ld)
401                 p.From.Type = obj.TYPE_MEM
402                 p.From.Reg = r0
403                 p.To.Type = obj.TYPE_REG
404                 p.To.Reg = ppc64.REGTMP
405                 // If it is a Compare-and-Swap-Release operation, set the EH field with
406                 // the release hint.
407                 if v.AuxInt == 0 {
408                         p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
409                 }
410                 // CMP reg1,reg2
411                 p1 := s.Prog(cmp)
412                 p1.From.Type = obj.TYPE_REG
413                 p1.From.Reg = r1
414                 p1.To.Reg = ppc64.REGTMP
415                 p1.To.Type = obj.TYPE_REG
416                 // BNE cas_fail
417                 p2 := s.Prog(ppc64.ABNE)
418                 p2.To.Type = obj.TYPE_BRANCH
419                 // STDCCC or STWCCC
420                 p3 := s.Prog(st)
421                 p3.From.Type = obj.TYPE_REG
422                 p3.From.Reg = r2
423                 p3.To.Type = obj.TYPE_MEM
424                 p3.To.Reg = r0
425                 // BNE retry
426                 p4 := s.Prog(ppc64.ABNE)
427                 p4.To.Type = obj.TYPE_BRANCH
428                 gc.Patch(p4, p)
429                 // LWSYNC - Assuming shared data not write-through-required nor
430                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431                 // If the operation is a CAS-Release, then synchronization is not necessary.
432                 if v.AuxInt != 0 {
433                         plwsync2 := s.Prog(ppc64.ALWSYNC)
434                         plwsync2.To.Type = obj.TYPE_NONE
435                 }
436                 // return true
437                 p5 := s.Prog(ppc64.AMOVD)
438                 p5.From.Type = obj.TYPE_CONST
439                 p5.From.Offset = 1
440                 p5.To.Type = obj.TYPE_REG
441                 p5.To.Reg = out
442                 // BR done
443                 p6 := s.Prog(obj.AJMP)
444                 p6.To.Type = obj.TYPE_BRANCH
445                 // return false
446                 p7 := s.Prog(ppc64.AMOVD)
447                 p7.From.Type = obj.TYPE_CONST
448                 p7.From.Offset = 0
449                 p7.To.Type = obj.TYPE_REG
450                 p7.To.Reg = out
451                 gc.Patch(p2, p7)
452                 // done (label)
453                 p8 := s.Prog(obj.ANOP)
454                 gc.Patch(p6, p8)
455
456         case ssa.OpPPC64LoweredGetClosurePtr:
457                 // Closure pointer is R11 (already)
458                 gc.CheckLoweredGetClosurePtr(v)
459
460         case ssa.OpPPC64LoweredGetCallerSP:
461                 // caller's SP is FixedFrameSize below the address of the first arg
462                 p := s.Prog(ppc64.AMOVD)
463                 p.From.Type = obj.TYPE_ADDR
464                 p.From.Offset = -gc.Ctxt.FixedFrameSize()
465                 p.From.Name = obj.NAME_PARAM
466                 p.To.Type = obj.TYPE_REG
467                 p.To.Reg = v.Reg()
468
469         case ssa.OpPPC64LoweredGetCallerPC:
470                 p := s.Prog(obj.AGETCALLERPC)
471                 p.To.Type = obj.TYPE_REG
472                 p.To.Reg = v.Reg()
473
474         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475                 // input is already rounded
476
477         case ssa.OpLoadReg:
478                 loadOp := loadByType(v.Type)
479                 p := s.Prog(loadOp)
480                 gc.AddrAuto(&p.From, v.Args[0])
481                 p.To.Type = obj.TYPE_REG
482                 p.To.Reg = v.Reg()
483
484         case ssa.OpStoreReg:
485                 storeOp := storeByType(v.Type)
486                 p := s.Prog(storeOp)
487                 p.From.Type = obj.TYPE_REG
488                 p.From.Reg = v.Args[0].Reg()
489                 gc.AddrAuto(&p.To, v)
490
491         case ssa.OpPPC64DIVD:
492                 // For now,
493                 //
494                 // cmp arg1, -1
495                 // be  ahead
496                 // v = arg0 / arg1
497                 // b over
498                 // ahead: v = - arg0
499                 // over: nop
500                 r := v.Reg()
501                 r0 := v.Args[0].Reg()
502                 r1 := v.Args[1].Reg()
503
504                 p := s.Prog(ppc64.ACMP)
505                 p.From.Type = obj.TYPE_REG
506                 p.From.Reg = r1
507                 p.To.Type = obj.TYPE_CONST
508                 p.To.Offset = -1
509
510                 pbahead := s.Prog(ppc64.ABEQ)
511                 pbahead.To.Type = obj.TYPE_BRANCH
512
513                 p = s.Prog(v.Op.Asm())
514                 p.From.Type = obj.TYPE_REG
515                 p.From.Reg = r1
516                 p.Reg = r0
517                 p.To.Type = obj.TYPE_REG
518                 p.To.Reg = r
519
520                 pbover := s.Prog(obj.AJMP)
521                 pbover.To.Type = obj.TYPE_BRANCH
522
523                 p = s.Prog(ppc64.ANEG)
524                 p.To.Type = obj.TYPE_REG
525                 p.To.Reg = r
526                 p.From.Type = obj.TYPE_REG
527                 p.From.Reg = r0
528                 gc.Patch(pbahead, p)
529
530                 p = s.Prog(obj.ANOP)
531                 gc.Patch(pbover, p)
532
533         case ssa.OpPPC64DIVW:
534                 // word-width version of above
535                 r := v.Reg()
536                 r0 := v.Args[0].Reg()
537                 r1 := v.Args[1].Reg()
538
539                 p := s.Prog(ppc64.ACMPW)
540                 p.From.Type = obj.TYPE_REG
541                 p.From.Reg = r1
542                 p.To.Type = obj.TYPE_CONST
543                 p.To.Offset = -1
544
545                 pbahead := s.Prog(ppc64.ABEQ)
546                 pbahead.To.Type = obj.TYPE_BRANCH
547
548                 p = s.Prog(v.Op.Asm())
549                 p.From.Type = obj.TYPE_REG
550                 p.From.Reg = r1
551                 p.Reg = r0
552                 p.To.Type = obj.TYPE_REG
553                 p.To.Reg = r
554
555                 pbover := s.Prog(obj.AJMP)
556                 pbover.To.Type = obj.TYPE_BRANCH
557
558                 p = s.Prog(ppc64.ANEG)
559                 p.To.Type = obj.TYPE_REG
560                 p.To.Reg = r
561                 p.From.Type = obj.TYPE_REG
562                 p.From.Reg = r0
563                 gc.Patch(pbahead, p)
564
565                 p = s.Prog(obj.ANOP)
566                 gc.Patch(pbover, p)
567
568         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
569                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
570                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
571                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
572                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
573                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
574                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
575                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
576                 r := v.Reg()
577                 r1 := v.Args[0].Reg()
578                 r2 := v.Args[1].Reg()
579                 p := s.Prog(v.Op.Asm())
580                 p.From.Type = obj.TYPE_REG
581                 p.From.Reg = r2
582                 p.Reg = r1
583                 p.To.Type = obj.TYPE_REG
584                 p.To.Reg = r
585
586         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
587                 r1 := v.Args[0].Reg()
588                 r2 := v.Args[1].Reg()
589                 p := s.Prog(v.Op.Asm())
590                 p.From.Type = obj.TYPE_REG
591                 p.From.Reg = r2
592                 p.Reg = r1
593                 p.To.Type = obj.TYPE_REG
594                 p.To.Reg = ppc64.REGTMP // result is not needed
595
596         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
597                 p := s.Prog(v.Op.Asm())
598                 p.From.Type = obj.TYPE_CONST
599                 p.From.Offset = v.AuxInt
600                 p.Reg = v.Args[0].Reg()
601                 p.To.Type = obj.TYPE_REG
602                 p.To.Reg = v.Reg()
603
604         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
605                 r := v.Reg()
606                 r1 := v.Args[0].Reg()
607                 r2 := v.Args[1].Reg()
608                 r3 := v.Args[2].Reg()
609                 // r = r1*r2 Â± r3
610                 p := s.Prog(v.Op.Asm())
611                 p.From.Type = obj.TYPE_REG
612                 p.From.Reg = r1
613                 p.Reg = r3
614                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
615                 p.To.Type = obj.TYPE_REG
616                 p.To.Reg = r
617
618         case ssa.OpPPC64MaskIfNotCarry:
619                 r := v.Reg()
620                 p := s.Prog(v.Op.Asm())
621                 p.From.Type = obj.TYPE_REG
622                 p.From.Reg = ppc64.REGZERO
623                 p.To.Type = obj.TYPE_REG
624                 p.To.Reg = r
625
626         case ssa.OpPPC64ADDconstForCarry:
627                 r1 := v.Args[0].Reg()
628                 p := s.Prog(v.Op.Asm())
629                 p.Reg = r1
630                 p.From.Type = obj.TYPE_CONST
631                 p.From.Offset = v.AuxInt
632                 p.To.Type = obj.TYPE_REG
633                 p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
634
635         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
636                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
637                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
638                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
639                 r := v.Reg()
640                 p := s.Prog(v.Op.Asm())
641                 p.To.Type = obj.TYPE_REG
642                 p.To.Reg = r
643                 p.From.Type = obj.TYPE_REG
644                 p.From.Reg = v.Args[0].Reg()
645
646         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
647                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
648                 p := s.Prog(v.Op.Asm())
649                 p.Reg = v.Args[0].Reg()
650                 p.From.Type = obj.TYPE_CONST
651                 p.From.Offset = v.AuxInt
652                 p.To.Type = obj.TYPE_REG
653                 p.To.Reg = v.Reg()
654
655         case ssa.OpPPC64ANDCCconst:
656                 p := s.Prog(v.Op.Asm())
657                 p.Reg = v.Args[0].Reg()
658                 p.From.Type = obj.TYPE_CONST
659                 p.From.Offset = v.AuxInt
660                 p.To.Type = obj.TYPE_REG
661                 p.To.Reg = ppc64.REGTMP // discard result
662
663         case ssa.OpPPC64MOVDaddr:
664                 switch v.Aux.(type) {
665                 default:
666                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
667                 case nil:
668                         // If aux offset and aux int are both 0, and the same
669                         // input and output regs are used, no instruction
670                         // needs to be generated, since it would just be
671                         // addi rx, rx, 0.
672                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
673                                 p := s.Prog(ppc64.AMOVD)
674                                 p.From.Type = obj.TYPE_ADDR
675                                 p.From.Reg = v.Args[0].Reg()
676                                 p.From.Offset = v.AuxInt
677                                 p.To.Type = obj.TYPE_REG
678                                 p.To.Reg = v.Reg()
679                         }
680
681                 case *obj.LSym, *gc.Node:
682                         p := s.Prog(ppc64.AMOVD)
683                         p.From.Type = obj.TYPE_ADDR
684                         p.From.Reg = v.Args[0].Reg()
685                         p.To.Type = obj.TYPE_REG
686                         p.To.Reg = v.Reg()
687                         gc.AddAux(&p.From, v)
688
689                 }
690
691         case ssa.OpPPC64MOVDconst:
692                 p := s.Prog(v.Op.Asm())
693                 p.From.Type = obj.TYPE_CONST
694                 p.From.Offset = v.AuxInt
695                 p.To.Type = obj.TYPE_REG
696                 p.To.Reg = v.Reg()
697
698         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
699                 p := s.Prog(v.Op.Asm())
700                 p.From.Type = obj.TYPE_FCONST
701                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
702                 p.To.Type = obj.TYPE_REG
703                 p.To.Reg = v.Reg()
704
705         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
706                 p := s.Prog(v.Op.Asm())
707                 p.From.Type = obj.TYPE_REG
708                 p.From.Reg = v.Args[0].Reg()
709                 p.To.Type = obj.TYPE_REG
710                 p.To.Reg = v.Args[1].Reg()
711
712         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
713                 p := s.Prog(v.Op.Asm())
714                 p.From.Type = obj.TYPE_REG
715                 p.From.Reg = v.Args[0].Reg()
716                 p.To.Type = obj.TYPE_CONST
717                 p.To.Offset = v.AuxInt
718
719         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
720                 // Shift in register to required size
721                 p := s.Prog(v.Op.Asm())
722                 p.From.Type = obj.TYPE_REG
723                 p.From.Reg = v.Args[0].Reg()
724                 p.To.Reg = v.Reg()
725                 p.To.Type = obj.TYPE_REG
726
727         case ssa.OpPPC64MOVDload:
728
729                 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
730                 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
731                 // the offset is not known until link time. If the load of a go.string uses relocation for the
732                 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
733                 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
734                 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
735                 // go.string types because other types will have proper alignment.
736
737                 gostring := false
738                 switch n := v.Aux.(type) {
739                 case *obj.LSym:
740                         gostring = strings.HasPrefix(n.Name, "go.string.")
741                 }
742                 if gostring {
743                         // Generate full addr of the go.string const
744                         // including AuxInt
745                         p := s.Prog(ppc64.AMOVD)
746                         p.From.Type = obj.TYPE_ADDR
747                         p.From.Reg = v.Args[0].Reg()
748                         gc.AddAux(&p.From, v)
749                         p.To.Type = obj.TYPE_REG
750                         p.To.Reg = v.Reg()
751                         // Load go.string using 0 offset
752                         p = s.Prog(v.Op.Asm())
753                         p.From.Type = obj.TYPE_MEM
754                         p.From.Reg = v.Reg()
755                         p.To.Type = obj.TYPE_REG
756                         p.To.Reg = v.Reg()
757                         break
758                 }
759                 // Not a go.string, generate a normal load
760                 fallthrough
761
762         case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
763                 p := s.Prog(v.Op.Asm())
764                 p.From.Type = obj.TYPE_MEM
765                 p.From.Reg = v.Args[0].Reg()
766                 gc.AddAux(&p.From, v)
767                 p.To.Type = obj.TYPE_REG
768                 p.To.Reg = v.Reg()
769
770         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
771                 p := s.Prog(v.Op.Asm())
772                 p.From.Type = obj.TYPE_MEM
773                 p.From.Reg = v.Args[0].Reg()
774                 p.To.Type = obj.TYPE_REG
775                 p.To.Reg = v.Reg()
776
777         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
778                 p := s.Prog(v.Op.Asm())
779                 p.To.Type = obj.TYPE_MEM
780                 p.To.Reg = v.Args[0].Reg()
781                 p.From.Type = obj.TYPE_REG
782                 p.From.Reg = v.Args[1].Reg()
783
784         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
785                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
786                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
787                 p := s.Prog(v.Op.Asm())
788                 p.From.Type = obj.TYPE_MEM
789                 p.From.Reg = v.Args[0].Reg()
790                 p.From.Index = v.Args[1].Reg()
791                 p.To.Type = obj.TYPE_REG
792                 p.To.Reg = v.Reg()
793
794         case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
795                 p := s.Prog(v.Op.Asm())
796                 p.From.Type = obj.TYPE_REG
797                 p.From.Reg = ppc64.REGZERO
798                 p.To.Type = obj.TYPE_MEM
799                 p.To.Reg = v.Args[0].Reg()
800                 gc.AddAux(&p.To, v)
801
802         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
803                 p := s.Prog(v.Op.Asm())
804                 p.From.Type = obj.TYPE_REG
805                 p.From.Reg = v.Args[1].Reg()
806                 p.To.Type = obj.TYPE_MEM
807                 p.To.Reg = v.Args[0].Reg()
808                 gc.AddAux(&p.To, v)
809
810         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
811                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
812                 ssa.OpPPC64MOVHBRstoreidx:
813                 p := s.Prog(v.Op.Asm())
814                 p.From.Type = obj.TYPE_REG
815                 p.From.Reg = v.Args[2].Reg()
816                 p.To.Index = v.Args[1].Reg()
817                 p.To.Type = obj.TYPE_MEM
818                 p.To.Reg = v.Args[0].Reg()
819
820         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
821                 // ISEL, ISELB
822                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
823                 // ISEL only accepts 0, 1, 2 condition values but the others can be
824                 // achieved by swapping operand order.
825                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
826                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
827                 // ISELB is used when a boolean result is needed, returning 0 or 1
828                 p := s.Prog(ppc64.AISEL)
829                 p.To.Type = obj.TYPE_REG
830                 p.To.Reg = v.Reg()
831                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
832                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
833                 if v.Op == ssa.OpPPC64ISEL {
834                         r.Reg = v.Args[1].Reg()
835                 }
836                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
837                 if v.AuxInt > 3 {
838                         p.Reg = r.Reg
839                         p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
840                 } else {
841                         p.Reg = v.Args[0].Reg()
842                         p.SetFrom3(r)
843                 }
844                 p.From.Type = obj.TYPE_CONST
845                 p.From.Offset = v.AuxInt & 3
846
847         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
848                 // The LoweredQuad code generation
849                 // generates STXV instructions on
850                 // power9. The Short variation is used
851                 // if no loop is generated.
852
853                 // sizes >= 64 generate a loop as follows:
854
855                 // Set up loop counter in CTR, used by BC
856                 // XXLXOR clears VS32
857                 //       XXLXOR VS32,VS32,VS32
858                 //       MOVD len/64,REG_TMP
859                 //       MOVD REG_TMP,CTR
860                 //       loop:
861                 //       STXV VS32,0(R20)
862                 //       STXV VS32,16(R20)
863                 //       STXV VS32,32(R20)
864                 //       STXV VS32,48(R20)
865                 //       ADD  $64,R20
866                 //       BC   16, 0, loop
867
868                 // Bytes per iteration
869                 ctr := v.AuxInt / 64
870
871                 // Remainder bytes
872                 rem := v.AuxInt % 64
873
874                 // Only generate a loop if there is more
875                 // than 1 iteration.
876                 if ctr > 1 {
877                         // Set up VS32 (V0) to hold 0s
878                         p := s.Prog(ppc64.AXXLXOR)
879                         p.From.Type = obj.TYPE_REG
880                         p.From.Reg = ppc64.REG_VS32
881                         p.To.Type = obj.TYPE_REG
882                         p.To.Reg = ppc64.REG_VS32
883                         p.Reg = ppc64.REG_VS32
884
885                         // Set up CTR loop counter
886                         p = s.Prog(ppc64.AMOVD)
887                         p.From.Type = obj.TYPE_CONST
888                         p.From.Offset = ctr
889                         p.To.Type = obj.TYPE_REG
890                         p.To.Reg = ppc64.REGTMP
891
892                         p = s.Prog(ppc64.AMOVD)
893                         p.From.Type = obj.TYPE_REG
894                         p.From.Reg = ppc64.REGTMP
895                         p.To.Type = obj.TYPE_REG
896                         p.To.Reg = ppc64.REG_CTR
897
898                         // Don't generate padding for
899                         // loops with few iterations.
900                         if ctr > 3 {
901                                 p = s.Prog(obj.APCALIGN)
902                                 p.From.Type = obj.TYPE_CONST
903                                 p.From.Offset = 16
904                         }
905
906                         // generate 4 STXVs to zero 64 bytes
907                         var top *obj.Prog
908
909                         p = s.Prog(ppc64.ASTXV)
910                         p.From.Type = obj.TYPE_REG
911                         p.From.Reg = ppc64.REG_VS32
912                         p.To.Type = obj.TYPE_MEM
913                         p.To.Reg = v.Args[0].Reg()
914
915                         //  Save the top of loop
916                         if top == nil {
917                                 top = p
918                         }
919                         p = s.Prog(ppc64.ASTXV)
920                         p.From.Type = obj.TYPE_REG
921                         p.From.Reg = ppc64.REG_VS32
922                         p.To.Type = obj.TYPE_MEM
923                         p.To.Reg = v.Args[0].Reg()
924                         p.To.Offset = 16
925
926                         p = s.Prog(ppc64.ASTXV)
927                         p.From.Type = obj.TYPE_REG
928                         p.From.Reg = ppc64.REG_VS32
929                         p.To.Type = obj.TYPE_MEM
930                         p.To.Reg = v.Args[0].Reg()
931                         p.To.Offset = 32
932
933                         p = s.Prog(ppc64.ASTXV)
934                         p.From.Type = obj.TYPE_REG
935                         p.From.Reg = ppc64.REG_VS32
936                         p.To.Type = obj.TYPE_MEM
937                         p.To.Reg = v.Args[0].Reg()
938                         p.To.Offset = 48
939
940                         // Increment address for the
941                         // 64 bytes just zeroed.
942                         p = s.Prog(ppc64.AADD)
943                         p.Reg = v.Args[0].Reg()
944                         p.From.Type = obj.TYPE_CONST
945                         p.From.Offset = 64
946                         p.To.Type = obj.TYPE_REG
947                         p.To.Reg = v.Args[0].Reg()
948
949                         // Branch back to top of loop
950                         // based on CTR
951                         // BC with BO_BCTR generates bdnz
952                         p = s.Prog(ppc64.ABC)
953                         p.From.Type = obj.TYPE_CONST
954                         p.From.Offset = ppc64.BO_BCTR
955                         p.Reg = ppc64.REG_R0
956                         p.To.Type = obj.TYPE_BRANCH
957                         gc.Patch(p, top)
958                 }
959                 // When ctr == 1 the loop was not generated but
960                 // there are at least 64 bytes to clear, so add
961                 // that to the remainder to generate the code
962                 // to clear those doublewords
963                 if ctr == 1 {
964                         rem += 64
965                 }
966
967                 // Clear the remainder starting at offset zero
968                 offset := int64(0)
969
970                 if rem >= 16 && ctr <= 1 {
971                         // If the XXLXOR hasn't already been
972                         // generated, do it here to initialize
973                         // VS32 (V0) to 0.
974                         p := s.Prog(ppc64.AXXLXOR)
975                         p.From.Type = obj.TYPE_REG
976                         p.From.Reg = ppc64.REG_VS32
977                         p.To.Type = obj.TYPE_REG
978                         p.To.Reg = ppc64.REG_VS32
979                         p.Reg = ppc64.REG_VS32
980                 }
981                 // Generate STXV for 32 or 64
982                 // bytes.
983                 for rem >= 32 {
984                         p := s.Prog(ppc64.ASTXV)
985                         p.From.Type = obj.TYPE_REG
986                         p.From.Reg = ppc64.REG_VS32
987                         p.To.Type = obj.TYPE_MEM
988                         p.To.Reg = v.Args[0].Reg()
989                         p.To.Offset = offset
990
991                         p = s.Prog(ppc64.ASTXV)
992                         p.From.Type = obj.TYPE_REG
993                         p.From.Reg = ppc64.REG_VS32
994                         p.To.Type = obj.TYPE_MEM
995                         p.To.Reg = v.Args[0].Reg()
996                         p.To.Offset = offset + 16
997                         offset += 32
998                         rem -= 32
999                 }
1000                 // Generate 16 bytes
1001                 if rem >= 16 {
1002                         p := s.Prog(ppc64.ASTXV)
1003                         p.From.Type = obj.TYPE_REG
1004                         p.From.Reg = ppc64.REG_VS32
1005                         p.To.Type = obj.TYPE_MEM
1006                         p.To.Reg = v.Args[0].Reg()
1007                         p.To.Offset = offset
1008                         offset += 16
1009                         rem -= 16
1010                 }
1011
1012                 // first clear as many doublewords as possible
1013                 // then clear remaining sizes as available
1014                 for rem > 0 {
1015                         op, size := ppc64.AMOVB, int64(1)
1016                         switch {
1017                         case rem >= 8:
1018                                 op, size = ppc64.AMOVD, 8
1019                         case rem >= 4:
1020                                 op, size = ppc64.AMOVW, 4
1021                         case rem >= 2:
1022                                 op, size = ppc64.AMOVH, 2
1023                         }
1024                         p := s.Prog(op)
1025                         p.From.Type = obj.TYPE_REG
1026                         p.From.Reg = ppc64.REG_R0
1027                         p.To.Type = obj.TYPE_MEM
1028                         p.To.Reg = v.Args[0].Reg()
1029                         p.To.Offset = offset
1030                         rem -= size
1031                         offset += size
1032                 }
1033
1034         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1035
1036                 // Unaligned data doesn't hurt performance
1037                 // for these instructions on power8.
1038
1039                 // For sizes >= 64 generate a loop as follows:
1040
1041                 // Set up loop counter in CTR, used by BC
1042                 //       XXLXOR VS32,VS32,VS32
1043                 //       MOVD len/32,REG_TMP
1044                 //       MOVD REG_TMP,CTR
1045                 //       MOVD $16,REG_TMP
1046                 //       loop:
1047                 //       STXVD2X VS32,(R0)(R20)
1048                 //       STXVD2X VS32,(R31)(R20)
1049                 //       ADD  $32,R20
1050                 //       BC   16, 0, loop
1051                 //
1052                 // any remainder is done as described below
1053
1054                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1055                 // then handle the remainder
1056                 //      MOVD R0,(R20)
1057                 //      MOVD R0,8(R20)
1058                 // .... etc.
1059                 //
1060                 // the remainder bytes are cleared using one or more
1061                 // of the following instructions with the appropriate
1062                 // offsets depending which instructions are needed
1063                 //
1064                 //      MOVW R0,n1(R20) 4 bytes
1065                 //      MOVH R0,n2(R20) 2 bytes
1066                 //      MOVB R0,n3(R20) 1 byte
1067                 //
1068                 // 7 bytes: MOVW, MOVH, MOVB
1069                 // 6 bytes: MOVW, MOVH
1070                 // 5 bytes: MOVW, MOVB
1071                 // 3 bytes: MOVH, MOVB
1072
1073                 // each loop iteration does 32 bytes
1074                 ctr := v.AuxInt / 32
1075
1076                 // remainder bytes
1077                 rem := v.AuxInt % 32
1078
1079                 // only generate a loop if there is more
1080                 // than 1 iteration.
1081                 if ctr > 1 {
1082                         // Set up VS32 (V0) to hold 0s
1083                         p := s.Prog(ppc64.AXXLXOR)
1084                         p.From.Type = obj.TYPE_REG
1085                         p.From.Reg = ppc64.REG_VS32
1086                         p.To.Type = obj.TYPE_REG
1087                         p.To.Reg = ppc64.REG_VS32
1088                         p.Reg = ppc64.REG_VS32
1089
1090                         // Set up CTR loop counter
1091                         p = s.Prog(ppc64.AMOVD)
1092                         p.From.Type = obj.TYPE_CONST
1093                         p.From.Offset = ctr
1094                         p.To.Type = obj.TYPE_REG
1095                         p.To.Reg = ppc64.REGTMP
1096
1097                         p = s.Prog(ppc64.AMOVD)
1098                         p.From.Type = obj.TYPE_REG
1099                         p.From.Reg = ppc64.REGTMP
1100                         p.To.Type = obj.TYPE_REG
1101                         p.To.Reg = ppc64.REG_CTR
1102
1103                         // Set up R31 to hold index value 16
1104                         p = s.Prog(ppc64.AMOVD)
1105                         p.From.Type = obj.TYPE_CONST
1106                         p.From.Offset = 16
1107                         p.To.Type = obj.TYPE_REG
1108                         p.To.Reg = ppc64.REGTMP
1109
1110                         // Don't add padding for alignment
1111                         // with few loop iterations.
1112                         if ctr > 3 {
1113                                 p = s.Prog(obj.APCALIGN)
1114                                 p.From.Type = obj.TYPE_CONST
1115                                 p.From.Offset = 16
1116                         }
1117
1118                         // generate 2 STXVD2Xs to store 16 bytes
1119                         // when this is a loop then the top must be saved
1120                         var top *obj.Prog
1121                         // This is the top of loop
1122
1123                         p = s.Prog(ppc64.ASTXVD2X)
1124                         p.From.Type = obj.TYPE_REG
1125                         p.From.Reg = ppc64.REG_VS32
1126                         p.To.Type = obj.TYPE_MEM
1127                         p.To.Reg = v.Args[0].Reg()
1128                         p.To.Index = ppc64.REGZERO
1129                         // Save the top of loop
1130                         if top == nil {
1131                                 top = p
1132                         }
1133                         p = s.Prog(ppc64.ASTXVD2X)
1134                         p.From.Type = obj.TYPE_REG
1135                         p.From.Reg = ppc64.REG_VS32
1136                         p.To.Type = obj.TYPE_MEM
1137                         p.To.Reg = v.Args[0].Reg()
1138                         p.To.Index = ppc64.REGTMP
1139
1140                         // Increment address for the
1141                         // 4 doublewords just zeroed.
1142                         p = s.Prog(ppc64.AADD)
1143                         p.Reg = v.Args[0].Reg()
1144                         p.From.Type = obj.TYPE_CONST
1145                         p.From.Offset = 32
1146                         p.To.Type = obj.TYPE_REG
1147                         p.To.Reg = v.Args[0].Reg()
1148
1149                         // Branch back to top of loop
1150                         // based on CTR
1151                         // BC with BO_BCTR generates bdnz
1152                         p = s.Prog(ppc64.ABC)
1153                         p.From.Type = obj.TYPE_CONST
1154                         p.From.Offset = ppc64.BO_BCTR
1155                         p.Reg = ppc64.REG_R0
1156                         p.To.Type = obj.TYPE_BRANCH
1157                         gc.Patch(p, top)
1158                 }
1159
1160                 // when ctr == 1 the loop was not generated but
1161                 // there are at least 32 bytes to clear, so add
1162                 // that to the remainder to generate the code
1163                 // to clear those doublewords
1164                 if ctr == 1 {
1165                         rem += 32
1166                 }
1167
1168                 // clear the remainder starting at offset zero
1169                 offset := int64(0)
1170
1171                 // first clear as many doublewords as possible
1172                 // then clear remaining sizes as available
1173                 for rem > 0 {
1174                         op, size := ppc64.AMOVB, int64(1)
1175                         switch {
1176                         case rem >= 8:
1177                                 op, size = ppc64.AMOVD, 8
1178                         case rem >= 4:
1179                                 op, size = ppc64.AMOVW, 4
1180                         case rem >= 2:
1181                                 op, size = ppc64.AMOVH, 2
1182                         }
1183                         p := s.Prog(op)
1184                         p.From.Type = obj.TYPE_REG
1185                         p.From.Reg = ppc64.REG_R0
1186                         p.To.Type = obj.TYPE_MEM
1187                         p.To.Reg = v.Args[0].Reg()
1188                         p.To.Offset = offset
1189                         rem -= size
1190                         offset += size
1191                 }
1192
1193         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1194
1195                 bytesPerLoop := int64(32)
1196                 // This will be used when moving more
1197                 // than 8 bytes.  Moves start with
1198                 // as many 8 byte moves as possible, then
1199                 // 4, 2, or 1 byte(s) as remaining.  This will
1200                 // work and be efficient for power8 or later.
1201                 // If there are 64 or more bytes, then a
1202                 // loop is generated to move 32 bytes and
1203                 // update the src and dst addresses on each
1204                 // iteration. When < 64 bytes, the appropriate
1205                 // number of moves are generated based on the
1206                 // size.
1207                 // When moving >= 64 bytes a loop is used
1208                 //      MOVD len/32,REG_TMP
1209                 //      MOVD REG_TMP,CTR
1210                 //      MOVD $16,REG_TMP
1211                 // top:
1212                 //      LXVD2X (R0)(R21),VS32
1213                 //      LXVD2X (R31)(R21),VS33
1214                 //      ADD $32,R21
1215                 //      STXVD2X VS32,(R0)(R20)
1216                 //      STXVD2X VS33,(R31)(R20)
1217                 //      ADD $32,R20
1218                 //      BC 16,0,top
1219                 // Bytes not moved by this loop are moved
1220                 // with a combination of the following instructions,
1221                 // starting with the largest sizes and generating as
1222                 // many as needed, using the appropriate offset value.
1223                 //      MOVD  n(R21),R31
1224                 //      MOVD  R31,n(R20)
1225                 //      MOVW  n1(R21),R31
1226                 //      MOVW  R31,n1(R20)
1227                 //      MOVH  n2(R21),R31
1228                 //      MOVH  R31,n2(R20)
1229                 //      MOVB  n3(R21),R31
1230                 //      MOVB  R31,n3(R20)
1231
1232                 // Each loop iteration moves 32 bytes
1233                 ctr := v.AuxInt / bytesPerLoop
1234
1235                 // Remainder after the loop
1236                 rem := v.AuxInt % bytesPerLoop
1237
1238                 dstReg := v.Args[0].Reg()
1239                 srcReg := v.Args[1].Reg()
1240
1241                 // The set of registers used here, must match the clobbered reg list
1242                 // in PPC64Ops.go.
1243                 offset := int64(0)
1244
1245                 // top of the loop
1246                 var top *obj.Prog
1247                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1248                 if ctr > 1 {
1249                         // Set up the CTR
1250                         p := s.Prog(ppc64.AMOVD)
1251                         p.From.Type = obj.TYPE_CONST
1252                         p.From.Offset = ctr
1253                         p.To.Type = obj.TYPE_REG
1254                         p.To.Reg = ppc64.REGTMP
1255
1256                         p = s.Prog(ppc64.AMOVD)
1257                         p.From.Type = obj.TYPE_REG
1258                         p.From.Reg = ppc64.REGTMP
1259                         p.To.Type = obj.TYPE_REG
1260                         p.To.Reg = ppc64.REG_CTR
1261
1262                         // Use REGTMP as index reg
1263                         p = s.Prog(ppc64.AMOVD)
1264                         p.From.Type = obj.TYPE_CONST
1265                         p.From.Offset = 16
1266                         p.To.Type = obj.TYPE_REG
1267                         p.To.Reg = ppc64.REGTMP
1268
1269                         // Don't adding padding for
1270                         // alignment with small iteration
1271                         // counts.
1272                         if ctr > 3 {
1273                                 p = s.Prog(obj.APCALIGN)
1274                                 p.From.Type = obj.TYPE_CONST
1275                                 p.From.Offset = 16
1276                         }
1277
1278                         // Generate 16 byte loads and stores.
1279                         // Use temp register for index (16)
1280                         // on the second one.
1281
1282                         p = s.Prog(ppc64.ALXVD2X)
1283                         p.From.Type = obj.TYPE_MEM
1284                         p.From.Reg = srcReg
1285                         p.From.Index = ppc64.REGZERO
1286                         p.To.Type = obj.TYPE_REG
1287                         p.To.Reg = ppc64.REG_VS32
1288                         if top == nil {
1289                                 top = p
1290                         }
1291                         p = s.Prog(ppc64.ALXVD2X)
1292                         p.From.Type = obj.TYPE_MEM
1293                         p.From.Reg = srcReg
1294                         p.From.Index = ppc64.REGTMP
1295                         p.To.Type = obj.TYPE_REG
1296                         p.To.Reg = ppc64.REG_VS33
1297
1298                         // increment the src reg for next iteration
1299                         p = s.Prog(ppc64.AADD)
1300                         p.Reg = srcReg
1301                         p.From.Type = obj.TYPE_CONST
1302                         p.From.Offset = bytesPerLoop
1303                         p.To.Type = obj.TYPE_REG
1304                         p.To.Reg = srcReg
1305
1306                         // generate 16 byte stores
1307                         p = s.Prog(ppc64.ASTXVD2X)
1308                         p.From.Type = obj.TYPE_REG
1309                         p.From.Reg = ppc64.REG_VS32
1310                         p.To.Type = obj.TYPE_MEM
1311                         p.To.Reg = dstReg
1312                         p.To.Index = ppc64.REGZERO
1313
1314                         p = s.Prog(ppc64.ASTXVD2X)
1315                         p.From.Type = obj.TYPE_REG
1316                         p.From.Reg = ppc64.REG_VS33
1317                         p.To.Type = obj.TYPE_MEM
1318                         p.To.Reg = dstReg
1319                         p.To.Index = ppc64.REGTMP
1320
1321                         // increment the dst reg for next iteration
1322                         p = s.Prog(ppc64.AADD)
1323                         p.Reg = dstReg
1324                         p.From.Type = obj.TYPE_CONST
1325                         p.From.Offset = bytesPerLoop
1326                         p.To.Type = obj.TYPE_REG
1327                         p.To.Reg = dstReg
1328
1329                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1330                         // to loop top.
1331                         p = s.Prog(ppc64.ABC)
1332                         p.From.Type = obj.TYPE_CONST
1333                         p.From.Offset = ppc64.BO_BCTR
1334                         p.Reg = ppc64.REG_R0
1335                         p.To.Type = obj.TYPE_BRANCH
1336                         gc.Patch(p, top)
1337
1338                         // srcReg and dstReg were incremented in the loop, so
1339                         // later instructions start with offset 0.
1340                         offset = int64(0)
1341                 }
1342
1343                 // No loop was generated for one iteration, so
1344                 // add 32 bytes to the remainder to move those bytes.
1345                 if ctr == 1 {
1346                         rem += bytesPerLoop
1347                 }
1348
1349                 if rem >= 16 {
1350                         // Generate 16 byte loads and stores.
1351                         // Use temp register for index (value 16)
1352                         // on the second one.
1353                         p := s.Prog(ppc64.ALXVD2X)
1354                         p.From.Type = obj.TYPE_MEM
1355                         p.From.Reg = srcReg
1356                         p.From.Index = ppc64.REGZERO
1357                         p.To.Type = obj.TYPE_REG
1358                         p.To.Reg = ppc64.REG_VS32
1359
1360                         p = s.Prog(ppc64.ASTXVD2X)
1361                         p.From.Type = obj.TYPE_REG
1362                         p.From.Reg = ppc64.REG_VS32
1363                         p.To.Type = obj.TYPE_MEM
1364                         p.To.Reg = dstReg
1365                         p.To.Index = ppc64.REGZERO
1366
1367                         offset = 16
1368                         rem -= 16
1369
1370                         if rem >= 16 {
1371                                 // Use REGTMP as index reg
1372                                 p := s.Prog(ppc64.AMOVD)
1373                                 p.From.Type = obj.TYPE_CONST
1374                                 p.From.Offset = 16
1375                                 p.To.Type = obj.TYPE_REG
1376                                 p.To.Reg = ppc64.REGTMP
1377
1378                                 p = s.Prog(ppc64.ALXVD2X)
1379                                 p.From.Type = obj.TYPE_MEM
1380                                 p.From.Reg = srcReg
1381                                 p.From.Index = ppc64.REGTMP
1382                                 p.To.Type = obj.TYPE_REG
1383                                 p.To.Reg = ppc64.REG_VS32
1384
1385                                 p = s.Prog(ppc64.ASTXVD2X)
1386                                 p.From.Type = obj.TYPE_REG
1387                                 p.From.Reg = ppc64.REG_VS32
1388                                 p.To.Type = obj.TYPE_MEM
1389                                 p.To.Reg = dstReg
1390                                 p.To.Index = ppc64.REGTMP
1391
1392                                 offset = 32
1393                                 rem -= 16
1394                         }
1395                 }
1396
1397                 // Generate all the remaining load and store pairs, starting with
1398                 // as many 8 byte moves as possible, then 4, 2, 1.
1399                 for rem > 0 {
1400                         op, size := ppc64.AMOVB, int64(1)
1401                         switch {
1402                         case rem >= 8:
1403                                 op, size = ppc64.AMOVD, 8
1404                         case rem >= 4:
1405                                 op, size = ppc64.AMOVW, 4
1406                         case rem >= 2:
1407                                 op, size = ppc64.AMOVH, 2
1408                         }
1409                         // Load
1410                         p := s.Prog(op)
1411                         p.To.Type = obj.TYPE_REG
1412                         p.To.Reg = ppc64.REGTMP
1413                         p.From.Type = obj.TYPE_MEM
1414                         p.From.Reg = srcReg
1415                         p.From.Offset = offset
1416
1417                         // Store
1418                         p = s.Prog(op)
1419                         p.From.Type = obj.TYPE_REG
1420                         p.From.Reg = ppc64.REGTMP
1421                         p.To.Type = obj.TYPE_MEM
1422                         p.To.Reg = dstReg
1423                         p.To.Offset = offset
1424                         rem -= size
1425                         offset += size
1426                 }
1427
1428         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1429                 bytesPerLoop := int64(64)
1430                 // This is used when moving more
1431                 // than 8 bytes on power9.  Moves start with
1432                 // as many 8 byte moves as possible, then
1433                 // 4, 2, or 1 byte(s) as remaining.  This will
1434                 // work and be efficient for power8 or later.
1435                 // If there are 64 or more bytes, then a
1436                 // loop is generated to move 32 bytes and
1437                 // update the src and dst addresses on each
1438                 // iteration. When < 64 bytes, the appropriate
1439                 // number of moves are generated based on the
1440                 // size.
1441                 // When moving >= 64 bytes a loop is used
1442                 //      MOVD len/32,REG_TMP
1443                 //      MOVD REG_TMP,CTR
1444                 // top:
1445                 //      LXV 0(R21),VS32
1446                 //      LXV 16(R21),VS33
1447                 //      ADD $32,R21
1448                 //      STXV VS32,0(R20)
1449                 //      STXV VS33,16(R20)
1450                 //      ADD $32,R20
1451                 //      BC 16,0,top
1452                 // Bytes not moved by this loop are moved
1453                 // with a combination of the following instructions,
1454                 // starting with the largest sizes and generating as
1455                 // many as needed, using the appropriate offset value.
1456                 //      MOVD  n(R21),R31
1457                 //      MOVD  R31,n(R20)
1458                 //      MOVW  n1(R21),R31
1459                 //      MOVW  R31,n1(R20)
1460                 //      MOVH  n2(R21),R31
1461                 //      MOVH  R31,n2(R20)
1462                 //      MOVB  n3(R21),R31
1463                 //      MOVB  R31,n3(R20)
1464
1465                 // Each loop iteration moves 32 bytes
1466                 ctr := v.AuxInt / bytesPerLoop
1467
1468                 // Remainder after the loop
1469                 rem := v.AuxInt % bytesPerLoop
1470
1471                 dstReg := v.Args[0].Reg()
1472                 srcReg := v.Args[1].Reg()
1473
1474                 offset := int64(0)
1475
1476                 // top of the loop
1477                 var top *obj.Prog
1478
1479                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1480                 if ctr > 1 {
1481                         // Set up the CTR
1482                         p := s.Prog(ppc64.AMOVD)
1483                         p.From.Type = obj.TYPE_CONST
1484                         p.From.Offset = ctr
1485                         p.To.Type = obj.TYPE_REG
1486                         p.To.Reg = ppc64.REGTMP
1487
1488                         p = s.Prog(ppc64.AMOVD)
1489                         p.From.Type = obj.TYPE_REG
1490                         p.From.Reg = ppc64.REGTMP
1491                         p.To.Type = obj.TYPE_REG
1492                         p.To.Reg = ppc64.REG_CTR
1493
1494                         p = s.Prog(obj.APCALIGN)
1495                         p.From.Type = obj.TYPE_CONST
1496                         p.From.Offset = 16
1497
1498                         // Generate 16 byte loads and stores.
1499                         p = s.Prog(ppc64.ALXV)
1500                         p.From.Type = obj.TYPE_MEM
1501                         p.From.Reg = srcReg
1502                         p.From.Offset = offset
1503                         p.To.Type = obj.TYPE_REG
1504                         p.To.Reg = ppc64.REG_VS32
1505                         if top == nil {
1506                                 top = p
1507                         }
1508                         p = s.Prog(ppc64.ALXV)
1509                         p.From.Type = obj.TYPE_MEM
1510                         p.From.Reg = srcReg
1511                         p.From.Offset = offset + 16
1512                         p.To.Type = obj.TYPE_REG
1513                         p.To.Reg = ppc64.REG_VS33
1514
1515                         // generate 16 byte stores
1516                         p = s.Prog(ppc64.ASTXV)
1517                         p.From.Type = obj.TYPE_REG
1518                         p.From.Reg = ppc64.REG_VS32
1519                         p.To.Type = obj.TYPE_MEM
1520                         p.To.Reg = dstReg
1521                         p.To.Offset = offset
1522
1523                         p = s.Prog(ppc64.ASTXV)
1524                         p.From.Type = obj.TYPE_REG
1525                         p.From.Reg = ppc64.REG_VS33
1526                         p.To.Type = obj.TYPE_MEM
1527                         p.To.Reg = dstReg
1528                         p.To.Offset = offset + 16
1529
1530                         // Generate 16 byte loads and stores.
1531                         p = s.Prog(ppc64.ALXV)
1532                         p.From.Type = obj.TYPE_MEM
1533                         p.From.Reg = srcReg
1534                         p.From.Offset = offset + 32
1535                         p.To.Type = obj.TYPE_REG
1536                         p.To.Reg = ppc64.REG_VS32
1537
1538                         p = s.Prog(ppc64.ALXV)
1539                         p.From.Type = obj.TYPE_MEM
1540                         p.From.Reg = srcReg
1541                         p.From.Offset = offset + 48
1542                         p.To.Type = obj.TYPE_REG
1543                         p.To.Reg = ppc64.REG_VS33
1544
1545                         // generate 16 byte stores
1546                         p = s.Prog(ppc64.ASTXV)
1547                         p.From.Type = obj.TYPE_REG
1548                         p.From.Reg = ppc64.REG_VS32
1549                         p.To.Type = obj.TYPE_MEM
1550                         p.To.Reg = dstReg
1551                         p.To.Offset = offset + 32
1552
1553                         p = s.Prog(ppc64.ASTXV)
1554                         p.From.Type = obj.TYPE_REG
1555                         p.From.Reg = ppc64.REG_VS33
1556                         p.To.Type = obj.TYPE_MEM
1557                         p.To.Reg = dstReg
1558                         p.To.Offset = offset + 48
1559
1560                         // increment the src reg for next iteration
1561                         p = s.Prog(ppc64.AADD)
1562                         p.Reg = srcReg
1563                         p.From.Type = obj.TYPE_CONST
1564                         p.From.Offset = bytesPerLoop
1565                         p.To.Type = obj.TYPE_REG
1566                         p.To.Reg = srcReg
1567
1568                         // increment the dst reg for next iteration
1569                         p = s.Prog(ppc64.AADD)
1570                         p.Reg = dstReg
1571                         p.From.Type = obj.TYPE_CONST
1572                         p.From.Offset = bytesPerLoop
1573                         p.To.Type = obj.TYPE_REG
1574                         p.To.Reg = dstReg
1575
1576                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1577                         // to loop top.
1578                         p = s.Prog(ppc64.ABC)
1579                         p.From.Type = obj.TYPE_CONST
1580                         p.From.Offset = ppc64.BO_BCTR
1581                         p.Reg = ppc64.REG_R0
1582                         p.To.Type = obj.TYPE_BRANCH
1583                         gc.Patch(p, top)
1584
1585                         // srcReg and dstReg were incremented in the loop, so
1586                         // later instructions start with offset 0.
1587                         offset = int64(0)
1588                 }
1589
1590                 // No loop was generated for one iteration, so
1591                 // add 32 bytes to the remainder to move those bytes.
1592                 if ctr == 1 {
1593                         rem += bytesPerLoop
1594                 }
1595                 if rem >= 32 {
1596                         p := s.Prog(ppc64.ALXV)
1597                         p.From.Type = obj.TYPE_MEM
1598                         p.From.Reg = srcReg
1599                         p.To.Type = obj.TYPE_REG
1600                         p.To.Reg = ppc64.REG_VS32
1601
1602                         p = s.Prog(ppc64.ALXV)
1603                         p.From.Type = obj.TYPE_MEM
1604                         p.From.Reg = srcReg
1605                         p.From.Offset = 16
1606                         p.To.Type = obj.TYPE_REG
1607                         p.To.Reg = ppc64.REG_VS33
1608
1609                         p = s.Prog(ppc64.ASTXV)
1610                         p.From.Type = obj.TYPE_REG
1611                         p.From.Reg = ppc64.REG_VS32
1612                         p.To.Type = obj.TYPE_MEM
1613                         p.To.Reg = dstReg
1614
1615                         p = s.Prog(ppc64.ASTXV)
1616                         p.From.Type = obj.TYPE_REG
1617                         p.From.Reg = ppc64.REG_VS33
1618                         p.To.Type = obj.TYPE_MEM
1619                         p.To.Reg = dstReg
1620                         p.To.Offset = 16
1621
1622                         offset = 32
1623                         rem -= 32
1624                 }
1625
1626                 if rem >= 16 {
1627                         // Generate 16 byte loads and stores.
1628                         p := s.Prog(ppc64.ALXV)
1629                         p.From.Type = obj.TYPE_MEM
1630                         p.From.Reg = srcReg
1631                         p.From.Offset = offset
1632                         p.To.Type = obj.TYPE_REG
1633                         p.To.Reg = ppc64.REG_VS32
1634
1635                         p = s.Prog(ppc64.ASTXV)
1636                         p.From.Type = obj.TYPE_REG
1637                         p.From.Reg = ppc64.REG_VS32
1638                         p.To.Type = obj.TYPE_MEM
1639                         p.To.Reg = dstReg
1640                         p.To.Offset = offset
1641
1642                         offset += 16
1643                         rem -= 16
1644
1645                         if rem >= 16 {
1646                                 p := s.Prog(ppc64.ALXV)
1647                                 p.From.Type = obj.TYPE_MEM
1648                                 p.From.Reg = srcReg
1649                                 p.From.Offset = offset
1650                                 p.To.Type = obj.TYPE_REG
1651                                 p.To.Reg = ppc64.REG_VS32
1652
1653                                 p = s.Prog(ppc64.ASTXV)
1654                                 p.From.Type = obj.TYPE_REG
1655                                 p.From.Reg = ppc64.REG_VS32
1656                                 p.To.Type = obj.TYPE_MEM
1657                                 p.To.Reg = dstReg
1658                                 p.To.Offset = offset
1659
1660                                 offset += 16
1661                                 rem -= 16
1662                         }
1663                 }
1664                 // Generate all the remaining load and store pairs, starting with
1665                 // as many 8 byte moves as possible, then 4, 2, 1.
1666                 for rem > 0 {
1667                         op, size := ppc64.AMOVB, int64(1)
1668                         switch {
1669                         case rem >= 8:
1670                                 op, size = ppc64.AMOVD, 8
1671                         case rem >= 4:
1672                                 op, size = ppc64.AMOVW, 4
1673                         case rem >= 2:
1674                                 op, size = ppc64.AMOVH, 2
1675                         }
1676                         // Load
1677                         p := s.Prog(op)
1678                         p.To.Type = obj.TYPE_REG
1679                         p.To.Reg = ppc64.REGTMP
1680                         p.From.Type = obj.TYPE_MEM
1681                         p.From.Reg = srcReg
1682                         p.From.Offset = offset
1683
1684                         // Store
1685                         p = s.Prog(op)
1686                         p.From.Type = obj.TYPE_REG
1687                         p.From.Reg = ppc64.REGTMP
1688                         p.To.Type = obj.TYPE_MEM
1689                         p.To.Reg = dstReg
1690                         p.To.Offset = offset
1691                         rem -= size
1692                         offset += size
1693                 }
1694
1695         case ssa.OpPPC64CALLstatic:
1696                 s.Call(v)
1697
1698         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1699                 p := s.Prog(ppc64.AMOVD)
1700                 p.From.Type = obj.TYPE_REG
1701                 p.From.Reg = v.Args[0].Reg()
1702                 p.To.Type = obj.TYPE_REG
1703                 p.To.Reg = ppc64.REG_LR
1704
1705                 if v.Args[0].Reg() != ppc64.REG_R12 {
1706                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1707                 }
1708
1709                 pp := s.Call(v)
1710                 pp.To.Reg = ppc64.REG_LR
1711
1712                 if gc.Ctxt.Flag_shared {
1713                         // When compiling Go into PIC, the function we just
1714                         // called via pointer might have been implemented in
1715                         // a separate module and so overwritten the TOC
1716                         // pointer in R2; reload it.
1717                         q := s.Prog(ppc64.AMOVD)
1718                         q.From.Type = obj.TYPE_MEM
1719                         q.From.Offset = 24
1720                         q.From.Reg = ppc64.REGSP
1721                         q.To.Type = obj.TYPE_REG
1722                         q.To.Reg = ppc64.REG_R2
1723                 }
1724
1725         case ssa.OpPPC64LoweredWB:
1726                 p := s.Prog(obj.ACALL)
1727                 p.To.Type = obj.TYPE_MEM
1728                 p.To.Name = obj.NAME_EXTERN
1729                 p.To.Sym = v.Aux.(*obj.LSym)
1730
1731         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1732                 p := s.Prog(obj.ACALL)
1733                 p.To.Type = obj.TYPE_MEM
1734                 p.To.Name = obj.NAME_EXTERN
1735                 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1736                 s.UseArgs(16) // space used in callee args area by assembly stubs
1737
1738         case ssa.OpPPC64LoweredNilCheck:
1739                 if objabi.GOOS == "aix" {
1740                         // CMP Rarg0, R0
1741                         // BNE 2(PC)
1742                         // STW R0, 0(R0)
1743                         // NOP (so the BNE has somewhere to land)
1744
1745                         // CMP Rarg0, R0
1746                         p := s.Prog(ppc64.ACMP)
1747                         p.From.Type = obj.TYPE_REG
1748                         p.From.Reg = v.Args[0].Reg()
1749                         p.To.Type = obj.TYPE_REG
1750                         p.To.Reg = ppc64.REG_R0
1751
1752                         // BNE 2(PC)
1753                         p2 := s.Prog(ppc64.ABNE)
1754                         p2.To.Type = obj.TYPE_BRANCH
1755
1756                         // STW R0, 0(R0)
1757                         // Write at 0 is forbidden and will trigger a SIGSEGV
1758                         p = s.Prog(ppc64.AMOVW)
1759                         p.From.Type = obj.TYPE_REG
1760                         p.From.Reg = ppc64.REG_R0
1761                         p.To.Type = obj.TYPE_MEM
1762                         p.To.Reg = ppc64.REG_R0
1763
1764                         // NOP (so the BNE has somewhere to land)
1765                         nop := s.Prog(obj.ANOP)
1766                         gc.Patch(p2, nop)
1767
1768                 } else {
1769                         // Issue a load which will fault if arg is nil.
1770                         p := s.Prog(ppc64.AMOVBZ)
1771                         p.From.Type = obj.TYPE_MEM
1772                         p.From.Reg = v.Args[0].Reg()
1773                         gc.AddAux(&p.From, v)
1774                         p.To.Type = obj.TYPE_REG
1775                         p.To.Reg = ppc64.REGTMP
1776                 }
1777                 if logopt.Enabled() {
1778                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1779                 }
1780                 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1781                         gc.Warnl(v.Pos, "generated nil check")
1782                 }
1783
1784         // These should be resolved by rules and not make it here.
1785         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1786                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1787                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1788                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1789         case ssa.OpPPC64InvertFlags:
1790                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1791         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT, ssa.OpPPC64FlagCarrySet, ssa.OpPPC64FlagCarryClear:
1792                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1793         case ssa.OpClobber:
1794                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1795         default:
1796                 v.Fatalf("genValue not implemented: %s", v.LongString())
1797         }
1798 }
1799
1800 var blockJump = [...]struct {
1801         asm, invasm     obj.As
1802         asmeq, invasmun bool
1803 }{
1804         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1805         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1806
1807         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1808         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1809         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1810         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1811
1812         // TODO: need to work FP comparisons into block jumps
1813         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1814         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1815         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1816         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1817 }
1818
1819 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1820         switch b.Kind {
1821         case ssa.BlockDefer:
1822                 // defer returns in R3:
1823                 // 0 if we should continue executing
1824                 // 1 if we should jump to deferreturn call
1825                 p := s.Prog(ppc64.ACMP)
1826                 p.From.Type = obj.TYPE_REG
1827                 p.From.Reg = ppc64.REG_R3
1828                 p.To.Type = obj.TYPE_REG
1829                 p.To.Reg = ppc64.REG_R0
1830
1831                 p = s.Prog(ppc64.ABNE)
1832                 p.To.Type = obj.TYPE_BRANCH
1833                 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1834                 if b.Succs[0].Block() != next {
1835                         p := s.Prog(obj.AJMP)
1836                         p.To.Type = obj.TYPE_BRANCH
1837                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1838                 }
1839
1840         case ssa.BlockPlain:
1841                 if b.Succs[0].Block() != next {
1842                         p := s.Prog(obj.AJMP)
1843                         p.To.Type = obj.TYPE_BRANCH
1844                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1845                 }
1846         case ssa.BlockExit:
1847         case ssa.BlockRet:
1848                 s.Prog(obj.ARET)
1849         case ssa.BlockRetJmp:
1850                 p := s.Prog(obj.AJMP)
1851                 p.To.Type = obj.TYPE_MEM
1852                 p.To.Name = obj.NAME_EXTERN
1853                 p.To.Sym = b.Aux.(*obj.LSym)
1854
1855         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1856                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1857                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1858                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1859                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1860                 jmp := blockJump[b.Kind]
1861                 switch next {
1862                 case b.Succs[0].Block():
1863                         s.Br(jmp.invasm, b.Succs[1].Block())
1864                         if jmp.invasmun {
1865                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1866                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
1867                         }
1868                 case b.Succs[1].Block():
1869                         s.Br(jmp.asm, b.Succs[0].Block())
1870                         if jmp.asmeq {
1871                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1872                         }
1873                 default:
1874                         if b.Likely != ssa.BranchUnlikely {
1875                                 s.Br(jmp.asm, b.Succs[0].Block())
1876                                 if jmp.asmeq {
1877                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
1878                                 }
1879                                 s.Br(obj.AJMP, b.Succs[1].Block())
1880                         } else {
1881                                 s.Br(jmp.invasm, b.Succs[1].Block())
1882                                 if jmp.invasmun {
1883                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1884                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
1885                                 }
1886                                 s.Br(obj.AJMP, b.Succs[0].Block())
1887                         }
1888                 }
1889         default:
1890                 b.Fatalf("branch not implemented: %s", b.LongString())
1891         }
1892 }