]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
[dev.regabi] cmd/compile: introduce cmd/compile/internal/base [generated]
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/base"
9         "cmd/compile/internal/gc"
10         "cmd/compile/internal/logopt"
11         "cmd/compile/internal/ssa"
12         "cmd/compile/internal/types"
13         "cmd/internal/obj"
14         "cmd/internal/obj/ppc64"
15         "cmd/internal/objabi"
16         "math"
17         "strings"
18 )
19
20 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
21 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
22         //      flive := b.FlagsLiveAtEnd
23         //      if b.Control != nil && b.Control.Type.IsFlags() {
24         //              flive = true
25         //      }
26         //      for i := len(b.Values) - 1; i >= 0; i-- {
27         //              v := b.Values[i]
28         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
29         //                      // The "mark" is any non-nil Aux value.
30         //                      v.Aux = v
31         //              }
32         //              if v.Type.IsFlags() {
33         //                      flive = false
34         //              }
35         //              for _, a := range v.Args {
36         //                      if a.Type.IsFlags() {
37         //                              flive = true
38         //                      }
39         //              }
40         //      }
41 }
42
43 // loadByType returns the load instruction of the given type.
44 func loadByType(t *types.Type) obj.As {
45         if t.IsFloat() {
46                 switch t.Size() {
47                 case 4:
48                         return ppc64.AFMOVS
49                 case 8:
50                         return ppc64.AFMOVD
51                 }
52         } else {
53                 switch t.Size() {
54                 case 1:
55                         if t.IsSigned() {
56                                 return ppc64.AMOVB
57                         } else {
58                                 return ppc64.AMOVBZ
59                         }
60                 case 2:
61                         if t.IsSigned() {
62                                 return ppc64.AMOVH
63                         } else {
64                                 return ppc64.AMOVHZ
65                         }
66                 case 4:
67                         if t.IsSigned() {
68                                 return ppc64.AMOVW
69                         } else {
70                                 return ppc64.AMOVWZ
71                         }
72                 case 8:
73                         return ppc64.AMOVD
74                 }
75         }
76         panic("bad load type")
77 }
78
79 // storeByType returns the store instruction of the given type.
80 func storeByType(t *types.Type) obj.As {
81         if t.IsFloat() {
82                 switch t.Size() {
83                 case 4:
84                         return ppc64.AFMOVS
85                 case 8:
86                         return ppc64.AFMOVD
87                 }
88         } else {
89                 switch t.Size() {
90                 case 1:
91                         return ppc64.AMOVB
92                 case 2:
93                         return ppc64.AMOVH
94                 case 4:
95                         return ppc64.AMOVW
96                 case 8:
97                         return ppc64.AMOVD
98                 }
99         }
100         panic("bad store type")
101 }
102
103 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
104         switch v.Op {
105         case ssa.OpCopy:
106                 t := v.Type
107                 if t.IsMemory() {
108                         return
109                 }
110                 x := v.Args[0].Reg()
111                 y := v.Reg()
112                 if x != y {
113                         rt := obj.TYPE_REG
114                         op := ppc64.AMOVD
115
116                         if t.IsFloat() {
117                                 op = ppc64.AFMOVD
118                         }
119                         p := s.Prog(op)
120                         p.From.Type = rt
121                         p.From.Reg = x
122                         p.To.Type = rt
123                         p.To.Reg = y
124                 }
125
126         case ssa.OpPPC64LoweredMuluhilo:
127                 // MULHDU       Rarg1, Rarg0, Reg0
128                 // MULLD        Rarg1, Rarg0, Reg1
129                 r0 := v.Args[0].Reg()
130                 r1 := v.Args[1].Reg()
131                 p := s.Prog(ppc64.AMULHDU)
132                 p.From.Type = obj.TYPE_REG
133                 p.From.Reg = r1
134                 p.Reg = r0
135                 p.To.Type = obj.TYPE_REG
136                 p.To.Reg = v.Reg0()
137                 p1 := s.Prog(ppc64.AMULLD)
138                 p1.From.Type = obj.TYPE_REG
139                 p1.From.Reg = r1
140                 p1.Reg = r0
141                 p1.To.Type = obj.TYPE_REG
142                 p1.To.Reg = v.Reg1()
143
144         case ssa.OpPPC64LoweredAdd64Carry:
145                 // ADDC         Rarg2, -1, Rtmp
146                 // ADDE         Rarg1, Rarg0, Reg0
147                 // ADDZE        Rzero, Reg1
148                 r0 := v.Args[0].Reg()
149                 r1 := v.Args[1].Reg()
150                 r2 := v.Args[2].Reg()
151                 p := s.Prog(ppc64.AADDC)
152                 p.From.Type = obj.TYPE_CONST
153                 p.From.Offset = -1
154                 p.Reg = r2
155                 p.To.Type = obj.TYPE_REG
156                 p.To.Reg = ppc64.REGTMP
157                 p1 := s.Prog(ppc64.AADDE)
158                 p1.From.Type = obj.TYPE_REG
159                 p1.From.Reg = r1
160                 p1.Reg = r0
161                 p1.To.Type = obj.TYPE_REG
162                 p1.To.Reg = v.Reg0()
163                 p2 := s.Prog(ppc64.AADDZE)
164                 p2.From.Type = obj.TYPE_REG
165                 p2.From.Reg = ppc64.REGZERO
166                 p2.To.Type = obj.TYPE_REG
167                 p2.To.Reg = v.Reg1()
168
169         case ssa.OpPPC64LoweredAtomicAnd8,
170                 ssa.OpPPC64LoweredAtomicAnd32,
171                 ssa.OpPPC64LoweredAtomicOr8,
172                 ssa.OpPPC64LoweredAtomicOr32:
173                 // LWSYNC
174                 // LBAR/LWAR    (Rarg0), Rtmp
175                 // AND/OR       Rarg1, Rtmp
176                 // STBCCC/STWCCC Rtmp, (Rarg0)
177                 // BNE          -3(PC)
178                 ld := ppc64.ALBAR
179                 st := ppc64.ASTBCCC
180                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
181                         ld = ppc64.ALWAR
182                         st = ppc64.ASTWCCC
183                 }
184                 r0 := v.Args[0].Reg()
185                 r1 := v.Args[1].Reg()
186                 // LWSYNC - Assuming shared data not write-through-required nor
187                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
188                 plwsync := s.Prog(ppc64.ALWSYNC)
189                 plwsync.To.Type = obj.TYPE_NONE
190                 // LBAR or LWAR
191                 p := s.Prog(ld)
192                 p.From.Type = obj.TYPE_MEM
193                 p.From.Reg = r0
194                 p.To.Type = obj.TYPE_REG
195                 p.To.Reg = ppc64.REGTMP
196                 // AND/OR reg1,out
197                 p1 := s.Prog(v.Op.Asm())
198                 p1.From.Type = obj.TYPE_REG
199                 p1.From.Reg = r1
200                 p1.To.Type = obj.TYPE_REG
201                 p1.To.Reg = ppc64.REGTMP
202                 // STBCCC or STWCCC
203                 p2 := s.Prog(st)
204                 p2.From.Type = obj.TYPE_REG
205                 p2.From.Reg = ppc64.REGTMP
206                 p2.To.Type = obj.TYPE_MEM
207                 p2.To.Reg = r0
208                 p2.RegTo2 = ppc64.REGTMP
209                 // BNE retry
210                 p3 := s.Prog(ppc64.ABNE)
211                 p3.To.Type = obj.TYPE_BRANCH
212                 gc.Patch(p3, p)
213
214         case ssa.OpPPC64LoweredAtomicAdd32,
215                 ssa.OpPPC64LoweredAtomicAdd64:
216                 // LWSYNC
217                 // LDAR/LWAR    (Rarg0), Rout
218                 // ADD          Rarg1, Rout
219                 // STDCCC/STWCCC Rout, (Rarg0)
220                 // BNE         -3(PC)
221                 // MOVW         Rout,Rout (if Add32)
222                 ld := ppc64.ALDAR
223                 st := ppc64.ASTDCCC
224                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
225                         ld = ppc64.ALWAR
226                         st = ppc64.ASTWCCC
227                 }
228                 r0 := v.Args[0].Reg()
229                 r1 := v.Args[1].Reg()
230                 out := v.Reg0()
231                 // LWSYNC - Assuming shared data not write-through-required nor
232                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
233                 plwsync := s.Prog(ppc64.ALWSYNC)
234                 plwsync.To.Type = obj.TYPE_NONE
235                 // LDAR or LWAR
236                 p := s.Prog(ld)
237                 p.From.Type = obj.TYPE_MEM
238                 p.From.Reg = r0
239                 p.To.Type = obj.TYPE_REG
240                 p.To.Reg = out
241                 // ADD reg1,out
242                 p1 := s.Prog(ppc64.AADD)
243                 p1.From.Type = obj.TYPE_REG
244                 p1.From.Reg = r1
245                 p1.To.Reg = out
246                 p1.To.Type = obj.TYPE_REG
247                 // STDCCC or STWCCC
248                 p3 := s.Prog(st)
249                 p3.From.Type = obj.TYPE_REG
250                 p3.From.Reg = out
251                 p3.To.Type = obj.TYPE_MEM
252                 p3.To.Reg = r0
253                 // BNE retry
254                 p4 := s.Prog(ppc64.ABNE)
255                 p4.To.Type = obj.TYPE_BRANCH
256                 gc.Patch(p4, p)
257
258                 // Ensure a 32 bit result
259                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
260                         p5 := s.Prog(ppc64.AMOVWZ)
261                         p5.To.Type = obj.TYPE_REG
262                         p5.To.Reg = out
263                         p5.From.Type = obj.TYPE_REG
264                         p5.From.Reg = out
265                 }
266
267         case ssa.OpPPC64LoweredAtomicExchange32,
268                 ssa.OpPPC64LoweredAtomicExchange64:
269                 // LWSYNC
270                 // LDAR/LWAR    (Rarg0), Rout
271                 // STDCCC/STWCCC Rout, (Rarg0)
272                 // BNE         -2(PC)
273                 // ISYNC
274                 ld := ppc64.ALDAR
275                 st := ppc64.ASTDCCC
276                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
277                         ld = ppc64.ALWAR
278                         st = ppc64.ASTWCCC
279                 }
280                 r0 := v.Args[0].Reg()
281                 r1 := v.Args[1].Reg()
282                 out := v.Reg0()
283                 // LWSYNC - Assuming shared data not write-through-required nor
284                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
285                 plwsync := s.Prog(ppc64.ALWSYNC)
286                 plwsync.To.Type = obj.TYPE_NONE
287                 // LDAR or LWAR
288                 p := s.Prog(ld)
289                 p.From.Type = obj.TYPE_MEM
290                 p.From.Reg = r0
291                 p.To.Type = obj.TYPE_REG
292                 p.To.Reg = out
293                 // STDCCC or STWCCC
294                 p1 := s.Prog(st)
295                 p1.From.Type = obj.TYPE_REG
296                 p1.From.Reg = r1
297                 p1.To.Type = obj.TYPE_MEM
298                 p1.To.Reg = r0
299                 // BNE retry
300                 p2 := s.Prog(ppc64.ABNE)
301                 p2.To.Type = obj.TYPE_BRANCH
302                 gc.Patch(p2, p)
303                 // ISYNC
304                 pisync := s.Prog(ppc64.AISYNC)
305                 pisync.To.Type = obj.TYPE_NONE
306
307         case ssa.OpPPC64LoweredAtomicLoad8,
308                 ssa.OpPPC64LoweredAtomicLoad32,
309                 ssa.OpPPC64LoweredAtomicLoad64,
310                 ssa.OpPPC64LoweredAtomicLoadPtr:
311                 // SYNC
312                 // MOVB/MOVD/MOVW (Rarg0), Rout
313                 // CMP Rout,Rout
314                 // BNE 1(PC)
315                 // ISYNC
316                 ld := ppc64.AMOVD
317                 cmp := ppc64.ACMP
318                 switch v.Op {
319                 case ssa.OpPPC64LoweredAtomicLoad8:
320                         ld = ppc64.AMOVBZ
321                 case ssa.OpPPC64LoweredAtomicLoad32:
322                         ld = ppc64.AMOVWZ
323                         cmp = ppc64.ACMPW
324                 }
325                 arg0 := v.Args[0].Reg()
326                 out := v.Reg0()
327                 // SYNC when AuxInt == 1; otherwise, load-acquire
328                 if v.AuxInt == 1 {
329                         psync := s.Prog(ppc64.ASYNC)
330                         psync.To.Type = obj.TYPE_NONE
331                 }
332                 // Load
333                 p := s.Prog(ld)
334                 p.From.Type = obj.TYPE_MEM
335                 p.From.Reg = arg0
336                 p.To.Type = obj.TYPE_REG
337                 p.To.Reg = out
338                 // CMP
339                 p1 := s.Prog(cmp)
340                 p1.From.Type = obj.TYPE_REG
341                 p1.From.Reg = out
342                 p1.To.Type = obj.TYPE_REG
343                 p1.To.Reg = out
344                 // BNE
345                 p2 := s.Prog(ppc64.ABNE)
346                 p2.To.Type = obj.TYPE_BRANCH
347                 // ISYNC
348                 pisync := s.Prog(ppc64.AISYNC)
349                 pisync.To.Type = obj.TYPE_NONE
350                 gc.Patch(p2, pisync)
351
352         case ssa.OpPPC64LoweredAtomicStore8,
353                 ssa.OpPPC64LoweredAtomicStore32,
354                 ssa.OpPPC64LoweredAtomicStore64:
355                 // SYNC or LWSYNC
356                 // MOVB/MOVW/MOVD arg1,(arg0)
357                 st := ppc64.AMOVD
358                 switch v.Op {
359                 case ssa.OpPPC64LoweredAtomicStore8:
360                         st = ppc64.AMOVB
361                 case ssa.OpPPC64LoweredAtomicStore32:
362                         st = ppc64.AMOVW
363                 }
364                 arg0 := v.Args[0].Reg()
365                 arg1 := v.Args[1].Reg()
366                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
367                 // SYNC
368                 syncOp := ppc64.ASYNC
369                 if v.AuxInt == 0 {
370                         syncOp = ppc64.ALWSYNC
371                 }
372                 psync := s.Prog(syncOp)
373                 psync.To.Type = obj.TYPE_NONE
374                 // Store
375                 p := s.Prog(st)
376                 p.To.Type = obj.TYPE_MEM
377                 p.To.Reg = arg0
378                 p.From.Type = obj.TYPE_REG
379                 p.From.Reg = arg1
380
381         case ssa.OpPPC64LoweredAtomicCas64,
382                 ssa.OpPPC64LoweredAtomicCas32:
383                 // LWSYNC
384                 // loop:
385                 // LDAR        (Rarg0), MutexHint, Rtmp
386                 // CMP         Rarg1, Rtmp
387                 // BNE         fail
388                 // STDCCC      Rarg2, (Rarg0)
389                 // BNE         loop
390                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
391                 // MOVD        $1, Rout
392                 // BR          end
393                 // fail:
394                 // MOVD        $0, Rout
395                 // end:
396                 ld := ppc64.ALDAR
397                 st := ppc64.ASTDCCC
398                 cmp := ppc64.ACMP
399                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
400                         ld = ppc64.ALWAR
401                         st = ppc64.ASTWCCC
402                         cmp = ppc64.ACMPW
403                 }
404                 r0 := v.Args[0].Reg()
405                 r1 := v.Args[1].Reg()
406                 r2 := v.Args[2].Reg()
407                 out := v.Reg0()
408                 // LWSYNC - Assuming shared data not write-through-required nor
409                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
410                 plwsync1 := s.Prog(ppc64.ALWSYNC)
411                 plwsync1.To.Type = obj.TYPE_NONE
412                 // LDAR or LWAR
413                 p := s.Prog(ld)
414                 p.From.Type = obj.TYPE_MEM
415                 p.From.Reg = r0
416                 p.To.Type = obj.TYPE_REG
417                 p.To.Reg = ppc64.REGTMP
418                 // If it is a Compare-and-Swap-Release operation, set the EH field with
419                 // the release hint.
420                 if v.AuxInt == 0 {
421                         p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
422                 }
423                 // CMP reg1,reg2
424                 p1 := s.Prog(cmp)
425                 p1.From.Type = obj.TYPE_REG
426                 p1.From.Reg = r1
427                 p1.To.Reg = ppc64.REGTMP
428                 p1.To.Type = obj.TYPE_REG
429                 // BNE cas_fail
430                 p2 := s.Prog(ppc64.ABNE)
431                 p2.To.Type = obj.TYPE_BRANCH
432                 // STDCCC or STWCCC
433                 p3 := s.Prog(st)
434                 p3.From.Type = obj.TYPE_REG
435                 p3.From.Reg = r2
436                 p3.To.Type = obj.TYPE_MEM
437                 p3.To.Reg = r0
438                 // BNE retry
439                 p4 := s.Prog(ppc64.ABNE)
440                 p4.To.Type = obj.TYPE_BRANCH
441                 gc.Patch(p4, p)
442                 // LWSYNC - Assuming shared data not write-through-required nor
443                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
444                 // If the operation is a CAS-Release, then synchronization is not necessary.
445                 if v.AuxInt != 0 {
446                         plwsync2 := s.Prog(ppc64.ALWSYNC)
447                         plwsync2.To.Type = obj.TYPE_NONE
448                 }
449                 // return true
450                 p5 := s.Prog(ppc64.AMOVD)
451                 p5.From.Type = obj.TYPE_CONST
452                 p5.From.Offset = 1
453                 p5.To.Type = obj.TYPE_REG
454                 p5.To.Reg = out
455                 // BR done
456                 p6 := s.Prog(obj.AJMP)
457                 p6.To.Type = obj.TYPE_BRANCH
458                 // return false
459                 p7 := s.Prog(ppc64.AMOVD)
460                 p7.From.Type = obj.TYPE_CONST
461                 p7.From.Offset = 0
462                 p7.To.Type = obj.TYPE_REG
463                 p7.To.Reg = out
464                 gc.Patch(p2, p7)
465                 // done (label)
466                 p8 := s.Prog(obj.ANOP)
467                 gc.Patch(p6, p8)
468
469         case ssa.OpPPC64LoweredGetClosurePtr:
470                 // Closure pointer is R11 (already)
471                 gc.CheckLoweredGetClosurePtr(v)
472
473         case ssa.OpPPC64LoweredGetCallerSP:
474                 // caller's SP is FixedFrameSize below the address of the first arg
475                 p := s.Prog(ppc64.AMOVD)
476                 p.From.Type = obj.TYPE_ADDR
477                 p.From.Offset = -base.Ctxt.FixedFrameSize()
478                 p.From.Name = obj.NAME_PARAM
479                 p.To.Type = obj.TYPE_REG
480                 p.To.Reg = v.Reg()
481
482         case ssa.OpPPC64LoweredGetCallerPC:
483                 p := s.Prog(obj.AGETCALLERPC)
484                 p.To.Type = obj.TYPE_REG
485                 p.To.Reg = v.Reg()
486
487         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
488                 // input is already rounded
489
490         case ssa.OpLoadReg:
491                 loadOp := loadByType(v.Type)
492                 p := s.Prog(loadOp)
493                 gc.AddrAuto(&p.From, v.Args[0])
494                 p.To.Type = obj.TYPE_REG
495                 p.To.Reg = v.Reg()
496
497         case ssa.OpStoreReg:
498                 storeOp := storeByType(v.Type)
499                 p := s.Prog(storeOp)
500                 p.From.Type = obj.TYPE_REG
501                 p.From.Reg = v.Args[0].Reg()
502                 gc.AddrAuto(&p.To, v)
503
504         case ssa.OpPPC64DIVD:
505                 // For now,
506                 //
507                 // cmp arg1, -1
508                 // be  ahead
509                 // v = arg0 / arg1
510                 // b over
511                 // ahead: v = - arg0
512                 // over: nop
513                 r := v.Reg()
514                 r0 := v.Args[0].Reg()
515                 r1 := v.Args[1].Reg()
516
517                 p := s.Prog(ppc64.ACMP)
518                 p.From.Type = obj.TYPE_REG
519                 p.From.Reg = r1
520                 p.To.Type = obj.TYPE_CONST
521                 p.To.Offset = -1
522
523                 pbahead := s.Prog(ppc64.ABEQ)
524                 pbahead.To.Type = obj.TYPE_BRANCH
525
526                 p = s.Prog(v.Op.Asm())
527                 p.From.Type = obj.TYPE_REG
528                 p.From.Reg = r1
529                 p.Reg = r0
530                 p.To.Type = obj.TYPE_REG
531                 p.To.Reg = r
532
533                 pbover := s.Prog(obj.AJMP)
534                 pbover.To.Type = obj.TYPE_BRANCH
535
536                 p = s.Prog(ppc64.ANEG)
537                 p.To.Type = obj.TYPE_REG
538                 p.To.Reg = r
539                 p.From.Type = obj.TYPE_REG
540                 p.From.Reg = r0
541                 gc.Patch(pbahead, p)
542
543                 p = s.Prog(obj.ANOP)
544                 gc.Patch(pbover, p)
545
546         case ssa.OpPPC64DIVW:
547                 // word-width version of above
548                 r := v.Reg()
549                 r0 := v.Args[0].Reg()
550                 r1 := v.Args[1].Reg()
551
552                 p := s.Prog(ppc64.ACMPW)
553                 p.From.Type = obj.TYPE_REG
554                 p.From.Reg = r1
555                 p.To.Type = obj.TYPE_CONST
556                 p.To.Offset = -1
557
558                 pbahead := s.Prog(ppc64.ABEQ)
559                 pbahead.To.Type = obj.TYPE_BRANCH
560
561                 p = s.Prog(v.Op.Asm())
562                 p.From.Type = obj.TYPE_REG
563                 p.From.Reg = r1
564                 p.Reg = r0
565                 p.To.Type = obj.TYPE_REG
566                 p.To.Reg = r
567
568                 pbover := s.Prog(obj.AJMP)
569                 pbover.To.Type = obj.TYPE_BRANCH
570
571                 p = s.Prog(ppc64.ANEG)
572                 p.To.Type = obj.TYPE_REG
573                 p.To.Reg = r
574                 p.From.Type = obj.TYPE_REG
575                 p.From.Reg = r0
576                 gc.Patch(pbahead, p)
577
578                 p = s.Prog(obj.ANOP)
579                 gc.Patch(pbover, p)
580
581         case ssa.OpPPC64CLRLSLWI:
582                 r := v.Reg()
583                 r1 := v.Args[0].Reg()
584                 shifts := v.AuxInt
585                 p := s.Prog(v.Op.Asm())
586                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
587                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
588                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
589                 p.Reg = r1
590                 p.To.Type = obj.TYPE_REG
591                 p.To.Reg = r
592
593         case ssa.OpPPC64CLRLSLDI:
594                 r := v.Reg()
595                 r1 := v.Args[0].Reg()
596                 shifts := v.AuxInt
597                 p := s.Prog(v.Op.Asm())
598                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
599                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
600                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
601                 p.Reg = r1
602                 p.To.Type = obj.TYPE_REG
603                 p.To.Reg = r
604
605                 // Mask has been set as sh
606         case ssa.OpPPC64RLDICL:
607                 r := v.Reg()
608                 r1 := v.Args[0].Reg()
609                 shifts := v.AuxInt
610                 p := s.Prog(v.Op.Asm())
611                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
612                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)})
613                 p.Reg = r1
614                 p.To.Type = obj.TYPE_REG
615                 p.To.Reg = r
616
617         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
618                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
619                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
620                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
621                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
622                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
623                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
624                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
625                 r := v.Reg()
626                 r1 := v.Args[0].Reg()
627                 r2 := v.Args[1].Reg()
628                 p := s.Prog(v.Op.Asm())
629                 p.From.Type = obj.TYPE_REG
630                 p.From.Reg = r2
631                 p.Reg = r1
632                 p.To.Type = obj.TYPE_REG
633                 p.To.Reg = r
634
635         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
636                 r1 := v.Args[0].Reg()
637                 r2 := v.Args[1].Reg()
638                 p := s.Prog(v.Op.Asm())
639                 p.From.Type = obj.TYPE_REG
640                 p.From.Reg = r2
641                 p.Reg = r1
642                 p.To.Type = obj.TYPE_REG
643                 p.To.Reg = ppc64.REGTMP // result is not needed
644
645         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
646                 p := s.Prog(v.Op.Asm())
647                 p.From.Type = obj.TYPE_CONST
648                 p.From.Offset = v.AuxInt
649                 p.Reg = v.Args[0].Reg()
650                 p.To.Type = obj.TYPE_REG
651                 p.To.Reg = v.Reg()
652
653                 // Auxint holds encoded rotate + mask
654         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
655                 rot, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
656                 p := s.Prog(v.Op.Asm())
657                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
658                 p.Reg = v.Args[0].Reg()
659                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
660                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
661
662                 // Auxint holds mask
663         case ssa.OpPPC64RLWNM:
664                 _, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
665                 p := s.Prog(v.Op.Asm())
666                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
667                 p.Reg = v.Args[0].Reg()
668                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
669                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
670
671         case ssa.OpPPC64MADDLD:
672                 r := v.Reg()
673                 r1 := v.Args[0].Reg()
674                 r2 := v.Args[1].Reg()
675                 r3 := v.Args[2].Reg()
676                 // r = r1*r2 Â± r3
677                 p := s.Prog(v.Op.Asm())
678                 p.From.Type = obj.TYPE_REG
679                 p.From.Reg = r1
680                 p.Reg = r2
681                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
682                 p.To.Type = obj.TYPE_REG
683                 p.To.Reg = r
684
685         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
686                 r := v.Reg()
687                 r1 := v.Args[0].Reg()
688                 r2 := v.Args[1].Reg()
689                 r3 := v.Args[2].Reg()
690                 // r = r1*r2 Â± r3
691                 p := s.Prog(v.Op.Asm())
692                 p.From.Type = obj.TYPE_REG
693                 p.From.Reg = r1
694                 p.Reg = r3
695                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
696                 p.To.Type = obj.TYPE_REG
697                 p.To.Reg = r
698
699         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
700                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
701                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
702                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
703                 r := v.Reg()
704                 p := s.Prog(v.Op.Asm())
705                 p.To.Type = obj.TYPE_REG
706                 p.To.Reg = r
707                 p.From.Type = obj.TYPE_REG
708                 p.From.Reg = v.Args[0].Reg()
709
710         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
711                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
712                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
713                 p := s.Prog(v.Op.Asm())
714                 p.Reg = v.Args[0].Reg()
715                 p.From.Type = obj.TYPE_CONST
716                 p.From.Offset = v.AuxInt
717                 p.To.Type = obj.TYPE_REG
718                 p.To.Reg = v.Reg()
719
720         case ssa.OpPPC64SUBFCconst:
721                 p := s.Prog(v.Op.Asm())
722                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
723                 p.From.Type = obj.TYPE_REG
724                 p.From.Reg = v.Args[0].Reg()
725                 p.To.Type = obj.TYPE_REG
726                 p.To.Reg = v.Reg()
727
728         case ssa.OpPPC64ANDCCconst:
729                 p := s.Prog(v.Op.Asm())
730                 p.Reg = v.Args[0].Reg()
731                 p.From.Type = obj.TYPE_CONST
732                 p.From.Offset = v.AuxInt
733                 p.To.Type = obj.TYPE_REG
734                 p.To.Reg = ppc64.REGTMP // discard result
735
736         case ssa.OpPPC64MOVDaddr:
737                 switch v.Aux.(type) {
738                 default:
739                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
740                 case nil:
741                         // If aux offset and aux int are both 0, and the same
742                         // input and output regs are used, no instruction
743                         // needs to be generated, since it would just be
744                         // addi rx, rx, 0.
745                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
746                                 p := s.Prog(ppc64.AMOVD)
747                                 p.From.Type = obj.TYPE_ADDR
748                                 p.From.Reg = v.Args[0].Reg()
749                                 p.From.Offset = v.AuxInt
750                                 p.To.Type = obj.TYPE_REG
751                                 p.To.Reg = v.Reg()
752                         }
753
754                 case *obj.LSym, *gc.Node:
755                         p := s.Prog(ppc64.AMOVD)
756                         p.From.Type = obj.TYPE_ADDR
757                         p.From.Reg = v.Args[0].Reg()
758                         p.To.Type = obj.TYPE_REG
759                         p.To.Reg = v.Reg()
760                         gc.AddAux(&p.From, v)
761
762                 }
763
764         case ssa.OpPPC64MOVDconst:
765                 p := s.Prog(v.Op.Asm())
766                 p.From.Type = obj.TYPE_CONST
767                 p.From.Offset = v.AuxInt
768                 p.To.Type = obj.TYPE_REG
769                 p.To.Reg = v.Reg()
770
771         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
772                 p := s.Prog(v.Op.Asm())
773                 p.From.Type = obj.TYPE_FCONST
774                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
775                 p.To.Type = obj.TYPE_REG
776                 p.To.Reg = v.Reg()
777
778         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
779                 p := s.Prog(v.Op.Asm())
780                 p.From.Type = obj.TYPE_REG
781                 p.From.Reg = v.Args[0].Reg()
782                 p.To.Type = obj.TYPE_REG
783                 p.To.Reg = v.Args[1].Reg()
784
785         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
786                 p := s.Prog(v.Op.Asm())
787                 p.From.Type = obj.TYPE_REG
788                 p.From.Reg = v.Args[0].Reg()
789                 p.To.Type = obj.TYPE_CONST
790                 p.To.Offset = v.AuxInt
791
792         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
793                 // Shift in register to required size
794                 p := s.Prog(v.Op.Asm())
795                 p.From.Type = obj.TYPE_REG
796                 p.From.Reg = v.Args[0].Reg()
797                 p.To.Reg = v.Reg()
798                 p.To.Type = obj.TYPE_REG
799
800         case ssa.OpPPC64MOVDload:
801
802                 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
803                 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
804                 // the offset is not known until link time. If the load of a go.string uses relocation for the
805                 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
806                 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
807                 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
808                 // go.string types because other types will have proper alignment.
809
810                 gostring := false
811                 switch n := v.Aux.(type) {
812                 case *obj.LSym:
813                         gostring = strings.HasPrefix(n.Name, "go.string.")
814                 }
815                 if gostring {
816                         // Generate full addr of the go.string const
817                         // including AuxInt
818                         p := s.Prog(ppc64.AMOVD)
819                         p.From.Type = obj.TYPE_ADDR
820                         p.From.Reg = v.Args[0].Reg()
821                         gc.AddAux(&p.From, v)
822                         p.To.Type = obj.TYPE_REG
823                         p.To.Reg = v.Reg()
824                         // Load go.string using 0 offset
825                         p = s.Prog(v.Op.Asm())
826                         p.From.Type = obj.TYPE_MEM
827                         p.From.Reg = v.Reg()
828                         p.To.Type = obj.TYPE_REG
829                         p.To.Reg = v.Reg()
830                         break
831                 }
832                 // Not a go.string, generate a normal load
833                 fallthrough
834
835         case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
836                 p := s.Prog(v.Op.Asm())
837                 p.From.Type = obj.TYPE_MEM
838                 p.From.Reg = v.Args[0].Reg()
839                 gc.AddAux(&p.From, v)
840                 p.To.Type = obj.TYPE_REG
841                 p.To.Reg = v.Reg()
842
843         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
844                 p := s.Prog(v.Op.Asm())
845                 p.From.Type = obj.TYPE_MEM
846                 p.From.Reg = v.Args[0].Reg()
847                 p.To.Type = obj.TYPE_REG
848                 p.To.Reg = v.Reg()
849
850         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
851                 p := s.Prog(v.Op.Asm())
852                 p.To.Type = obj.TYPE_MEM
853                 p.To.Reg = v.Args[0].Reg()
854                 p.From.Type = obj.TYPE_REG
855                 p.From.Reg = v.Args[1].Reg()
856
857         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
858                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
859                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
860                 p := s.Prog(v.Op.Asm())
861                 p.From.Type = obj.TYPE_MEM
862                 p.From.Reg = v.Args[0].Reg()
863                 p.From.Index = v.Args[1].Reg()
864                 p.To.Type = obj.TYPE_REG
865                 p.To.Reg = v.Reg()
866
867         case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
868                 p := s.Prog(v.Op.Asm())
869                 p.From.Type = obj.TYPE_REG
870                 p.From.Reg = ppc64.REGZERO
871                 p.To.Type = obj.TYPE_MEM
872                 p.To.Reg = v.Args[0].Reg()
873                 gc.AddAux(&p.To, v)
874
875         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
876                 p := s.Prog(v.Op.Asm())
877                 p.From.Type = obj.TYPE_REG
878                 p.From.Reg = v.Args[1].Reg()
879                 p.To.Type = obj.TYPE_MEM
880                 p.To.Reg = v.Args[0].Reg()
881                 gc.AddAux(&p.To, v)
882
883         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
884                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
885                 ssa.OpPPC64MOVHBRstoreidx:
886                 p := s.Prog(v.Op.Asm())
887                 p.From.Type = obj.TYPE_REG
888                 p.From.Reg = v.Args[2].Reg()
889                 p.To.Index = v.Args[1].Reg()
890                 p.To.Type = obj.TYPE_MEM
891                 p.To.Reg = v.Args[0].Reg()
892
893         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
894                 // ISEL, ISELB
895                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
896                 // ISEL only accepts 0, 1, 2 condition values but the others can be
897                 // achieved by swapping operand order.
898                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
899                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
900                 // ISELB is used when a boolean result is needed, returning 0 or 1
901                 p := s.Prog(ppc64.AISEL)
902                 p.To.Type = obj.TYPE_REG
903                 p.To.Reg = v.Reg()
904                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
905                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
906                 if v.Op == ssa.OpPPC64ISEL {
907                         r.Reg = v.Args[1].Reg()
908                 }
909                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
910                 if v.AuxInt > 3 {
911                         p.Reg = r.Reg
912                         p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
913                 } else {
914                         p.Reg = v.Args[0].Reg()
915                         p.SetFrom3(r)
916                 }
917                 p.From.Type = obj.TYPE_CONST
918                 p.From.Offset = v.AuxInt & 3
919
920         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
921                 // The LoweredQuad code generation
922                 // generates STXV instructions on
923                 // power9. The Short variation is used
924                 // if no loop is generated.
925
926                 // sizes >= 64 generate a loop as follows:
927
928                 // Set up loop counter in CTR, used by BC
929                 // XXLXOR clears VS32
930                 //       XXLXOR VS32,VS32,VS32
931                 //       MOVD len/64,REG_TMP
932                 //       MOVD REG_TMP,CTR
933                 //       loop:
934                 //       STXV VS32,0(R20)
935                 //       STXV VS32,16(R20)
936                 //       STXV VS32,32(R20)
937                 //       STXV VS32,48(R20)
938                 //       ADD  $64,R20
939                 //       BC   16, 0, loop
940
941                 // Bytes per iteration
942                 ctr := v.AuxInt / 64
943
944                 // Remainder bytes
945                 rem := v.AuxInt % 64
946
947                 // Only generate a loop if there is more
948                 // than 1 iteration.
949                 if ctr > 1 {
950                         // Set up VS32 (V0) to hold 0s
951                         p := s.Prog(ppc64.AXXLXOR)
952                         p.From.Type = obj.TYPE_REG
953                         p.From.Reg = ppc64.REG_VS32
954                         p.To.Type = obj.TYPE_REG
955                         p.To.Reg = ppc64.REG_VS32
956                         p.Reg = ppc64.REG_VS32
957
958                         // Set up CTR loop counter
959                         p = s.Prog(ppc64.AMOVD)
960                         p.From.Type = obj.TYPE_CONST
961                         p.From.Offset = ctr
962                         p.To.Type = obj.TYPE_REG
963                         p.To.Reg = ppc64.REGTMP
964
965                         p = s.Prog(ppc64.AMOVD)
966                         p.From.Type = obj.TYPE_REG
967                         p.From.Reg = ppc64.REGTMP
968                         p.To.Type = obj.TYPE_REG
969                         p.To.Reg = ppc64.REG_CTR
970
971                         // Don't generate padding for
972                         // loops with few iterations.
973                         if ctr > 3 {
974                                 p = s.Prog(obj.APCALIGN)
975                                 p.From.Type = obj.TYPE_CONST
976                                 p.From.Offset = 16
977                         }
978
979                         // generate 4 STXVs to zero 64 bytes
980                         var top *obj.Prog
981
982                         p = s.Prog(ppc64.ASTXV)
983                         p.From.Type = obj.TYPE_REG
984                         p.From.Reg = ppc64.REG_VS32
985                         p.To.Type = obj.TYPE_MEM
986                         p.To.Reg = v.Args[0].Reg()
987
988                         //  Save the top of loop
989                         if top == nil {
990                                 top = p
991                         }
992                         p = s.Prog(ppc64.ASTXV)
993                         p.From.Type = obj.TYPE_REG
994                         p.From.Reg = ppc64.REG_VS32
995                         p.To.Type = obj.TYPE_MEM
996                         p.To.Reg = v.Args[0].Reg()
997                         p.To.Offset = 16
998
999                         p = s.Prog(ppc64.ASTXV)
1000                         p.From.Type = obj.TYPE_REG
1001                         p.From.Reg = ppc64.REG_VS32
1002                         p.To.Type = obj.TYPE_MEM
1003                         p.To.Reg = v.Args[0].Reg()
1004                         p.To.Offset = 32
1005
1006                         p = s.Prog(ppc64.ASTXV)
1007                         p.From.Type = obj.TYPE_REG
1008                         p.From.Reg = ppc64.REG_VS32
1009                         p.To.Type = obj.TYPE_MEM
1010                         p.To.Reg = v.Args[0].Reg()
1011                         p.To.Offset = 48
1012
1013                         // Increment address for the
1014                         // 64 bytes just zeroed.
1015                         p = s.Prog(ppc64.AADD)
1016                         p.Reg = v.Args[0].Reg()
1017                         p.From.Type = obj.TYPE_CONST
1018                         p.From.Offset = 64
1019                         p.To.Type = obj.TYPE_REG
1020                         p.To.Reg = v.Args[0].Reg()
1021
1022                         // Branch back to top of loop
1023                         // based on CTR
1024                         // BC with BO_BCTR generates bdnz
1025                         p = s.Prog(ppc64.ABC)
1026                         p.From.Type = obj.TYPE_CONST
1027                         p.From.Offset = ppc64.BO_BCTR
1028                         p.Reg = ppc64.REG_R0
1029                         p.To.Type = obj.TYPE_BRANCH
1030                         gc.Patch(p, top)
1031                 }
1032                 // When ctr == 1 the loop was not generated but
1033                 // there are at least 64 bytes to clear, so add
1034                 // that to the remainder to generate the code
1035                 // to clear those doublewords
1036                 if ctr == 1 {
1037                         rem += 64
1038                 }
1039
1040                 // Clear the remainder starting at offset zero
1041                 offset := int64(0)
1042
1043                 if rem >= 16 && ctr <= 1 {
1044                         // If the XXLXOR hasn't already been
1045                         // generated, do it here to initialize
1046                         // VS32 (V0) to 0.
1047                         p := s.Prog(ppc64.AXXLXOR)
1048                         p.From.Type = obj.TYPE_REG
1049                         p.From.Reg = ppc64.REG_VS32
1050                         p.To.Type = obj.TYPE_REG
1051                         p.To.Reg = ppc64.REG_VS32
1052                         p.Reg = ppc64.REG_VS32
1053                 }
1054                 // Generate STXV for 32 or 64
1055                 // bytes.
1056                 for rem >= 32 {
1057                         p := s.Prog(ppc64.ASTXV)
1058                         p.From.Type = obj.TYPE_REG
1059                         p.From.Reg = ppc64.REG_VS32
1060                         p.To.Type = obj.TYPE_MEM
1061                         p.To.Reg = v.Args[0].Reg()
1062                         p.To.Offset = offset
1063
1064                         p = s.Prog(ppc64.ASTXV)
1065                         p.From.Type = obj.TYPE_REG
1066                         p.From.Reg = ppc64.REG_VS32
1067                         p.To.Type = obj.TYPE_MEM
1068                         p.To.Reg = v.Args[0].Reg()
1069                         p.To.Offset = offset + 16
1070                         offset += 32
1071                         rem -= 32
1072                 }
1073                 // Generate 16 bytes
1074                 if rem >= 16 {
1075                         p := s.Prog(ppc64.ASTXV)
1076                         p.From.Type = obj.TYPE_REG
1077                         p.From.Reg = ppc64.REG_VS32
1078                         p.To.Type = obj.TYPE_MEM
1079                         p.To.Reg = v.Args[0].Reg()
1080                         p.To.Offset = offset
1081                         offset += 16
1082                         rem -= 16
1083                 }
1084
1085                 // first clear as many doublewords as possible
1086                 // then clear remaining sizes as available
1087                 for rem > 0 {
1088                         op, size := ppc64.AMOVB, int64(1)
1089                         switch {
1090                         case rem >= 8:
1091                                 op, size = ppc64.AMOVD, 8
1092                         case rem >= 4:
1093                                 op, size = ppc64.AMOVW, 4
1094                         case rem >= 2:
1095                                 op, size = ppc64.AMOVH, 2
1096                         }
1097                         p := s.Prog(op)
1098                         p.From.Type = obj.TYPE_REG
1099                         p.From.Reg = ppc64.REG_R0
1100                         p.To.Type = obj.TYPE_MEM
1101                         p.To.Reg = v.Args[0].Reg()
1102                         p.To.Offset = offset
1103                         rem -= size
1104                         offset += size
1105                 }
1106
1107         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1108
1109                 // Unaligned data doesn't hurt performance
1110                 // for these instructions on power8.
1111
1112                 // For sizes >= 64 generate a loop as follows:
1113
1114                 // Set up loop counter in CTR, used by BC
1115                 //       XXLXOR VS32,VS32,VS32
1116                 //       MOVD len/32,REG_TMP
1117                 //       MOVD REG_TMP,CTR
1118                 //       MOVD $16,REG_TMP
1119                 //       loop:
1120                 //       STXVD2X VS32,(R0)(R20)
1121                 //       STXVD2X VS32,(R31)(R20)
1122                 //       ADD  $32,R20
1123                 //       BC   16, 0, loop
1124                 //
1125                 // any remainder is done as described below
1126
1127                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1128                 // then handle the remainder
1129                 //      MOVD R0,(R20)
1130                 //      MOVD R0,8(R20)
1131                 // .... etc.
1132                 //
1133                 // the remainder bytes are cleared using one or more
1134                 // of the following instructions with the appropriate
1135                 // offsets depending which instructions are needed
1136                 //
1137                 //      MOVW R0,n1(R20) 4 bytes
1138                 //      MOVH R0,n2(R20) 2 bytes
1139                 //      MOVB R0,n3(R20) 1 byte
1140                 //
1141                 // 7 bytes: MOVW, MOVH, MOVB
1142                 // 6 bytes: MOVW, MOVH
1143                 // 5 bytes: MOVW, MOVB
1144                 // 3 bytes: MOVH, MOVB
1145
1146                 // each loop iteration does 32 bytes
1147                 ctr := v.AuxInt / 32
1148
1149                 // remainder bytes
1150                 rem := v.AuxInt % 32
1151
1152                 // only generate a loop if there is more
1153                 // than 1 iteration.
1154                 if ctr > 1 {
1155                         // Set up VS32 (V0) to hold 0s
1156                         p := s.Prog(ppc64.AXXLXOR)
1157                         p.From.Type = obj.TYPE_REG
1158                         p.From.Reg = ppc64.REG_VS32
1159                         p.To.Type = obj.TYPE_REG
1160                         p.To.Reg = ppc64.REG_VS32
1161                         p.Reg = ppc64.REG_VS32
1162
1163                         // Set up CTR loop counter
1164                         p = s.Prog(ppc64.AMOVD)
1165                         p.From.Type = obj.TYPE_CONST
1166                         p.From.Offset = ctr
1167                         p.To.Type = obj.TYPE_REG
1168                         p.To.Reg = ppc64.REGTMP
1169
1170                         p = s.Prog(ppc64.AMOVD)
1171                         p.From.Type = obj.TYPE_REG
1172                         p.From.Reg = ppc64.REGTMP
1173                         p.To.Type = obj.TYPE_REG
1174                         p.To.Reg = ppc64.REG_CTR
1175
1176                         // Set up R31 to hold index value 16
1177                         p = s.Prog(ppc64.AMOVD)
1178                         p.From.Type = obj.TYPE_CONST
1179                         p.From.Offset = 16
1180                         p.To.Type = obj.TYPE_REG
1181                         p.To.Reg = ppc64.REGTMP
1182
1183                         // Don't add padding for alignment
1184                         // with few loop iterations.
1185                         if ctr > 3 {
1186                                 p = s.Prog(obj.APCALIGN)
1187                                 p.From.Type = obj.TYPE_CONST
1188                                 p.From.Offset = 16
1189                         }
1190
1191                         // generate 2 STXVD2Xs to store 16 bytes
1192                         // when this is a loop then the top must be saved
1193                         var top *obj.Prog
1194                         // This is the top of loop
1195
1196                         p = s.Prog(ppc64.ASTXVD2X)
1197                         p.From.Type = obj.TYPE_REG
1198                         p.From.Reg = ppc64.REG_VS32
1199                         p.To.Type = obj.TYPE_MEM
1200                         p.To.Reg = v.Args[0].Reg()
1201                         p.To.Index = ppc64.REGZERO
1202                         // Save the top of loop
1203                         if top == nil {
1204                                 top = p
1205                         }
1206                         p = s.Prog(ppc64.ASTXVD2X)
1207                         p.From.Type = obj.TYPE_REG
1208                         p.From.Reg = ppc64.REG_VS32
1209                         p.To.Type = obj.TYPE_MEM
1210                         p.To.Reg = v.Args[0].Reg()
1211                         p.To.Index = ppc64.REGTMP
1212
1213                         // Increment address for the
1214                         // 4 doublewords just zeroed.
1215                         p = s.Prog(ppc64.AADD)
1216                         p.Reg = v.Args[0].Reg()
1217                         p.From.Type = obj.TYPE_CONST
1218                         p.From.Offset = 32
1219                         p.To.Type = obj.TYPE_REG
1220                         p.To.Reg = v.Args[0].Reg()
1221
1222                         // Branch back to top of loop
1223                         // based on CTR
1224                         // BC with BO_BCTR generates bdnz
1225                         p = s.Prog(ppc64.ABC)
1226                         p.From.Type = obj.TYPE_CONST
1227                         p.From.Offset = ppc64.BO_BCTR
1228                         p.Reg = ppc64.REG_R0
1229                         p.To.Type = obj.TYPE_BRANCH
1230                         gc.Patch(p, top)
1231                 }
1232
1233                 // when ctr == 1 the loop was not generated but
1234                 // there are at least 32 bytes to clear, so add
1235                 // that to the remainder to generate the code
1236                 // to clear those doublewords
1237                 if ctr == 1 {
1238                         rem += 32
1239                 }
1240
1241                 // clear the remainder starting at offset zero
1242                 offset := int64(0)
1243
1244                 // first clear as many doublewords as possible
1245                 // then clear remaining sizes as available
1246                 for rem > 0 {
1247                         op, size := ppc64.AMOVB, int64(1)
1248                         switch {
1249                         case rem >= 8:
1250                                 op, size = ppc64.AMOVD, 8
1251                         case rem >= 4:
1252                                 op, size = ppc64.AMOVW, 4
1253                         case rem >= 2:
1254                                 op, size = ppc64.AMOVH, 2
1255                         }
1256                         p := s.Prog(op)
1257                         p.From.Type = obj.TYPE_REG
1258                         p.From.Reg = ppc64.REG_R0
1259                         p.To.Type = obj.TYPE_MEM
1260                         p.To.Reg = v.Args[0].Reg()
1261                         p.To.Offset = offset
1262                         rem -= size
1263                         offset += size
1264                 }
1265
1266         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1267
1268                 bytesPerLoop := int64(32)
1269                 // This will be used when moving more
1270                 // than 8 bytes.  Moves start with
1271                 // as many 8 byte moves as possible, then
1272                 // 4, 2, or 1 byte(s) as remaining.  This will
1273                 // work and be efficient for power8 or later.
1274                 // If there are 64 or more bytes, then a
1275                 // loop is generated to move 32 bytes and
1276                 // update the src and dst addresses on each
1277                 // iteration. When < 64 bytes, the appropriate
1278                 // number of moves are generated based on the
1279                 // size.
1280                 // When moving >= 64 bytes a loop is used
1281                 //      MOVD len/32,REG_TMP
1282                 //      MOVD REG_TMP,CTR
1283                 //      MOVD $16,REG_TMP
1284                 // top:
1285                 //      LXVD2X (R0)(R21),VS32
1286                 //      LXVD2X (R31)(R21),VS33
1287                 //      ADD $32,R21
1288                 //      STXVD2X VS32,(R0)(R20)
1289                 //      STXVD2X VS33,(R31)(R20)
1290                 //      ADD $32,R20
1291                 //      BC 16,0,top
1292                 // Bytes not moved by this loop are moved
1293                 // with a combination of the following instructions,
1294                 // starting with the largest sizes and generating as
1295                 // many as needed, using the appropriate offset value.
1296                 //      MOVD  n(R21),R31
1297                 //      MOVD  R31,n(R20)
1298                 //      MOVW  n1(R21),R31
1299                 //      MOVW  R31,n1(R20)
1300                 //      MOVH  n2(R21),R31
1301                 //      MOVH  R31,n2(R20)
1302                 //      MOVB  n3(R21),R31
1303                 //      MOVB  R31,n3(R20)
1304
1305                 // Each loop iteration moves 32 bytes
1306                 ctr := v.AuxInt / bytesPerLoop
1307
1308                 // Remainder after the loop
1309                 rem := v.AuxInt % bytesPerLoop
1310
1311                 dstReg := v.Args[0].Reg()
1312                 srcReg := v.Args[1].Reg()
1313
1314                 // The set of registers used here, must match the clobbered reg list
1315                 // in PPC64Ops.go.
1316                 offset := int64(0)
1317
1318                 // top of the loop
1319                 var top *obj.Prog
1320                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1321                 if ctr > 1 {
1322                         // Set up the CTR
1323                         p := s.Prog(ppc64.AMOVD)
1324                         p.From.Type = obj.TYPE_CONST
1325                         p.From.Offset = ctr
1326                         p.To.Type = obj.TYPE_REG
1327                         p.To.Reg = ppc64.REGTMP
1328
1329                         p = s.Prog(ppc64.AMOVD)
1330                         p.From.Type = obj.TYPE_REG
1331                         p.From.Reg = ppc64.REGTMP
1332                         p.To.Type = obj.TYPE_REG
1333                         p.To.Reg = ppc64.REG_CTR
1334
1335                         // Use REGTMP as index reg
1336                         p = s.Prog(ppc64.AMOVD)
1337                         p.From.Type = obj.TYPE_CONST
1338                         p.From.Offset = 16
1339                         p.To.Type = obj.TYPE_REG
1340                         p.To.Reg = ppc64.REGTMP
1341
1342                         // Don't adding padding for
1343                         // alignment with small iteration
1344                         // counts.
1345                         if ctr > 3 {
1346                                 p = s.Prog(obj.APCALIGN)
1347                                 p.From.Type = obj.TYPE_CONST
1348                                 p.From.Offset = 16
1349                         }
1350
1351                         // Generate 16 byte loads and stores.
1352                         // Use temp register for index (16)
1353                         // on the second one.
1354
1355                         p = s.Prog(ppc64.ALXVD2X)
1356                         p.From.Type = obj.TYPE_MEM
1357                         p.From.Reg = srcReg
1358                         p.From.Index = ppc64.REGZERO
1359                         p.To.Type = obj.TYPE_REG
1360                         p.To.Reg = ppc64.REG_VS32
1361                         if top == nil {
1362                                 top = p
1363                         }
1364                         p = s.Prog(ppc64.ALXVD2X)
1365                         p.From.Type = obj.TYPE_MEM
1366                         p.From.Reg = srcReg
1367                         p.From.Index = ppc64.REGTMP
1368                         p.To.Type = obj.TYPE_REG
1369                         p.To.Reg = ppc64.REG_VS33
1370
1371                         // increment the src reg for next iteration
1372                         p = s.Prog(ppc64.AADD)
1373                         p.Reg = srcReg
1374                         p.From.Type = obj.TYPE_CONST
1375                         p.From.Offset = bytesPerLoop
1376                         p.To.Type = obj.TYPE_REG
1377                         p.To.Reg = srcReg
1378
1379                         // generate 16 byte stores
1380                         p = s.Prog(ppc64.ASTXVD2X)
1381                         p.From.Type = obj.TYPE_REG
1382                         p.From.Reg = ppc64.REG_VS32
1383                         p.To.Type = obj.TYPE_MEM
1384                         p.To.Reg = dstReg
1385                         p.To.Index = ppc64.REGZERO
1386
1387                         p = s.Prog(ppc64.ASTXVD2X)
1388                         p.From.Type = obj.TYPE_REG
1389                         p.From.Reg = ppc64.REG_VS33
1390                         p.To.Type = obj.TYPE_MEM
1391                         p.To.Reg = dstReg
1392                         p.To.Index = ppc64.REGTMP
1393
1394                         // increment the dst reg for next iteration
1395                         p = s.Prog(ppc64.AADD)
1396                         p.Reg = dstReg
1397                         p.From.Type = obj.TYPE_CONST
1398                         p.From.Offset = bytesPerLoop
1399                         p.To.Type = obj.TYPE_REG
1400                         p.To.Reg = dstReg
1401
1402                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1403                         // to loop top.
1404                         p = s.Prog(ppc64.ABC)
1405                         p.From.Type = obj.TYPE_CONST
1406                         p.From.Offset = ppc64.BO_BCTR
1407                         p.Reg = ppc64.REG_R0
1408                         p.To.Type = obj.TYPE_BRANCH
1409                         gc.Patch(p, top)
1410
1411                         // srcReg and dstReg were incremented in the loop, so
1412                         // later instructions start with offset 0.
1413                         offset = int64(0)
1414                 }
1415
1416                 // No loop was generated for one iteration, so
1417                 // add 32 bytes to the remainder to move those bytes.
1418                 if ctr == 1 {
1419                         rem += bytesPerLoop
1420                 }
1421
1422                 if rem >= 16 {
1423                         // Generate 16 byte loads and stores.
1424                         // Use temp register for index (value 16)
1425                         // on the second one.
1426                         p := s.Prog(ppc64.ALXVD2X)
1427                         p.From.Type = obj.TYPE_MEM
1428                         p.From.Reg = srcReg
1429                         p.From.Index = ppc64.REGZERO
1430                         p.To.Type = obj.TYPE_REG
1431                         p.To.Reg = ppc64.REG_VS32
1432
1433                         p = s.Prog(ppc64.ASTXVD2X)
1434                         p.From.Type = obj.TYPE_REG
1435                         p.From.Reg = ppc64.REG_VS32
1436                         p.To.Type = obj.TYPE_MEM
1437                         p.To.Reg = dstReg
1438                         p.To.Index = ppc64.REGZERO
1439
1440                         offset = 16
1441                         rem -= 16
1442
1443                         if rem >= 16 {
1444                                 // Use REGTMP as index reg
1445                                 p := s.Prog(ppc64.AMOVD)
1446                                 p.From.Type = obj.TYPE_CONST
1447                                 p.From.Offset = 16
1448                                 p.To.Type = obj.TYPE_REG
1449                                 p.To.Reg = ppc64.REGTMP
1450
1451                                 p = s.Prog(ppc64.ALXVD2X)
1452                                 p.From.Type = obj.TYPE_MEM
1453                                 p.From.Reg = srcReg
1454                                 p.From.Index = ppc64.REGTMP
1455                                 p.To.Type = obj.TYPE_REG
1456                                 p.To.Reg = ppc64.REG_VS32
1457
1458                                 p = s.Prog(ppc64.ASTXVD2X)
1459                                 p.From.Type = obj.TYPE_REG
1460                                 p.From.Reg = ppc64.REG_VS32
1461                                 p.To.Type = obj.TYPE_MEM
1462                                 p.To.Reg = dstReg
1463                                 p.To.Index = ppc64.REGTMP
1464
1465                                 offset = 32
1466                                 rem -= 16
1467                         }
1468                 }
1469
1470                 // Generate all the remaining load and store pairs, starting with
1471                 // as many 8 byte moves as possible, then 4, 2, 1.
1472                 for rem > 0 {
1473                         op, size := ppc64.AMOVB, int64(1)
1474                         switch {
1475                         case rem >= 8:
1476                                 op, size = ppc64.AMOVD, 8
1477                         case rem >= 4:
1478                                 op, size = ppc64.AMOVW, 4
1479                         case rem >= 2:
1480                                 op, size = ppc64.AMOVH, 2
1481                         }
1482                         // Load
1483                         p := s.Prog(op)
1484                         p.To.Type = obj.TYPE_REG
1485                         p.To.Reg = ppc64.REGTMP
1486                         p.From.Type = obj.TYPE_MEM
1487                         p.From.Reg = srcReg
1488                         p.From.Offset = offset
1489
1490                         // Store
1491                         p = s.Prog(op)
1492                         p.From.Type = obj.TYPE_REG
1493                         p.From.Reg = ppc64.REGTMP
1494                         p.To.Type = obj.TYPE_MEM
1495                         p.To.Reg = dstReg
1496                         p.To.Offset = offset
1497                         rem -= size
1498                         offset += size
1499                 }
1500
1501         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1502                 bytesPerLoop := int64(64)
1503                 // This is used when moving more
1504                 // than 8 bytes on power9.  Moves start with
1505                 // as many 8 byte moves as possible, then
1506                 // 4, 2, or 1 byte(s) as remaining.  This will
1507                 // work and be efficient for power8 or later.
1508                 // If there are 64 or more bytes, then a
1509                 // loop is generated to move 32 bytes and
1510                 // update the src and dst addresses on each
1511                 // iteration. When < 64 bytes, the appropriate
1512                 // number of moves are generated based on the
1513                 // size.
1514                 // When moving >= 64 bytes a loop is used
1515                 //      MOVD len/32,REG_TMP
1516                 //      MOVD REG_TMP,CTR
1517                 // top:
1518                 //      LXV 0(R21),VS32
1519                 //      LXV 16(R21),VS33
1520                 //      ADD $32,R21
1521                 //      STXV VS32,0(R20)
1522                 //      STXV VS33,16(R20)
1523                 //      ADD $32,R20
1524                 //      BC 16,0,top
1525                 // Bytes not moved by this loop are moved
1526                 // with a combination of the following instructions,
1527                 // starting with the largest sizes and generating as
1528                 // many as needed, using the appropriate offset value.
1529                 //      MOVD  n(R21),R31
1530                 //      MOVD  R31,n(R20)
1531                 //      MOVW  n1(R21),R31
1532                 //      MOVW  R31,n1(R20)
1533                 //      MOVH  n2(R21),R31
1534                 //      MOVH  R31,n2(R20)
1535                 //      MOVB  n3(R21),R31
1536                 //      MOVB  R31,n3(R20)
1537
1538                 // Each loop iteration moves 32 bytes
1539                 ctr := v.AuxInt / bytesPerLoop
1540
1541                 // Remainder after the loop
1542                 rem := v.AuxInt % bytesPerLoop
1543
1544                 dstReg := v.Args[0].Reg()
1545                 srcReg := v.Args[1].Reg()
1546
1547                 offset := int64(0)
1548
1549                 // top of the loop
1550                 var top *obj.Prog
1551
1552                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1553                 if ctr > 1 {
1554                         // Set up the CTR
1555                         p := s.Prog(ppc64.AMOVD)
1556                         p.From.Type = obj.TYPE_CONST
1557                         p.From.Offset = ctr
1558                         p.To.Type = obj.TYPE_REG
1559                         p.To.Reg = ppc64.REGTMP
1560
1561                         p = s.Prog(ppc64.AMOVD)
1562                         p.From.Type = obj.TYPE_REG
1563                         p.From.Reg = ppc64.REGTMP
1564                         p.To.Type = obj.TYPE_REG
1565                         p.To.Reg = ppc64.REG_CTR
1566
1567                         p = s.Prog(obj.APCALIGN)
1568                         p.From.Type = obj.TYPE_CONST
1569                         p.From.Offset = 16
1570
1571                         // Generate 16 byte loads and stores.
1572                         p = s.Prog(ppc64.ALXV)
1573                         p.From.Type = obj.TYPE_MEM
1574                         p.From.Reg = srcReg
1575                         p.From.Offset = offset
1576                         p.To.Type = obj.TYPE_REG
1577                         p.To.Reg = ppc64.REG_VS32
1578                         if top == nil {
1579                                 top = p
1580                         }
1581                         p = s.Prog(ppc64.ALXV)
1582                         p.From.Type = obj.TYPE_MEM
1583                         p.From.Reg = srcReg
1584                         p.From.Offset = offset + 16
1585                         p.To.Type = obj.TYPE_REG
1586                         p.To.Reg = ppc64.REG_VS33
1587
1588                         // generate 16 byte stores
1589                         p = s.Prog(ppc64.ASTXV)
1590                         p.From.Type = obj.TYPE_REG
1591                         p.From.Reg = ppc64.REG_VS32
1592                         p.To.Type = obj.TYPE_MEM
1593                         p.To.Reg = dstReg
1594                         p.To.Offset = offset
1595
1596                         p = s.Prog(ppc64.ASTXV)
1597                         p.From.Type = obj.TYPE_REG
1598                         p.From.Reg = ppc64.REG_VS33
1599                         p.To.Type = obj.TYPE_MEM
1600                         p.To.Reg = dstReg
1601                         p.To.Offset = offset + 16
1602
1603                         // Generate 16 byte loads and stores.
1604                         p = s.Prog(ppc64.ALXV)
1605                         p.From.Type = obj.TYPE_MEM
1606                         p.From.Reg = srcReg
1607                         p.From.Offset = offset + 32
1608                         p.To.Type = obj.TYPE_REG
1609                         p.To.Reg = ppc64.REG_VS32
1610
1611                         p = s.Prog(ppc64.ALXV)
1612                         p.From.Type = obj.TYPE_MEM
1613                         p.From.Reg = srcReg
1614                         p.From.Offset = offset + 48
1615                         p.To.Type = obj.TYPE_REG
1616                         p.To.Reg = ppc64.REG_VS33
1617
1618                         // generate 16 byte stores
1619                         p = s.Prog(ppc64.ASTXV)
1620                         p.From.Type = obj.TYPE_REG
1621                         p.From.Reg = ppc64.REG_VS32
1622                         p.To.Type = obj.TYPE_MEM
1623                         p.To.Reg = dstReg
1624                         p.To.Offset = offset + 32
1625
1626                         p = s.Prog(ppc64.ASTXV)
1627                         p.From.Type = obj.TYPE_REG
1628                         p.From.Reg = ppc64.REG_VS33
1629                         p.To.Type = obj.TYPE_MEM
1630                         p.To.Reg = dstReg
1631                         p.To.Offset = offset + 48
1632
1633                         // increment the src reg for next iteration
1634                         p = s.Prog(ppc64.AADD)
1635                         p.Reg = srcReg
1636                         p.From.Type = obj.TYPE_CONST
1637                         p.From.Offset = bytesPerLoop
1638                         p.To.Type = obj.TYPE_REG
1639                         p.To.Reg = srcReg
1640
1641                         // increment the dst reg for next iteration
1642                         p = s.Prog(ppc64.AADD)
1643                         p.Reg = dstReg
1644                         p.From.Type = obj.TYPE_CONST
1645                         p.From.Offset = bytesPerLoop
1646                         p.To.Type = obj.TYPE_REG
1647                         p.To.Reg = dstReg
1648
1649                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1650                         // to loop top.
1651                         p = s.Prog(ppc64.ABC)
1652                         p.From.Type = obj.TYPE_CONST
1653                         p.From.Offset = ppc64.BO_BCTR
1654                         p.Reg = ppc64.REG_R0
1655                         p.To.Type = obj.TYPE_BRANCH
1656                         gc.Patch(p, top)
1657
1658                         // srcReg and dstReg were incremented in the loop, so
1659                         // later instructions start with offset 0.
1660                         offset = int64(0)
1661                 }
1662
1663                 // No loop was generated for one iteration, so
1664                 // add 32 bytes to the remainder to move those bytes.
1665                 if ctr == 1 {
1666                         rem += bytesPerLoop
1667                 }
1668                 if rem >= 32 {
1669                         p := s.Prog(ppc64.ALXV)
1670                         p.From.Type = obj.TYPE_MEM
1671                         p.From.Reg = srcReg
1672                         p.To.Type = obj.TYPE_REG
1673                         p.To.Reg = ppc64.REG_VS32
1674
1675                         p = s.Prog(ppc64.ALXV)
1676                         p.From.Type = obj.TYPE_MEM
1677                         p.From.Reg = srcReg
1678                         p.From.Offset = 16
1679                         p.To.Type = obj.TYPE_REG
1680                         p.To.Reg = ppc64.REG_VS33
1681
1682                         p = s.Prog(ppc64.ASTXV)
1683                         p.From.Type = obj.TYPE_REG
1684                         p.From.Reg = ppc64.REG_VS32
1685                         p.To.Type = obj.TYPE_MEM
1686                         p.To.Reg = dstReg
1687
1688                         p = s.Prog(ppc64.ASTXV)
1689                         p.From.Type = obj.TYPE_REG
1690                         p.From.Reg = ppc64.REG_VS33
1691                         p.To.Type = obj.TYPE_MEM
1692                         p.To.Reg = dstReg
1693                         p.To.Offset = 16
1694
1695                         offset = 32
1696                         rem -= 32
1697                 }
1698
1699                 if rem >= 16 {
1700                         // Generate 16 byte loads and stores.
1701                         p := s.Prog(ppc64.ALXV)
1702                         p.From.Type = obj.TYPE_MEM
1703                         p.From.Reg = srcReg
1704                         p.From.Offset = offset
1705                         p.To.Type = obj.TYPE_REG
1706                         p.To.Reg = ppc64.REG_VS32
1707
1708                         p = s.Prog(ppc64.ASTXV)
1709                         p.From.Type = obj.TYPE_REG
1710                         p.From.Reg = ppc64.REG_VS32
1711                         p.To.Type = obj.TYPE_MEM
1712                         p.To.Reg = dstReg
1713                         p.To.Offset = offset
1714
1715                         offset += 16
1716                         rem -= 16
1717
1718                         if rem >= 16 {
1719                                 p := s.Prog(ppc64.ALXV)
1720                                 p.From.Type = obj.TYPE_MEM
1721                                 p.From.Reg = srcReg
1722                                 p.From.Offset = offset
1723                                 p.To.Type = obj.TYPE_REG
1724                                 p.To.Reg = ppc64.REG_VS32
1725
1726                                 p = s.Prog(ppc64.ASTXV)
1727                                 p.From.Type = obj.TYPE_REG
1728                                 p.From.Reg = ppc64.REG_VS32
1729                                 p.To.Type = obj.TYPE_MEM
1730                                 p.To.Reg = dstReg
1731                                 p.To.Offset = offset
1732
1733                                 offset += 16
1734                                 rem -= 16
1735                         }
1736                 }
1737                 // Generate all the remaining load and store pairs, starting with
1738                 // as many 8 byte moves as possible, then 4, 2, 1.
1739                 for rem > 0 {
1740                         op, size := ppc64.AMOVB, int64(1)
1741                         switch {
1742                         case rem >= 8:
1743                                 op, size = ppc64.AMOVD, 8
1744                         case rem >= 4:
1745                                 op, size = ppc64.AMOVW, 4
1746                         case rem >= 2:
1747                                 op, size = ppc64.AMOVH, 2
1748                         }
1749                         // Load
1750                         p := s.Prog(op)
1751                         p.To.Type = obj.TYPE_REG
1752                         p.To.Reg = ppc64.REGTMP
1753                         p.From.Type = obj.TYPE_MEM
1754                         p.From.Reg = srcReg
1755                         p.From.Offset = offset
1756
1757                         // Store
1758                         p = s.Prog(op)
1759                         p.From.Type = obj.TYPE_REG
1760                         p.From.Reg = ppc64.REGTMP
1761                         p.To.Type = obj.TYPE_MEM
1762                         p.To.Reg = dstReg
1763                         p.To.Offset = offset
1764                         rem -= size
1765                         offset += size
1766                 }
1767
1768         case ssa.OpPPC64CALLstatic:
1769                 s.Call(v)
1770
1771         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1772                 p := s.Prog(ppc64.AMOVD)
1773                 p.From.Type = obj.TYPE_REG
1774                 p.From.Reg = v.Args[0].Reg()
1775                 p.To.Type = obj.TYPE_REG
1776                 p.To.Reg = ppc64.REG_LR
1777
1778                 if v.Args[0].Reg() != ppc64.REG_R12 {
1779                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1780                 }
1781
1782                 pp := s.Call(v)
1783                 pp.To.Reg = ppc64.REG_LR
1784
1785                 // Insert a hint this is not a subroutine return.
1786                 pp.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 1})
1787
1788                 if base.Ctxt.Flag_shared {
1789                         // When compiling Go into PIC, the function we just
1790                         // called via pointer might have been implemented in
1791                         // a separate module and so overwritten the TOC
1792                         // pointer in R2; reload it.
1793                         q := s.Prog(ppc64.AMOVD)
1794                         q.From.Type = obj.TYPE_MEM
1795                         q.From.Offset = 24
1796                         q.From.Reg = ppc64.REGSP
1797                         q.To.Type = obj.TYPE_REG
1798                         q.To.Reg = ppc64.REG_R2
1799                 }
1800
1801         case ssa.OpPPC64LoweredWB:
1802                 p := s.Prog(obj.ACALL)
1803                 p.To.Type = obj.TYPE_MEM
1804                 p.To.Name = obj.NAME_EXTERN
1805                 p.To.Sym = v.Aux.(*obj.LSym)
1806
1807         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1808                 p := s.Prog(obj.ACALL)
1809                 p.To.Type = obj.TYPE_MEM
1810                 p.To.Name = obj.NAME_EXTERN
1811                 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1812                 s.UseArgs(16) // space used in callee args area by assembly stubs
1813
1814         case ssa.OpPPC64LoweredNilCheck:
1815                 if objabi.GOOS == "aix" {
1816                         // CMP Rarg0, R0
1817                         // BNE 2(PC)
1818                         // STW R0, 0(R0)
1819                         // NOP (so the BNE has somewhere to land)
1820
1821                         // CMP Rarg0, R0
1822                         p := s.Prog(ppc64.ACMP)
1823                         p.From.Type = obj.TYPE_REG
1824                         p.From.Reg = v.Args[0].Reg()
1825                         p.To.Type = obj.TYPE_REG
1826                         p.To.Reg = ppc64.REG_R0
1827
1828                         // BNE 2(PC)
1829                         p2 := s.Prog(ppc64.ABNE)
1830                         p2.To.Type = obj.TYPE_BRANCH
1831
1832                         // STW R0, 0(R0)
1833                         // Write at 0 is forbidden and will trigger a SIGSEGV
1834                         p = s.Prog(ppc64.AMOVW)
1835                         p.From.Type = obj.TYPE_REG
1836                         p.From.Reg = ppc64.REG_R0
1837                         p.To.Type = obj.TYPE_MEM
1838                         p.To.Reg = ppc64.REG_R0
1839
1840                         // NOP (so the BNE has somewhere to land)
1841                         nop := s.Prog(obj.ANOP)
1842                         gc.Patch(p2, nop)
1843
1844                 } else {
1845                         // Issue a load which will fault if arg is nil.
1846                         p := s.Prog(ppc64.AMOVBZ)
1847                         p.From.Type = obj.TYPE_MEM
1848                         p.From.Reg = v.Args[0].Reg()
1849                         gc.AddAux(&p.From, v)
1850                         p.To.Type = obj.TYPE_REG
1851                         p.To.Reg = ppc64.REGTMP
1852                 }
1853                 if logopt.Enabled() {
1854                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1855                 }
1856                 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1857                         base.WarnfAt(v.Pos, "generated nil check")
1858                 }
1859
1860         // These should be resolved by rules and not make it here.
1861         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1862                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1863                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1864                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1865         case ssa.OpPPC64InvertFlags:
1866                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1867         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1868                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1869         case ssa.OpClobber:
1870                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1871         default:
1872                 v.Fatalf("genValue not implemented: %s", v.LongString())
1873         }
1874 }
1875
1876 var blockJump = [...]struct {
1877         asm, invasm     obj.As
1878         asmeq, invasmun bool
1879 }{
1880         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1881         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1882
1883         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1884         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1885         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1886         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1887
1888         // TODO: need to work FP comparisons into block jumps
1889         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1890         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1891         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1892         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1893 }
1894
1895 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1896         switch b.Kind {
1897         case ssa.BlockDefer:
1898                 // defer returns in R3:
1899                 // 0 if we should continue executing
1900                 // 1 if we should jump to deferreturn call
1901                 p := s.Prog(ppc64.ACMP)
1902                 p.From.Type = obj.TYPE_REG
1903                 p.From.Reg = ppc64.REG_R3
1904                 p.To.Type = obj.TYPE_REG
1905                 p.To.Reg = ppc64.REG_R0
1906
1907                 p = s.Prog(ppc64.ABNE)
1908                 p.To.Type = obj.TYPE_BRANCH
1909                 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1910                 if b.Succs[0].Block() != next {
1911                         p := s.Prog(obj.AJMP)
1912                         p.To.Type = obj.TYPE_BRANCH
1913                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1914                 }
1915
1916         case ssa.BlockPlain:
1917                 if b.Succs[0].Block() != next {
1918                         p := s.Prog(obj.AJMP)
1919                         p.To.Type = obj.TYPE_BRANCH
1920                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1921                 }
1922         case ssa.BlockExit:
1923         case ssa.BlockRet:
1924                 s.Prog(obj.ARET)
1925         case ssa.BlockRetJmp:
1926                 p := s.Prog(obj.AJMP)
1927                 p.To.Type = obj.TYPE_MEM
1928                 p.To.Name = obj.NAME_EXTERN
1929                 p.To.Sym = b.Aux.(*obj.LSym)
1930
1931         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1932                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1933                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1934                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1935                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1936                 jmp := blockJump[b.Kind]
1937                 switch next {
1938                 case b.Succs[0].Block():
1939                         s.Br(jmp.invasm, b.Succs[1].Block())
1940                         if jmp.invasmun {
1941                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1942                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
1943                         }
1944                 case b.Succs[1].Block():
1945                         s.Br(jmp.asm, b.Succs[0].Block())
1946                         if jmp.asmeq {
1947                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1948                         }
1949                 default:
1950                         if b.Likely != ssa.BranchUnlikely {
1951                                 s.Br(jmp.asm, b.Succs[0].Block())
1952                                 if jmp.asmeq {
1953                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
1954                                 }
1955                                 s.Br(obj.AJMP, b.Succs[1].Block())
1956                         } else {
1957                                 s.Br(jmp.invasm, b.Succs[1].Block())
1958                                 if jmp.invasmun {
1959                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1960                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
1961                                 }
1962                                 s.Br(obj.AJMP, b.Succs[0].Block())
1963                         }
1964                 }
1965         default:
1966                 b.Fatalf("branch not implemented: %s", b.LongString())
1967         }
1968 }