]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
cmd/compile: combine more 32 bit shift and mask operations on ppc64
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/gc"
9         "cmd/compile/internal/logopt"
10         "cmd/compile/internal/ssa"
11         "cmd/compile/internal/types"
12         "cmd/internal/obj"
13         "cmd/internal/obj/ppc64"
14         "cmd/internal/objabi"
15         "math"
16         "strings"
17 )
18
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21         //      flive := b.FlagsLiveAtEnd
22         //      if b.Control != nil && b.Control.Type.IsFlags() {
23         //              flive = true
24         //      }
25         //      for i := len(b.Values) - 1; i >= 0; i-- {
26         //              v := b.Values[i]
27         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28         //                      // The "mark" is any non-nil Aux value.
29         //                      v.Aux = v
30         //              }
31         //              if v.Type.IsFlags() {
32         //                      flive = false
33         //              }
34         //              for _, a := range v.Args {
35         //                      if a.Type.IsFlags() {
36         //                              flive = true
37         //                      }
38         //              }
39         //      }
40 }
41
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
44         if t.IsFloat() {
45                 switch t.Size() {
46                 case 4:
47                         return ppc64.AFMOVS
48                 case 8:
49                         return ppc64.AFMOVD
50                 }
51         } else {
52                 switch t.Size() {
53                 case 1:
54                         if t.IsSigned() {
55                                 return ppc64.AMOVB
56                         } else {
57                                 return ppc64.AMOVBZ
58                         }
59                 case 2:
60                         if t.IsSigned() {
61                                 return ppc64.AMOVH
62                         } else {
63                                 return ppc64.AMOVHZ
64                         }
65                 case 4:
66                         if t.IsSigned() {
67                                 return ppc64.AMOVW
68                         } else {
69                                 return ppc64.AMOVWZ
70                         }
71                 case 8:
72                         return ppc64.AMOVD
73                 }
74         }
75         panic("bad load type")
76 }
77
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
80         if t.IsFloat() {
81                 switch t.Size() {
82                 case 4:
83                         return ppc64.AFMOVS
84                 case 8:
85                         return ppc64.AFMOVD
86                 }
87         } else {
88                 switch t.Size() {
89                 case 1:
90                         return ppc64.AMOVB
91                 case 2:
92                         return ppc64.AMOVH
93                 case 4:
94                         return ppc64.AMOVW
95                 case 8:
96                         return ppc64.AMOVD
97                 }
98         }
99         panic("bad store type")
100 }
101
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
103         switch v.Op {
104         case ssa.OpCopy:
105                 t := v.Type
106                 if t.IsMemory() {
107                         return
108                 }
109                 x := v.Args[0].Reg()
110                 y := v.Reg()
111                 if x != y {
112                         rt := obj.TYPE_REG
113                         op := ppc64.AMOVD
114
115                         if t.IsFloat() {
116                                 op = ppc64.AFMOVD
117                         }
118                         p := s.Prog(op)
119                         p.From.Type = rt
120                         p.From.Reg = x
121                         p.To.Type = rt
122                         p.To.Reg = y
123                 }
124
125         case ssa.OpPPC64LoweredMuluhilo:
126                 // MULHDU       Rarg1, Rarg0, Reg0
127                 // MULLD        Rarg1, Rarg0, Reg1
128                 r0 := v.Args[0].Reg()
129                 r1 := v.Args[1].Reg()
130                 p := s.Prog(ppc64.AMULHDU)
131                 p.From.Type = obj.TYPE_REG
132                 p.From.Reg = r1
133                 p.Reg = r0
134                 p.To.Type = obj.TYPE_REG
135                 p.To.Reg = v.Reg0()
136                 p1 := s.Prog(ppc64.AMULLD)
137                 p1.From.Type = obj.TYPE_REG
138                 p1.From.Reg = r1
139                 p1.Reg = r0
140                 p1.To.Type = obj.TYPE_REG
141                 p1.To.Reg = v.Reg1()
142
143         case ssa.OpPPC64LoweredAdd64Carry:
144                 // ADDC         Rarg2, -1, Rtmp
145                 // ADDE         Rarg1, Rarg0, Reg0
146                 // ADDZE        Rzero, Reg1
147                 r0 := v.Args[0].Reg()
148                 r1 := v.Args[1].Reg()
149                 r2 := v.Args[2].Reg()
150                 p := s.Prog(ppc64.AADDC)
151                 p.From.Type = obj.TYPE_CONST
152                 p.From.Offset = -1
153                 p.Reg = r2
154                 p.To.Type = obj.TYPE_REG
155                 p.To.Reg = ppc64.REGTMP
156                 p1 := s.Prog(ppc64.AADDE)
157                 p1.From.Type = obj.TYPE_REG
158                 p1.From.Reg = r1
159                 p1.Reg = r0
160                 p1.To.Type = obj.TYPE_REG
161                 p1.To.Reg = v.Reg0()
162                 p2 := s.Prog(ppc64.AADDZE)
163                 p2.From.Type = obj.TYPE_REG
164                 p2.From.Reg = ppc64.REGZERO
165                 p2.To.Type = obj.TYPE_REG
166                 p2.To.Reg = v.Reg1()
167
168         case ssa.OpPPC64LoweredAtomicAnd8,
169                 ssa.OpPPC64LoweredAtomicAnd32,
170                 ssa.OpPPC64LoweredAtomicOr8,
171                 ssa.OpPPC64LoweredAtomicOr32:
172                 // LWSYNC
173                 // LBAR/LWAR    (Rarg0), Rtmp
174                 // AND/OR       Rarg1, Rtmp
175                 // STBCCC/STWCCC Rtmp, (Rarg0)
176                 // BNE          -3(PC)
177                 ld := ppc64.ALBAR
178                 st := ppc64.ASTBCCC
179                 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
180                         ld = ppc64.ALWAR
181                         st = ppc64.ASTWCCC
182                 }
183                 r0 := v.Args[0].Reg()
184                 r1 := v.Args[1].Reg()
185                 // LWSYNC - Assuming shared data not write-through-required nor
186                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
187                 plwsync := s.Prog(ppc64.ALWSYNC)
188                 plwsync.To.Type = obj.TYPE_NONE
189                 // LBAR or LWAR
190                 p := s.Prog(ld)
191                 p.From.Type = obj.TYPE_MEM
192                 p.From.Reg = r0
193                 p.To.Type = obj.TYPE_REG
194                 p.To.Reg = ppc64.REGTMP
195                 // AND/OR reg1,out
196                 p1 := s.Prog(v.Op.Asm())
197                 p1.From.Type = obj.TYPE_REG
198                 p1.From.Reg = r1
199                 p1.To.Type = obj.TYPE_REG
200                 p1.To.Reg = ppc64.REGTMP
201                 // STBCCC or STWCCC
202                 p2 := s.Prog(st)
203                 p2.From.Type = obj.TYPE_REG
204                 p2.From.Reg = ppc64.REGTMP
205                 p2.To.Type = obj.TYPE_MEM
206                 p2.To.Reg = r0
207                 p2.RegTo2 = ppc64.REGTMP
208                 // BNE retry
209                 p3 := s.Prog(ppc64.ABNE)
210                 p3.To.Type = obj.TYPE_BRANCH
211                 gc.Patch(p3, p)
212
213         case ssa.OpPPC64LoweredAtomicAdd32,
214                 ssa.OpPPC64LoweredAtomicAdd64:
215                 // LWSYNC
216                 // LDAR/LWAR    (Rarg0), Rout
217                 // ADD          Rarg1, Rout
218                 // STDCCC/STWCCC Rout, (Rarg0)
219                 // BNE         -3(PC)
220                 // MOVW         Rout,Rout (if Add32)
221                 ld := ppc64.ALDAR
222                 st := ppc64.ASTDCCC
223                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
224                         ld = ppc64.ALWAR
225                         st = ppc64.ASTWCCC
226                 }
227                 r0 := v.Args[0].Reg()
228                 r1 := v.Args[1].Reg()
229                 out := v.Reg0()
230                 // LWSYNC - Assuming shared data not write-through-required nor
231                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
232                 plwsync := s.Prog(ppc64.ALWSYNC)
233                 plwsync.To.Type = obj.TYPE_NONE
234                 // LDAR or LWAR
235                 p := s.Prog(ld)
236                 p.From.Type = obj.TYPE_MEM
237                 p.From.Reg = r0
238                 p.To.Type = obj.TYPE_REG
239                 p.To.Reg = out
240                 // ADD reg1,out
241                 p1 := s.Prog(ppc64.AADD)
242                 p1.From.Type = obj.TYPE_REG
243                 p1.From.Reg = r1
244                 p1.To.Reg = out
245                 p1.To.Type = obj.TYPE_REG
246                 // STDCCC or STWCCC
247                 p3 := s.Prog(st)
248                 p3.From.Type = obj.TYPE_REG
249                 p3.From.Reg = out
250                 p3.To.Type = obj.TYPE_MEM
251                 p3.To.Reg = r0
252                 // BNE retry
253                 p4 := s.Prog(ppc64.ABNE)
254                 p4.To.Type = obj.TYPE_BRANCH
255                 gc.Patch(p4, p)
256
257                 // Ensure a 32 bit result
258                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
259                         p5 := s.Prog(ppc64.AMOVWZ)
260                         p5.To.Type = obj.TYPE_REG
261                         p5.To.Reg = out
262                         p5.From.Type = obj.TYPE_REG
263                         p5.From.Reg = out
264                 }
265
266         case ssa.OpPPC64LoweredAtomicExchange32,
267                 ssa.OpPPC64LoweredAtomicExchange64:
268                 // LWSYNC
269                 // LDAR/LWAR    (Rarg0), Rout
270                 // STDCCC/STWCCC Rout, (Rarg0)
271                 // BNE         -2(PC)
272                 // ISYNC
273                 ld := ppc64.ALDAR
274                 st := ppc64.ASTDCCC
275                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
276                         ld = ppc64.ALWAR
277                         st = ppc64.ASTWCCC
278                 }
279                 r0 := v.Args[0].Reg()
280                 r1 := v.Args[1].Reg()
281                 out := v.Reg0()
282                 // LWSYNC - Assuming shared data not write-through-required nor
283                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
284                 plwsync := s.Prog(ppc64.ALWSYNC)
285                 plwsync.To.Type = obj.TYPE_NONE
286                 // LDAR or LWAR
287                 p := s.Prog(ld)
288                 p.From.Type = obj.TYPE_MEM
289                 p.From.Reg = r0
290                 p.To.Type = obj.TYPE_REG
291                 p.To.Reg = out
292                 // STDCCC or STWCCC
293                 p1 := s.Prog(st)
294                 p1.From.Type = obj.TYPE_REG
295                 p1.From.Reg = r1
296                 p1.To.Type = obj.TYPE_MEM
297                 p1.To.Reg = r0
298                 // BNE retry
299                 p2 := s.Prog(ppc64.ABNE)
300                 p2.To.Type = obj.TYPE_BRANCH
301                 gc.Patch(p2, p)
302                 // ISYNC
303                 pisync := s.Prog(ppc64.AISYNC)
304                 pisync.To.Type = obj.TYPE_NONE
305
306         case ssa.OpPPC64LoweredAtomicLoad8,
307                 ssa.OpPPC64LoweredAtomicLoad32,
308                 ssa.OpPPC64LoweredAtomicLoad64,
309                 ssa.OpPPC64LoweredAtomicLoadPtr:
310                 // SYNC
311                 // MOVB/MOVD/MOVW (Rarg0), Rout
312                 // CMP Rout,Rout
313                 // BNE 1(PC)
314                 // ISYNC
315                 ld := ppc64.AMOVD
316                 cmp := ppc64.ACMP
317                 switch v.Op {
318                 case ssa.OpPPC64LoweredAtomicLoad8:
319                         ld = ppc64.AMOVBZ
320                 case ssa.OpPPC64LoweredAtomicLoad32:
321                         ld = ppc64.AMOVWZ
322                         cmp = ppc64.ACMPW
323                 }
324                 arg0 := v.Args[0].Reg()
325                 out := v.Reg0()
326                 // SYNC when AuxInt == 1; otherwise, load-acquire
327                 if v.AuxInt == 1 {
328                         psync := s.Prog(ppc64.ASYNC)
329                         psync.To.Type = obj.TYPE_NONE
330                 }
331                 // Load
332                 p := s.Prog(ld)
333                 p.From.Type = obj.TYPE_MEM
334                 p.From.Reg = arg0
335                 p.To.Type = obj.TYPE_REG
336                 p.To.Reg = out
337                 // CMP
338                 p1 := s.Prog(cmp)
339                 p1.From.Type = obj.TYPE_REG
340                 p1.From.Reg = out
341                 p1.To.Type = obj.TYPE_REG
342                 p1.To.Reg = out
343                 // BNE
344                 p2 := s.Prog(ppc64.ABNE)
345                 p2.To.Type = obj.TYPE_BRANCH
346                 // ISYNC
347                 pisync := s.Prog(ppc64.AISYNC)
348                 pisync.To.Type = obj.TYPE_NONE
349                 gc.Patch(p2, pisync)
350
351         case ssa.OpPPC64LoweredAtomicStore8,
352                 ssa.OpPPC64LoweredAtomicStore32,
353                 ssa.OpPPC64LoweredAtomicStore64:
354                 // SYNC or LWSYNC
355                 // MOVB/MOVW/MOVD arg1,(arg0)
356                 st := ppc64.AMOVD
357                 switch v.Op {
358                 case ssa.OpPPC64LoweredAtomicStore8:
359                         st = ppc64.AMOVB
360                 case ssa.OpPPC64LoweredAtomicStore32:
361                         st = ppc64.AMOVW
362                 }
363                 arg0 := v.Args[0].Reg()
364                 arg1 := v.Args[1].Reg()
365                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
366                 // SYNC
367                 syncOp := ppc64.ASYNC
368                 if v.AuxInt == 0 {
369                         syncOp = ppc64.ALWSYNC
370                 }
371                 psync := s.Prog(syncOp)
372                 psync.To.Type = obj.TYPE_NONE
373                 // Store
374                 p := s.Prog(st)
375                 p.To.Type = obj.TYPE_MEM
376                 p.To.Reg = arg0
377                 p.From.Type = obj.TYPE_REG
378                 p.From.Reg = arg1
379
380         case ssa.OpPPC64LoweredAtomicCas64,
381                 ssa.OpPPC64LoweredAtomicCas32:
382                 // LWSYNC
383                 // loop:
384                 // LDAR        (Rarg0), MutexHint, Rtmp
385                 // CMP         Rarg1, Rtmp
386                 // BNE         fail
387                 // STDCCC      Rarg2, (Rarg0)
388                 // BNE         loop
389                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
390                 // MOVD        $1, Rout
391                 // BR          end
392                 // fail:
393                 // MOVD        $0, Rout
394                 // end:
395                 ld := ppc64.ALDAR
396                 st := ppc64.ASTDCCC
397                 cmp := ppc64.ACMP
398                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
399                         ld = ppc64.ALWAR
400                         st = ppc64.ASTWCCC
401                         cmp = ppc64.ACMPW
402                 }
403                 r0 := v.Args[0].Reg()
404                 r1 := v.Args[1].Reg()
405                 r2 := v.Args[2].Reg()
406                 out := v.Reg0()
407                 // LWSYNC - Assuming shared data not write-through-required nor
408                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
409                 plwsync1 := s.Prog(ppc64.ALWSYNC)
410                 plwsync1.To.Type = obj.TYPE_NONE
411                 // LDAR or LWAR
412                 p := s.Prog(ld)
413                 p.From.Type = obj.TYPE_MEM
414                 p.From.Reg = r0
415                 p.To.Type = obj.TYPE_REG
416                 p.To.Reg = ppc64.REGTMP
417                 // If it is a Compare-and-Swap-Release operation, set the EH field with
418                 // the release hint.
419                 if v.AuxInt == 0 {
420                         p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
421                 }
422                 // CMP reg1,reg2
423                 p1 := s.Prog(cmp)
424                 p1.From.Type = obj.TYPE_REG
425                 p1.From.Reg = r1
426                 p1.To.Reg = ppc64.REGTMP
427                 p1.To.Type = obj.TYPE_REG
428                 // BNE cas_fail
429                 p2 := s.Prog(ppc64.ABNE)
430                 p2.To.Type = obj.TYPE_BRANCH
431                 // STDCCC or STWCCC
432                 p3 := s.Prog(st)
433                 p3.From.Type = obj.TYPE_REG
434                 p3.From.Reg = r2
435                 p3.To.Type = obj.TYPE_MEM
436                 p3.To.Reg = r0
437                 // BNE retry
438                 p4 := s.Prog(ppc64.ABNE)
439                 p4.To.Type = obj.TYPE_BRANCH
440                 gc.Patch(p4, p)
441                 // LWSYNC - Assuming shared data not write-through-required nor
442                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
443                 // If the operation is a CAS-Release, then synchronization is not necessary.
444                 if v.AuxInt != 0 {
445                         plwsync2 := s.Prog(ppc64.ALWSYNC)
446                         plwsync2.To.Type = obj.TYPE_NONE
447                 }
448                 // return true
449                 p5 := s.Prog(ppc64.AMOVD)
450                 p5.From.Type = obj.TYPE_CONST
451                 p5.From.Offset = 1
452                 p5.To.Type = obj.TYPE_REG
453                 p5.To.Reg = out
454                 // BR done
455                 p6 := s.Prog(obj.AJMP)
456                 p6.To.Type = obj.TYPE_BRANCH
457                 // return false
458                 p7 := s.Prog(ppc64.AMOVD)
459                 p7.From.Type = obj.TYPE_CONST
460                 p7.From.Offset = 0
461                 p7.To.Type = obj.TYPE_REG
462                 p7.To.Reg = out
463                 gc.Patch(p2, p7)
464                 // done (label)
465                 p8 := s.Prog(obj.ANOP)
466                 gc.Patch(p6, p8)
467
468         case ssa.OpPPC64LoweredGetClosurePtr:
469                 // Closure pointer is R11 (already)
470                 gc.CheckLoweredGetClosurePtr(v)
471
472         case ssa.OpPPC64LoweredGetCallerSP:
473                 // caller's SP is FixedFrameSize below the address of the first arg
474                 p := s.Prog(ppc64.AMOVD)
475                 p.From.Type = obj.TYPE_ADDR
476                 p.From.Offset = -gc.Ctxt.FixedFrameSize()
477                 p.From.Name = obj.NAME_PARAM
478                 p.To.Type = obj.TYPE_REG
479                 p.To.Reg = v.Reg()
480
481         case ssa.OpPPC64LoweredGetCallerPC:
482                 p := s.Prog(obj.AGETCALLERPC)
483                 p.To.Type = obj.TYPE_REG
484                 p.To.Reg = v.Reg()
485
486         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
487                 // input is already rounded
488
489         case ssa.OpLoadReg:
490                 loadOp := loadByType(v.Type)
491                 p := s.Prog(loadOp)
492                 gc.AddrAuto(&p.From, v.Args[0])
493                 p.To.Type = obj.TYPE_REG
494                 p.To.Reg = v.Reg()
495
496         case ssa.OpStoreReg:
497                 storeOp := storeByType(v.Type)
498                 p := s.Prog(storeOp)
499                 p.From.Type = obj.TYPE_REG
500                 p.From.Reg = v.Args[0].Reg()
501                 gc.AddrAuto(&p.To, v)
502
503         case ssa.OpPPC64DIVD:
504                 // For now,
505                 //
506                 // cmp arg1, -1
507                 // be  ahead
508                 // v = arg0 / arg1
509                 // b over
510                 // ahead: v = - arg0
511                 // over: nop
512                 r := v.Reg()
513                 r0 := v.Args[0].Reg()
514                 r1 := v.Args[1].Reg()
515
516                 p := s.Prog(ppc64.ACMP)
517                 p.From.Type = obj.TYPE_REG
518                 p.From.Reg = r1
519                 p.To.Type = obj.TYPE_CONST
520                 p.To.Offset = -1
521
522                 pbahead := s.Prog(ppc64.ABEQ)
523                 pbahead.To.Type = obj.TYPE_BRANCH
524
525                 p = s.Prog(v.Op.Asm())
526                 p.From.Type = obj.TYPE_REG
527                 p.From.Reg = r1
528                 p.Reg = r0
529                 p.To.Type = obj.TYPE_REG
530                 p.To.Reg = r
531
532                 pbover := s.Prog(obj.AJMP)
533                 pbover.To.Type = obj.TYPE_BRANCH
534
535                 p = s.Prog(ppc64.ANEG)
536                 p.To.Type = obj.TYPE_REG
537                 p.To.Reg = r
538                 p.From.Type = obj.TYPE_REG
539                 p.From.Reg = r0
540                 gc.Patch(pbahead, p)
541
542                 p = s.Prog(obj.ANOP)
543                 gc.Patch(pbover, p)
544
545         case ssa.OpPPC64DIVW:
546                 // word-width version of above
547                 r := v.Reg()
548                 r0 := v.Args[0].Reg()
549                 r1 := v.Args[1].Reg()
550
551                 p := s.Prog(ppc64.ACMPW)
552                 p.From.Type = obj.TYPE_REG
553                 p.From.Reg = r1
554                 p.To.Type = obj.TYPE_CONST
555                 p.To.Offset = -1
556
557                 pbahead := s.Prog(ppc64.ABEQ)
558                 pbahead.To.Type = obj.TYPE_BRANCH
559
560                 p = s.Prog(v.Op.Asm())
561                 p.From.Type = obj.TYPE_REG
562                 p.From.Reg = r1
563                 p.Reg = r0
564                 p.To.Type = obj.TYPE_REG
565                 p.To.Reg = r
566
567                 pbover := s.Prog(obj.AJMP)
568                 pbover.To.Type = obj.TYPE_BRANCH
569
570                 p = s.Prog(ppc64.ANEG)
571                 p.To.Type = obj.TYPE_REG
572                 p.To.Reg = r
573                 p.From.Type = obj.TYPE_REG
574                 p.From.Reg = r0
575                 gc.Patch(pbahead, p)
576
577                 p = s.Prog(obj.ANOP)
578                 gc.Patch(pbover, p)
579
580         case ssa.OpPPC64CLRLSLWI:
581                 r := v.Reg()
582                 r1 := v.Args[0].Reg()
583                 shifts := v.AuxInt
584                 p := s.Prog(v.Op.Asm())
585                 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
586                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
587                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
588                 p.Reg = r1
589                 p.To.Type = obj.TYPE_REG
590                 p.To.Reg = r
591
592         case ssa.OpPPC64CLRLSLDI:
593                 r := v.Reg()
594                 r1 := v.Args[0].Reg()
595                 shifts := v.AuxInt
596                 p := s.Prog(v.Op.Asm())
597                 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
598                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
599                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
600                 p.Reg = r1
601                 p.To.Type = obj.TYPE_REG
602                 p.To.Reg = r
603
604                 // Mask has been set as sh
605         case ssa.OpPPC64RLDICL:
606                 r := v.Reg()
607                 r1 := v.Args[0].Reg()
608                 shifts := v.AuxInt
609                 p := s.Prog(v.Op.Asm())
610                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
611                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)})
612                 p.Reg = r1
613                 p.To.Type = obj.TYPE_REG
614                 p.To.Reg = r
615
616         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
617                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
618                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
619                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
620                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
621                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
622                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
623                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
624                 r := v.Reg()
625                 r1 := v.Args[0].Reg()
626                 r2 := v.Args[1].Reg()
627                 p := s.Prog(v.Op.Asm())
628                 p.From.Type = obj.TYPE_REG
629                 p.From.Reg = r2
630                 p.Reg = r1
631                 p.To.Type = obj.TYPE_REG
632                 p.To.Reg = r
633
634         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
635                 r1 := v.Args[0].Reg()
636                 r2 := v.Args[1].Reg()
637                 p := s.Prog(v.Op.Asm())
638                 p.From.Type = obj.TYPE_REG
639                 p.From.Reg = r2
640                 p.Reg = r1
641                 p.To.Type = obj.TYPE_REG
642                 p.To.Reg = ppc64.REGTMP // result is not needed
643
644         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
645                 p := s.Prog(v.Op.Asm())
646                 p.From.Type = obj.TYPE_CONST
647                 p.From.Offset = v.AuxInt
648                 p.Reg = v.Args[0].Reg()
649                 p.To.Type = obj.TYPE_REG
650                 p.To.Reg = v.Reg()
651
652                 // Auxint holds encoded rotate + mask
653         case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
654                 rot, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
655                 p := s.Prog(v.Op.Asm())
656                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
657                 p.Reg = v.Args[0].Reg()
658                 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
659                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
660
661                 // Auxint holds mask
662         case ssa.OpPPC64RLWNM:
663                 _, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
664                 p := s.Prog(v.Op.Asm())
665                 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
666                 p.Reg = v.Args[0].Reg()
667                 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
668                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
669
670         case ssa.OpPPC64MADDLD:
671                 r := v.Reg()
672                 r1 := v.Args[0].Reg()
673                 r2 := v.Args[1].Reg()
674                 r3 := v.Args[2].Reg()
675                 // r = r1*r2 Â± r3
676                 p := s.Prog(v.Op.Asm())
677                 p.From.Type = obj.TYPE_REG
678                 p.From.Reg = r1
679                 p.Reg = r2
680                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
681                 p.To.Type = obj.TYPE_REG
682                 p.To.Reg = r
683
684         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
685                 r := v.Reg()
686                 r1 := v.Args[0].Reg()
687                 r2 := v.Args[1].Reg()
688                 r3 := v.Args[2].Reg()
689                 // r = r1*r2 Â± r3
690                 p := s.Prog(v.Op.Asm())
691                 p.From.Type = obj.TYPE_REG
692                 p.From.Reg = r1
693                 p.Reg = r3
694                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
695                 p.To.Type = obj.TYPE_REG
696                 p.To.Reg = r
697
698         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
699                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
700                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
701                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
702                 r := v.Reg()
703                 p := s.Prog(v.Op.Asm())
704                 p.To.Type = obj.TYPE_REG
705                 p.To.Reg = r
706                 p.From.Type = obj.TYPE_REG
707                 p.From.Reg = v.Args[0].Reg()
708
709         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
710                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
711                 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
712                 p := s.Prog(v.Op.Asm())
713                 p.Reg = v.Args[0].Reg()
714                 p.From.Type = obj.TYPE_CONST
715                 p.From.Offset = v.AuxInt
716                 p.To.Type = obj.TYPE_REG
717                 p.To.Reg = v.Reg()
718
719         case ssa.OpPPC64SUBFCconst:
720                 p := s.Prog(v.Op.Asm())
721                 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
722                 p.From.Type = obj.TYPE_REG
723                 p.From.Reg = v.Args[0].Reg()
724                 p.To.Type = obj.TYPE_REG
725                 p.To.Reg = v.Reg()
726
727         case ssa.OpPPC64ANDCCconst:
728                 p := s.Prog(v.Op.Asm())
729                 p.Reg = v.Args[0].Reg()
730                 p.From.Type = obj.TYPE_CONST
731                 p.From.Offset = v.AuxInt
732                 p.To.Type = obj.TYPE_REG
733                 p.To.Reg = ppc64.REGTMP // discard result
734
735         case ssa.OpPPC64MOVDaddr:
736                 switch v.Aux.(type) {
737                 default:
738                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
739                 case nil:
740                         // If aux offset and aux int are both 0, and the same
741                         // input and output regs are used, no instruction
742                         // needs to be generated, since it would just be
743                         // addi rx, rx, 0.
744                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
745                                 p := s.Prog(ppc64.AMOVD)
746                                 p.From.Type = obj.TYPE_ADDR
747                                 p.From.Reg = v.Args[0].Reg()
748                                 p.From.Offset = v.AuxInt
749                                 p.To.Type = obj.TYPE_REG
750                                 p.To.Reg = v.Reg()
751                         }
752
753                 case *obj.LSym, *gc.Node:
754                         p := s.Prog(ppc64.AMOVD)
755                         p.From.Type = obj.TYPE_ADDR
756                         p.From.Reg = v.Args[0].Reg()
757                         p.To.Type = obj.TYPE_REG
758                         p.To.Reg = v.Reg()
759                         gc.AddAux(&p.From, v)
760
761                 }
762
763         case ssa.OpPPC64MOVDconst:
764                 p := s.Prog(v.Op.Asm())
765                 p.From.Type = obj.TYPE_CONST
766                 p.From.Offset = v.AuxInt
767                 p.To.Type = obj.TYPE_REG
768                 p.To.Reg = v.Reg()
769
770         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
771                 p := s.Prog(v.Op.Asm())
772                 p.From.Type = obj.TYPE_FCONST
773                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
774                 p.To.Type = obj.TYPE_REG
775                 p.To.Reg = v.Reg()
776
777         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
778                 p := s.Prog(v.Op.Asm())
779                 p.From.Type = obj.TYPE_REG
780                 p.From.Reg = v.Args[0].Reg()
781                 p.To.Type = obj.TYPE_REG
782                 p.To.Reg = v.Args[1].Reg()
783
784         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
785                 p := s.Prog(v.Op.Asm())
786                 p.From.Type = obj.TYPE_REG
787                 p.From.Reg = v.Args[0].Reg()
788                 p.To.Type = obj.TYPE_CONST
789                 p.To.Offset = v.AuxInt
790
791         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
792                 // Shift in register to required size
793                 p := s.Prog(v.Op.Asm())
794                 p.From.Type = obj.TYPE_REG
795                 p.From.Reg = v.Args[0].Reg()
796                 p.To.Reg = v.Reg()
797                 p.To.Type = obj.TYPE_REG
798
799         case ssa.OpPPC64MOVDload:
800
801                 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
802                 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
803                 // the offset is not known until link time. If the load of a go.string uses relocation for the
804                 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
805                 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
806                 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
807                 // go.string types because other types will have proper alignment.
808
809                 gostring := false
810                 switch n := v.Aux.(type) {
811                 case *obj.LSym:
812                         gostring = strings.HasPrefix(n.Name, "go.string.")
813                 }
814                 if gostring {
815                         // Generate full addr of the go.string const
816                         // including AuxInt
817                         p := s.Prog(ppc64.AMOVD)
818                         p.From.Type = obj.TYPE_ADDR
819                         p.From.Reg = v.Args[0].Reg()
820                         gc.AddAux(&p.From, v)
821                         p.To.Type = obj.TYPE_REG
822                         p.To.Reg = v.Reg()
823                         // Load go.string using 0 offset
824                         p = s.Prog(v.Op.Asm())
825                         p.From.Type = obj.TYPE_MEM
826                         p.From.Reg = v.Reg()
827                         p.To.Type = obj.TYPE_REG
828                         p.To.Reg = v.Reg()
829                         break
830                 }
831                 // Not a go.string, generate a normal load
832                 fallthrough
833
834         case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
835                 p := s.Prog(v.Op.Asm())
836                 p.From.Type = obj.TYPE_MEM
837                 p.From.Reg = v.Args[0].Reg()
838                 gc.AddAux(&p.From, v)
839                 p.To.Type = obj.TYPE_REG
840                 p.To.Reg = v.Reg()
841
842         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
843                 p := s.Prog(v.Op.Asm())
844                 p.From.Type = obj.TYPE_MEM
845                 p.From.Reg = v.Args[0].Reg()
846                 p.To.Type = obj.TYPE_REG
847                 p.To.Reg = v.Reg()
848
849         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
850                 p := s.Prog(v.Op.Asm())
851                 p.To.Type = obj.TYPE_MEM
852                 p.To.Reg = v.Args[0].Reg()
853                 p.From.Type = obj.TYPE_REG
854                 p.From.Reg = v.Args[1].Reg()
855
856         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
857                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
858                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
859                 p := s.Prog(v.Op.Asm())
860                 p.From.Type = obj.TYPE_MEM
861                 p.From.Reg = v.Args[0].Reg()
862                 p.From.Index = v.Args[1].Reg()
863                 p.To.Type = obj.TYPE_REG
864                 p.To.Reg = v.Reg()
865
866         case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
867                 p := s.Prog(v.Op.Asm())
868                 p.From.Type = obj.TYPE_REG
869                 p.From.Reg = ppc64.REGZERO
870                 p.To.Type = obj.TYPE_MEM
871                 p.To.Reg = v.Args[0].Reg()
872                 gc.AddAux(&p.To, v)
873
874         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
875                 p := s.Prog(v.Op.Asm())
876                 p.From.Type = obj.TYPE_REG
877                 p.From.Reg = v.Args[1].Reg()
878                 p.To.Type = obj.TYPE_MEM
879                 p.To.Reg = v.Args[0].Reg()
880                 gc.AddAux(&p.To, v)
881
882         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
883                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
884                 ssa.OpPPC64MOVHBRstoreidx:
885                 p := s.Prog(v.Op.Asm())
886                 p.From.Type = obj.TYPE_REG
887                 p.From.Reg = v.Args[2].Reg()
888                 p.To.Index = v.Args[1].Reg()
889                 p.To.Type = obj.TYPE_MEM
890                 p.To.Reg = v.Args[0].Reg()
891
892         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
893                 // ISEL, ISELB
894                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
895                 // ISEL only accepts 0, 1, 2 condition values but the others can be
896                 // achieved by swapping operand order.
897                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
898                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
899                 // ISELB is used when a boolean result is needed, returning 0 or 1
900                 p := s.Prog(ppc64.AISEL)
901                 p.To.Type = obj.TYPE_REG
902                 p.To.Reg = v.Reg()
903                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
904                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
905                 if v.Op == ssa.OpPPC64ISEL {
906                         r.Reg = v.Args[1].Reg()
907                 }
908                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
909                 if v.AuxInt > 3 {
910                         p.Reg = r.Reg
911                         p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
912                 } else {
913                         p.Reg = v.Args[0].Reg()
914                         p.SetFrom3(r)
915                 }
916                 p.From.Type = obj.TYPE_CONST
917                 p.From.Offset = v.AuxInt & 3
918
919         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
920                 // The LoweredQuad code generation
921                 // generates STXV instructions on
922                 // power9. The Short variation is used
923                 // if no loop is generated.
924
925                 // sizes >= 64 generate a loop as follows:
926
927                 // Set up loop counter in CTR, used by BC
928                 // XXLXOR clears VS32
929                 //       XXLXOR VS32,VS32,VS32
930                 //       MOVD len/64,REG_TMP
931                 //       MOVD REG_TMP,CTR
932                 //       loop:
933                 //       STXV VS32,0(R20)
934                 //       STXV VS32,16(R20)
935                 //       STXV VS32,32(R20)
936                 //       STXV VS32,48(R20)
937                 //       ADD  $64,R20
938                 //       BC   16, 0, loop
939
940                 // Bytes per iteration
941                 ctr := v.AuxInt / 64
942
943                 // Remainder bytes
944                 rem := v.AuxInt % 64
945
946                 // Only generate a loop if there is more
947                 // than 1 iteration.
948                 if ctr > 1 {
949                         // Set up VS32 (V0) to hold 0s
950                         p := s.Prog(ppc64.AXXLXOR)
951                         p.From.Type = obj.TYPE_REG
952                         p.From.Reg = ppc64.REG_VS32
953                         p.To.Type = obj.TYPE_REG
954                         p.To.Reg = ppc64.REG_VS32
955                         p.Reg = ppc64.REG_VS32
956
957                         // Set up CTR loop counter
958                         p = s.Prog(ppc64.AMOVD)
959                         p.From.Type = obj.TYPE_CONST
960                         p.From.Offset = ctr
961                         p.To.Type = obj.TYPE_REG
962                         p.To.Reg = ppc64.REGTMP
963
964                         p = s.Prog(ppc64.AMOVD)
965                         p.From.Type = obj.TYPE_REG
966                         p.From.Reg = ppc64.REGTMP
967                         p.To.Type = obj.TYPE_REG
968                         p.To.Reg = ppc64.REG_CTR
969
970                         // Don't generate padding for
971                         // loops with few iterations.
972                         if ctr > 3 {
973                                 p = s.Prog(obj.APCALIGN)
974                                 p.From.Type = obj.TYPE_CONST
975                                 p.From.Offset = 16
976                         }
977
978                         // generate 4 STXVs to zero 64 bytes
979                         var top *obj.Prog
980
981                         p = s.Prog(ppc64.ASTXV)
982                         p.From.Type = obj.TYPE_REG
983                         p.From.Reg = ppc64.REG_VS32
984                         p.To.Type = obj.TYPE_MEM
985                         p.To.Reg = v.Args[0].Reg()
986
987                         //  Save the top of loop
988                         if top == nil {
989                                 top = p
990                         }
991                         p = s.Prog(ppc64.ASTXV)
992                         p.From.Type = obj.TYPE_REG
993                         p.From.Reg = ppc64.REG_VS32
994                         p.To.Type = obj.TYPE_MEM
995                         p.To.Reg = v.Args[0].Reg()
996                         p.To.Offset = 16
997
998                         p = s.Prog(ppc64.ASTXV)
999                         p.From.Type = obj.TYPE_REG
1000                         p.From.Reg = ppc64.REG_VS32
1001                         p.To.Type = obj.TYPE_MEM
1002                         p.To.Reg = v.Args[0].Reg()
1003                         p.To.Offset = 32
1004
1005                         p = s.Prog(ppc64.ASTXV)
1006                         p.From.Type = obj.TYPE_REG
1007                         p.From.Reg = ppc64.REG_VS32
1008                         p.To.Type = obj.TYPE_MEM
1009                         p.To.Reg = v.Args[0].Reg()
1010                         p.To.Offset = 48
1011
1012                         // Increment address for the
1013                         // 64 bytes just zeroed.
1014                         p = s.Prog(ppc64.AADD)
1015                         p.Reg = v.Args[0].Reg()
1016                         p.From.Type = obj.TYPE_CONST
1017                         p.From.Offset = 64
1018                         p.To.Type = obj.TYPE_REG
1019                         p.To.Reg = v.Args[0].Reg()
1020
1021                         // Branch back to top of loop
1022                         // based on CTR
1023                         // BC with BO_BCTR generates bdnz
1024                         p = s.Prog(ppc64.ABC)
1025                         p.From.Type = obj.TYPE_CONST
1026                         p.From.Offset = ppc64.BO_BCTR
1027                         p.Reg = ppc64.REG_R0
1028                         p.To.Type = obj.TYPE_BRANCH
1029                         gc.Patch(p, top)
1030                 }
1031                 // When ctr == 1 the loop was not generated but
1032                 // there are at least 64 bytes to clear, so add
1033                 // that to the remainder to generate the code
1034                 // to clear those doublewords
1035                 if ctr == 1 {
1036                         rem += 64
1037                 }
1038
1039                 // Clear the remainder starting at offset zero
1040                 offset := int64(0)
1041
1042                 if rem >= 16 && ctr <= 1 {
1043                         // If the XXLXOR hasn't already been
1044                         // generated, do it here to initialize
1045                         // VS32 (V0) to 0.
1046                         p := s.Prog(ppc64.AXXLXOR)
1047                         p.From.Type = obj.TYPE_REG
1048                         p.From.Reg = ppc64.REG_VS32
1049                         p.To.Type = obj.TYPE_REG
1050                         p.To.Reg = ppc64.REG_VS32
1051                         p.Reg = ppc64.REG_VS32
1052                 }
1053                 // Generate STXV for 32 or 64
1054                 // bytes.
1055                 for rem >= 32 {
1056                         p := s.Prog(ppc64.ASTXV)
1057                         p.From.Type = obj.TYPE_REG
1058                         p.From.Reg = ppc64.REG_VS32
1059                         p.To.Type = obj.TYPE_MEM
1060                         p.To.Reg = v.Args[0].Reg()
1061                         p.To.Offset = offset
1062
1063                         p = s.Prog(ppc64.ASTXV)
1064                         p.From.Type = obj.TYPE_REG
1065                         p.From.Reg = ppc64.REG_VS32
1066                         p.To.Type = obj.TYPE_MEM
1067                         p.To.Reg = v.Args[0].Reg()
1068                         p.To.Offset = offset + 16
1069                         offset += 32
1070                         rem -= 32
1071                 }
1072                 // Generate 16 bytes
1073                 if rem >= 16 {
1074                         p := s.Prog(ppc64.ASTXV)
1075                         p.From.Type = obj.TYPE_REG
1076                         p.From.Reg = ppc64.REG_VS32
1077                         p.To.Type = obj.TYPE_MEM
1078                         p.To.Reg = v.Args[0].Reg()
1079                         p.To.Offset = offset
1080                         offset += 16
1081                         rem -= 16
1082                 }
1083
1084                 // first clear as many doublewords as possible
1085                 // then clear remaining sizes as available
1086                 for rem > 0 {
1087                         op, size := ppc64.AMOVB, int64(1)
1088                         switch {
1089                         case rem >= 8:
1090                                 op, size = ppc64.AMOVD, 8
1091                         case rem >= 4:
1092                                 op, size = ppc64.AMOVW, 4
1093                         case rem >= 2:
1094                                 op, size = ppc64.AMOVH, 2
1095                         }
1096                         p := s.Prog(op)
1097                         p.From.Type = obj.TYPE_REG
1098                         p.From.Reg = ppc64.REG_R0
1099                         p.To.Type = obj.TYPE_MEM
1100                         p.To.Reg = v.Args[0].Reg()
1101                         p.To.Offset = offset
1102                         rem -= size
1103                         offset += size
1104                 }
1105
1106         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1107
1108                 // Unaligned data doesn't hurt performance
1109                 // for these instructions on power8.
1110
1111                 // For sizes >= 64 generate a loop as follows:
1112
1113                 // Set up loop counter in CTR, used by BC
1114                 //       XXLXOR VS32,VS32,VS32
1115                 //       MOVD len/32,REG_TMP
1116                 //       MOVD REG_TMP,CTR
1117                 //       MOVD $16,REG_TMP
1118                 //       loop:
1119                 //       STXVD2X VS32,(R0)(R20)
1120                 //       STXVD2X VS32,(R31)(R20)
1121                 //       ADD  $32,R20
1122                 //       BC   16, 0, loop
1123                 //
1124                 // any remainder is done as described below
1125
1126                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1127                 // then handle the remainder
1128                 //      MOVD R0,(R20)
1129                 //      MOVD R0,8(R20)
1130                 // .... etc.
1131                 //
1132                 // the remainder bytes are cleared using one or more
1133                 // of the following instructions with the appropriate
1134                 // offsets depending which instructions are needed
1135                 //
1136                 //      MOVW R0,n1(R20) 4 bytes
1137                 //      MOVH R0,n2(R20) 2 bytes
1138                 //      MOVB R0,n3(R20) 1 byte
1139                 //
1140                 // 7 bytes: MOVW, MOVH, MOVB
1141                 // 6 bytes: MOVW, MOVH
1142                 // 5 bytes: MOVW, MOVB
1143                 // 3 bytes: MOVH, MOVB
1144
1145                 // each loop iteration does 32 bytes
1146                 ctr := v.AuxInt / 32
1147
1148                 // remainder bytes
1149                 rem := v.AuxInt % 32
1150
1151                 // only generate a loop if there is more
1152                 // than 1 iteration.
1153                 if ctr > 1 {
1154                         // Set up VS32 (V0) to hold 0s
1155                         p := s.Prog(ppc64.AXXLXOR)
1156                         p.From.Type = obj.TYPE_REG
1157                         p.From.Reg = ppc64.REG_VS32
1158                         p.To.Type = obj.TYPE_REG
1159                         p.To.Reg = ppc64.REG_VS32
1160                         p.Reg = ppc64.REG_VS32
1161
1162                         // Set up CTR loop counter
1163                         p = s.Prog(ppc64.AMOVD)
1164                         p.From.Type = obj.TYPE_CONST
1165                         p.From.Offset = ctr
1166                         p.To.Type = obj.TYPE_REG
1167                         p.To.Reg = ppc64.REGTMP
1168
1169                         p = s.Prog(ppc64.AMOVD)
1170                         p.From.Type = obj.TYPE_REG
1171                         p.From.Reg = ppc64.REGTMP
1172                         p.To.Type = obj.TYPE_REG
1173                         p.To.Reg = ppc64.REG_CTR
1174
1175                         // Set up R31 to hold index value 16
1176                         p = s.Prog(ppc64.AMOVD)
1177                         p.From.Type = obj.TYPE_CONST
1178                         p.From.Offset = 16
1179                         p.To.Type = obj.TYPE_REG
1180                         p.To.Reg = ppc64.REGTMP
1181
1182                         // Don't add padding for alignment
1183                         // with few loop iterations.
1184                         if ctr > 3 {
1185                                 p = s.Prog(obj.APCALIGN)
1186                                 p.From.Type = obj.TYPE_CONST
1187                                 p.From.Offset = 16
1188                         }
1189
1190                         // generate 2 STXVD2Xs to store 16 bytes
1191                         // when this is a loop then the top must be saved
1192                         var top *obj.Prog
1193                         // This is the top of loop
1194
1195                         p = s.Prog(ppc64.ASTXVD2X)
1196                         p.From.Type = obj.TYPE_REG
1197                         p.From.Reg = ppc64.REG_VS32
1198                         p.To.Type = obj.TYPE_MEM
1199                         p.To.Reg = v.Args[0].Reg()
1200                         p.To.Index = ppc64.REGZERO
1201                         // Save the top of loop
1202                         if top == nil {
1203                                 top = p
1204                         }
1205                         p = s.Prog(ppc64.ASTXVD2X)
1206                         p.From.Type = obj.TYPE_REG
1207                         p.From.Reg = ppc64.REG_VS32
1208                         p.To.Type = obj.TYPE_MEM
1209                         p.To.Reg = v.Args[0].Reg()
1210                         p.To.Index = ppc64.REGTMP
1211
1212                         // Increment address for the
1213                         // 4 doublewords just zeroed.
1214                         p = s.Prog(ppc64.AADD)
1215                         p.Reg = v.Args[0].Reg()
1216                         p.From.Type = obj.TYPE_CONST
1217                         p.From.Offset = 32
1218                         p.To.Type = obj.TYPE_REG
1219                         p.To.Reg = v.Args[0].Reg()
1220
1221                         // Branch back to top of loop
1222                         // based on CTR
1223                         // BC with BO_BCTR generates bdnz
1224                         p = s.Prog(ppc64.ABC)
1225                         p.From.Type = obj.TYPE_CONST
1226                         p.From.Offset = ppc64.BO_BCTR
1227                         p.Reg = ppc64.REG_R0
1228                         p.To.Type = obj.TYPE_BRANCH
1229                         gc.Patch(p, top)
1230                 }
1231
1232                 // when ctr == 1 the loop was not generated but
1233                 // there are at least 32 bytes to clear, so add
1234                 // that to the remainder to generate the code
1235                 // to clear those doublewords
1236                 if ctr == 1 {
1237                         rem += 32
1238                 }
1239
1240                 // clear the remainder starting at offset zero
1241                 offset := int64(0)
1242
1243                 // first clear as many doublewords as possible
1244                 // then clear remaining sizes as available
1245                 for rem > 0 {
1246                         op, size := ppc64.AMOVB, int64(1)
1247                         switch {
1248                         case rem >= 8:
1249                                 op, size = ppc64.AMOVD, 8
1250                         case rem >= 4:
1251                                 op, size = ppc64.AMOVW, 4
1252                         case rem >= 2:
1253                                 op, size = ppc64.AMOVH, 2
1254                         }
1255                         p := s.Prog(op)
1256                         p.From.Type = obj.TYPE_REG
1257                         p.From.Reg = ppc64.REG_R0
1258                         p.To.Type = obj.TYPE_MEM
1259                         p.To.Reg = v.Args[0].Reg()
1260                         p.To.Offset = offset
1261                         rem -= size
1262                         offset += size
1263                 }
1264
1265         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1266
1267                 bytesPerLoop := int64(32)
1268                 // This will be used when moving more
1269                 // than 8 bytes.  Moves start with
1270                 // as many 8 byte moves as possible, then
1271                 // 4, 2, or 1 byte(s) as remaining.  This will
1272                 // work and be efficient for power8 or later.
1273                 // If there are 64 or more bytes, then a
1274                 // loop is generated to move 32 bytes and
1275                 // update the src and dst addresses on each
1276                 // iteration. When < 64 bytes, the appropriate
1277                 // number of moves are generated based on the
1278                 // size.
1279                 // When moving >= 64 bytes a loop is used
1280                 //      MOVD len/32,REG_TMP
1281                 //      MOVD REG_TMP,CTR
1282                 //      MOVD $16,REG_TMP
1283                 // top:
1284                 //      LXVD2X (R0)(R21),VS32
1285                 //      LXVD2X (R31)(R21),VS33
1286                 //      ADD $32,R21
1287                 //      STXVD2X VS32,(R0)(R20)
1288                 //      STXVD2X VS33,(R31)(R20)
1289                 //      ADD $32,R20
1290                 //      BC 16,0,top
1291                 // Bytes not moved by this loop are moved
1292                 // with a combination of the following instructions,
1293                 // starting with the largest sizes and generating as
1294                 // many as needed, using the appropriate offset value.
1295                 //      MOVD  n(R21),R31
1296                 //      MOVD  R31,n(R20)
1297                 //      MOVW  n1(R21),R31
1298                 //      MOVW  R31,n1(R20)
1299                 //      MOVH  n2(R21),R31
1300                 //      MOVH  R31,n2(R20)
1301                 //      MOVB  n3(R21),R31
1302                 //      MOVB  R31,n3(R20)
1303
1304                 // Each loop iteration moves 32 bytes
1305                 ctr := v.AuxInt / bytesPerLoop
1306
1307                 // Remainder after the loop
1308                 rem := v.AuxInt % bytesPerLoop
1309
1310                 dstReg := v.Args[0].Reg()
1311                 srcReg := v.Args[1].Reg()
1312
1313                 // The set of registers used here, must match the clobbered reg list
1314                 // in PPC64Ops.go.
1315                 offset := int64(0)
1316
1317                 // top of the loop
1318                 var top *obj.Prog
1319                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1320                 if ctr > 1 {
1321                         // Set up the CTR
1322                         p := s.Prog(ppc64.AMOVD)
1323                         p.From.Type = obj.TYPE_CONST
1324                         p.From.Offset = ctr
1325                         p.To.Type = obj.TYPE_REG
1326                         p.To.Reg = ppc64.REGTMP
1327
1328                         p = s.Prog(ppc64.AMOVD)
1329                         p.From.Type = obj.TYPE_REG
1330                         p.From.Reg = ppc64.REGTMP
1331                         p.To.Type = obj.TYPE_REG
1332                         p.To.Reg = ppc64.REG_CTR
1333
1334                         // Use REGTMP as index reg
1335                         p = s.Prog(ppc64.AMOVD)
1336                         p.From.Type = obj.TYPE_CONST
1337                         p.From.Offset = 16
1338                         p.To.Type = obj.TYPE_REG
1339                         p.To.Reg = ppc64.REGTMP
1340
1341                         // Don't adding padding for
1342                         // alignment with small iteration
1343                         // counts.
1344                         if ctr > 3 {
1345                                 p = s.Prog(obj.APCALIGN)
1346                                 p.From.Type = obj.TYPE_CONST
1347                                 p.From.Offset = 16
1348                         }
1349
1350                         // Generate 16 byte loads and stores.
1351                         // Use temp register for index (16)
1352                         // on the second one.
1353
1354                         p = s.Prog(ppc64.ALXVD2X)
1355                         p.From.Type = obj.TYPE_MEM
1356                         p.From.Reg = srcReg
1357                         p.From.Index = ppc64.REGZERO
1358                         p.To.Type = obj.TYPE_REG
1359                         p.To.Reg = ppc64.REG_VS32
1360                         if top == nil {
1361                                 top = p
1362                         }
1363                         p = s.Prog(ppc64.ALXVD2X)
1364                         p.From.Type = obj.TYPE_MEM
1365                         p.From.Reg = srcReg
1366                         p.From.Index = ppc64.REGTMP
1367                         p.To.Type = obj.TYPE_REG
1368                         p.To.Reg = ppc64.REG_VS33
1369
1370                         // increment the src reg for next iteration
1371                         p = s.Prog(ppc64.AADD)
1372                         p.Reg = srcReg
1373                         p.From.Type = obj.TYPE_CONST
1374                         p.From.Offset = bytesPerLoop
1375                         p.To.Type = obj.TYPE_REG
1376                         p.To.Reg = srcReg
1377
1378                         // generate 16 byte stores
1379                         p = s.Prog(ppc64.ASTXVD2X)
1380                         p.From.Type = obj.TYPE_REG
1381                         p.From.Reg = ppc64.REG_VS32
1382                         p.To.Type = obj.TYPE_MEM
1383                         p.To.Reg = dstReg
1384                         p.To.Index = ppc64.REGZERO
1385
1386                         p = s.Prog(ppc64.ASTXVD2X)
1387                         p.From.Type = obj.TYPE_REG
1388                         p.From.Reg = ppc64.REG_VS33
1389                         p.To.Type = obj.TYPE_MEM
1390                         p.To.Reg = dstReg
1391                         p.To.Index = ppc64.REGTMP
1392
1393                         // increment the dst reg for next iteration
1394                         p = s.Prog(ppc64.AADD)
1395                         p.Reg = dstReg
1396                         p.From.Type = obj.TYPE_CONST
1397                         p.From.Offset = bytesPerLoop
1398                         p.To.Type = obj.TYPE_REG
1399                         p.To.Reg = dstReg
1400
1401                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1402                         // to loop top.
1403                         p = s.Prog(ppc64.ABC)
1404                         p.From.Type = obj.TYPE_CONST
1405                         p.From.Offset = ppc64.BO_BCTR
1406                         p.Reg = ppc64.REG_R0
1407                         p.To.Type = obj.TYPE_BRANCH
1408                         gc.Patch(p, top)
1409
1410                         // srcReg and dstReg were incremented in the loop, so
1411                         // later instructions start with offset 0.
1412                         offset = int64(0)
1413                 }
1414
1415                 // No loop was generated for one iteration, so
1416                 // add 32 bytes to the remainder to move those bytes.
1417                 if ctr == 1 {
1418                         rem += bytesPerLoop
1419                 }
1420
1421                 if rem >= 16 {
1422                         // Generate 16 byte loads and stores.
1423                         // Use temp register for index (value 16)
1424                         // on the second one.
1425                         p := s.Prog(ppc64.ALXVD2X)
1426                         p.From.Type = obj.TYPE_MEM
1427                         p.From.Reg = srcReg
1428                         p.From.Index = ppc64.REGZERO
1429                         p.To.Type = obj.TYPE_REG
1430                         p.To.Reg = ppc64.REG_VS32
1431
1432                         p = s.Prog(ppc64.ASTXVD2X)
1433                         p.From.Type = obj.TYPE_REG
1434                         p.From.Reg = ppc64.REG_VS32
1435                         p.To.Type = obj.TYPE_MEM
1436                         p.To.Reg = dstReg
1437                         p.To.Index = ppc64.REGZERO
1438
1439                         offset = 16
1440                         rem -= 16
1441
1442                         if rem >= 16 {
1443                                 // Use REGTMP as index reg
1444                                 p := s.Prog(ppc64.AMOVD)
1445                                 p.From.Type = obj.TYPE_CONST
1446                                 p.From.Offset = 16
1447                                 p.To.Type = obj.TYPE_REG
1448                                 p.To.Reg = ppc64.REGTMP
1449
1450                                 p = s.Prog(ppc64.ALXVD2X)
1451                                 p.From.Type = obj.TYPE_MEM
1452                                 p.From.Reg = srcReg
1453                                 p.From.Index = ppc64.REGTMP
1454                                 p.To.Type = obj.TYPE_REG
1455                                 p.To.Reg = ppc64.REG_VS32
1456
1457                                 p = s.Prog(ppc64.ASTXVD2X)
1458                                 p.From.Type = obj.TYPE_REG
1459                                 p.From.Reg = ppc64.REG_VS32
1460                                 p.To.Type = obj.TYPE_MEM
1461                                 p.To.Reg = dstReg
1462                                 p.To.Index = ppc64.REGTMP
1463
1464                                 offset = 32
1465                                 rem -= 16
1466                         }
1467                 }
1468
1469                 // Generate all the remaining load and store pairs, starting with
1470                 // as many 8 byte moves as possible, then 4, 2, 1.
1471                 for rem > 0 {
1472                         op, size := ppc64.AMOVB, int64(1)
1473                         switch {
1474                         case rem >= 8:
1475                                 op, size = ppc64.AMOVD, 8
1476                         case rem >= 4:
1477                                 op, size = ppc64.AMOVW, 4
1478                         case rem >= 2:
1479                                 op, size = ppc64.AMOVH, 2
1480                         }
1481                         // Load
1482                         p := s.Prog(op)
1483                         p.To.Type = obj.TYPE_REG
1484                         p.To.Reg = ppc64.REGTMP
1485                         p.From.Type = obj.TYPE_MEM
1486                         p.From.Reg = srcReg
1487                         p.From.Offset = offset
1488
1489                         // Store
1490                         p = s.Prog(op)
1491                         p.From.Type = obj.TYPE_REG
1492                         p.From.Reg = ppc64.REGTMP
1493                         p.To.Type = obj.TYPE_MEM
1494                         p.To.Reg = dstReg
1495                         p.To.Offset = offset
1496                         rem -= size
1497                         offset += size
1498                 }
1499
1500         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1501                 bytesPerLoop := int64(64)
1502                 // This is used when moving more
1503                 // than 8 bytes on power9.  Moves start with
1504                 // as many 8 byte moves as possible, then
1505                 // 4, 2, or 1 byte(s) as remaining.  This will
1506                 // work and be efficient for power8 or later.
1507                 // If there are 64 or more bytes, then a
1508                 // loop is generated to move 32 bytes and
1509                 // update the src and dst addresses on each
1510                 // iteration. When < 64 bytes, the appropriate
1511                 // number of moves are generated based on the
1512                 // size.
1513                 // When moving >= 64 bytes a loop is used
1514                 //      MOVD len/32,REG_TMP
1515                 //      MOVD REG_TMP,CTR
1516                 // top:
1517                 //      LXV 0(R21),VS32
1518                 //      LXV 16(R21),VS33
1519                 //      ADD $32,R21
1520                 //      STXV VS32,0(R20)
1521                 //      STXV VS33,16(R20)
1522                 //      ADD $32,R20
1523                 //      BC 16,0,top
1524                 // Bytes not moved by this loop are moved
1525                 // with a combination of the following instructions,
1526                 // starting with the largest sizes and generating as
1527                 // many as needed, using the appropriate offset value.
1528                 //      MOVD  n(R21),R31
1529                 //      MOVD  R31,n(R20)
1530                 //      MOVW  n1(R21),R31
1531                 //      MOVW  R31,n1(R20)
1532                 //      MOVH  n2(R21),R31
1533                 //      MOVH  R31,n2(R20)
1534                 //      MOVB  n3(R21),R31
1535                 //      MOVB  R31,n3(R20)
1536
1537                 // Each loop iteration moves 32 bytes
1538                 ctr := v.AuxInt / bytesPerLoop
1539
1540                 // Remainder after the loop
1541                 rem := v.AuxInt % bytesPerLoop
1542
1543                 dstReg := v.Args[0].Reg()
1544                 srcReg := v.Args[1].Reg()
1545
1546                 offset := int64(0)
1547
1548                 // top of the loop
1549                 var top *obj.Prog
1550
1551                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1552                 if ctr > 1 {
1553                         // Set up the CTR
1554                         p := s.Prog(ppc64.AMOVD)
1555                         p.From.Type = obj.TYPE_CONST
1556                         p.From.Offset = ctr
1557                         p.To.Type = obj.TYPE_REG
1558                         p.To.Reg = ppc64.REGTMP
1559
1560                         p = s.Prog(ppc64.AMOVD)
1561                         p.From.Type = obj.TYPE_REG
1562                         p.From.Reg = ppc64.REGTMP
1563                         p.To.Type = obj.TYPE_REG
1564                         p.To.Reg = ppc64.REG_CTR
1565
1566                         p = s.Prog(obj.APCALIGN)
1567                         p.From.Type = obj.TYPE_CONST
1568                         p.From.Offset = 16
1569
1570                         // Generate 16 byte loads and stores.
1571                         p = s.Prog(ppc64.ALXV)
1572                         p.From.Type = obj.TYPE_MEM
1573                         p.From.Reg = srcReg
1574                         p.From.Offset = offset
1575                         p.To.Type = obj.TYPE_REG
1576                         p.To.Reg = ppc64.REG_VS32
1577                         if top == nil {
1578                                 top = p
1579                         }
1580                         p = s.Prog(ppc64.ALXV)
1581                         p.From.Type = obj.TYPE_MEM
1582                         p.From.Reg = srcReg
1583                         p.From.Offset = offset + 16
1584                         p.To.Type = obj.TYPE_REG
1585                         p.To.Reg = ppc64.REG_VS33
1586
1587                         // generate 16 byte stores
1588                         p = s.Prog(ppc64.ASTXV)
1589                         p.From.Type = obj.TYPE_REG
1590                         p.From.Reg = ppc64.REG_VS32
1591                         p.To.Type = obj.TYPE_MEM
1592                         p.To.Reg = dstReg
1593                         p.To.Offset = offset
1594
1595                         p = s.Prog(ppc64.ASTXV)
1596                         p.From.Type = obj.TYPE_REG
1597                         p.From.Reg = ppc64.REG_VS33
1598                         p.To.Type = obj.TYPE_MEM
1599                         p.To.Reg = dstReg
1600                         p.To.Offset = offset + 16
1601
1602                         // Generate 16 byte loads and stores.
1603                         p = s.Prog(ppc64.ALXV)
1604                         p.From.Type = obj.TYPE_MEM
1605                         p.From.Reg = srcReg
1606                         p.From.Offset = offset + 32
1607                         p.To.Type = obj.TYPE_REG
1608                         p.To.Reg = ppc64.REG_VS32
1609
1610                         p = s.Prog(ppc64.ALXV)
1611                         p.From.Type = obj.TYPE_MEM
1612                         p.From.Reg = srcReg
1613                         p.From.Offset = offset + 48
1614                         p.To.Type = obj.TYPE_REG
1615                         p.To.Reg = ppc64.REG_VS33
1616
1617                         // generate 16 byte stores
1618                         p = s.Prog(ppc64.ASTXV)
1619                         p.From.Type = obj.TYPE_REG
1620                         p.From.Reg = ppc64.REG_VS32
1621                         p.To.Type = obj.TYPE_MEM
1622                         p.To.Reg = dstReg
1623                         p.To.Offset = offset + 32
1624
1625                         p = s.Prog(ppc64.ASTXV)
1626                         p.From.Type = obj.TYPE_REG
1627                         p.From.Reg = ppc64.REG_VS33
1628                         p.To.Type = obj.TYPE_MEM
1629                         p.To.Reg = dstReg
1630                         p.To.Offset = offset + 48
1631
1632                         // increment the src reg for next iteration
1633                         p = s.Prog(ppc64.AADD)
1634                         p.Reg = srcReg
1635                         p.From.Type = obj.TYPE_CONST
1636                         p.From.Offset = bytesPerLoop
1637                         p.To.Type = obj.TYPE_REG
1638                         p.To.Reg = srcReg
1639
1640                         // increment the dst reg for next iteration
1641                         p = s.Prog(ppc64.AADD)
1642                         p.Reg = dstReg
1643                         p.From.Type = obj.TYPE_CONST
1644                         p.From.Offset = bytesPerLoop
1645                         p.To.Type = obj.TYPE_REG
1646                         p.To.Reg = dstReg
1647
1648                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1649                         // to loop top.
1650                         p = s.Prog(ppc64.ABC)
1651                         p.From.Type = obj.TYPE_CONST
1652                         p.From.Offset = ppc64.BO_BCTR
1653                         p.Reg = ppc64.REG_R0
1654                         p.To.Type = obj.TYPE_BRANCH
1655                         gc.Patch(p, top)
1656
1657                         // srcReg and dstReg were incremented in the loop, so
1658                         // later instructions start with offset 0.
1659                         offset = int64(0)
1660                 }
1661
1662                 // No loop was generated for one iteration, so
1663                 // add 32 bytes to the remainder to move those bytes.
1664                 if ctr == 1 {
1665                         rem += bytesPerLoop
1666                 }
1667                 if rem >= 32 {
1668                         p := s.Prog(ppc64.ALXV)
1669                         p.From.Type = obj.TYPE_MEM
1670                         p.From.Reg = srcReg
1671                         p.To.Type = obj.TYPE_REG
1672                         p.To.Reg = ppc64.REG_VS32
1673
1674                         p = s.Prog(ppc64.ALXV)
1675                         p.From.Type = obj.TYPE_MEM
1676                         p.From.Reg = srcReg
1677                         p.From.Offset = 16
1678                         p.To.Type = obj.TYPE_REG
1679                         p.To.Reg = ppc64.REG_VS33
1680
1681                         p = s.Prog(ppc64.ASTXV)
1682                         p.From.Type = obj.TYPE_REG
1683                         p.From.Reg = ppc64.REG_VS32
1684                         p.To.Type = obj.TYPE_MEM
1685                         p.To.Reg = dstReg
1686
1687                         p = s.Prog(ppc64.ASTXV)
1688                         p.From.Type = obj.TYPE_REG
1689                         p.From.Reg = ppc64.REG_VS33
1690                         p.To.Type = obj.TYPE_MEM
1691                         p.To.Reg = dstReg
1692                         p.To.Offset = 16
1693
1694                         offset = 32
1695                         rem -= 32
1696                 }
1697
1698                 if rem >= 16 {
1699                         // Generate 16 byte loads and stores.
1700                         p := s.Prog(ppc64.ALXV)
1701                         p.From.Type = obj.TYPE_MEM
1702                         p.From.Reg = srcReg
1703                         p.From.Offset = offset
1704                         p.To.Type = obj.TYPE_REG
1705                         p.To.Reg = ppc64.REG_VS32
1706
1707                         p = s.Prog(ppc64.ASTXV)
1708                         p.From.Type = obj.TYPE_REG
1709                         p.From.Reg = ppc64.REG_VS32
1710                         p.To.Type = obj.TYPE_MEM
1711                         p.To.Reg = dstReg
1712                         p.To.Offset = offset
1713
1714                         offset += 16
1715                         rem -= 16
1716
1717                         if rem >= 16 {
1718                                 p := s.Prog(ppc64.ALXV)
1719                                 p.From.Type = obj.TYPE_MEM
1720                                 p.From.Reg = srcReg
1721                                 p.From.Offset = offset
1722                                 p.To.Type = obj.TYPE_REG
1723                                 p.To.Reg = ppc64.REG_VS32
1724
1725                                 p = s.Prog(ppc64.ASTXV)
1726                                 p.From.Type = obj.TYPE_REG
1727                                 p.From.Reg = ppc64.REG_VS32
1728                                 p.To.Type = obj.TYPE_MEM
1729                                 p.To.Reg = dstReg
1730                                 p.To.Offset = offset
1731
1732                                 offset += 16
1733                                 rem -= 16
1734                         }
1735                 }
1736                 // Generate all the remaining load and store pairs, starting with
1737                 // as many 8 byte moves as possible, then 4, 2, 1.
1738                 for rem > 0 {
1739                         op, size := ppc64.AMOVB, int64(1)
1740                         switch {
1741                         case rem >= 8:
1742                                 op, size = ppc64.AMOVD, 8
1743                         case rem >= 4:
1744                                 op, size = ppc64.AMOVW, 4
1745                         case rem >= 2:
1746                                 op, size = ppc64.AMOVH, 2
1747                         }
1748                         // Load
1749                         p := s.Prog(op)
1750                         p.To.Type = obj.TYPE_REG
1751                         p.To.Reg = ppc64.REGTMP
1752                         p.From.Type = obj.TYPE_MEM
1753                         p.From.Reg = srcReg
1754                         p.From.Offset = offset
1755
1756                         // Store
1757                         p = s.Prog(op)
1758                         p.From.Type = obj.TYPE_REG
1759                         p.From.Reg = ppc64.REGTMP
1760                         p.To.Type = obj.TYPE_MEM
1761                         p.To.Reg = dstReg
1762                         p.To.Offset = offset
1763                         rem -= size
1764                         offset += size
1765                 }
1766
1767         case ssa.OpPPC64CALLstatic:
1768                 s.Call(v)
1769
1770         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1771                 p := s.Prog(ppc64.AMOVD)
1772                 p.From.Type = obj.TYPE_REG
1773                 p.From.Reg = v.Args[0].Reg()
1774                 p.To.Type = obj.TYPE_REG
1775                 p.To.Reg = ppc64.REG_LR
1776
1777                 if v.Args[0].Reg() != ppc64.REG_R12 {
1778                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1779                 }
1780
1781                 pp := s.Call(v)
1782                 pp.To.Reg = ppc64.REG_LR
1783
1784                 if gc.Ctxt.Flag_shared {
1785                         // When compiling Go into PIC, the function we just
1786                         // called via pointer might have been implemented in
1787                         // a separate module and so overwritten the TOC
1788                         // pointer in R2; reload it.
1789                         q := s.Prog(ppc64.AMOVD)
1790                         q.From.Type = obj.TYPE_MEM
1791                         q.From.Offset = 24
1792                         q.From.Reg = ppc64.REGSP
1793                         q.To.Type = obj.TYPE_REG
1794                         q.To.Reg = ppc64.REG_R2
1795                 }
1796
1797         case ssa.OpPPC64LoweredWB:
1798                 p := s.Prog(obj.ACALL)
1799                 p.To.Type = obj.TYPE_MEM
1800                 p.To.Name = obj.NAME_EXTERN
1801                 p.To.Sym = v.Aux.(*obj.LSym)
1802
1803         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1804                 p := s.Prog(obj.ACALL)
1805                 p.To.Type = obj.TYPE_MEM
1806                 p.To.Name = obj.NAME_EXTERN
1807                 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1808                 s.UseArgs(16) // space used in callee args area by assembly stubs
1809
1810         case ssa.OpPPC64LoweredNilCheck:
1811                 if objabi.GOOS == "aix" {
1812                         // CMP Rarg0, R0
1813                         // BNE 2(PC)
1814                         // STW R0, 0(R0)
1815                         // NOP (so the BNE has somewhere to land)
1816
1817                         // CMP Rarg0, R0
1818                         p := s.Prog(ppc64.ACMP)
1819                         p.From.Type = obj.TYPE_REG
1820                         p.From.Reg = v.Args[0].Reg()
1821                         p.To.Type = obj.TYPE_REG
1822                         p.To.Reg = ppc64.REG_R0
1823
1824                         // BNE 2(PC)
1825                         p2 := s.Prog(ppc64.ABNE)
1826                         p2.To.Type = obj.TYPE_BRANCH
1827
1828                         // STW R0, 0(R0)
1829                         // Write at 0 is forbidden and will trigger a SIGSEGV
1830                         p = s.Prog(ppc64.AMOVW)
1831                         p.From.Type = obj.TYPE_REG
1832                         p.From.Reg = ppc64.REG_R0
1833                         p.To.Type = obj.TYPE_MEM
1834                         p.To.Reg = ppc64.REG_R0
1835
1836                         // NOP (so the BNE has somewhere to land)
1837                         nop := s.Prog(obj.ANOP)
1838                         gc.Patch(p2, nop)
1839
1840                 } else {
1841                         // Issue a load which will fault if arg is nil.
1842                         p := s.Prog(ppc64.AMOVBZ)
1843                         p.From.Type = obj.TYPE_MEM
1844                         p.From.Reg = v.Args[0].Reg()
1845                         gc.AddAux(&p.From, v)
1846                         p.To.Type = obj.TYPE_REG
1847                         p.To.Reg = ppc64.REGTMP
1848                 }
1849                 if logopt.Enabled() {
1850                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1851                 }
1852                 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1853                         gc.Warnl(v.Pos, "generated nil check")
1854                 }
1855
1856         // These should be resolved by rules and not make it here.
1857         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1858                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1859                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1860                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1861         case ssa.OpPPC64InvertFlags:
1862                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1863         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1864                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1865         case ssa.OpClobber:
1866                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1867         default:
1868                 v.Fatalf("genValue not implemented: %s", v.LongString())
1869         }
1870 }
1871
1872 var blockJump = [...]struct {
1873         asm, invasm     obj.As
1874         asmeq, invasmun bool
1875 }{
1876         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1877         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1878
1879         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1880         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1881         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1882         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1883
1884         // TODO: need to work FP comparisons into block jumps
1885         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1886         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1887         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1888         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1889 }
1890
1891 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1892         switch b.Kind {
1893         case ssa.BlockDefer:
1894                 // defer returns in R3:
1895                 // 0 if we should continue executing
1896                 // 1 if we should jump to deferreturn call
1897                 p := s.Prog(ppc64.ACMP)
1898                 p.From.Type = obj.TYPE_REG
1899                 p.From.Reg = ppc64.REG_R3
1900                 p.To.Type = obj.TYPE_REG
1901                 p.To.Reg = ppc64.REG_R0
1902
1903                 p = s.Prog(ppc64.ABNE)
1904                 p.To.Type = obj.TYPE_BRANCH
1905                 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1906                 if b.Succs[0].Block() != next {
1907                         p := s.Prog(obj.AJMP)
1908                         p.To.Type = obj.TYPE_BRANCH
1909                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1910                 }
1911
1912         case ssa.BlockPlain:
1913                 if b.Succs[0].Block() != next {
1914                         p := s.Prog(obj.AJMP)
1915                         p.To.Type = obj.TYPE_BRANCH
1916                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1917                 }
1918         case ssa.BlockExit:
1919         case ssa.BlockRet:
1920                 s.Prog(obj.ARET)
1921         case ssa.BlockRetJmp:
1922                 p := s.Prog(obj.AJMP)
1923                 p.To.Type = obj.TYPE_MEM
1924                 p.To.Name = obj.NAME_EXTERN
1925                 p.To.Sym = b.Aux.(*obj.LSym)
1926
1927         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1928                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1929                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1930                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1931                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1932                 jmp := blockJump[b.Kind]
1933                 switch next {
1934                 case b.Succs[0].Block():
1935                         s.Br(jmp.invasm, b.Succs[1].Block())
1936                         if jmp.invasmun {
1937                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1938                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
1939                         }
1940                 case b.Succs[1].Block():
1941                         s.Br(jmp.asm, b.Succs[0].Block())
1942                         if jmp.asmeq {
1943                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1944                         }
1945                 default:
1946                         if b.Likely != ssa.BranchUnlikely {
1947                                 s.Br(jmp.asm, b.Succs[0].Block())
1948                                 if jmp.asmeq {
1949                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
1950                                 }
1951                                 s.Br(obj.AJMP, b.Succs[1].Block())
1952                         } else {
1953                                 s.Br(jmp.invasm, b.Succs[1].Block())
1954                                 if jmp.invasmun {
1955                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1956                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
1957                                 }
1958                                 s.Br(obj.AJMP, b.Succs[0].Block())
1959                         }
1960                 }
1961         default:
1962                 b.Fatalf("branch not implemented: %s", b.LongString())
1963         }
1964 }