]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
cmd/compile: remove unused carry related ssa ops in ppc64
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/gc"
9         "cmd/compile/internal/logopt"
10         "cmd/compile/internal/ssa"
11         "cmd/compile/internal/types"
12         "cmd/internal/obj"
13         "cmd/internal/obj/ppc64"
14         "cmd/internal/objabi"
15         "math"
16         "strings"
17 )
18
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21         //      flive := b.FlagsLiveAtEnd
22         //      if b.Control != nil && b.Control.Type.IsFlags() {
23         //              flive = true
24         //      }
25         //      for i := len(b.Values) - 1; i >= 0; i-- {
26         //              v := b.Values[i]
27         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28         //                      // The "mark" is any non-nil Aux value.
29         //                      v.Aux = v
30         //              }
31         //              if v.Type.IsFlags() {
32         //                      flive = false
33         //              }
34         //              for _, a := range v.Args {
35         //                      if a.Type.IsFlags() {
36         //                              flive = true
37         //                      }
38         //              }
39         //      }
40 }
41
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
44         if t.IsFloat() {
45                 switch t.Size() {
46                 case 4:
47                         return ppc64.AFMOVS
48                 case 8:
49                         return ppc64.AFMOVD
50                 }
51         } else {
52                 switch t.Size() {
53                 case 1:
54                         if t.IsSigned() {
55                                 return ppc64.AMOVB
56                         } else {
57                                 return ppc64.AMOVBZ
58                         }
59                 case 2:
60                         if t.IsSigned() {
61                                 return ppc64.AMOVH
62                         } else {
63                                 return ppc64.AMOVHZ
64                         }
65                 case 4:
66                         if t.IsSigned() {
67                                 return ppc64.AMOVW
68                         } else {
69                                 return ppc64.AMOVWZ
70                         }
71                 case 8:
72                         return ppc64.AMOVD
73                 }
74         }
75         panic("bad load type")
76 }
77
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
80         if t.IsFloat() {
81                 switch t.Size() {
82                 case 4:
83                         return ppc64.AFMOVS
84                 case 8:
85                         return ppc64.AFMOVD
86                 }
87         } else {
88                 switch t.Size() {
89                 case 1:
90                         return ppc64.AMOVB
91                 case 2:
92                         return ppc64.AMOVH
93                 case 4:
94                         return ppc64.AMOVW
95                 case 8:
96                         return ppc64.AMOVD
97                 }
98         }
99         panic("bad store type")
100 }
101
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
103         switch v.Op {
104         case ssa.OpCopy:
105                 t := v.Type
106                 if t.IsMemory() {
107                         return
108                 }
109                 x := v.Args[0].Reg()
110                 y := v.Reg()
111                 if x != y {
112                         rt := obj.TYPE_REG
113                         op := ppc64.AMOVD
114
115                         if t.IsFloat() {
116                                 op = ppc64.AFMOVD
117                         }
118                         p := s.Prog(op)
119                         p.From.Type = rt
120                         p.From.Reg = x
121                         p.To.Type = rt
122                         p.To.Reg = y
123                 }
124
125         case ssa.OpPPC64LoweredMuluhilo:
126                 // MULHDU       Rarg1, Rarg0, Reg0
127                 // MULLD        Rarg1, Rarg0, Reg1
128                 r0 := v.Args[0].Reg()
129                 r1 := v.Args[1].Reg()
130                 p := s.Prog(ppc64.AMULHDU)
131                 p.From.Type = obj.TYPE_REG
132                 p.From.Reg = r1
133                 p.Reg = r0
134                 p.To.Type = obj.TYPE_REG
135                 p.To.Reg = v.Reg0()
136                 p1 := s.Prog(ppc64.AMULLD)
137                 p1.From.Type = obj.TYPE_REG
138                 p1.From.Reg = r1
139                 p1.Reg = r0
140                 p1.To.Type = obj.TYPE_REG
141                 p1.To.Reg = v.Reg1()
142
143         case ssa.OpPPC64LoweredAdd64Carry:
144                 // ADDC         Rarg2, -1, Rtmp
145                 // ADDE         Rarg1, Rarg0, Reg0
146                 // ADDZE        Rzero, Reg1
147                 r0 := v.Args[0].Reg()
148                 r1 := v.Args[1].Reg()
149                 r2 := v.Args[2].Reg()
150                 p := s.Prog(ppc64.AADDC)
151                 p.From.Type = obj.TYPE_CONST
152                 p.From.Offset = -1
153                 p.Reg = r2
154                 p.To.Type = obj.TYPE_REG
155                 p.To.Reg = ppc64.REGTMP
156                 p1 := s.Prog(ppc64.AADDE)
157                 p1.From.Type = obj.TYPE_REG
158                 p1.From.Reg = r1
159                 p1.Reg = r0
160                 p1.To.Type = obj.TYPE_REG
161                 p1.To.Reg = v.Reg0()
162                 p2 := s.Prog(ppc64.AADDZE)
163                 p2.From.Type = obj.TYPE_REG
164                 p2.From.Reg = ppc64.REGZERO
165                 p2.To.Type = obj.TYPE_REG
166                 p2.To.Reg = v.Reg1()
167
168         case ssa.OpPPC64LoweredAtomicAnd8,
169                 ssa.OpPPC64LoweredAtomicOr8:
170                 // LWSYNC
171                 // LBAR         (Rarg0), Rtmp
172                 // AND/OR       Rarg1, Rtmp
173                 // STBCCC       Rtmp, (Rarg0)
174                 // BNE          -3(PC)
175                 r0 := v.Args[0].Reg()
176                 r1 := v.Args[1].Reg()
177                 // LWSYNC - Assuming shared data not write-through-required nor
178                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179                 plwsync := s.Prog(ppc64.ALWSYNC)
180                 plwsync.To.Type = obj.TYPE_NONE
181                 p := s.Prog(ppc64.ALBAR)
182                 p.From.Type = obj.TYPE_MEM
183                 p.From.Reg = r0
184                 p.To.Type = obj.TYPE_REG
185                 p.To.Reg = ppc64.REGTMP
186                 p1 := s.Prog(v.Op.Asm())
187                 p1.From.Type = obj.TYPE_REG
188                 p1.From.Reg = r1
189                 p1.To.Type = obj.TYPE_REG
190                 p1.To.Reg = ppc64.REGTMP
191                 p2 := s.Prog(ppc64.ASTBCCC)
192                 p2.From.Type = obj.TYPE_REG
193                 p2.From.Reg = ppc64.REGTMP
194                 p2.To.Type = obj.TYPE_MEM
195                 p2.To.Reg = r0
196                 p2.RegTo2 = ppc64.REGTMP
197                 p3 := s.Prog(ppc64.ABNE)
198                 p3.To.Type = obj.TYPE_BRANCH
199                 gc.Patch(p3, p)
200
201         case ssa.OpPPC64LoweredAtomicAdd32,
202                 ssa.OpPPC64LoweredAtomicAdd64:
203                 // LWSYNC
204                 // LDAR/LWAR    (Rarg0), Rout
205                 // ADD          Rarg1, Rout
206                 // STDCCC/STWCCC Rout, (Rarg0)
207                 // BNE         -3(PC)
208                 // MOVW         Rout,Rout (if Add32)
209                 ld := ppc64.ALDAR
210                 st := ppc64.ASTDCCC
211                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
212                         ld = ppc64.ALWAR
213                         st = ppc64.ASTWCCC
214                 }
215                 r0 := v.Args[0].Reg()
216                 r1 := v.Args[1].Reg()
217                 out := v.Reg0()
218                 // LWSYNC - Assuming shared data not write-through-required nor
219                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220                 plwsync := s.Prog(ppc64.ALWSYNC)
221                 plwsync.To.Type = obj.TYPE_NONE
222                 // LDAR or LWAR
223                 p := s.Prog(ld)
224                 p.From.Type = obj.TYPE_MEM
225                 p.From.Reg = r0
226                 p.To.Type = obj.TYPE_REG
227                 p.To.Reg = out
228                 // ADD reg1,out
229                 p1 := s.Prog(ppc64.AADD)
230                 p1.From.Type = obj.TYPE_REG
231                 p1.From.Reg = r1
232                 p1.To.Reg = out
233                 p1.To.Type = obj.TYPE_REG
234                 // STDCCC or STWCCC
235                 p3 := s.Prog(st)
236                 p3.From.Type = obj.TYPE_REG
237                 p3.From.Reg = out
238                 p3.To.Type = obj.TYPE_MEM
239                 p3.To.Reg = r0
240                 // BNE retry
241                 p4 := s.Prog(ppc64.ABNE)
242                 p4.To.Type = obj.TYPE_BRANCH
243                 gc.Patch(p4, p)
244
245                 // Ensure a 32 bit result
246                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247                         p5 := s.Prog(ppc64.AMOVWZ)
248                         p5.To.Type = obj.TYPE_REG
249                         p5.To.Reg = out
250                         p5.From.Type = obj.TYPE_REG
251                         p5.From.Reg = out
252                 }
253
254         case ssa.OpPPC64LoweredAtomicExchange32,
255                 ssa.OpPPC64LoweredAtomicExchange64:
256                 // LWSYNC
257                 // LDAR/LWAR    (Rarg0), Rout
258                 // STDCCC/STWCCC Rout, (Rarg0)
259                 // BNE         -2(PC)
260                 // ISYNC
261                 ld := ppc64.ALDAR
262                 st := ppc64.ASTDCCC
263                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
264                         ld = ppc64.ALWAR
265                         st = ppc64.ASTWCCC
266                 }
267                 r0 := v.Args[0].Reg()
268                 r1 := v.Args[1].Reg()
269                 out := v.Reg0()
270                 // LWSYNC - Assuming shared data not write-through-required nor
271                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272                 plwsync := s.Prog(ppc64.ALWSYNC)
273                 plwsync.To.Type = obj.TYPE_NONE
274                 // LDAR or LWAR
275                 p := s.Prog(ld)
276                 p.From.Type = obj.TYPE_MEM
277                 p.From.Reg = r0
278                 p.To.Type = obj.TYPE_REG
279                 p.To.Reg = out
280                 // STDCCC or STWCCC
281                 p1 := s.Prog(st)
282                 p1.From.Type = obj.TYPE_REG
283                 p1.From.Reg = r1
284                 p1.To.Type = obj.TYPE_MEM
285                 p1.To.Reg = r0
286                 // BNE retry
287                 p2 := s.Prog(ppc64.ABNE)
288                 p2.To.Type = obj.TYPE_BRANCH
289                 gc.Patch(p2, p)
290                 // ISYNC
291                 pisync := s.Prog(ppc64.AISYNC)
292                 pisync.To.Type = obj.TYPE_NONE
293
294         case ssa.OpPPC64LoweredAtomicLoad8,
295                 ssa.OpPPC64LoweredAtomicLoad32,
296                 ssa.OpPPC64LoweredAtomicLoad64,
297                 ssa.OpPPC64LoweredAtomicLoadPtr:
298                 // SYNC
299                 // MOVB/MOVD/MOVW (Rarg0), Rout
300                 // CMP Rout,Rout
301                 // BNE 1(PC)
302                 // ISYNC
303                 ld := ppc64.AMOVD
304                 cmp := ppc64.ACMP
305                 switch v.Op {
306                 case ssa.OpPPC64LoweredAtomicLoad8:
307                         ld = ppc64.AMOVBZ
308                 case ssa.OpPPC64LoweredAtomicLoad32:
309                         ld = ppc64.AMOVWZ
310                         cmp = ppc64.ACMPW
311                 }
312                 arg0 := v.Args[0].Reg()
313                 out := v.Reg0()
314                 // SYNC when AuxInt == 1; otherwise, load-acquire
315                 if v.AuxInt == 1 {
316                         psync := s.Prog(ppc64.ASYNC)
317                         psync.To.Type = obj.TYPE_NONE
318                 }
319                 // Load
320                 p := s.Prog(ld)
321                 p.From.Type = obj.TYPE_MEM
322                 p.From.Reg = arg0
323                 p.To.Type = obj.TYPE_REG
324                 p.To.Reg = out
325                 // CMP
326                 p1 := s.Prog(cmp)
327                 p1.From.Type = obj.TYPE_REG
328                 p1.From.Reg = out
329                 p1.To.Type = obj.TYPE_REG
330                 p1.To.Reg = out
331                 // BNE
332                 p2 := s.Prog(ppc64.ABNE)
333                 p2.To.Type = obj.TYPE_BRANCH
334                 // ISYNC
335                 pisync := s.Prog(ppc64.AISYNC)
336                 pisync.To.Type = obj.TYPE_NONE
337                 gc.Patch(p2, pisync)
338
339         case ssa.OpPPC64LoweredAtomicStore8,
340                 ssa.OpPPC64LoweredAtomicStore32,
341                 ssa.OpPPC64LoweredAtomicStore64:
342                 // SYNC or LWSYNC
343                 // MOVB/MOVW/MOVD arg1,(arg0)
344                 st := ppc64.AMOVD
345                 switch v.Op {
346                 case ssa.OpPPC64LoweredAtomicStore8:
347                         st = ppc64.AMOVB
348                 case ssa.OpPPC64LoweredAtomicStore32:
349                         st = ppc64.AMOVW
350                 }
351                 arg0 := v.Args[0].Reg()
352                 arg1 := v.Args[1].Reg()
353                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
354                 // SYNC
355                 syncOp := ppc64.ASYNC
356                 if v.AuxInt == 0 {
357                         syncOp = ppc64.ALWSYNC
358                 }
359                 psync := s.Prog(syncOp)
360                 psync.To.Type = obj.TYPE_NONE
361                 // Store
362                 p := s.Prog(st)
363                 p.To.Type = obj.TYPE_MEM
364                 p.To.Reg = arg0
365                 p.From.Type = obj.TYPE_REG
366                 p.From.Reg = arg1
367
368         case ssa.OpPPC64LoweredAtomicCas64,
369                 ssa.OpPPC64LoweredAtomicCas32:
370                 // LWSYNC
371                 // loop:
372                 // LDAR        (Rarg0), MutexHint, Rtmp
373                 // CMP         Rarg1, Rtmp
374                 // BNE         fail
375                 // STDCCC      Rarg2, (Rarg0)
376                 // BNE         loop
377                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
378                 // MOVD        $1, Rout
379                 // BR          end
380                 // fail:
381                 // MOVD        $0, Rout
382                 // end:
383                 ld := ppc64.ALDAR
384                 st := ppc64.ASTDCCC
385                 cmp := ppc64.ACMP
386                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
387                         ld = ppc64.ALWAR
388                         st = ppc64.ASTWCCC
389                         cmp = ppc64.ACMPW
390                 }
391                 r0 := v.Args[0].Reg()
392                 r1 := v.Args[1].Reg()
393                 r2 := v.Args[2].Reg()
394                 out := v.Reg0()
395                 // LWSYNC - Assuming shared data not write-through-required nor
396                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397                 plwsync1 := s.Prog(ppc64.ALWSYNC)
398                 plwsync1.To.Type = obj.TYPE_NONE
399                 // LDAR or LWAR
400                 p := s.Prog(ld)
401                 p.From.Type = obj.TYPE_MEM
402                 p.From.Reg = r0
403                 p.To.Type = obj.TYPE_REG
404                 p.To.Reg = ppc64.REGTMP
405                 // If it is a Compare-and-Swap-Release operation, set the EH field with
406                 // the release hint.
407                 if v.AuxInt == 0 {
408                         p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
409                 }
410                 // CMP reg1,reg2
411                 p1 := s.Prog(cmp)
412                 p1.From.Type = obj.TYPE_REG
413                 p1.From.Reg = r1
414                 p1.To.Reg = ppc64.REGTMP
415                 p1.To.Type = obj.TYPE_REG
416                 // BNE cas_fail
417                 p2 := s.Prog(ppc64.ABNE)
418                 p2.To.Type = obj.TYPE_BRANCH
419                 // STDCCC or STWCCC
420                 p3 := s.Prog(st)
421                 p3.From.Type = obj.TYPE_REG
422                 p3.From.Reg = r2
423                 p3.To.Type = obj.TYPE_MEM
424                 p3.To.Reg = r0
425                 // BNE retry
426                 p4 := s.Prog(ppc64.ABNE)
427                 p4.To.Type = obj.TYPE_BRANCH
428                 gc.Patch(p4, p)
429                 // LWSYNC - Assuming shared data not write-through-required nor
430                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431                 // If the operation is a CAS-Release, then synchronization is not necessary.
432                 if v.AuxInt != 0 {
433                         plwsync2 := s.Prog(ppc64.ALWSYNC)
434                         plwsync2.To.Type = obj.TYPE_NONE
435                 }
436                 // return true
437                 p5 := s.Prog(ppc64.AMOVD)
438                 p5.From.Type = obj.TYPE_CONST
439                 p5.From.Offset = 1
440                 p5.To.Type = obj.TYPE_REG
441                 p5.To.Reg = out
442                 // BR done
443                 p6 := s.Prog(obj.AJMP)
444                 p6.To.Type = obj.TYPE_BRANCH
445                 // return false
446                 p7 := s.Prog(ppc64.AMOVD)
447                 p7.From.Type = obj.TYPE_CONST
448                 p7.From.Offset = 0
449                 p7.To.Type = obj.TYPE_REG
450                 p7.To.Reg = out
451                 gc.Patch(p2, p7)
452                 // done (label)
453                 p8 := s.Prog(obj.ANOP)
454                 gc.Patch(p6, p8)
455
456         case ssa.OpPPC64LoweredGetClosurePtr:
457                 // Closure pointer is R11 (already)
458                 gc.CheckLoweredGetClosurePtr(v)
459
460         case ssa.OpPPC64LoweredGetCallerSP:
461                 // caller's SP is FixedFrameSize below the address of the first arg
462                 p := s.Prog(ppc64.AMOVD)
463                 p.From.Type = obj.TYPE_ADDR
464                 p.From.Offset = -gc.Ctxt.FixedFrameSize()
465                 p.From.Name = obj.NAME_PARAM
466                 p.To.Type = obj.TYPE_REG
467                 p.To.Reg = v.Reg()
468
469         case ssa.OpPPC64LoweredGetCallerPC:
470                 p := s.Prog(obj.AGETCALLERPC)
471                 p.To.Type = obj.TYPE_REG
472                 p.To.Reg = v.Reg()
473
474         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475                 // input is already rounded
476
477         case ssa.OpLoadReg:
478                 loadOp := loadByType(v.Type)
479                 p := s.Prog(loadOp)
480                 gc.AddrAuto(&p.From, v.Args[0])
481                 p.To.Type = obj.TYPE_REG
482                 p.To.Reg = v.Reg()
483
484         case ssa.OpStoreReg:
485                 storeOp := storeByType(v.Type)
486                 p := s.Prog(storeOp)
487                 p.From.Type = obj.TYPE_REG
488                 p.From.Reg = v.Args[0].Reg()
489                 gc.AddrAuto(&p.To, v)
490
491         case ssa.OpPPC64DIVD:
492                 // For now,
493                 //
494                 // cmp arg1, -1
495                 // be  ahead
496                 // v = arg0 / arg1
497                 // b over
498                 // ahead: v = - arg0
499                 // over: nop
500                 r := v.Reg()
501                 r0 := v.Args[0].Reg()
502                 r1 := v.Args[1].Reg()
503
504                 p := s.Prog(ppc64.ACMP)
505                 p.From.Type = obj.TYPE_REG
506                 p.From.Reg = r1
507                 p.To.Type = obj.TYPE_CONST
508                 p.To.Offset = -1
509
510                 pbahead := s.Prog(ppc64.ABEQ)
511                 pbahead.To.Type = obj.TYPE_BRANCH
512
513                 p = s.Prog(v.Op.Asm())
514                 p.From.Type = obj.TYPE_REG
515                 p.From.Reg = r1
516                 p.Reg = r0
517                 p.To.Type = obj.TYPE_REG
518                 p.To.Reg = r
519
520                 pbover := s.Prog(obj.AJMP)
521                 pbover.To.Type = obj.TYPE_BRANCH
522
523                 p = s.Prog(ppc64.ANEG)
524                 p.To.Type = obj.TYPE_REG
525                 p.To.Reg = r
526                 p.From.Type = obj.TYPE_REG
527                 p.From.Reg = r0
528                 gc.Patch(pbahead, p)
529
530                 p = s.Prog(obj.ANOP)
531                 gc.Patch(pbover, p)
532
533         case ssa.OpPPC64DIVW:
534                 // word-width version of above
535                 r := v.Reg()
536                 r0 := v.Args[0].Reg()
537                 r1 := v.Args[1].Reg()
538
539                 p := s.Prog(ppc64.ACMPW)
540                 p.From.Type = obj.TYPE_REG
541                 p.From.Reg = r1
542                 p.To.Type = obj.TYPE_CONST
543                 p.To.Offset = -1
544
545                 pbahead := s.Prog(ppc64.ABEQ)
546                 pbahead.To.Type = obj.TYPE_BRANCH
547
548                 p = s.Prog(v.Op.Asm())
549                 p.From.Type = obj.TYPE_REG
550                 p.From.Reg = r1
551                 p.Reg = r0
552                 p.To.Type = obj.TYPE_REG
553                 p.To.Reg = r
554
555                 pbover := s.Prog(obj.AJMP)
556                 pbover.To.Type = obj.TYPE_BRANCH
557
558                 p = s.Prog(ppc64.ANEG)
559                 p.To.Type = obj.TYPE_REG
560                 p.To.Reg = r
561                 p.From.Type = obj.TYPE_REG
562                 p.From.Reg = r0
563                 gc.Patch(pbahead, p)
564
565                 p = s.Prog(obj.ANOP)
566                 gc.Patch(pbover, p)
567
568         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
569                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
570                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
571                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
572                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
573                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
574                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
575                 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
576                 r := v.Reg()
577                 r1 := v.Args[0].Reg()
578                 r2 := v.Args[1].Reg()
579                 p := s.Prog(v.Op.Asm())
580                 p.From.Type = obj.TYPE_REG
581                 p.From.Reg = r2
582                 p.Reg = r1
583                 p.To.Type = obj.TYPE_REG
584                 p.To.Reg = r
585
586         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
587                 r1 := v.Args[0].Reg()
588                 r2 := v.Args[1].Reg()
589                 p := s.Prog(v.Op.Asm())
590                 p.From.Type = obj.TYPE_REG
591                 p.From.Reg = r2
592                 p.Reg = r1
593                 p.To.Type = obj.TYPE_REG
594                 p.To.Reg = ppc64.REGTMP // result is not needed
595
596         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
597                 p := s.Prog(v.Op.Asm())
598                 p.From.Type = obj.TYPE_CONST
599                 p.From.Offset = v.AuxInt
600                 p.Reg = v.Args[0].Reg()
601                 p.To.Type = obj.TYPE_REG
602                 p.To.Reg = v.Reg()
603
604         case ssa.OpPPC64MADDLD:
605                 r := v.Reg()
606                 r1 := v.Args[0].Reg()
607                 r2 := v.Args[1].Reg()
608                 r3 := v.Args[2].Reg()
609                 // r = r1*r2 Â± r3
610                 p := s.Prog(v.Op.Asm())
611                 p.From.Type = obj.TYPE_REG
612                 p.From.Reg = r1
613                 p.Reg = r2
614                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
615                 p.To.Type = obj.TYPE_REG
616                 p.To.Reg = r
617
618         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
619                 r := v.Reg()
620                 r1 := v.Args[0].Reg()
621                 r2 := v.Args[1].Reg()
622                 r3 := v.Args[2].Reg()
623                 // r = r1*r2 Â± r3
624                 p := s.Prog(v.Op.Asm())
625                 p.From.Type = obj.TYPE_REG
626                 p.From.Reg = r1
627                 p.Reg = r3
628                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
629                 p.To.Type = obj.TYPE_REG
630                 p.To.Reg = r
631
632         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
633                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
634                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
635                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
636                 r := v.Reg()
637                 p := s.Prog(v.Op.Asm())
638                 p.To.Type = obj.TYPE_REG
639                 p.To.Reg = r
640                 p.From.Type = obj.TYPE_REG
641                 p.From.Reg = v.Args[0].Reg()
642
643         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
644                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
645                 p := s.Prog(v.Op.Asm())
646                 p.Reg = v.Args[0].Reg()
647                 p.From.Type = obj.TYPE_CONST
648                 p.From.Offset = v.AuxInt
649                 p.To.Type = obj.TYPE_REG
650                 p.To.Reg = v.Reg()
651
652         case ssa.OpPPC64ANDCCconst:
653                 p := s.Prog(v.Op.Asm())
654                 p.Reg = v.Args[0].Reg()
655                 p.From.Type = obj.TYPE_CONST
656                 p.From.Offset = v.AuxInt
657                 p.To.Type = obj.TYPE_REG
658                 p.To.Reg = ppc64.REGTMP // discard result
659
660         case ssa.OpPPC64MOVDaddr:
661                 switch v.Aux.(type) {
662                 default:
663                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
664                 case nil:
665                         // If aux offset and aux int are both 0, and the same
666                         // input and output regs are used, no instruction
667                         // needs to be generated, since it would just be
668                         // addi rx, rx, 0.
669                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
670                                 p := s.Prog(ppc64.AMOVD)
671                                 p.From.Type = obj.TYPE_ADDR
672                                 p.From.Reg = v.Args[0].Reg()
673                                 p.From.Offset = v.AuxInt
674                                 p.To.Type = obj.TYPE_REG
675                                 p.To.Reg = v.Reg()
676                         }
677
678                 case *obj.LSym, *gc.Node:
679                         p := s.Prog(ppc64.AMOVD)
680                         p.From.Type = obj.TYPE_ADDR
681                         p.From.Reg = v.Args[0].Reg()
682                         p.To.Type = obj.TYPE_REG
683                         p.To.Reg = v.Reg()
684                         gc.AddAux(&p.From, v)
685
686                 }
687
688         case ssa.OpPPC64MOVDconst:
689                 p := s.Prog(v.Op.Asm())
690                 p.From.Type = obj.TYPE_CONST
691                 p.From.Offset = v.AuxInt
692                 p.To.Type = obj.TYPE_REG
693                 p.To.Reg = v.Reg()
694
695         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
696                 p := s.Prog(v.Op.Asm())
697                 p.From.Type = obj.TYPE_FCONST
698                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
699                 p.To.Type = obj.TYPE_REG
700                 p.To.Reg = v.Reg()
701
702         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
703                 p := s.Prog(v.Op.Asm())
704                 p.From.Type = obj.TYPE_REG
705                 p.From.Reg = v.Args[0].Reg()
706                 p.To.Type = obj.TYPE_REG
707                 p.To.Reg = v.Args[1].Reg()
708
709         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
710                 p := s.Prog(v.Op.Asm())
711                 p.From.Type = obj.TYPE_REG
712                 p.From.Reg = v.Args[0].Reg()
713                 p.To.Type = obj.TYPE_CONST
714                 p.To.Offset = v.AuxInt
715
716         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
717                 // Shift in register to required size
718                 p := s.Prog(v.Op.Asm())
719                 p.From.Type = obj.TYPE_REG
720                 p.From.Reg = v.Args[0].Reg()
721                 p.To.Reg = v.Reg()
722                 p.To.Type = obj.TYPE_REG
723
724         case ssa.OpPPC64MOVDload:
725
726                 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
727                 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
728                 // the offset is not known until link time. If the load of a go.string uses relocation for the
729                 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
730                 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
731                 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
732                 // go.string types because other types will have proper alignment.
733
734                 gostring := false
735                 switch n := v.Aux.(type) {
736                 case *obj.LSym:
737                         gostring = strings.HasPrefix(n.Name, "go.string.")
738                 }
739                 if gostring {
740                         // Generate full addr of the go.string const
741                         // including AuxInt
742                         p := s.Prog(ppc64.AMOVD)
743                         p.From.Type = obj.TYPE_ADDR
744                         p.From.Reg = v.Args[0].Reg()
745                         gc.AddAux(&p.From, v)
746                         p.To.Type = obj.TYPE_REG
747                         p.To.Reg = v.Reg()
748                         // Load go.string using 0 offset
749                         p = s.Prog(v.Op.Asm())
750                         p.From.Type = obj.TYPE_MEM
751                         p.From.Reg = v.Reg()
752                         p.To.Type = obj.TYPE_REG
753                         p.To.Reg = v.Reg()
754                         break
755                 }
756                 // Not a go.string, generate a normal load
757                 fallthrough
758
759         case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
760                 p := s.Prog(v.Op.Asm())
761                 p.From.Type = obj.TYPE_MEM
762                 p.From.Reg = v.Args[0].Reg()
763                 gc.AddAux(&p.From, v)
764                 p.To.Type = obj.TYPE_REG
765                 p.To.Reg = v.Reg()
766
767         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
768                 p := s.Prog(v.Op.Asm())
769                 p.From.Type = obj.TYPE_MEM
770                 p.From.Reg = v.Args[0].Reg()
771                 p.To.Type = obj.TYPE_REG
772                 p.To.Reg = v.Reg()
773
774         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
775                 p := s.Prog(v.Op.Asm())
776                 p.To.Type = obj.TYPE_MEM
777                 p.To.Reg = v.Args[0].Reg()
778                 p.From.Type = obj.TYPE_REG
779                 p.From.Reg = v.Args[1].Reg()
780
781         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
782                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
783                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
784                 p := s.Prog(v.Op.Asm())
785                 p.From.Type = obj.TYPE_MEM
786                 p.From.Reg = v.Args[0].Reg()
787                 p.From.Index = v.Args[1].Reg()
788                 p.To.Type = obj.TYPE_REG
789                 p.To.Reg = v.Reg()
790
791         case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
792                 p := s.Prog(v.Op.Asm())
793                 p.From.Type = obj.TYPE_REG
794                 p.From.Reg = ppc64.REGZERO
795                 p.To.Type = obj.TYPE_MEM
796                 p.To.Reg = v.Args[0].Reg()
797                 gc.AddAux(&p.To, v)
798
799         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
800                 p := s.Prog(v.Op.Asm())
801                 p.From.Type = obj.TYPE_REG
802                 p.From.Reg = v.Args[1].Reg()
803                 p.To.Type = obj.TYPE_MEM
804                 p.To.Reg = v.Args[0].Reg()
805                 gc.AddAux(&p.To, v)
806
807         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
808                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
809                 ssa.OpPPC64MOVHBRstoreidx:
810                 p := s.Prog(v.Op.Asm())
811                 p.From.Type = obj.TYPE_REG
812                 p.From.Reg = v.Args[2].Reg()
813                 p.To.Index = v.Args[1].Reg()
814                 p.To.Type = obj.TYPE_MEM
815                 p.To.Reg = v.Args[0].Reg()
816
817         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
818                 // ISEL, ISELB
819                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
820                 // ISEL only accepts 0, 1, 2 condition values but the others can be
821                 // achieved by swapping operand order.
822                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
823                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
824                 // ISELB is used when a boolean result is needed, returning 0 or 1
825                 p := s.Prog(ppc64.AISEL)
826                 p.To.Type = obj.TYPE_REG
827                 p.To.Reg = v.Reg()
828                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
829                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
830                 if v.Op == ssa.OpPPC64ISEL {
831                         r.Reg = v.Args[1].Reg()
832                 }
833                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
834                 if v.AuxInt > 3 {
835                         p.Reg = r.Reg
836                         p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
837                 } else {
838                         p.Reg = v.Args[0].Reg()
839                         p.SetFrom3(r)
840                 }
841                 p.From.Type = obj.TYPE_CONST
842                 p.From.Offset = v.AuxInt & 3
843
844         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
845                 // The LoweredQuad code generation
846                 // generates STXV instructions on
847                 // power9. The Short variation is used
848                 // if no loop is generated.
849
850                 // sizes >= 64 generate a loop as follows:
851
852                 // Set up loop counter in CTR, used by BC
853                 // XXLXOR clears VS32
854                 //       XXLXOR VS32,VS32,VS32
855                 //       MOVD len/64,REG_TMP
856                 //       MOVD REG_TMP,CTR
857                 //       loop:
858                 //       STXV VS32,0(R20)
859                 //       STXV VS32,16(R20)
860                 //       STXV VS32,32(R20)
861                 //       STXV VS32,48(R20)
862                 //       ADD  $64,R20
863                 //       BC   16, 0, loop
864
865                 // Bytes per iteration
866                 ctr := v.AuxInt / 64
867
868                 // Remainder bytes
869                 rem := v.AuxInt % 64
870
871                 // Only generate a loop if there is more
872                 // than 1 iteration.
873                 if ctr > 1 {
874                         // Set up VS32 (V0) to hold 0s
875                         p := s.Prog(ppc64.AXXLXOR)
876                         p.From.Type = obj.TYPE_REG
877                         p.From.Reg = ppc64.REG_VS32
878                         p.To.Type = obj.TYPE_REG
879                         p.To.Reg = ppc64.REG_VS32
880                         p.Reg = ppc64.REG_VS32
881
882                         // Set up CTR loop counter
883                         p = s.Prog(ppc64.AMOVD)
884                         p.From.Type = obj.TYPE_CONST
885                         p.From.Offset = ctr
886                         p.To.Type = obj.TYPE_REG
887                         p.To.Reg = ppc64.REGTMP
888
889                         p = s.Prog(ppc64.AMOVD)
890                         p.From.Type = obj.TYPE_REG
891                         p.From.Reg = ppc64.REGTMP
892                         p.To.Type = obj.TYPE_REG
893                         p.To.Reg = ppc64.REG_CTR
894
895                         // Don't generate padding for
896                         // loops with few iterations.
897                         if ctr > 3 {
898                                 p = s.Prog(obj.APCALIGN)
899                                 p.From.Type = obj.TYPE_CONST
900                                 p.From.Offset = 16
901                         }
902
903                         // generate 4 STXVs to zero 64 bytes
904                         var top *obj.Prog
905
906                         p = s.Prog(ppc64.ASTXV)
907                         p.From.Type = obj.TYPE_REG
908                         p.From.Reg = ppc64.REG_VS32
909                         p.To.Type = obj.TYPE_MEM
910                         p.To.Reg = v.Args[0].Reg()
911
912                         //  Save the top of loop
913                         if top == nil {
914                                 top = p
915                         }
916                         p = s.Prog(ppc64.ASTXV)
917                         p.From.Type = obj.TYPE_REG
918                         p.From.Reg = ppc64.REG_VS32
919                         p.To.Type = obj.TYPE_MEM
920                         p.To.Reg = v.Args[0].Reg()
921                         p.To.Offset = 16
922
923                         p = s.Prog(ppc64.ASTXV)
924                         p.From.Type = obj.TYPE_REG
925                         p.From.Reg = ppc64.REG_VS32
926                         p.To.Type = obj.TYPE_MEM
927                         p.To.Reg = v.Args[0].Reg()
928                         p.To.Offset = 32
929
930                         p = s.Prog(ppc64.ASTXV)
931                         p.From.Type = obj.TYPE_REG
932                         p.From.Reg = ppc64.REG_VS32
933                         p.To.Type = obj.TYPE_MEM
934                         p.To.Reg = v.Args[0].Reg()
935                         p.To.Offset = 48
936
937                         // Increment address for the
938                         // 64 bytes just zeroed.
939                         p = s.Prog(ppc64.AADD)
940                         p.Reg = v.Args[0].Reg()
941                         p.From.Type = obj.TYPE_CONST
942                         p.From.Offset = 64
943                         p.To.Type = obj.TYPE_REG
944                         p.To.Reg = v.Args[0].Reg()
945
946                         // Branch back to top of loop
947                         // based on CTR
948                         // BC with BO_BCTR generates bdnz
949                         p = s.Prog(ppc64.ABC)
950                         p.From.Type = obj.TYPE_CONST
951                         p.From.Offset = ppc64.BO_BCTR
952                         p.Reg = ppc64.REG_R0
953                         p.To.Type = obj.TYPE_BRANCH
954                         gc.Patch(p, top)
955                 }
956                 // When ctr == 1 the loop was not generated but
957                 // there are at least 64 bytes to clear, so add
958                 // that to the remainder to generate the code
959                 // to clear those doublewords
960                 if ctr == 1 {
961                         rem += 64
962                 }
963
964                 // Clear the remainder starting at offset zero
965                 offset := int64(0)
966
967                 if rem >= 16 && ctr <= 1 {
968                         // If the XXLXOR hasn't already been
969                         // generated, do it here to initialize
970                         // VS32 (V0) to 0.
971                         p := s.Prog(ppc64.AXXLXOR)
972                         p.From.Type = obj.TYPE_REG
973                         p.From.Reg = ppc64.REG_VS32
974                         p.To.Type = obj.TYPE_REG
975                         p.To.Reg = ppc64.REG_VS32
976                         p.Reg = ppc64.REG_VS32
977                 }
978                 // Generate STXV for 32 or 64
979                 // bytes.
980                 for rem >= 32 {
981                         p := s.Prog(ppc64.ASTXV)
982                         p.From.Type = obj.TYPE_REG
983                         p.From.Reg = ppc64.REG_VS32
984                         p.To.Type = obj.TYPE_MEM
985                         p.To.Reg = v.Args[0].Reg()
986                         p.To.Offset = offset
987
988                         p = s.Prog(ppc64.ASTXV)
989                         p.From.Type = obj.TYPE_REG
990                         p.From.Reg = ppc64.REG_VS32
991                         p.To.Type = obj.TYPE_MEM
992                         p.To.Reg = v.Args[0].Reg()
993                         p.To.Offset = offset + 16
994                         offset += 32
995                         rem -= 32
996                 }
997                 // Generate 16 bytes
998                 if rem >= 16 {
999                         p := s.Prog(ppc64.ASTXV)
1000                         p.From.Type = obj.TYPE_REG
1001                         p.From.Reg = ppc64.REG_VS32
1002                         p.To.Type = obj.TYPE_MEM
1003                         p.To.Reg = v.Args[0].Reg()
1004                         p.To.Offset = offset
1005                         offset += 16
1006                         rem -= 16
1007                 }
1008
1009                 // first clear as many doublewords as possible
1010                 // then clear remaining sizes as available
1011                 for rem > 0 {
1012                         op, size := ppc64.AMOVB, int64(1)
1013                         switch {
1014                         case rem >= 8:
1015                                 op, size = ppc64.AMOVD, 8
1016                         case rem >= 4:
1017                                 op, size = ppc64.AMOVW, 4
1018                         case rem >= 2:
1019                                 op, size = ppc64.AMOVH, 2
1020                         }
1021                         p := s.Prog(op)
1022                         p.From.Type = obj.TYPE_REG
1023                         p.From.Reg = ppc64.REG_R0
1024                         p.To.Type = obj.TYPE_MEM
1025                         p.To.Reg = v.Args[0].Reg()
1026                         p.To.Offset = offset
1027                         rem -= size
1028                         offset += size
1029                 }
1030
1031         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1032
1033                 // Unaligned data doesn't hurt performance
1034                 // for these instructions on power8.
1035
1036                 // For sizes >= 64 generate a loop as follows:
1037
1038                 // Set up loop counter in CTR, used by BC
1039                 //       XXLXOR VS32,VS32,VS32
1040                 //       MOVD len/32,REG_TMP
1041                 //       MOVD REG_TMP,CTR
1042                 //       MOVD $16,REG_TMP
1043                 //       loop:
1044                 //       STXVD2X VS32,(R0)(R20)
1045                 //       STXVD2X VS32,(R31)(R20)
1046                 //       ADD  $32,R20
1047                 //       BC   16, 0, loop
1048                 //
1049                 // any remainder is done as described below
1050
1051                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1052                 // then handle the remainder
1053                 //      MOVD R0,(R20)
1054                 //      MOVD R0,8(R20)
1055                 // .... etc.
1056                 //
1057                 // the remainder bytes are cleared using one or more
1058                 // of the following instructions with the appropriate
1059                 // offsets depending which instructions are needed
1060                 //
1061                 //      MOVW R0,n1(R20) 4 bytes
1062                 //      MOVH R0,n2(R20) 2 bytes
1063                 //      MOVB R0,n3(R20) 1 byte
1064                 //
1065                 // 7 bytes: MOVW, MOVH, MOVB
1066                 // 6 bytes: MOVW, MOVH
1067                 // 5 bytes: MOVW, MOVB
1068                 // 3 bytes: MOVH, MOVB
1069
1070                 // each loop iteration does 32 bytes
1071                 ctr := v.AuxInt / 32
1072
1073                 // remainder bytes
1074                 rem := v.AuxInt % 32
1075
1076                 // only generate a loop if there is more
1077                 // than 1 iteration.
1078                 if ctr > 1 {
1079                         // Set up VS32 (V0) to hold 0s
1080                         p := s.Prog(ppc64.AXXLXOR)
1081                         p.From.Type = obj.TYPE_REG
1082                         p.From.Reg = ppc64.REG_VS32
1083                         p.To.Type = obj.TYPE_REG
1084                         p.To.Reg = ppc64.REG_VS32
1085                         p.Reg = ppc64.REG_VS32
1086
1087                         // Set up CTR loop counter
1088                         p = s.Prog(ppc64.AMOVD)
1089                         p.From.Type = obj.TYPE_CONST
1090                         p.From.Offset = ctr
1091                         p.To.Type = obj.TYPE_REG
1092                         p.To.Reg = ppc64.REGTMP
1093
1094                         p = s.Prog(ppc64.AMOVD)
1095                         p.From.Type = obj.TYPE_REG
1096                         p.From.Reg = ppc64.REGTMP
1097                         p.To.Type = obj.TYPE_REG
1098                         p.To.Reg = ppc64.REG_CTR
1099
1100                         // Set up R31 to hold index value 16
1101                         p = s.Prog(ppc64.AMOVD)
1102                         p.From.Type = obj.TYPE_CONST
1103                         p.From.Offset = 16
1104                         p.To.Type = obj.TYPE_REG
1105                         p.To.Reg = ppc64.REGTMP
1106
1107                         // Don't add padding for alignment
1108                         // with few loop iterations.
1109                         if ctr > 3 {
1110                                 p = s.Prog(obj.APCALIGN)
1111                                 p.From.Type = obj.TYPE_CONST
1112                                 p.From.Offset = 16
1113                         }
1114
1115                         // generate 2 STXVD2Xs to store 16 bytes
1116                         // when this is a loop then the top must be saved
1117                         var top *obj.Prog
1118                         // This is the top of loop
1119
1120                         p = s.Prog(ppc64.ASTXVD2X)
1121                         p.From.Type = obj.TYPE_REG
1122                         p.From.Reg = ppc64.REG_VS32
1123                         p.To.Type = obj.TYPE_MEM
1124                         p.To.Reg = v.Args[0].Reg()
1125                         p.To.Index = ppc64.REGZERO
1126                         // Save the top of loop
1127                         if top == nil {
1128                                 top = p
1129                         }
1130                         p = s.Prog(ppc64.ASTXVD2X)
1131                         p.From.Type = obj.TYPE_REG
1132                         p.From.Reg = ppc64.REG_VS32
1133                         p.To.Type = obj.TYPE_MEM
1134                         p.To.Reg = v.Args[0].Reg()
1135                         p.To.Index = ppc64.REGTMP
1136
1137                         // Increment address for the
1138                         // 4 doublewords just zeroed.
1139                         p = s.Prog(ppc64.AADD)
1140                         p.Reg = v.Args[0].Reg()
1141                         p.From.Type = obj.TYPE_CONST
1142                         p.From.Offset = 32
1143                         p.To.Type = obj.TYPE_REG
1144                         p.To.Reg = v.Args[0].Reg()
1145
1146                         // Branch back to top of loop
1147                         // based on CTR
1148                         // BC with BO_BCTR generates bdnz
1149                         p = s.Prog(ppc64.ABC)
1150                         p.From.Type = obj.TYPE_CONST
1151                         p.From.Offset = ppc64.BO_BCTR
1152                         p.Reg = ppc64.REG_R0
1153                         p.To.Type = obj.TYPE_BRANCH
1154                         gc.Patch(p, top)
1155                 }
1156
1157                 // when ctr == 1 the loop was not generated but
1158                 // there are at least 32 bytes to clear, so add
1159                 // that to the remainder to generate the code
1160                 // to clear those doublewords
1161                 if ctr == 1 {
1162                         rem += 32
1163                 }
1164
1165                 // clear the remainder starting at offset zero
1166                 offset := int64(0)
1167
1168                 // first clear as many doublewords as possible
1169                 // then clear remaining sizes as available
1170                 for rem > 0 {
1171                         op, size := ppc64.AMOVB, int64(1)
1172                         switch {
1173                         case rem >= 8:
1174                                 op, size = ppc64.AMOVD, 8
1175                         case rem >= 4:
1176                                 op, size = ppc64.AMOVW, 4
1177                         case rem >= 2:
1178                                 op, size = ppc64.AMOVH, 2
1179                         }
1180                         p := s.Prog(op)
1181                         p.From.Type = obj.TYPE_REG
1182                         p.From.Reg = ppc64.REG_R0
1183                         p.To.Type = obj.TYPE_MEM
1184                         p.To.Reg = v.Args[0].Reg()
1185                         p.To.Offset = offset
1186                         rem -= size
1187                         offset += size
1188                 }
1189
1190         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1191
1192                 bytesPerLoop := int64(32)
1193                 // This will be used when moving more
1194                 // than 8 bytes.  Moves start with
1195                 // as many 8 byte moves as possible, then
1196                 // 4, 2, or 1 byte(s) as remaining.  This will
1197                 // work and be efficient for power8 or later.
1198                 // If there are 64 or more bytes, then a
1199                 // loop is generated to move 32 bytes and
1200                 // update the src and dst addresses on each
1201                 // iteration. When < 64 bytes, the appropriate
1202                 // number of moves are generated based on the
1203                 // size.
1204                 // When moving >= 64 bytes a loop is used
1205                 //      MOVD len/32,REG_TMP
1206                 //      MOVD REG_TMP,CTR
1207                 //      MOVD $16,REG_TMP
1208                 // top:
1209                 //      LXVD2X (R0)(R21),VS32
1210                 //      LXVD2X (R31)(R21),VS33
1211                 //      ADD $32,R21
1212                 //      STXVD2X VS32,(R0)(R20)
1213                 //      STXVD2X VS33,(R31)(R20)
1214                 //      ADD $32,R20
1215                 //      BC 16,0,top
1216                 // Bytes not moved by this loop are moved
1217                 // with a combination of the following instructions,
1218                 // starting with the largest sizes and generating as
1219                 // many as needed, using the appropriate offset value.
1220                 //      MOVD  n(R21),R31
1221                 //      MOVD  R31,n(R20)
1222                 //      MOVW  n1(R21),R31
1223                 //      MOVW  R31,n1(R20)
1224                 //      MOVH  n2(R21),R31
1225                 //      MOVH  R31,n2(R20)
1226                 //      MOVB  n3(R21),R31
1227                 //      MOVB  R31,n3(R20)
1228
1229                 // Each loop iteration moves 32 bytes
1230                 ctr := v.AuxInt / bytesPerLoop
1231
1232                 // Remainder after the loop
1233                 rem := v.AuxInt % bytesPerLoop
1234
1235                 dstReg := v.Args[0].Reg()
1236                 srcReg := v.Args[1].Reg()
1237
1238                 // The set of registers used here, must match the clobbered reg list
1239                 // in PPC64Ops.go.
1240                 offset := int64(0)
1241
1242                 // top of the loop
1243                 var top *obj.Prog
1244                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1245                 if ctr > 1 {
1246                         // Set up the CTR
1247                         p := s.Prog(ppc64.AMOVD)
1248                         p.From.Type = obj.TYPE_CONST
1249                         p.From.Offset = ctr
1250                         p.To.Type = obj.TYPE_REG
1251                         p.To.Reg = ppc64.REGTMP
1252
1253                         p = s.Prog(ppc64.AMOVD)
1254                         p.From.Type = obj.TYPE_REG
1255                         p.From.Reg = ppc64.REGTMP
1256                         p.To.Type = obj.TYPE_REG
1257                         p.To.Reg = ppc64.REG_CTR
1258
1259                         // Use REGTMP as index reg
1260                         p = s.Prog(ppc64.AMOVD)
1261                         p.From.Type = obj.TYPE_CONST
1262                         p.From.Offset = 16
1263                         p.To.Type = obj.TYPE_REG
1264                         p.To.Reg = ppc64.REGTMP
1265
1266                         // Don't adding padding for
1267                         // alignment with small iteration
1268                         // counts.
1269                         if ctr > 3 {
1270                                 p = s.Prog(obj.APCALIGN)
1271                                 p.From.Type = obj.TYPE_CONST
1272                                 p.From.Offset = 16
1273                         }
1274
1275                         // Generate 16 byte loads and stores.
1276                         // Use temp register for index (16)
1277                         // on the second one.
1278
1279                         p = s.Prog(ppc64.ALXVD2X)
1280                         p.From.Type = obj.TYPE_MEM
1281                         p.From.Reg = srcReg
1282                         p.From.Index = ppc64.REGZERO
1283                         p.To.Type = obj.TYPE_REG
1284                         p.To.Reg = ppc64.REG_VS32
1285                         if top == nil {
1286                                 top = p
1287                         }
1288                         p = s.Prog(ppc64.ALXVD2X)
1289                         p.From.Type = obj.TYPE_MEM
1290                         p.From.Reg = srcReg
1291                         p.From.Index = ppc64.REGTMP
1292                         p.To.Type = obj.TYPE_REG
1293                         p.To.Reg = ppc64.REG_VS33
1294
1295                         // increment the src reg for next iteration
1296                         p = s.Prog(ppc64.AADD)
1297                         p.Reg = srcReg
1298                         p.From.Type = obj.TYPE_CONST
1299                         p.From.Offset = bytesPerLoop
1300                         p.To.Type = obj.TYPE_REG
1301                         p.To.Reg = srcReg
1302
1303                         // generate 16 byte stores
1304                         p = s.Prog(ppc64.ASTXVD2X)
1305                         p.From.Type = obj.TYPE_REG
1306                         p.From.Reg = ppc64.REG_VS32
1307                         p.To.Type = obj.TYPE_MEM
1308                         p.To.Reg = dstReg
1309                         p.To.Index = ppc64.REGZERO
1310
1311                         p = s.Prog(ppc64.ASTXVD2X)
1312                         p.From.Type = obj.TYPE_REG
1313                         p.From.Reg = ppc64.REG_VS33
1314                         p.To.Type = obj.TYPE_MEM
1315                         p.To.Reg = dstReg
1316                         p.To.Index = ppc64.REGTMP
1317
1318                         // increment the dst reg for next iteration
1319                         p = s.Prog(ppc64.AADD)
1320                         p.Reg = dstReg
1321                         p.From.Type = obj.TYPE_CONST
1322                         p.From.Offset = bytesPerLoop
1323                         p.To.Type = obj.TYPE_REG
1324                         p.To.Reg = dstReg
1325
1326                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1327                         // to loop top.
1328                         p = s.Prog(ppc64.ABC)
1329                         p.From.Type = obj.TYPE_CONST
1330                         p.From.Offset = ppc64.BO_BCTR
1331                         p.Reg = ppc64.REG_R0
1332                         p.To.Type = obj.TYPE_BRANCH
1333                         gc.Patch(p, top)
1334
1335                         // srcReg and dstReg were incremented in the loop, so
1336                         // later instructions start with offset 0.
1337                         offset = int64(0)
1338                 }
1339
1340                 // No loop was generated for one iteration, so
1341                 // add 32 bytes to the remainder to move those bytes.
1342                 if ctr == 1 {
1343                         rem += bytesPerLoop
1344                 }
1345
1346                 if rem >= 16 {
1347                         // Generate 16 byte loads and stores.
1348                         // Use temp register for index (value 16)
1349                         // on the second one.
1350                         p := s.Prog(ppc64.ALXVD2X)
1351                         p.From.Type = obj.TYPE_MEM
1352                         p.From.Reg = srcReg
1353                         p.From.Index = ppc64.REGZERO
1354                         p.To.Type = obj.TYPE_REG
1355                         p.To.Reg = ppc64.REG_VS32
1356
1357                         p = s.Prog(ppc64.ASTXVD2X)
1358                         p.From.Type = obj.TYPE_REG
1359                         p.From.Reg = ppc64.REG_VS32
1360                         p.To.Type = obj.TYPE_MEM
1361                         p.To.Reg = dstReg
1362                         p.To.Index = ppc64.REGZERO
1363
1364                         offset = 16
1365                         rem -= 16
1366
1367                         if rem >= 16 {
1368                                 // Use REGTMP as index reg
1369                                 p := s.Prog(ppc64.AMOVD)
1370                                 p.From.Type = obj.TYPE_CONST
1371                                 p.From.Offset = 16
1372                                 p.To.Type = obj.TYPE_REG
1373                                 p.To.Reg = ppc64.REGTMP
1374
1375                                 p = s.Prog(ppc64.ALXVD2X)
1376                                 p.From.Type = obj.TYPE_MEM
1377                                 p.From.Reg = srcReg
1378                                 p.From.Index = ppc64.REGTMP
1379                                 p.To.Type = obj.TYPE_REG
1380                                 p.To.Reg = ppc64.REG_VS32
1381
1382                                 p = s.Prog(ppc64.ASTXVD2X)
1383                                 p.From.Type = obj.TYPE_REG
1384                                 p.From.Reg = ppc64.REG_VS32
1385                                 p.To.Type = obj.TYPE_MEM
1386                                 p.To.Reg = dstReg
1387                                 p.To.Index = ppc64.REGTMP
1388
1389                                 offset = 32
1390                                 rem -= 16
1391                         }
1392                 }
1393
1394                 // Generate all the remaining load and store pairs, starting with
1395                 // as many 8 byte moves as possible, then 4, 2, 1.
1396                 for rem > 0 {
1397                         op, size := ppc64.AMOVB, int64(1)
1398                         switch {
1399                         case rem >= 8:
1400                                 op, size = ppc64.AMOVD, 8
1401                         case rem >= 4:
1402                                 op, size = ppc64.AMOVW, 4
1403                         case rem >= 2:
1404                                 op, size = ppc64.AMOVH, 2
1405                         }
1406                         // Load
1407                         p := s.Prog(op)
1408                         p.To.Type = obj.TYPE_REG
1409                         p.To.Reg = ppc64.REGTMP
1410                         p.From.Type = obj.TYPE_MEM
1411                         p.From.Reg = srcReg
1412                         p.From.Offset = offset
1413
1414                         // Store
1415                         p = s.Prog(op)
1416                         p.From.Type = obj.TYPE_REG
1417                         p.From.Reg = ppc64.REGTMP
1418                         p.To.Type = obj.TYPE_MEM
1419                         p.To.Reg = dstReg
1420                         p.To.Offset = offset
1421                         rem -= size
1422                         offset += size
1423                 }
1424
1425         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1426                 bytesPerLoop := int64(64)
1427                 // This is used when moving more
1428                 // than 8 bytes on power9.  Moves start with
1429                 // as many 8 byte moves as possible, then
1430                 // 4, 2, or 1 byte(s) as remaining.  This will
1431                 // work and be efficient for power8 or later.
1432                 // If there are 64 or more bytes, then a
1433                 // loop is generated to move 32 bytes and
1434                 // update the src and dst addresses on each
1435                 // iteration. When < 64 bytes, the appropriate
1436                 // number of moves are generated based on the
1437                 // size.
1438                 // When moving >= 64 bytes a loop is used
1439                 //      MOVD len/32,REG_TMP
1440                 //      MOVD REG_TMP,CTR
1441                 // top:
1442                 //      LXV 0(R21),VS32
1443                 //      LXV 16(R21),VS33
1444                 //      ADD $32,R21
1445                 //      STXV VS32,0(R20)
1446                 //      STXV VS33,16(R20)
1447                 //      ADD $32,R20
1448                 //      BC 16,0,top
1449                 // Bytes not moved by this loop are moved
1450                 // with a combination of the following instructions,
1451                 // starting with the largest sizes and generating as
1452                 // many as needed, using the appropriate offset value.
1453                 //      MOVD  n(R21),R31
1454                 //      MOVD  R31,n(R20)
1455                 //      MOVW  n1(R21),R31
1456                 //      MOVW  R31,n1(R20)
1457                 //      MOVH  n2(R21),R31
1458                 //      MOVH  R31,n2(R20)
1459                 //      MOVB  n3(R21),R31
1460                 //      MOVB  R31,n3(R20)
1461
1462                 // Each loop iteration moves 32 bytes
1463                 ctr := v.AuxInt / bytesPerLoop
1464
1465                 // Remainder after the loop
1466                 rem := v.AuxInt % bytesPerLoop
1467
1468                 dstReg := v.Args[0].Reg()
1469                 srcReg := v.Args[1].Reg()
1470
1471                 offset := int64(0)
1472
1473                 // top of the loop
1474                 var top *obj.Prog
1475
1476                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1477                 if ctr > 1 {
1478                         // Set up the CTR
1479                         p := s.Prog(ppc64.AMOVD)
1480                         p.From.Type = obj.TYPE_CONST
1481                         p.From.Offset = ctr
1482                         p.To.Type = obj.TYPE_REG
1483                         p.To.Reg = ppc64.REGTMP
1484
1485                         p = s.Prog(ppc64.AMOVD)
1486                         p.From.Type = obj.TYPE_REG
1487                         p.From.Reg = ppc64.REGTMP
1488                         p.To.Type = obj.TYPE_REG
1489                         p.To.Reg = ppc64.REG_CTR
1490
1491                         p = s.Prog(obj.APCALIGN)
1492                         p.From.Type = obj.TYPE_CONST
1493                         p.From.Offset = 16
1494
1495                         // Generate 16 byte loads and stores.
1496                         p = s.Prog(ppc64.ALXV)
1497                         p.From.Type = obj.TYPE_MEM
1498                         p.From.Reg = srcReg
1499                         p.From.Offset = offset
1500                         p.To.Type = obj.TYPE_REG
1501                         p.To.Reg = ppc64.REG_VS32
1502                         if top == nil {
1503                                 top = p
1504                         }
1505                         p = s.Prog(ppc64.ALXV)
1506                         p.From.Type = obj.TYPE_MEM
1507                         p.From.Reg = srcReg
1508                         p.From.Offset = offset + 16
1509                         p.To.Type = obj.TYPE_REG
1510                         p.To.Reg = ppc64.REG_VS33
1511
1512                         // generate 16 byte stores
1513                         p = s.Prog(ppc64.ASTXV)
1514                         p.From.Type = obj.TYPE_REG
1515                         p.From.Reg = ppc64.REG_VS32
1516                         p.To.Type = obj.TYPE_MEM
1517                         p.To.Reg = dstReg
1518                         p.To.Offset = offset
1519
1520                         p = s.Prog(ppc64.ASTXV)
1521                         p.From.Type = obj.TYPE_REG
1522                         p.From.Reg = ppc64.REG_VS33
1523                         p.To.Type = obj.TYPE_MEM
1524                         p.To.Reg = dstReg
1525                         p.To.Offset = offset + 16
1526
1527                         // Generate 16 byte loads and stores.
1528                         p = s.Prog(ppc64.ALXV)
1529                         p.From.Type = obj.TYPE_MEM
1530                         p.From.Reg = srcReg
1531                         p.From.Offset = offset + 32
1532                         p.To.Type = obj.TYPE_REG
1533                         p.To.Reg = ppc64.REG_VS32
1534
1535                         p = s.Prog(ppc64.ALXV)
1536                         p.From.Type = obj.TYPE_MEM
1537                         p.From.Reg = srcReg
1538                         p.From.Offset = offset + 48
1539                         p.To.Type = obj.TYPE_REG
1540                         p.To.Reg = ppc64.REG_VS33
1541
1542                         // generate 16 byte stores
1543                         p = s.Prog(ppc64.ASTXV)
1544                         p.From.Type = obj.TYPE_REG
1545                         p.From.Reg = ppc64.REG_VS32
1546                         p.To.Type = obj.TYPE_MEM
1547                         p.To.Reg = dstReg
1548                         p.To.Offset = offset + 32
1549
1550                         p = s.Prog(ppc64.ASTXV)
1551                         p.From.Type = obj.TYPE_REG
1552                         p.From.Reg = ppc64.REG_VS33
1553                         p.To.Type = obj.TYPE_MEM
1554                         p.To.Reg = dstReg
1555                         p.To.Offset = offset + 48
1556
1557                         // increment the src reg for next iteration
1558                         p = s.Prog(ppc64.AADD)
1559                         p.Reg = srcReg
1560                         p.From.Type = obj.TYPE_CONST
1561                         p.From.Offset = bytesPerLoop
1562                         p.To.Type = obj.TYPE_REG
1563                         p.To.Reg = srcReg
1564
1565                         // increment the dst reg for next iteration
1566                         p = s.Prog(ppc64.AADD)
1567                         p.Reg = dstReg
1568                         p.From.Type = obj.TYPE_CONST
1569                         p.From.Offset = bytesPerLoop
1570                         p.To.Type = obj.TYPE_REG
1571                         p.To.Reg = dstReg
1572
1573                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1574                         // to loop top.
1575                         p = s.Prog(ppc64.ABC)
1576                         p.From.Type = obj.TYPE_CONST
1577                         p.From.Offset = ppc64.BO_BCTR
1578                         p.Reg = ppc64.REG_R0
1579                         p.To.Type = obj.TYPE_BRANCH
1580                         gc.Patch(p, top)
1581
1582                         // srcReg and dstReg were incremented in the loop, so
1583                         // later instructions start with offset 0.
1584                         offset = int64(0)
1585                 }
1586
1587                 // No loop was generated for one iteration, so
1588                 // add 32 bytes to the remainder to move those bytes.
1589                 if ctr == 1 {
1590                         rem += bytesPerLoop
1591                 }
1592                 if rem >= 32 {
1593                         p := s.Prog(ppc64.ALXV)
1594                         p.From.Type = obj.TYPE_MEM
1595                         p.From.Reg = srcReg
1596                         p.To.Type = obj.TYPE_REG
1597                         p.To.Reg = ppc64.REG_VS32
1598
1599                         p = s.Prog(ppc64.ALXV)
1600                         p.From.Type = obj.TYPE_MEM
1601                         p.From.Reg = srcReg
1602                         p.From.Offset = 16
1603                         p.To.Type = obj.TYPE_REG
1604                         p.To.Reg = ppc64.REG_VS33
1605
1606                         p = s.Prog(ppc64.ASTXV)
1607                         p.From.Type = obj.TYPE_REG
1608                         p.From.Reg = ppc64.REG_VS32
1609                         p.To.Type = obj.TYPE_MEM
1610                         p.To.Reg = dstReg
1611
1612                         p = s.Prog(ppc64.ASTXV)
1613                         p.From.Type = obj.TYPE_REG
1614                         p.From.Reg = ppc64.REG_VS33
1615                         p.To.Type = obj.TYPE_MEM
1616                         p.To.Reg = dstReg
1617                         p.To.Offset = 16
1618
1619                         offset = 32
1620                         rem -= 32
1621                 }
1622
1623                 if rem >= 16 {
1624                         // Generate 16 byte loads and stores.
1625                         p := s.Prog(ppc64.ALXV)
1626                         p.From.Type = obj.TYPE_MEM
1627                         p.From.Reg = srcReg
1628                         p.From.Offset = offset
1629                         p.To.Type = obj.TYPE_REG
1630                         p.To.Reg = ppc64.REG_VS32
1631
1632                         p = s.Prog(ppc64.ASTXV)
1633                         p.From.Type = obj.TYPE_REG
1634                         p.From.Reg = ppc64.REG_VS32
1635                         p.To.Type = obj.TYPE_MEM
1636                         p.To.Reg = dstReg
1637                         p.To.Offset = offset
1638
1639                         offset += 16
1640                         rem -= 16
1641
1642                         if rem >= 16 {
1643                                 p := s.Prog(ppc64.ALXV)
1644                                 p.From.Type = obj.TYPE_MEM
1645                                 p.From.Reg = srcReg
1646                                 p.From.Offset = offset
1647                                 p.To.Type = obj.TYPE_REG
1648                                 p.To.Reg = ppc64.REG_VS32
1649
1650                                 p = s.Prog(ppc64.ASTXV)
1651                                 p.From.Type = obj.TYPE_REG
1652                                 p.From.Reg = ppc64.REG_VS32
1653                                 p.To.Type = obj.TYPE_MEM
1654                                 p.To.Reg = dstReg
1655                                 p.To.Offset = offset
1656
1657                                 offset += 16
1658                                 rem -= 16
1659                         }
1660                 }
1661                 // Generate all the remaining load and store pairs, starting with
1662                 // as many 8 byte moves as possible, then 4, 2, 1.
1663                 for rem > 0 {
1664                         op, size := ppc64.AMOVB, int64(1)
1665                         switch {
1666                         case rem >= 8:
1667                                 op, size = ppc64.AMOVD, 8
1668                         case rem >= 4:
1669                                 op, size = ppc64.AMOVW, 4
1670                         case rem >= 2:
1671                                 op, size = ppc64.AMOVH, 2
1672                         }
1673                         // Load
1674                         p := s.Prog(op)
1675                         p.To.Type = obj.TYPE_REG
1676                         p.To.Reg = ppc64.REGTMP
1677                         p.From.Type = obj.TYPE_MEM
1678                         p.From.Reg = srcReg
1679                         p.From.Offset = offset
1680
1681                         // Store
1682                         p = s.Prog(op)
1683                         p.From.Type = obj.TYPE_REG
1684                         p.From.Reg = ppc64.REGTMP
1685                         p.To.Type = obj.TYPE_MEM
1686                         p.To.Reg = dstReg
1687                         p.To.Offset = offset
1688                         rem -= size
1689                         offset += size
1690                 }
1691
1692         case ssa.OpPPC64CALLstatic:
1693                 s.Call(v)
1694
1695         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1696                 p := s.Prog(ppc64.AMOVD)
1697                 p.From.Type = obj.TYPE_REG
1698                 p.From.Reg = v.Args[0].Reg()
1699                 p.To.Type = obj.TYPE_REG
1700                 p.To.Reg = ppc64.REG_LR
1701
1702                 if v.Args[0].Reg() != ppc64.REG_R12 {
1703                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1704                 }
1705
1706                 pp := s.Call(v)
1707                 pp.To.Reg = ppc64.REG_LR
1708
1709                 if gc.Ctxt.Flag_shared {
1710                         // When compiling Go into PIC, the function we just
1711                         // called via pointer might have been implemented in
1712                         // a separate module and so overwritten the TOC
1713                         // pointer in R2; reload it.
1714                         q := s.Prog(ppc64.AMOVD)
1715                         q.From.Type = obj.TYPE_MEM
1716                         q.From.Offset = 24
1717                         q.From.Reg = ppc64.REGSP
1718                         q.To.Type = obj.TYPE_REG
1719                         q.To.Reg = ppc64.REG_R2
1720                 }
1721
1722         case ssa.OpPPC64LoweredWB:
1723                 p := s.Prog(obj.ACALL)
1724                 p.To.Type = obj.TYPE_MEM
1725                 p.To.Name = obj.NAME_EXTERN
1726                 p.To.Sym = v.Aux.(*obj.LSym)
1727
1728         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1729                 p := s.Prog(obj.ACALL)
1730                 p.To.Type = obj.TYPE_MEM
1731                 p.To.Name = obj.NAME_EXTERN
1732                 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1733                 s.UseArgs(16) // space used in callee args area by assembly stubs
1734
1735         case ssa.OpPPC64LoweredNilCheck:
1736                 if objabi.GOOS == "aix" {
1737                         // CMP Rarg0, R0
1738                         // BNE 2(PC)
1739                         // STW R0, 0(R0)
1740                         // NOP (so the BNE has somewhere to land)
1741
1742                         // CMP Rarg0, R0
1743                         p := s.Prog(ppc64.ACMP)
1744                         p.From.Type = obj.TYPE_REG
1745                         p.From.Reg = v.Args[0].Reg()
1746                         p.To.Type = obj.TYPE_REG
1747                         p.To.Reg = ppc64.REG_R0
1748
1749                         // BNE 2(PC)
1750                         p2 := s.Prog(ppc64.ABNE)
1751                         p2.To.Type = obj.TYPE_BRANCH
1752
1753                         // STW R0, 0(R0)
1754                         // Write at 0 is forbidden and will trigger a SIGSEGV
1755                         p = s.Prog(ppc64.AMOVW)
1756                         p.From.Type = obj.TYPE_REG
1757                         p.From.Reg = ppc64.REG_R0
1758                         p.To.Type = obj.TYPE_MEM
1759                         p.To.Reg = ppc64.REG_R0
1760
1761                         // NOP (so the BNE has somewhere to land)
1762                         nop := s.Prog(obj.ANOP)
1763                         gc.Patch(p2, nop)
1764
1765                 } else {
1766                         // Issue a load which will fault if arg is nil.
1767                         p := s.Prog(ppc64.AMOVBZ)
1768                         p.From.Type = obj.TYPE_MEM
1769                         p.From.Reg = v.Args[0].Reg()
1770                         gc.AddAux(&p.From, v)
1771                         p.To.Type = obj.TYPE_REG
1772                         p.To.Reg = ppc64.REGTMP
1773                 }
1774                 if logopt.Enabled() {
1775                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1776                 }
1777                 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1778                         gc.Warnl(v.Pos, "generated nil check")
1779                 }
1780
1781         // These should be resolved by rules and not make it here.
1782         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1783                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1784                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1785                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1786         case ssa.OpPPC64InvertFlags:
1787                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1788         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1789                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1790         case ssa.OpClobber:
1791                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1792         default:
1793                 v.Fatalf("genValue not implemented: %s", v.LongString())
1794         }
1795 }
1796
1797 var blockJump = [...]struct {
1798         asm, invasm     obj.As
1799         asmeq, invasmun bool
1800 }{
1801         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1802         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1803
1804         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1805         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1806         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1807         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1808
1809         // TODO: need to work FP comparisons into block jumps
1810         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1811         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1812         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1813         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1814 }
1815
1816 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1817         switch b.Kind {
1818         case ssa.BlockDefer:
1819                 // defer returns in R3:
1820                 // 0 if we should continue executing
1821                 // 1 if we should jump to deferreturn call
1822                 p := s.Prog(ppc64.ACMP)
1823                 p.From.Type = obj.TYPE_REG
1824                 p.From.Reg = ppc64.REG_R3
1825                 p.To.Type = obj.TYPE_REG
1826                 p.To.Reg = ppc64.REG_R0
1827
1828                 p = s.Prog(ppc64.ABNE)
1829                 p.To.Type = obj.TYPE_BRANCH
1830                 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1831                 if b.Succs[0].Block() != next {
1832                         p := s.Prog(obj.AJMP)
1833                         p.To.Type = obj.TYPE_BRANCH
1834                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1835                 }
1836
1837         case ssa.BlockPlain:
1838                 if b.Succs[0].Block() != next {
1839                         p := s.Prog(obj.AJMP)
1840                         p.To.Type = obj.TYPE_BRANCH
1841                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1842                 }
1843         case ssa.BlockExit:
1844         case ssa.BlockRet:
1845                 s.Prog(obj.ARET)
1846         case ssa.BlockRetJmp:
1847                 p := s.Prog(obj.AJMP)
1848                 p.To.Type = obj.TYPE_MEM
1849                 p.To.Name = obj.NAME_EXTERN
1850                 p.To.Sym = b.Aux.(*obj.LSym)
1851
1852         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1853                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1854                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1855                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1856                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1857                 jmp := blockJump[b.Kind]
1858                 switch next {
1859                 case b.Succs[0].Block():
1860                         s.Br(jmp.invasm, b.Succs[1].Block())
1861                         if jmp.invasmun {
1862                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1863                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
1864                         }
1865                 case b.Succs[1].Block():
1866                         s.Br(jmp.asm, b.Succs[0].Block())
1867                         if jmp.asmeq {
1868                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1869                         }
1870                 default:
1871                         if b.Likely != ssa.BranchUnlikely {
1872                                 s.Br(jmp.asm, b.Succs[0].Block())
1873                                 if jmp.asmeq {
1874                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
1875                                 }
1876                                 s.Br(obj.AJMP, b.Succs[1].Block())
1877                         } else {
1878                                 s.Br(jmp.invasm, b.Succs[1].Block())
1879                                 if jmp.invasmun {
1880                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1881                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
1882                                 }
1883                                 s.Br(obj.AJMP, b.Succs[0].Block())
1884                         }
1885                 }
1886         default:
1887                 b.Fatalf("branch not implemented: %s", b.LongString())
1888         }
1889 }