]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/compile/internal/ppc64/ssa.go
[dev.link] all: merge branch 'master' into dev.link
[gostls13.git] / src / cmd / compile / internal / ppc64 / ssa.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package ppc64
6
7 import (
8         "cmd/compile/internal/gc"
9         "cmd/compile/internal/logopt"
10         "cmd/compile/internal/ssa"
11         "cmd/compile/internal/types"
12         "cmd/internal/obj"
13         "cmd/internal/obj/ppc64"
14         "cmd/internal/objabi"
15         "math"
16         "strings"
17 )
18
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21         //      flive := b.FlagsLiveAtEnd
22         //      if b.Control != nil && b.Control.Type.IsFlags() {
23         //              flive = true
24         //      }
25         //      for i := len(b.Values) - 1; i >= 0; i-- {
26         //              v := b.Values[i]
27         //              if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28         //                      // The "mark" is any non-nil Aux value.
29         //                      v.Aux = v
30         //              }
31         //              if v.Type.IsFlags() {
32         //                      flive = false
33         //              }
34         //              for _, a := range v.Args {
35         //                      if a.Type.IsFlags() {
36         //                              flive = true
37         //                      }
38         //              }
39         //      }
40 }
41
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
44         if t.IsFloat() {
45                 switch t.Size() {
46                 case 4:
47                         return ppc64.AFMOVS
48                 case 8:
49                         return ppc64.AFMOVD
50                 }
51         } else {
52                 switch t.Size() {
53                 case 1:
54                         if t.IsSigned() {
55                                 return ppc64.AMOVB
56                         } else {
57                                 return ppc64.AMOVBZ
58                         }
59                 case 2:
60                         if t.IsSigned() {
61                                 return ppc64.AMOVH
62                         } else {
63                                 return ppc64.AMOVHZ
64                         }
65                 case 4:
66                         if t.IsSigned() {
67                                 return ppc64.AMOVW
68                         } else {
69                                 return ppc64.AMOVWZ
70                         }
71                 case 8:
72                         return ppc64.AMOVD
73                 }
74         }
75         panic("bad load type")
76 }
77
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
80         if t.IsFloat() {
81                 switch t.Size() {
82                 case 4:
83                         return ppc64.AFMOVS
84                 case 8:
85                         return ppc64.AFMOVD
86                 }
87         } else {
88                 switch t.Size() {
89                 case 1:
90                         return ppc64.AMOVB
91                 case 2:
92                         return ppc64.AMOVH
93                 case 4:
94                         return ppc64.AMOVW
95                 case 8:
96                         return ppc64.AMOVD
97                 }
98         }
99         panic("bad store type")
100 }
101
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
103         switch v.Op {
104         case ssa.OpCopy:
105                 t := v.Type
106                 if t.IsMemory() {
107                         return
108                 }
109                 x := v.Args[0].Reg()
110                 y := v.Reg()
111                 if x != y {
112                         rt := obj.TYPE_REG
113                         op := ppc64.AMOVD
114
115                         if t.IsFloat() {
116                                 op = ppc64.AFMOVD
117                         }
118                         p := s.Prog(op)
119                         p.From.Type = rt
120                         p.From.Reg = x
121                         p.To.Type = rt
122                         p.To.Reg = y
123                 }
124
125         case ssa.OpPPC64LoweredMuluhilo:
126                 // MULHDU       Rarg1, Rarg0, Reg0
127                 // MULLD        Rarg1, Rarg0, Reg1
128                 r0 := v.Args[0].Reg()
129                 r1 := v.Args[1].Reg()
130                 p := s.Prog(ppc64.AMULHDU)
131                 p.From.Type = obj.TYPE_REG
132                 p.From.Reg = r1
133                 p.Reg = r0
134                 p.To.Type = obj.TYPE_REG
135                 p.To.Reg = v.Reg0()
136                 p1 := s.Prog(ppc64.AMULLD)
137                 p1.From.Type = obj.TYPE_REG
138                 p1.From.Reg = r1
139                 p1.Reg = r0
140                 p1.To.Type = obj.TYPE_REG
141                 p1.To.Reg = v.Reg1()
142
143         case ssa.OpPPC64LoweredAdd64Carry:
144                 // ADDC         Rarg2, -1, Rtmp
145                 // ADDE         Rarg1, Rarg0, Reg0
146                 // ADDZE        Rzero, Reg1
147                 r0 := v.Args[0].Reg()
148                 r1 := v.Args[1].Reg()
149                 r2 := v.Args[2].Reg()
150                 p := s.Prog(ppc64.AADDC)
151                 p.From.Type = obj.TYPE_CONST
152                 p.From.Offset = -1
153                 p.Reg = r2
154                 p.To.Type = obj.TYPE_REG
155                 p.To.Reg = ppc64.REGTMP
156                 p1 := s.Prog(ppc64.AADDE)
157                 p1.From.Type = obj.TYPE_REG
158                 p1.From.Reg = r1
159                 p1.Reg = r0
160                 p1.To.Type = obj.TYPE_REG
161                 p1.To.Reg = v.Reg0()
162                 p2 := s.Prog(ppc64.AADDZE)
163                 p2.From.Type = obj.TYPE_REG
164                 p2.From.Reg = ppc64.REGZERO
165                 p2.To.Type = obj.TYPE_REG
166                 p2.To.Reg = v.Reg1()
167
168         case ssa.OpPPC64LoweredAtomicAnd8,
169                 ssa.OpPPC64LoweredAtomicOr8:
170                 // LWSYNC
171                 // LBAR         (Rarg0), Rtmp
172                 // AND/OR       Rarg1, Rtmp
173                 // STBCCC       Rtmp, (Rarg0)
174                 // BNE          -3(PC)
175                 r0 := v.Args[0].Reg()
176                 r1 := v.Args[1].Reg()
177                 // LWSYNC - Assuming shared data not write-through-required nor
178                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179                 plwsync := s.Prog(ppc64.ALWSYNC)
180                 plwsync.To.Type = obj.TYPE_NONE
181                 p := s.Prog(ppc64.ALBAR)
182                 p.From.Type = obj.TYPE_MEM
183                 p.From.Reg = r0
184                 p.To.Type = obj.TYPE_REG
185                 p.To.Reg = ppc64.REGTMP
186                 p1 := s.Prog(v.Op.Asm())
187                 p1.From.Type = obj.TYPE_REG
188                 p1.From.Reg = r1
189                 p1.To.Type = obj.TYPE_REG
190                 p1.To.Reg = ppc64.REGTMP
191                 p2 := s.Prog(ppc64.ASTBCCC)
192                 p2.From.Type = obj.TYPE_REG
193                 p2.From.Reg = ppc64.REGTMP
194                 p2.To.Type = obj.TYPE_MEM
195                 p2.To.Reg = r0
196                 p2.RegTo2 = ppc64.REGTMP
197                 p3 := s.Prog(ppc64.ABNE)
198                 p3.To.Type = obj.TYPE_BRANCH
199                 gc.Patch(p3, p)
200
201         case ssa.OpPPC64LoweredAtomicAdd32,
202                 ssa.OpPPC64LoweredAtomicAdd64:
203                 // LWSYNC
204                 // LDAR/LWAR    (Rarg0), Rout
205                 // ADD          Rarg1, Rout
206                 // STDCCC/STWCCC Rout, (Rarg0)
207                 // BNE         -3(PC)
208                 // MOVW         Rout,Rout (if Add32)
209                 ld := ppc64.ALDAR
210                 st := ppc64.ASTDCCC
211                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
212                         ld = ppc64.ALWAR
213                         st = ppc64.ASTWCCC
214                 }
215                 r0 := v.Args[0].Reg()
216                 r1 := v.Args[1].Reg()
217                 out := v.Reg0()
218                 // LWSYNC - Assuming shared data not write-through-required nor
219                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220                 plwsync := s.Prog(ppc64.ALWSYNC)
221                 plwsync.To.Type = obj.TYPE_NONE
222                 // LDAR or LWAR
223                 p := s.Prog(ld)
224                 p.From.Type = obj.TYPE_MEM
225                 p.From.Reg = r0
226                 p.To.Type = obj.TYPE_REG
227                 p.To.Reg = out
228                 // ADD reg1,out
229                 p1 := s.Prog(ppc64.AADD)
230                 p1.From.Type = obj.TYPE_REG
231                 p1.From.Reg = r1
232                 p1.To.Reg = out
233                 p1.To.Type = obj.TYPE_REG
234                 // STDCCC or STWCCC
235                 p3 := s.Prog(st)
236                 p3.From.Type = obj.TYPE_REG
237                 p3.From.Reg = out
238                 p3.To.Type = obj.TYPE_MEM
239                 p3.To.Reg = r0
240                 // BNE retry
241                 p4 := s.Prog(ppc64.ABNE)
242                 p4.To.Type = obj.TYPE_BRANCH
243                 gc.Patch(p4, p)
244
245                 // Ensure a 32 bit result
246                 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247                         p5 := s.Prog(ppc64.AMOVWZ)
248                         p5.To.Type = obj.TYPE_REG
249                         p5.To.Reg = out
250                         p5.From.Type = obj.TYPE_REG
251                         p5.From.Reg = out
252                 }
253
254         case ssa.OpPPC64LoweredAtomicExchange32,
255                 ssa.OpPPC64LoweredAtomicExchange64:
256                 // LWSYNC
257                 // LDAR/LWAR    (Rarg0), Rout
258                 // STDCCC/STWCCC Rout, (Rarg0)
259                 // BNE         -2(PC)
260                 // ISYNC
261                 ld := ppc64.ALDAR
262                 st := ppc64.ASTDCCC
263                 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
264                         ld = ppc64.ALWAR
265                         st = ppc64.ASTWCCC
266                 }
267                 r0 := v.Args[0].Reg()
268                 r1 := v.Args[1].Reg()
269                 out := v.Reg0()
270                 // LWSYNC - Assuming shared data not write-through-required nor
271                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272                 plwsync := s.Prog(ppc64.ALWSYNC)
273                 plwsync.To.Type = obj.TYPE_NONE
274                 // LDAR or LWAR
275                 p := s.Prog(ld)
276                 p.From.Type = obj.TYPE_MEM
277                 p.From.Reg = r0
278                 p.To.Type = obj.TYPE_REG
279                 p.To.Reg = out
280                 // STDCCC or STWCCC
281                 p1 := s.Prog(st)
282                 p1.From.Type = obj.TYPE_REG
283                 p1.From.Reg = r1
284                 p1.To.Type = obj.TYPE_MEM
285                 p1.To.Reg = r0
286                 // BNE retry
287                 p2 := s.Prog(ppc64.ABNE)
288                 p2.To.Type = obj.TYPE_BRANCH
289                 gc.Patch(p2, p)
290                 // ISYNC
291                 pisync := s.Prog(ppc64.AISYNC)
292                 pisync.To.Type = obj.TYPE_NONE
293
294         case ssa.OpPPC64LoweredAtomicLoad8,
295                 ssa.OpPPC64LoweredAtomicLoad32,
296                 ssa.OpPPC64LoweredAtomicLoad64,
297                 ssa.OpPPC64LoweredAtomicLoadPtr:
298                 // SYNC
299                 // MOVB/MOVD/MOVW (Rarg0), Rout
300                 // CMP Rout,Rout
301                 // BNE 1(PC)
302                 // ISYNC
303                 ld := ppc64.AMOVD
304                 cmp := ppc64.ACMP
305                 switch v.Op {
306                 case ssa.OpPPC64LoweredAtomicLoad8:
307                         ld = ppc64.AMOVBZ
308                 case ssa.OpPPC64LoweredAtomicLoad32:
309                         ld = ppc64.AMOVWZ
310                         cmp = ppc64.ACMPW
311                 }
312                 arg0 := v.Args[0].Reg()
313                 out := v.Reg0()
314                 // SYNC when AuxInt == 1; otherwise, load-acquire
315                 if v.AuxInt == 1 {
316                         psync := s.Prog(ppc64.ASYNC)
317                         psync.To.Type = obj.TYPE_NONE
318                 }
319                 // Load
320                 p := s.Prog(ld)
321                 p.From.Type = obj.TYPE_MEM
322                 p.From.Reg = arg0
323                 p.To.Type = obj.TYPE_REG
324                 p.To.Reg = out
325                 // CMP
326                 p1 := s.Prog(cmp)
327                 p1.From.Type = obj.TYPE_REG
328                 p1.From.Reg = out
329                 p1.To.Type = obj.TYPE_REG
330                 p1.To.Reg = out
331                 // BNE
332                 p2 := s.Prog(ppc64.ABNE)
333                 p2.To.Type = obj.TYPE_BRANCH
334                 // ISYNC
335                 pisync := s.Prog(ppc64.AISYNC)
336                 pisync.To.Type = obj.TYPE_NONE
337                 gc.Patch(p2, pisync)
338
339         case ssa.OpPPC64LoweredAtomicStore8,
340                 ssa.OpPPC64LoweredAtomicStore32,
341                 ssa.OpPPC64LoweredAtomicStore64:
342                 // SYNC or LWSYNC
343                 // MOVB/MOVW/MOVD arg1,(arg0)
344                 st := ppc64.AMOVD
345                 switch v.Op {
346                 case ssa.OpPPC64LoweredAtomicStore8:
347                         st = ppc64.AMOVB
348                 case ssa.OpPPC64LoweredAtomicStore32:
349                         st = ppc64.AMOVW
350                 }
351                 arg0 := v.Args[0].Reg()
352                 arg1 := v.Args[1].Reg()
353                 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
354                 // SYNC
355                 syncOp := ppc64.ASYNC
356                 if v.AuxInt == 0 {
357                         syncOp = ppc64.ALWSYNC
358                 }
359                 psync := s.Prog(syncOp)
360                 psync.To.Type = obj.TYPE_NONE
361                 // Store
362                 p := s.Prog(st)
363                 p.To.Type = obj.TYPE_MEM
364                 p.To.Reg = arg0
365                 p.From.Type = obj.TYPE_REG
366                 p.From.Reg = arg1
367
368         case ssa.OpPPC64LoweredAtomicCas64,
369                 ssa.OpPPC64LoweredAtomicCas32:
370                 // LWSYNC
371                 // loop:
372                 // LDAR        (Rarg0), MutexHint, Rtmp
373                 // CMP         Rarg1, Rtmp
374                 // BNE         fail
375                 // STDCCC      Rarg2, (Rarg0)
376                 // BNE         loop
377                 // LWSYNC      // Only for sequential consistency; not required in CasRel.
378                 // MOVD        $1, Rout
379                 // BR          end
380                 // fail:
381                 // MOVD        $0, Rout
382                 // end:
383                 ld := ppc64.ALDAR
384                 st := ppc64.ASTDCCC
385                 cmp := ppc64.ACMP
386                 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
387                         ld = ppc64.ALWAR
388                         st = ppc64.ASTWCCC
389                         cmp = ppc64.ACMPW
390                 }
391                 r0 := v.Args[0].Reg()
392                 r1 := v.Args[1].Reg()
393                 r2 := v.Args[2].Reg()
394                 out := v.Reg0()
395                 // LWSYNC - Assuming shared data not write-through-required nor
396                 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397                 plwsync1 := s.Prog(ppc64.ALWSYNC)
398                 plwsync1.To.Type = obj.TYPE_NONE
399                 // LDAR or LWAR
400                 p := s.Prog(ld)
401                 p.From.Type = obj.TYPE_MEM
402                 p.From.Reg = r0
403                 p.To.Type = obj.TYPE_REG
404                 p.To.Reg = ppc64.REGTMP
405                 // If it is a Compare-and-Swap-Release operation, set the EH field with
406                 // the release hint.
407                 if v.AuxInt == 0 {
408                         p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
409                 }
410                 // CMP reg1,reg2
411                 p1 := s.Prog(cmp)
412                 p1.From.Type = obj.TYPE_REG
413                 p1.From.Reg = r1
414                 p1.To.Reg = ppc64.REGTMP
415                 p1.To.Type = obj.TYPE_REG
416                 // BNE cas_fail
417                 p2 := s.Prog(ppc64.ABNE)
418                 p2.To.Type = obj.TYPE_BRANCH
419                 // STDCCC or STWCCC
420                 p3 := s.Prog(st)
421                 p3.From.Type = obj.TYPE_REG
422                 p3.From.Reg = r2
423                 p3.To.Type = obj.TYPE_MEM
424                 p3.To.Reg = r0
425                 // BNE retry
426                 p4 := s.Prog(ppc64.ABNE)
427                 p4.To.Type = obj.TYPE_BRANCH
428                 gc.Patch(p4, p)
429                 // LWSYNC - Assuming shared data not write-through-required nor
430                 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431                 // If the operation is a CAS-Release, then synchronization is not necessary.
432                 if v.AuxInt != 0 {
433                         plwsync2 := s.Prog(ppc64.ALWSYNC)
434                         plwsync2.To.Type = obj.TYPE_NONE
435                 }
436                 // return true
437                 p5 := s.Prog(ppc64.AMOVD)
438                 p5.From.Type = obj.TYPE_CONST
439                 p5.From.Offset = 1
440                 p5.To.Type = obj.TYPE_REG
441                 p5.To.Reg = out
442                 // BR done
443                 p6 := s.Prog(obj.AJMP)
444                 p6.To.Type = obj.TYPE_BRANCH
445                 // return false
446                 p7 := s.Prog(ppc64.AMOVD)
447                 p7.From.Type = obj.TYPE_CONST
448                 p7.From.Offset = 0
449                 p7.To.Type = obj.TYPE_REG
450                 p7.To.Reg = out
451                 gc.Patch(p2, p7)
452                 // done (label)
453                 p8 := s.Prog(obj.ANOP)
454                 gc.Patch(p6, p8)
455
456         case ssa.OpPPC64LoweredGetClosurePtr:
457                 // Closure pointer is R11 (already)
458                 gc.CheckLoweredGetClosurePtr(v)
459
460         case ssa.OpPPC64LoweredGetCallerSP:
461                 // caller's SP is FixedFrameSize below the address of the first arg
462                 p := s.Prog(ppc64.AMOVD)
463                 p.From.Type = obj.TYPE_ADDR
464                 p.From.Offset = -gc.Ctxt.FixedFrameSize()
465                 p.From.Name = obj.NAME_PARAM
466                 p.To.Type = obj.TYPE_REG
467                 p.To.Reg = v.Reg()
468
469         case ssa.OpPPC64LoweredGetCallerPC:
470                 p := s.Prog(obj.AGETCALLERPC)
471                 p.To.Type = obj.TYPE_REG
472                 p.To.Reg = v.Reg()
473
474         case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475                 // input is already rounded
476
477         case ssa.OpLoadReg:
478                 loadOp := loadByType(v.Type)
479                 p := s.Prog(loadOp)
480                 gc.AddrAuto(&p.From, v.Args[0])
481                 p.To.Type = obj.TYPE_REG
482                 p.To.Reg = v.Reg()
483
484         case ssa.OpStoreReg:
485                 storeOp := storeByType(v.Type)
486                 p := s.Prog(storeOp)
487                 p.From.Type = obj.TYPE_REG
488                 p.From.Reg = v.Args[0].Reg()
489                 gc.AddrAuto(&p.To, v)
490
491         case ssa.OpPPC64DIVD:
492                 // For now,
493                 //
494                 // cmp arg1, -1
495                 // be  ahead
496                 // v = arg0 / arg1
497                 // b over
498                 // ahead: v = - arg0
499                 // over: nop
500                 r := v.Reg()
501                 r0 := v.Args[0].Reg()
502                 r1 := v.Args[1].Reg()
503
504                 p := s.Prog(ppc64.ACMP)
505                 p.From.Type = obj.TYPE_REG
506                 p.From.Reg = r1
507                 p.To.Type = obj.TYPE_CONST
508                 p.To.Offset = -1
509
510                 pbahead := s.Prog(ppc64.ABEQ)
511                 pbahead.To.Type = obj.TYPE_BRANCH
512
513                 p = s.Prog(v.Op.Asm())
514                 p.From.Type = obj.TYPE_REG
515                 p.From.Reg = r1
516                 p.Reg = r0
517                 p.To.Type = obj.TYPE_REG
518                 p.To.Reg = r
519
520                 pbover := s.Prog(obj.AJMP)
521                 pbover.To.Type = obj.TYPE_BRANCH
522
523                 p = s.Prog(ppc64.ANEG)
524                 p.To.Type = obj.TYPE_REG
525                 p.To.Reg = r
526                 p.From.Type = obj.TYPE_REG
527                 p.From.Reg = r0
528                 gc.Patch(pbahead, p)
529
530                 p = s.Prog(obj.ANOP)
531                 gc.Patch(pbover, p)
532
533         case ssa.OpPPC64DIVW:
534                 // word-width version of above
535                 r := v.Reg()
536                 r0 := v.Args[0].Reg()
537                 r1 := v.Args[1].Reg()
538
539                 p := s.Prog(ppc64.ACMPW)
540                 p.From.Type = obj.TYPE_REG
541                 p.From.Reg = r1
542                 p.To.Type = obj.TYPE_CONST
543                 p.To.Offset = -1
544
545                 pbahead := s.Prog(ppc64.ABEQ)
546                 pbahead.To.Type = obj.TYPE_BRANCH
547
548                 p = s.Prog(v.Op.Asm())
549                 p.From.Type = obj.TYPE_REG
550                 p.From.Reg = r1
551                 p.Reg = r0
552                 p.To.Type = obj.TYPE_REG
553                 p.To.Reg = r
554
555                 pbover := s.Prog(obj.AJMP)
556                 pbover.To.Type = obj.TYPE_BRANCH
557
558                 p = s.Prog(ppc64.ANEG)
559                 p.To.Type = obj.TYPE_REG
560                 p.To.Reg = r
561                 p.From.Type = obj.TYPE_REG
562                 p.From.Reg = r0
563                 gc.Patch(pbahead, p)
564
565                 p = s.Prog(obj.ANOP)
566                 gc.Patch(pbover, p)
567
568         case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
569                 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
570                 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
571                 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
572                 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
573                 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
574                 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
575                 r := v.Reg()
576                 r1 := v.Args[0].Reg()
577                 r2 := v.Args[1].Reg()
578                 p := s.Prog(v.Op.Asm())
579                 p.From.Type = obj.TYPE_REG
580                 p.From.Reg = r2
581                 p.Reg = r1
582                 p.To.Type = obj.TYPE_REG
583                 p.To.Reg = r
584
585         case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
586                 r1 := v.Args[0].Reg()
587                 r2 := v.Args[1].Reg()
588                 p := s.Prog(v.Op.Asm())
589                 p.From.Type = obj.TYPE_REG
590                 p.From.Reg = r2
591                 p.Reg = r1
592                 p.To.Type = obj.TYPE_REG
593                 p.To.Reg = ppc64.REGTMP // result is not needed
594
595         case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
596                 p := s.Prog(v.Op.Asm())
597                 p.From.Type = obj.TYPE_CONST
598                 p.From.Offset = v.AuxInt
599                 p.Reg = v.Args[0].Reg()
600                 p.To.Type = obj.TYPE_REG
601                 p.To.Reg = v.Reg()
602
603         case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
604                 r := v.Reg()
605                 r1 := v.Args[0].Reg()
606                 r2 := v.Args[1].Reg()
607                 r3 := v.Args[2].Reg()
608                 // r = r1*r2 Â± r3
609                 p := s.Prog(v.Op.Asm())
610                 p.From.Type = obj.TYPE_REG
611                 p.From.Reg = r1
612                 p.Reg = r3
613                 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
614                 p.To.Type = obj.TYPE_REG
615                 p.To.Reg = r
616
617         case ssa.OpPPC64MaskIfNotCarry:
618                 r := v.Reg()
619                 p := s.Prog(v.Op.Asm())
620                 p.From.Type = obj.TYPE_REG
621                 p.From.Reg = ppc64.REGZERO
622                 p.To.Type = obj.TYPE_REG
623                 p.To.Reg = r
624
625         case ssa.OpPPC64ADDconstForCarry:
626                 r1 := v.Args[0].Reg()
627                 p := s.Prog(v.Op.Asm())
628                 p.Reg = r1
629                 p.From.Type = obj.TYPE_CONST
630                 p.From.Offset = v.AuxInt
631                 p.To.Type = obj.TYPE_REG
632                 p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
633
634         case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
635                 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
636                 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
637                 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
638                 r := v.Reg()
639                 p := s.Prog(v.Op.Asm())
640                 p.To.Type = obj.TYPE_REG
641                 p.To.Reg = r
642                 p.From.Type = obj.TYPE_REG
643                 p.From.Reg = v.Args[0].Reg()
644
645         case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
646                 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
647                 p := s.Prog(v.Op.Asm())
648                 p.Reg = v.Args[0].Reg()
649                 p.From.Type = obj.TYPE_CONST
650                 p.From.Offset = v.AuxInt
651                 p.To.Type = obj.TYPE_REG
652                 p.To.Reg = v.Reg()
653
654         case ssa.OpPPC64ANDCCconst:
655                 p := s.Prog(v.Op.Asm())
656                 p.Reg = v.Args[0].Reg()
657
658                 if v.Aux != nil {
659                         p.From.Type = obj.TYPE_CONST
660                         p.From.Offset = gc.AuxOffset(v)
661                 } else {
662                         p.From.Type = obj.TYPE_CONST
663                         p.From.Offset = v.AuxInt
664                 }
665
666                 p.To.Type = obj.TYPE_REG
667                 p.To.Reg = ppc64.REGTMP // discard result
668
669         case ssa.OpPPC64MOVDaddr:
670                 switch v.Aux.(type) {
671                 default:
672                         v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
673                 case nil:
674                         // If aux offset and aux int are both 0, and the same
675                         // input and output regs are used, no instruction
676                         // needs to be generated, since it would just be
677                         // addi rx, rx, 0.
678                         if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
679                                 p := s.Prog(ppc64.AMOVD)
680                                 p.From.Type = obj.TYPE_ADDR
681                                 p.From.Reg = v.Args[0].Reg()
682                                 p.From.Offset = v.AuxInt
683                                 p.To.Type = obj.TYPE_REG
684                                 p.To.Reg = v.Reg()
685                         }
686
687                 case *obj.LSym, *gc.Node:
688                         p := s.Prog(ppc64.AMOVD)
689                         p.From.Type = obj.TYPE_ADDR
690                         p.From.Reg = v.Args[0].Reg()
691                         p.To.Type = obj.TYPE_REG
692                         p.To.Reg = v.Reg()
693                         gc.AddAux(&p.From, v)
694
695                 }
696
697         case ssa.OpPPC64MOVDconst:
698                 p := s.Prog(v.Op.Asm())
699                 p.From.Type = obj.TYPE_CONST
700                 p.From.Offset = v.AuxInt
701                 p.To.Type = obj.TYPE_REG
702                 p.To.Reg = v.Reg()
703
704         case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
705                 p := s.Prog(v.Op.Asm())
706                 p.From.Type = obj.TYPE_FCONST
707                 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
708                 p.To.Type = obj.TYPE_REG
709                 p.To.Reg = v.Reg()
710
711         case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
712                 p := s.Prog(v.Op.Asm())
713                 p.From.Type = obj.TYPE_REG
714                 p.From.Reg = v.Args[0].Reg()
715                 p.To.Type = obj.TYPE_REG
716                 p.To.Reg = v.Args[1].Reg()
717
718         case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
719                 p := s.Prog(v.Op.Asm())
720                 p.From.Type = obj.TYPE_REG
721                 p.From.Reg = v.Args[0].Reg()
722                 p.To.Type = obj.TYPE_CONST
723                 p.To.Offset = v.AuxInt
724
725         case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
726                 // Shift in register to required size
727                 p := s.Prog(v.Op.Asm())
728                 p.From.Type = obj.TYPE_REG
729                 p.From.Reg = v.Args[0].Reg()
730                 p.To.Reg = v.Reg()
731                 p.To.Type = obj.TYPE_REG
732
733         case ssa.OpPPC64MOVDload:
734
735                 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
736                 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
737                 // the offset is not known until link time. If the load of a go.string uses relocation for the
738                 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
739                 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
740                 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
741                 // go.string types because other types will have proper alignment.
742
743                 gostring := false
744                 switch n := v.Aux.(type) {
745                 case *obj.LSym:
746                         gostring = strings.HasPrefix(n.Name, "go.string.")
747                 }
748                 if gostring {
749                         // Generate full addr of the go.string const
750                         // including AuxInt
751                         p := s.Prog(ppc64.AMOVD)
752                         p.From.Type = obj.TYPE_ADDR
753                         p.From.Reg = v.Args[0].Reg()
754                         gc.AddAux(&p.From, v)
755                         p.To.Type = obj.TYPE_REG
756                         p.To.Reg = v.Reg()
757                         // Load go.string using 0 offset
758                         p = s.Prog(v.Op.Asm())
759                         p.From.Type = obj.TYPE_MEM
760                         p.From.Reg = v.Reg()
761                         p.To.Type = obj.TYPE_REG
762                         p.To.Reg = v.Reg()
763                         break
764                 }
765                 // Not a go.string, generate a normal load
766                 fallthrough
767
768         case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
769                 p := s.Prog(v.Op.Asm())
770                 p.From.Type = obj.TYPE_MEM
771                 p.From.Reg = v.Args[0].Reg()
772                 gc.AddAux(&p.From, v)
773                 p.To.Type = obj.TYPE_REG
774                 p.To.Reg = v.Reg()
775
776         case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
777                 p := s.Prog(v.Op.Asm())
778                 p.From.Type = obj.TYPE_MEM
779                 p.From.Reg = v.Args[0].Reg()
780                 p.To.Type = obj.TYPE_REG
781                 p.To.Reg = v.Reg()
782
783         case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
784                 p := s.Prog(v.Op.Asm())
785                 p.To.Type = obj.TYPE_MEM
786                 p.To.Reg = v.Args[0].Reg()
787                 p.From.Type = obj.TYPE_REG
788                 p.From.Reg = v.Args[1].Reg()
789
790         case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
791                 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
792                 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
793                 p := s.Prog(v.Op.Asm())
794                 p.From.Type = obj.TYPE_MEM
795                 p.From.Reg = v.Args[0].Reg()
796                 p.From.Index = v.Args[1].Reg()
797                 p.To.Type = obj.TYPE_REG
798                 p.To.Reg = v.Reg()
799
800         case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
801                 p := s.Prog(v.Op.Asm())
802                 p.From.Type = obj.TYPE_REG
803                 p.From.Reg = ppc64.REGZERO
804                 p.To.Type = obj.TYPE_MEM
805                 p.To.Reg = v.Args[0].Reg()
806                 gc.AddAux(&p.To, v)
807
808         case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
809                 p := s.Prog(v.Op.Asm())
810                 p.From.Type = obj.TYPE_REG
811                 p.From.Reg = v.Args[1].Reg()
812                 p.To.Type = obj.TYPE_MEM
813                 p.To.Reg = v.Args[0].Reg()
814                 gc.AddAux(&p.To, v)
815
816         case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
817                 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
818                 ssa.OpPPC64MOVHBRstoreidx:
819                 p := s.Prog(v.Op.Asm())
820                 p.From.Type = obj.TYPE_REG
821                 p.From.Reg = v.Args[2].Reg()
822                 p.To.Index = v.Args[1].Reg()
823                 p.To.Type = obj.TYPE_MEM
824                 p.To.Reg = v.Args[0].Reg()
825
826         case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
827                 // ISEL, ISELB
828                 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
829                 // ISEL only accepts 0, 1, 2 condition values but the others can be
830                 // achieved by swapping operand order.
831                 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
832                 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
833                 // ISELB is used when a boolean result is needed, returning 0 or 1
834                 p := s.Prog(ppc64.AISEL)
835                 p.To.Type = obj.TYPE_REG
836                 p.To.Reg = v.Reg()
837                 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
838                 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
839                 if v.Op == ssa.OpPPC64ISEL {
840                         r.Reg = v.Args[1].Reg()
841                 }
842                 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
843                 if v.AuxInt > 3 {
844                         p.Reg = r.Reg
845                         p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
846                 } else {
847                         p.Reg = v.Args[0].Reg()
848                         p.SetFrom3(r)
849                 }
850                 p.From.Type = obj.TYPE_CONST
851                 p.From.Offset = v.AuxInt & 3
852
853         case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
854                 // The LoweredQuad code generation
855                 // generates STXV instructions on
856                 // power9. The Short variation is used
857                 // if no loop is generated.
858
859                 // sizes >= 64 generate a loop as follows:
860
861                 // Set up loop counter in CTR, used by BC
862                 // XXLXOR clears VS32
863                 //       XXLXOR VS32,VS32,VS32
864                 //       MOVD len/64,REG_TMP
865                 //       MOVD REG_TMP,CTR
866                 //       loop:
867                 //       STXV VS32,0(R20)
868                 //       STXV VS32,16(R20)
869                 //       STXV VS32,32(R20)
870                 //       STXV VS32,48(R20)
871                 //       ADD  $64,R20
872                 //       BC   16, 0, loop
873
874                 // Bytes per iteration
875                 ctr := v.AuxInt / 64
876
877                 // Remainder bytes
878                 rem := v.AuxInt % 64
879
880                 // Only generate a loop if there is more
881                 // than 1 iteration.
882                 if ctr > 1 {
883                         // Set up VS32 (V0) to hold 0s
884                         p := s.Prog(ppc64.AXXLXOR)
885                         p.From.Type = obj.TYPE_REG
886                         p.From.Reg = ppc64.REG_VS32
887                         p.To.Type = obj.TYPE_REG
888                         p.To.Reg = ppc64.REG_VS32
889                         p.Reg = ppc64.REG_VS32
890
891                         // Set up CTR loop counter
892                         p = s.Prog(ppc64.AMOVD)
893                         p.From.Type = obj.TYPE_CONST
894                         p.From.Offset = ctr
895                         p.To.Type = obj.TYPE_REG
896                         p.To.Reg = ppc64.REGTMP
897
898                         p = s.Prog(ppc64.AMOVD)
899                         p.From.Type = obj.TYPE_REG
900                         p.From.Reg = ppc64.REGTMP
901                         p.To.Type = obj.TYPE_REG
902                         p.To.Reg = ppc64.REG_CTR
903
904                         // Don't generate padding for
905                         // loops with few iterations.
906                         if ctr > 3 {
907                                 p = s.Prog(obj.APCALIGN)
908                                 p.From.Type = obj.TYPE_CONST
909                                 p.From.Offset = 16
910                         }
911
912                         // generate 4 STXVs to zero 64 bytes
913                         var top *obj.Prog
914
915                         p = s.Prog(ppc64.ASTXV)
916                         p.From.Type = obj.TYPE_REG
917                         p.From.Reg = ppc64.REG_VS32
918                         p.To.Type = obj.TYPE_MEM
919                         p.To.Reg = v.Args[0].Reg()
920
921                         //  Save the top of loop
922                         if top == nil {
923                                 top = p
924                         }
925                         p = s.Prog(ppc64.ASTXV)
926                         p.From.Type = obj.TYPE_REG
927                         p.From.Reg = ppc64.REG_VS32
928                         p.To.Type = obj.TYPE_MEM
929                         p.To.Reg = v.Args[0].Reg()
930                         p.To.Offset = 16
931
932                         p = s.Prog(ppc64.ASTXV)
933                         p.From.Type = obj.TYPE_REG
934                         p.From.Reg = ppc64.REG_VS32
935                         p.To.Type = obj.TYPE_MEM
936                         p.To.Reg = v.Args[0].Reg()
937                         p.To.Offset = 32
938
939                         p = s.Prog(ppc64.ASTXV)
940                         p.From.Type = obj.TYPE_REG
941                         p.From.Reg = ppc64.REG_VS32
942                         p.To.Type = obj.TYPE_MEM
943                         p.To.Reg = v.Args[0].Reg()
944                         p.To.Offset = 48
945
946                         // Increment address for the
947                         // 64 bytes just zeroed.
948                         p = s.Prog(ppc64.AADD)
949                         p.Reg = v.Args[0].Reg()
950                         p.From.Type = obj.TYPE_CONST
951                         p.From.Offset = 64
952                         p.To.Type = obj.TYPE_REG
953                         p.To.Reg = v.Args[0].Reg()
954
955                         // Branch back to top of loop
956                         // based on CTR
957                         // BC with BO_BCTR generates bdnz
958                         p = s.Prog(ppc64.ABC)
959                         p.From.Type = obj.TYPE_CONST
960                         p.From.Offset = ppc64.BO_BCTR
961                         p.Reg = ppc64.REG_R0
962                         p.To.Type = obj.TYPE_BRANCH
963                         gc.Patch(p, top)
964                 }
965                 // When ctr == 1 the loop was not generated but
966                 // there are at least 64 bytes to clear, so add
967                 // that to the remainder to generate the code
968                 // to clear those doublewords
969                 if ctr == 1 {
970                         rem += 64
971                 }
972
973                 // Clear the remainder starting at offset zero
974                 offset := int64(0)
975
976                 if rem >= 16 && ctr <= 1 {
977                         // If the XXLXOR hasn't already been
978                         // generated, do it here to initialize
979                         // VS32 (V0) to 0.
980                         p := s.Prog(ppc64.AXXLXOR)
981                         p.From.Type = obj.TYPE_REG
982                         p.From.Reg = ppc64.REG_VS32
983                         p.To.Type = obj.TYPE_REG
984                         p.To.Reg = ppc64.REG_VS32
985                         p.Reg = ppc64.REG_VS32
986                 }
987                 // Generate STXV for 32 or 64
988                 // bytes.
989                 for rem >= 32 {
990                         p := s.Prog(ppc64.ASTXV)
991                         p.From.Type = obj.TYPE_REG
992                         p.From.Reg = ppc64.REG_VS32
993                         p.To.Type = obj.TYPE_MEM
994                         p.To.Reg = v.Args[0].Reg()
995                         p.To.Offset = offset
996
997                         p = s.Prog(ppc64.ASTXV)
998                         p.From.Type = obj.TYPE_REG
999                         p.From.Reg = ppc64.REG_VS32
1000                         p.To.Type = obj.TYPE_MEM
1001                         p.To.Reg = v.Args[0].Reg()
1002                         p.To.Offset = offset + 16
1003                         offset += 32
1004                         rem -= 32
1005                 }
1006                 // Generate 16 bytes
1007                 if rem >= 16 {
1008                         p := s.Prog(ppc64.ASTXV)
1009                         p.From.Type = obj.TYPE_REG
1010                         p.From.Reg = ppc64.REG_VS32
1011                         p.To.Type = obj.TYPE_MEM
1012                         p.To.Reg = v.Args[0].Reg()
1013                         p.To.Offset = offset
1014                         offset += 16
1015                         rem -= 16
1016                 }
1017
1018                 // first clear as many doublewords as possible
1019                 // then clear remaining sizes as available
1020                 for rem > 0 {
1021                         op, size := ppc64.AMOVB, int64(1)
1022                         switch {
1023                         case rem >= 8:
1024                                 op, size = ppc64.AMOVD, 8
1025                         case rem >= 4:
1026                                 op, size = ppc64.AMOVW, 4
1027                         case rem >= 2:
1028                                 op, size = ppc64.AMOVH, 2
1029                         }
1030                         p := s.Prog(op)
1031                         p.From.Type = obj.TYPE_REG
1032                         p.From.Reg = ppc64.REG_R0
1033                         p.To.Type = obj.TYPE_MEM
1034                         p.To.Reg = v.Args[0].Reg()
1035                         p.To.Offset = offset
1036                         rem -= size
1037                         offset += size
1038                 }
1039
1040         case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1041
1042                 // Unaligned data doesn't hurt performance
1043                 // for these instructions on power8.
1044
1045                 // For sizes >= 64 generate a loop as follows:
1046
1047                 // Set up loop counter in CTR, used by BC
1048                 //       XXLXOR VS32,VS32,VS32
1049                 //       MOVD len/32,REG_TMP
1050                 //       MOVD REG_TMP,CTR
1051                 //       MOVD $16,REG_TMP
1052                 //       loop:
1053                 //       STXVD2X VS32,(R0)(R20)
1054                 //       STXVD2X VS32,(R31)(R20)
1055                 //       ADD  $32,R20
1056                 //       BC   16, 0, loop
1057                 //
1058                 // any remainder is done as described below
1059
1060                 // for sizes < 64 bytes, first clear as many doublewords as possible,
1061                 // then handle the remainder
1062                 //      MOVD R0,(R20)
1063                 //      MOVD R0,8(R20)
1064                 // .... etc.
1065                 //
1066                 // the remainder bytes are cleared using one or more
1067                 // of the following instructions with the appropriate
1068                 // offsets depending which instructions are needed
1069                 //
1070                 //      MOVW R0,n1(R20) 4 bytes
1071                 //      MOVH R0,n2(R20) 2 bytes
1072                 //      MOVB R0,n3(R20) 1 byte
1073                 //
1074                 // 7 bytes: MOVW, MOVH, MOVB
1075                 // 6 bytes: MOVW, MOVH
1076                 // 5 bytes: MOVW, MOVB
1077                 // 3 bytes: MOVH, MOVB
1078
1079                 // each loop iteration does 32 bytes
1080                 ctr := v.AuxInt / 32
1081
1082                 // remainder bytes
1083                 rem := v.AuxInt % 32
1084
1085                 // only generate a loop if there is more
1086                 // than 1 iteration.
1087                 if ctr > 1 {
1088                         // Set up VS32 (V0) to hold 0s
1089                         p := s.Prog(ppc64.AXXLXOR)
1090                         p.From.Type = obj.TYPE_REG
1091                         p.From.Reg = ppc64.REG_VS32
1092                         p.To.Type = obj.TYPE_REG
1093                         p.To.Reg = ppc64.REG_VS32
1094                         p.Reg = ppc64.REG_VS32
1095
1096                         // Set up CTR loop counter
1097                         p = s.Prog(ppc64.AMOVD)
1098                         p.From.Type = obj.TYPE_CONST
1099                         p.From.Offset = ctr
1100                         p.To.Type = obj.TYPE_REG
1101                         p.To.Reg = ppc64.REGTMP
1102
1103                         p = s.Prog(ppc64.AMOVD)
1104                         p.From.Type = obj.TYPE_REG
1105                         p.From.Reg = ppc64.REGTMP
1106                         p.To.Type = obj.TYPE_REG
1107                         p.To.Reg = ppc64.REG_CTR
1108
1109                         // Set up R31 to hold index value 16
1110                         p = s.Prog(ppc64.AMOVD)
1111                         p.From.Type = obj.TYPE_CONST
1112                         p.From.Offset = 16
1113                         p.To.Type = obj.TYPE_REG
1114                         p.To.Reg = ppc64.REGTMP
1115
1116                         // Don't add padding for alignment
1117                         // with few loop iterations.
1118                         if ctr > 3 {
1119                                 p = s.Prog(obj.APCALIGN)
1120                                 p.From.Type = obj.TYPE_CONST
1121                                 p.From.Offset = 16
1122                         }
1123
1124                         // generate 2 STXVD2Xs to store 16 bytes
1125                         // when this is a loop then the top must be saved
1126                         var top *obj.Prog
1127                         // This is the top of loop
1128
1129                         p = s.Prog(ppc64.ASTXVD2X)
1130                         p.From.Type = obj.TYPE_REG
1131                         p.From.Reg = ppc64.REG_VS32
1132                         p.To.Type = obj.TYPE_MEM
1133                         p.To.Reg = v.Args[0].Reg()
1134                         p.To.Index = ppc64.REGZERO
1135                         // Save the top of loop
1136                         if top == nil {
1137                                 top = p
1138                         }
1139                         p = s.Prog(ppc64.ASTXVD2X)
1140                         p.From.Type = obj.TYPE_REG
1141                         p.From.Reg = ppc64.REG_VS32
1142                         p.To.Type = obj.TYPE_MEM
1143                         p.To.Reg = v.Args[0].Reg()
1144                         p.To.Index = ppc64.REGTMP
1145
1146                         // Increment address for the
1147                         // 4 doublewords just zeroed.
1148                         p = s.Prog(ppc64.AADD)
1149                         p.Reg = v.Args[0].Reg()
1150                         p.From.Type = obj.TYPE_CONST
1151                         p.From.Offset = 32
1152                         p.To.Type = obj.TYPE_REG
1153                         p.To.Reg = v.Args[0].Reg()
1154
1155                         // Branch back to top of loop
1156                         // based on CTR
1157                         // BC with BO_BCTR generates bdnz
1158                         p = s.Prog(ppc64.ABC)
1159                         p.From.Type = obj.TYPE_CONST
1160                         p.From.Offset = ppc64.BO_BCTR
1161                         p.Reg = ppc64.REG_R0
1162                         p.To.Type = obj.TYPE_BRANCH
1163                         gc.Patch(p, top)
1164                 }
1165
1166                 // when ctr == 1 the loop was not generated but
1167                 // there are at least 32 bytes to clear, so add
1168                 // that to the remainder to generate the code
1169                 // to clear those doublewords
1170                 if ctr == 1 {
1171                         rem += 32
1172                 }
1173
1174                 // clear the remainder starting at offset zero
1175                 offset := int64(0)
1176
1177                 // first clear as many doublewords as possible
1178                 // then clear remaining sizes as available
1179                 for rem > 0 {
1180                         op, size := ppc64.AMOVB, int64(1)
1181                         switch {
1182                         case rem >= 8:
1183                                 op, size = ppc64.AMOVD, 8
1184                         case rem >= 4:
1185                                 op, size = ppc64.AMOVW, 4
1186                         case rem >= 2:
1187                                 op, size = ppc64.AMOVH, 2
1188                         }
1189                         p := s.Prog(op)
1190                         p.From.Type = obj.TYPE_REG
1191                         p.From.Reg = ppc64.REG_R0
1192                         p.To.Type = obj.TYPE_MEM
1193                         p.To.Reg = v.Args[0].Reg()
1194                         p.To.Offset = offset
1195                         rem -= size
1196                         offset += size
1197                 }
1198
1199         case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1200
1201                 bytesPerLoop := int64(32)
1202                 // This will be used when moving more
1203                 // than 8 bytes.  Moves start with
1204                 // as many 8 byte moves as possible, then
1205                 // 4, 2, or 1 byte(s) as remaining.  This will
1206                 // work and be efficient for power8 or later.
1207                 // If there are 64 or more bytes, then a
1208                 // loop is generated to move 32 bytes and
1209                 // update the src and dst addresses on each
1210                 // iteration. When < 64 bytes, the appropriate
1211                 // number of moves are generated based on the
1212                 // size.
1213                 // When moving >= 64 bytes a loop is used
1214                 //      MOVD len/32,REG_TMP
1215                 //      MOVD REG_TMP,CTR
1216                 //      MOVD $16,REG_TMP
1217                 // top:
1218                 //      LXVD2X (R0)(R21),VS32
1219                 //      LXVD2X (R31)(R21),VS33
1220                 //      ADD $32,R21
1221                 //      STXVD2X VS32,(R0)(R20)
1222                 //      STXVD2X VS33,(R31)(R20)
1223                 //      ADD $32,R20
1224                 //      BC 16,0,top
1225                 // Bytes not moved by this loop are moved
1226                 // with a combination of the following instructions,
1227                 // starting with the largest sizes and generating as
1228                 // many as needed, using the appropriate offset value.
1229                 //      MOVD  n(R21),R31
1230                 //      MOVD  R31,n(R20)
1231                 //      MOVW  n1(R21),R31
1232                 //      MOVW  R31,n1(R20)
1233                 //      MOVH  n2(R21),R31
1234                 //      MOVH  R31,n2(R20)
1235                 //      MOVB  n3(R21),R31
1236                 //      MOVB  R31,n3(R20)
1237
1238                 // Each loop iteration moves 32 bytes
1239                 ctr := v.AuxInt / bytesPerLoop
1240
1241                 // Remainder after the loop
1242                 rem := v.AuxInt % bytesPerLoop
1243
1244                 dstReg := v.Args[0].Reg()
1245                 srcReg := v.Args[1].Reg()
1246
1247                 // The set of registers used here, must match the clobbered reg list
1248                 // in PPC64Ops.go.
1249                 offset := int64(0)
1250
1251                 // top of the loop
1252                 var top *obj.Prog
1253                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1254                 if ctr > 1 {
1255                         // Set up the CTR
1256                         p := s.Prog(ppc64.AMOVD)
1257                         p.From.Type = obj.TYPE_CONST
1258                         p.From.Offset = ctr
1259                         p.To.Type = obj.TYPE_REG
1260                         p.To.Reg = ppc64.REGTMP
1261
1262                         p = s.Prog(ppc64.AMOVD)
1263                         p.From.Type = obj.TYPE_REG
1264                         p.From.Reg = ppc64.REGTMP
1265                         p.To.Type = obj.TYPE_REG
1266                         p.To.Reg = ppc64.REG_CTR
1267
1268                         // Use REGTMP as index reg
1269                         p = s.Prog(ppc64.AMOVD)
1270                         p.From.Type = obj.TYPE_CONST
1271                         p.From.Offset = 16
1272                         p.To.Type = obj.TYPE_REG
1273                         p.To.Reg = ppc64.REGTMP
1274
1275                         // Don't adding padding for
1276                         // alignment with small iteration
1277                         // counts.
1278                         if ctr > 3 {
1279                                 p = s.Prog(obj.APCALIGN)
1280                                 p.From.Type = obj.TYPE_CONST
1281                                 p.From.Offset = 16
1282                         }
1283
1284                         // Generate 16 byte loads and stores.
1285                         // Use temp register for index (16)
1286                         // on the second one.
1287
1288                         p = s.Prog(ppc64.ALXVD2X)
1289                         p.From.Type = obj.TYPE_MEM
1290                         p.From.Reg = srcReg
1291                         p.From.Index = ppc64.REGZERO
1292                         p.To.Type = obj.TYPE_REG
1293                         p.To.Reg = ppc64.REG_VS32
1294                         if top == nil {
1295                                 top = p
1296                         }
1297                         p = s.Prog(ppc64.ALXVD2X)
1298                         p.From.Type = obj.TYPE_MEM
1299                         p.From.Reg = srcReg
1300                         p.From.Index = ppc64.REGTMP
1301                         p.To.Type = obj.TYPE_REG
1302                         p.To.Reg = ppc64.REG_VS33
1303
1304                         // increment the src reg for next iteration
1305                         p = s.Prog(ppc64.AADD)
1306                         p.Reg = srcReg
1307                         p.From.Type = obj.TYPE_CONST
1308                         p.From.Offset = bytesPerLoop
1309                         p.To.Type = obj.TYPE_REG
1310                         p.To.Reg = srcReg
1311
1312                         // generate 16 byte stores
1313                         p = s.Prog(ppc64.ASTXVD2X)
1314                         p.From.Type = obj.TYPE_REG
1315                         p.From.Reg = ppc64.REG_VS32
1316                         p.To.Type = obj.TYPE_MEM
1317                         p.To.Reg = dstReg
1318                         p.To.Index = ppc64.REGZERO
1319
1320                         p = s.Prog(ppc64.ASTXVD2X)
1321                         p.From.Type = obj.TYPE_REG
1322                         p.From.Reg = ppc64.REG_VS33
1323                         p.To.Type = obj.TYPE_MEM
1324                         p.To.Reg = dstReg
1325                         p.To.Index = ppc64.REGTMP
1326
1327                         // increment the dst reg for next iteration
1328                         p = s.Prog(ppc64.AADD)
1329                         p.Reg = dstReg
1330                         p.From.Type = obj.TYPE_CONST
1331                         p.From.Offset = bytesPerLoop
1332                         p.To.Type = obj.TYPE_REG
1333                         p.To.Reg = dstReg
1334
1335                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1336                         // to loop top.
1337                         p = s.Prog(ppc64.ABC)
1338                         p.From.Type = obj.TYPE_CONST
1339                         p.From.Offset = ppc64.BO_BCTR
1340                         p.Reg = ppc64.REG_R0
1341                         p.To.Type = obj.TYPE_BRANCH
1342                         gc.Patch(p, top)
1343
1344                         // srcReg and dstReg were incremented in the loop, so
1345                         // later instructions start with offset 0.
1346                         offset = int64(0)
1347                 }
1348
1349                 // No loop was generated for one iteration, so
1350                 // add 32 bytes to the remainder to move those bytes.
1351                 if ctr == 1 {
1352                         rem += bytesPerLoop
1353                 }
1354
1355                 if rem >= 16 {
1356                         // Generate 16 byte loads and stores.
1357                         // Use temp register for index (value 16)
1358                         // on the second one.
1359                         p := s.Prog(ppc64.ALXVD2X)
1360                         p.From.Type = obj.TYPE_MEM
1361                         p.From.Reg = srcReg
1362                         p.From.Index = ppc64.REGZERO
1363                         p.To.Type = obj.TYPE_REG
1364                         p.To.Reg = ppc64.REG_VS32
1365
1366                         p = s.Prog(ppc64.ASTXVD2X)
1367                         p.From.Type = obj.TYPE_REG
1368                         p.From.Reg = ppc64.REG_VS32
1369                         p.To.Type = obj.TYPE_MEM
1370                         p.To.Reg = dstReg
1371                         p.To.Index = ppc64.REGZERO
1372
1373                         offset = 16
1374                         rem -= 16
1375
1376                         if rem >= 16 {
1377                                 // Use REGTMP as index reg
1378                                 p := s.Prog(ppc64.AMOVD)
1379                                 p.From.Type = obj.TYPE_CONST
1380                                 p.From.Offset = 16
1381                                 p.To.Type = obj.TYPE_REG
1382                                 p.To.Reg = ppc64.REGTMP
1383
1384                                 p = s.Prog(ppc64.ALXVD2X)
1385                                 p.From.Type = obj.TYPE_MEM
1386                                 p.From.Reg = srcReg
1387                                 p.From.Index = ppc64.REGTMP
1388                                 p.To.Type = obj.TYPE_REG
1389                                 p.To.Reg = ppc64.REG_VS32
1390
1391                                 p = s.Prog(ppc64.ASTXVD2X)
1392                                 p.From.Type = obj.TYPE_REG
1393                                 p.From.Reg = ppc64.REG_VS32
1394                                 p.To.Type = obj.TYPE_MEM
1395                                 p.To.Reg = dstReg
1396                                 p.To.Index = ppc64.REGTMP
1397
1398                                 offset = 32
1399                                 rem -= 16
1400                         }
1401                 }
1402
1403                 // Generate all the remaining load and store pairs, starting with
1404                 // as many 8 byte moves as possible, then 4, 2, 1.
1405                 for rem > 0 {
1406                         op, size := ppc64.AMOVB, int64(1)
1407                         switch {
1408                         case rem >= 8:
1409                                 op, size = ppc64.AMOVD, 8
1410                         case rem >= 4:
1411                                 op, size = ppc64.AMOVW, 4
1412                         case rem >= 2:
1413                                 op, size = ppc64.AMOVH, 2
1414                         }
1415                         // Load
1416                         p := s.Prog(op)
1417                         p.To.Type = obj.TYPE_REG
1418                         p.To.Reg = ppc64.REGTMP
1419                         p.From.Type = obj.TYPE_MEM
1420                         p.From.Reg = srcReg
1421                         p.From.Offset = offset
1422
1423                         // Store
1424                         p = s.Prog(op)
1425                         p.From.Type = obj.TYPE_REG
1426                         p.From.Reg = ppc64.REGTMP
1427                         p.To.Type = obj.TYPE_MEM
1428                         p.To.Reg = dstReg
1429                         p.To.Offset = offset
1430                         rem -= size
1431                         offset += size
1432                 }
1433
1434         case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1435                 bytesPerLoop := int64(64)
1436                 // This is used when moving more
1437                 // than 8 bytes on power9.  Moves start with
1438                 // as many 8 byte moves as possible, then
1439                 // 4, 2, or 1 byte(s) as remaining.  This will
1440                 // work and be efficient for power8 or later.
1441                 // If there are 64 or more bytes, then a
1442                 // loop is generated to move 32 bytes and
1443                 // update the src and dst addresses on each
1444                 // iteration. When < 64 bytes, the appropriate
1445                 // number of moves are generated based on the
1446                 // size.
1447                 // When moving >= 64 bytes a loop is used
1448                 //      MOVD len/32,REG_TMP
1449                 //      MOVD REG_TMP,CTR
1450                 // top:
1451                 //      LXV 0(R21),VS32
1452                 //      LXV 16(R21),VS33
1453                 //      ADD $32,R21
1454                 //      STXV VS32,0(R20)
1455                 //      STXV VS33,16(R20)
1456                 //      ADD $32,R20
1457                 //      BC 16,0,top
1458                 // Bytes not moved by this loop are moved
1459                 // with a combination of the following instructions,
1460                 // starting with the largest sizes and generating as
1461                 // many as needed, using the appropriate offset value.
1462                 //      MOVD  n(R21),R31
1463                 //      MOVD  R31,n(R20)
1464                 //      MOVW  n1(R21),R31
1465                 //      MOVW  R31,n1(R20)
1466                 //      MOVH  n2(R21),R31
1467                 //      MOVH  R31,n2(R20)
1468                 //      MOVB  n3(R21),R31
1469                 //      MOVB  R31,n3(R20)
1470
1471                 // Each loop iteration moves 32 bytes
1472                 ctr := v.AuxInt / bytesPerLoop
1473
1474                 // Remainder after the loop
1475                 rem := v.AuxInt % bytesPerLoop
1476
1477                 dstReg := v.Args[0].Reg()
1478                 srcReg := v.Args[1].Reg()
1479
1480                 offset := int64(0)
1481
1482                 // top of the loop
1483                 var top *obj.Prog
1484
1485                 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1486                 if ctr > 1 {
1487                         // Set up the CTR
1488                         p := s.Prog(ppc64.AMOVD)
1489                         p.From.Type = obj.TYPE_CONST
1490                         p.From.Offset = ctr
1491                         p.To.Type = obj.TYPE_REG
1492                         p.To.Reg = ppc64.REGTMP
1493
1494                         p = s.Prog(ppc64.AMOVD)
1495                         p.From.Type = obj.TYPE_REG
1496                         p.From.Reg = ppc64.REGTMP
1497                         p.To.Type = obj.TYPE_REG
1498                         p.To.Reg = ppc64.REG_CTR
1499
1500                         p = s.Prog(obj.APCALIGN)
1501                         p.From.Type = obj.TYPE_CONST
1502                         p.From.Offset = 16
1503
1504                         // Generate 16 byte loads and stores.
1505                         p = s.Prog(ppc64.ALXV)
1506                         p.From.Type = obj.TYPE_MEM
1507                         p.From.Reg = srcReg
1508                         p.From.Offset = offset
1509                         p.To.Type = obj.TYPE_REG
1510                         p.To.Reg = ppc64.REG_VS32
1511                         if top == nil {
1512                                 top = p
1513                         }
1514                         p = s.Prog(ppc64.ALXV)
1515                         p.From.Type = obj.TYPE_MEM
1516                         p.From.Reg = srcReg
1517                         p.From.Offset = offset + 16
1518                         p.To.Type = obj.TYPE_REG
1519                         p.To.Reg = ppc64.REG_VS33
1520
1521                         // generate 16 byte stores
1522                         p = s.Prog(ppc64.ASTXV)
1523                         p.From.Type = obj.TYPE_REG
1524                         p.From.Reg = ppc64.REG_VS32
1525                         p.To.Type = obj.TYPE_MEM
1526                         p.To.Reg = dstReg
1527                         p.To.Offset = offset
1528
1529                         p = s.Prog(ppc64.ASTXV)
1530                         p.From.Type = obj.TYPE_REG
1531                         p.From.Reg = ppc64.REG_VS33
1532                         p.To.Type = obj.TYPE_MEM
1533                         p.To.Reg = dstReg
1534                         p.To.Offset = offset + 16
1535
1536                         // Generate 16 byte loads and stores.
1537                         p = s.Prog(ppc64.ALXV)
1538                         p.From.Type = obj.TYPE_MEM
1539                         p.From.Reg = srcReg
1540                         p.From.Offset = offset + 32
1541                         p.To.Type = obj.TYPE_REG
1542                         p.To.Reg = ppc64.REG_VS32
1543
1544                         p = s.Prog(ppc64.ALXV)
1545                         p.From.Type = obj.TYPE_MEM
1546                         p.From.Reg = srcReg
1547                         p.From.Offset = offset + 48
1548                         p.To.Type = obj.TYPE_REG
1549                         p.To.Reg = ppc64.REG_VS33
1550
1551                         // generate 16 byte stores
1552                         p = s.Prog(ppc64.ASTXV)
1553                         p.From.Type = obj.TYPE_REG
1554                         p.From.Reg = ppc64.REG_VS32
1555                         p.To.Type = obj.TYPE_MEM
1556                         p.To.Reg = dstReg
1557                         p.To.Offset = offset + 32
1558
1559                         p = s.Prog(ppc64.ASTXV)
1560                         p.From.Type = obj.TYPE_REG
1561                         p.From.Reg = ppc64.REG_VS33
1562                         p.To.Type = obj.TYPE_MEM
1563                         p.To.Reg = dstReg
1564                         p.To.Offset = offset + 48
1565
1566                         // increment the src reg for next iteration
1567                         p = s.Prog(ppc64.AADD)
1568                         p.Reg = srcReg
1569                         p.From.Type = obj.TYPE_CONST
1570                         p.From.Offset = bytesPerLoop
1571                         p.To.Type = obj.TYPE_REG
1572                         p.To.Reg = srcReg
1573
1574                         // increment the dst reg for next iteration
1575                         p = s.Prog(ppc64.AADD)
1576                         p.Reg = dstReg
1577                         p.From.Type = obj.TYPE_CONST
1578                         p.From.Offset = bytesPerLoop
1579                         p.To.Type = obj.TYPE_REG
1580                         p.To.Reg = dstReg
1581
1582                         // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1583                         // to loop top.
1584                         p = s.Prog(ppc64.ABC)
1585                         p.From.Type = obj.TYPE_CONST
1586                         p.From.Offset = ppc64.BO_BCTR
1587                         p.Reg = ppc64.REG_R0
1588                         p.To.Type = obj.TYPE_BRANCH
1589                         gc.Patch(p, top)
1590
1591                         // srcReg and dstReg were incremented in the loop, so
1592                         // later instructions start with offset 0.
1593                         offset = int64(0)
1594                 }
1595
1596                 // No loop was generated for one iteration, so
1597                 // add 32 bytes to the remainder to move those bytes.
1598                 if ctr == 1 {
1599                         rem += bytesPerLoop
1600                 }
1601                 if rem >= 32 {
1602                         p := s.Prog(ppc64.ALXV)
1603                         p.From.Type = obj.TYPE_MEM
1604                         p.From.Reg = srcReg
1605                         p.To.Type = obj.TYPE_REG
1606                         p.To.Reg = ppc64.REG_VS32
1607
1608                         p = s.Prog(ppc64.ALXV)
1609                         p.From.Type = obj.TYPE_MEM
1610                         p.From.Reg = srcReg
1611                         p.From.Offset = 16
1612                         p.To.Type = obj.TYPE_REG
1613                         p.To.Reg = ppc64.REG_VS33
1614
1615                         p = s.Prog(ppc64.ASTXV)
1616                         p.From.Type = obj.TYPE_REG
1617                         p.From.Reg = ppc64.REG_VS32
1618                         p.To.Type = obj.TYPE_MEM
1619                         p.To.Reg = dstReg
1620
1621                         p = s.Prog(ppc64.ASTXV)
1622                         p.From.Type = obj.TYPE_REG
1623                         p.From.Reg = ppc64.REG_VS33
1624                         p.To.Type = obj.TYPE_MEM
1625                         p.To.Reg = dstReg
1626                         p.To.Offset = 16
1627
1628                         offset = 32
1629                         rem -= 32
1630                 }
1631
1632                 if rem >= 16 {
1633                         // Generate 16 byte loads and stores.
1634                         p := s.Prog(ppc64.ALXV)
1635                         p.From.Type = obj.TYPE_MEM
1636                         p.From.Reg = srcReg
1637                         p.From.Offset = offset
1638                         p.To.Type = obj.TYPE_REG
1639                         p.To.Reg = ppc64.REG_VS32
1640
1641                         p = s.Prog(ppc64.ASTXV)
1642                         p.From.Type = obj.TYPE_REG
1643                         p.From.Reg = ppc64.REG_VS32
1644                         p.To.Type = obj.TYPE_MEM
1645                         p.To.Reg = dstReg
1646                         p.To.Offset = offset
1647
1648                         offset += 16
1649                         rem -= 16
1650
1651                         if rem >= 16 {
1652                                 p := s.Prog(ppc64.ALXV)
1653                                 p.From.Type = obj.TYPE_MEM
1654                                 p.From.Reg = srcReg
1655                                 p.From.Offset = offset
1656                                 p.To.Type = obj.TYPE_REG
1657                                 p.To.Reg = ppc64.REG_VS32
1658
1659                                 p = s.Prog(ppc64.ASTXV)
1660                                 p.From.Type = obj.TYPE_REG
1661                                 p.From.Reg = ppc64.REG_VS32
1662                                 p.To.Type = obj.TYPE_MEM
1663                                 p.To.Reg = dstReg
1664                                 p.To.Offset = offset
1665
1666                                 offset += 16
1667                                 rem -= 16
1668                         }
1669                 }
1670                 // Generate all the remaining load and store pairs, starting with
1671                 // as many 8 byte moves as possible, then 4, 2, 1.
1672                 for rem > 0 {
1673                         op, size := ppc64.AMOVB, int64(1)
1674                         switch {
1675                         case rem >= 8:
1676                                 op, size = ppc64.AMOVD, 8
1677                         case rem >= 4:
1678                                 op, size = ppc64.AMOVW, 4
1679                         case rem >= 2:
1680                                 op, size = ppc64.AMOVH, 2
1681                         }
1682                         // Load
1683                         p := s.Prog(op)
1684                         p.To.Type = obj.TYPE_REG
1685                         p.To.Reg = ppc64.REGTMP
1686                         p.From.Type = obj.TYPE_MEM
1687                         p.From.Reg = srcReg
1688                         p.From.Offset = offset
1689
1690                         // Store
1691                         p = s.Prog(op)
1692                         p.From.Type = obj.TYPE_REG
1693                         p.From.Reg = ppc64.REGTMP
1694                         p.To.Type = obj.TYPE_MEM
1695                         p.To.Reg = dstReg
1696                         p.To.Offset = offset
1697                         rem -= size
1698                         offset += size
1699                 }
1700
1701         case ssa.OpPPC64CALLstatic:
1702                 s.Call(v)
1703
1704         case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1705                 p := s.Prog(ppc64.AMOVD)
1706                 p.From.Type = obj.TYPE_REG
1707                 p.From.Reg = v.Args[0].Reg()
1708                 p.To.Type = obj.TYPE_REG
1709                 p.To.Reg = ppc64.REG_LR
1710
1711                 if v.Args[0].Reg() != ppc64.REG_R12 {
1712                         v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1713                 }
1714
1715                 pp := s.Call(v)
1716                 pp.To.Reg = ppc64.REG_LR
1717
1718                 if gc.Ctxt.Flag_shared {
1719                         // When compiling Go into PIC, the function we just
1720                         // called via pointer might have been implemented in
1721                         // a separate module and so overwritten the TOC
1722                         // pointer in R2; reload it.
1723                         q := s.Prog(ppc64.AMOVD)
1724                         q.From.Type = obj.TYPE_MEM
1725                         q.From.Offset = 24
1726                         q.From.Reg = ppc64.REGSP
1727                         q.To.Type = obj.TYPE_REG
1728                         q.To.Reg = ppc64.REG_R2
1729                 }
1730
1731         case ssa.OpPPC64LoweredWB:
1732                 p := s.Prog(obj.ACALL)
1733                 p.To.Type = obj.TYPE_MEM
1734                 p.To.Name = obj.NAME_EXTERN
1735                 p.To.Sym = v.Aux.(*obj.LSym)
1736
1737         case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1738                 p := s.Prog(obj.ACALL)
1739                 p.To.Type = obj.TYPE_MEM
1740                 p.To.Name = obj.NAME_EXTERN
1741                 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1742                 s.UseArgs(16) // space used in callee args area by assembly stubs
1743
1744         case ssa.OpPPC64LoweredNilCheck:
1745                 if objabi.GOOS == "aix" {
1746                         // CMP Rarg0, R0
1747                         // BNE 2(PC)
1748                         // STW R0, 0(R0)
1749                         // NOP (so the BNE has somewhere to land)
1750
1751                         // CMP Rarg0, R0
1752                         p := s.Prog(ppc64.ACMP)
1753                         p.From.Type = obj.TYPE_REG
1754                         p.From.Reg = v.Args[0].Reg()
1755                         p.To.Type = obj.TYPE_REG
1756                         p.To.Reg = ppc64.REG_R0
1757
1758                         // BNE 2(PC)
1759                         p2 := s.Prog(ppc64.ABNE)
1760                         p2.To.Type = obj.TYPE_BRANCH
1761
1762                         // STW R0, 0(R0)
1763                         // Write at 0 is forbidden and will trigger a SIGSEGV
1764                         p = s.Prog(ppc64.AMOVW)
1765                         p.From.Type = obj.TYPE_REG
1766                         p.From.Reg = ppc64.REG_R0
1767                         p.To.Type = obj.TYPE_MEM
1768                         p.To.Reg = ppc64.REG_R0
1769
1770                         // NOP (so the BNE has somewhere to land)
1771                         nop := s.Prog(obj.ANOP)
1772                         gc.Patch(p2, nop)
1773
1774                 } else {
1775                         // Issue a load which will fault if arg is nil.
1776                         p := s.Prog(ppc64.AMOVBZ)
1777                         p.From.Type = obj.TYPE_MEM
1778                         p.From.Reg = v.Args[0].Reg()
1779                         gc.AddAux(&p.From, v)
1780                         p.To.Type = obj.TYPE_REG
1781                         p.To.Reg = ppc64.REGTMP
1782                 }
1783                 if logopt.Enabled() {
1784                         logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1785                 }
1786                 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1787                         gc.Warnl(v.Pos, "generated nil check")
1788                 }
1789
1790         // These should be resolved by rules and not make it here.
1791         case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1792                 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1793                 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1794                 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1795         case ssa.OpPPC64InvertFlags:
1796                 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1797         case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT, ssa.OpPPC64FlagCarrySet, ssa.OpPPC64FlagCarryClear:
1798                 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1799         case ssa.OpClobber:
1800                 // TODO: implement for clobberdead experiment. Nop is ok for now.
1801         default:
1802                 v.Fatalf("genValue not implemented: %s", v.LongString())
1803         }
1804 }
1805
1806 var blockJump = [...]struct {
1807         asm, invasm     obj.As
1808         asmeq, invasmun bool
1809 }{
1810         ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1811         ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1812
1813         ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1814         ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1815         ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1816         ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1817
1818         // TODO: need to work FP comparisons into block jumps
1819         ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1820         ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1821         ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1822         ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1823 }
1824
1825 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1826         switch b.Kind {
1827         case ssa.BlockDefer:
1828                 // defer returns in R3:
1829                 // 0 if we should continue executing
1830                 // 1 if we should jump to deferreturn call
1831                 p := s.Prog(ppc64.ACMP)
1832                 p.From.Type = obj.TYPE_REG
1833                 p.From.Reg = ppc64.REG_R3
1834                 p.To.Type = obj.TYPE_REG
1835                 p.To.Reg = ppc64.REG_R0
1836
1837                 p = s.Prog(ppc64.ABNE)
1838                 p.To.Type = obj.TYPE_BRANCH
1839                 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1840                 if b.Succs[0].Block() != next {
1841                         p := s.Prog(obj.AJMP)
1842                         p.To.Type = obj.TYPE_BRANCH
1843                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1844                 }
1845
1846         case ssa.BlockPlain:
1847                 if b.Succs[0].Block() != next {
1848                         p := s.Prog(obj.AJMP)
1849                         p.To.Type = obj.TYPE_BRANCH
1850                         s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1851                 }
1852         case ssa.BlockExit:
1853         case ssa.BlockRet:
1854                 s.Prog(obj.ARET)
1855         case ssa.BlockRetJmp:
1856                 p := s.Prog(obj.AJMP)
1857                 p.To.Type = obj.TYPE_MEM
1858                 p.To.Name = obj.NAME_EXTERN
1859                 p.To.Sym = b.Aux.(*obj.LSym)
1860
1861         case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1862                 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1863                 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1864                 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1865                 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1866                 jmp := blockJump[b.Kind]
1867                 switch next {
1868                 case b.Succs[0].Block():
1869                         s.Br(jmp.invasm, b.Succs[1].Block())
1870                         if jmp.invasmun {
1871                                 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1872                                 s.Br(ppc64.ABVS, b.Succs[1].Block())
1873                         }
1874                 case b.Succs[1].Block():
1875                         s.Br(jmp.asm, b.Succs[0].Block())
1876                         if jmp.asmeq {
1877                                 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1878                         }
1879                 default:
1880                         if b.Likely != ssa.BranchUnlikely {
1881                                 s.Br(jmp.asm, b.Succs[0].Block())
1882                                 if jmp.asmeq {
1883                                         s.Br(ppc64.ABEQ, b.Succs[0].Block())
1884                                 }
1885                                 s.Br(obj.AJMP, b.Succs[1].Block())
1886                         } else {
1887                                 s.Br(jmp.invasm, b.Succs[1].Block())
1888                                 if jmp.invasmun {
1889                                         // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1890                                         s.Br(ppc64.ABVS, b.Succs[1].Block())
1891                                 }
1892                                 s.Br(obj.AJMP, b.Succs[0].Block())
1893                         }
1894                 }
1895         default:
1896                 b.Fatalf("branch not implemented: %s", b.LongString())
1897         }
1898 }