1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/gc"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/ssa"
11 "cmd/compile/internal/types"
13 "cmd/internal/obj/ppc64"
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21 // flive := b.FlagsLiveAtEnd
22 // if b.Control != nil && b.Control.Type.IsFlags() {
25 // for i := len(b.Values) - 1; i >= 0; i-- {
27 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28 // // The "mark" is any non-nil Aux value.
31 // if v.Type.IsFlags() {
34 // for _, a := range v.Args {
35 // if a.Type.IsFlags() {
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
75 panic("bad load type")
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
99 panic("bad store type")
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
125 case ssa.OpPPC64LoweredMuluhilo:
126 // MULHDU Rarg1, Rarg0, Reg0
127 // MULLD Rarg1, Rarg0, Reg1
128 r0 := v.Args[0].Reg()
129 r1 := v.Args[1].Reg()
130 p := s.Prog(ppc64.AMULHDU)
131 p.From.Type = obj.TYPE_REG
134 p.To.Type = obj.TYPE_REG
136 p1 := s.Prog(ppc64.AMULLD)
137 p1.From.Type = obj.TYPE_REG
140 p1.To.Type = obj.TYPE_REG
143 case ssa.OpPPC64LoweredAdd64Carry:
144 // ADDC Rarg2, -1, Rtmp
145 // ADDE Rarg1, Rarg0, Reg0
147 r0 := v.Args[0].Reg()
148 r1 := v.Args[1].Reg()
149 r2 := v.Args[2].Reg()
150 p := s.Prog(ppc64.AADDC)
151 p.From.Type = obj.TYPE_CONST
154 p.To.Type = obj.TYPE_REG
155 p.To.Reg = ppc64.REGTMP
156 p1 := s.Prog(ppc64.AADDE)
157 p1.From.Type = obj.TYPE_REG
160 p1.To.Type = obj.TYPE_REG
162 p2 := s.Prog(ppc64.AADDZE)
163 p2.From.Type = obj.TYPE_REG
164 p2.From.Reg = ppc64.REGZERO
165 p2.To.Type = obj.TYPE_REG
168 case ssa.OpPPC64LoweredAtomicAnd8,
169 ssa.OpPPC64LoweredAtomicOr8:
171 // LBAR (Rarg0), Rtmp
172 // AND/OR Rarg1, Rtmp
173 // STBCCC Rtmp, (Rarg0)
175 r0 := v.Args[0].Reg()
176 r1 := v.Args[1].Reg()
177 // LWSYNC - Assuming shared data not write-through-required nor
178 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179 plwsync := s.Prog(ppc64.ALWSYNC)
180 plwsync.To.Type = obj.TYPE_NONE
181 p := s.Prog(ppc64.ALBAR)
182 p.From.Type = obj.TYPE_MEM
184 p.To.Type = obj.TYPE_REG
185 p.To.Reg = ppc64.REGTMP
186 p1 := s.Prog(v.Op.Asm())
187 p1.From.Type = obj.TYPE_REG
189 p1.To.Type = obj.TYPE_REG
190 p1.To.Reg = ppc64.REGTMP
191 p2 := s.Prog(ppc64.ASTBCCC)
192 p2.From.Type = obj.TYPE_REG
193 p2.From.Reg = ppc64.REGTMP
194 p2.To.Type = obj.TYPE_MEM
196 p2.RegTo2 = ppc64.REGTMP
197 p3 := s.Prog(ppc64.ABNE)
198 p3.To.Type = obj.TYPE_BRANCH
201 case ssa.OpPPC64LoweredAtomicAdd32,
202 ssa.OpPPC64LoweredAtomicAdd64:
204 // LDAR/LWAR (Rarg0), Rout
206 // STDCCC/STWCCC Rout, (Rarg0)
208 // MOVW Rout,Rout (if Add32)
211 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
215 r0 := v.Args[0].Reg()
216 r1 := v.Args[1].Reg()
218 // LWSYNC - Assuming shared data not write-through-required nor
219 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220 plwsync := s.Prog(ppc64.ALWSYNC)
221 plwsync.To.Type = obj.TYPE_NONE
224 p.From.Type = obj.TYPE_MEM
226 p.To.Type = obj.TYPE_REG
229 p1 := s.Prog(ppc64.AADD)
230 p1.From.Type = obj.TYPE_REG
233 p1.To.Type = obj.TYPE_REG
236 p3.From.Type = obj.TYPE_REG
238 p3.To.Type = obj.TYPE_MEM
241 p4 := s.Prog(ppc64.ABNE)
242 p4.To.Type = obj.TYPE_BRANCH
245 // Ensure a 32 bit result
246 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247 p5 := s.Prog(ppc64.AMOVWZ)
248 p5.To.Type = obj.TYPE_REG
250 p5.From.Type = obj.TYPE_REG
254 case ssa.OpPPC64LoweredAtomicExchange32,
255 ssa.OpPPC64LoweredAtomicExchange64:
257 // LDAR/LWAR (Rarg0), Rout
258 // STDCCC/STWCCC Rout, (Rarg0)
263 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
267 r0 := v.Args[0].Reg()
268 r1 := v.Args[1].Reg()
270 // LWSYNC - Assuming shared data not write-through-required nor
271 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272 plwsync := s.Prog(ppc64.ALWSYNC)
273 plwsync.To.Type = obj.TYPE_NONE
276 p.From.Type = obj.TYPE_MEM
278 p.To.Type = obj.TYPE_REG
282 p1.From.Type = obj.TYPE_REG
284 p1.To.Type = obj.TYPE_MEM
287 p2 := s.Prog(ppc64.ABNE)
288 p2.To.Type = obj.TYPE_BRANCH
291 pisync := s.Prog(ppc64.AISYNC)
292 pisync.To.Type = obj.TYPE_NONE
294 case ssa.OpPPC64LoweredAtomicLoad8,
295 ssa.OpPPC64LoweredAtomicLoad32,
296 ssa.OpPPC64LoweredAtomicLoad64,
297 ssa.OpPPC64LoweredAtomicLoadPtr:
299 // MOVB/MOVD/MOVW (Rarg0), Rout
306 case ssa.OpPPC64LoweredAtomicLoad8:
308 case ssa.OpPPC64LoweredAtomicLoad32:
312 arg0 := v.Args[0].Reg()
314 // SYNC when AuxInt == 1; otherwise, load-acquire
316 psync := s.Prog(ppc64.ASYNC)
317 psync.To.Type = obj.TYPE_NONE
321 p.From.Type = obj.TYPE_MEM
323 p.To.Type = obj.TYPE_REG
327 p1.From.Type = obj.TYPE_REG
329 p1.To.Type = obj.TYPE_REG
332 p2 := s.Prog(ppc64.ABNE)
333 p2.To.Type = obj.TYPE_BRANCH
335 pisync := s.Prog(ppc64.AISYNC)
336 pisync.To.Type = obj.TYPE_NONE
339 case ssa.OpPPC64LoweredAtomicStore8,
340 ssa.OpPPC64LoweredAtomicStore32,
341 ssa.OpPPC64LoweredAtomicStore64:
343 // MOVB/MOVW/MOVD arg1,(arg0)
346 case ssa.OpPPC64LoweredAtomicStore8:
348 case ssa.OpPPC64LoweredAtomicStore32:
351 arg0 := v.Args[0].Reg()
352 arg1 := v.Args[1].Reg()
353 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
355 syncOp := ppc64.ASYNC
357 syncOp = ppc64.ALWSYNC
359 psync := s.Prog(syncOp)
360 psync.To.Type = obj.TYPE_NONE
363 p.To.Type = obj.TYPE_MEM
365 p.From.Type = obj.TYPE_REG
368 case ssa.OpPPC64LoweredAtomicCas64,
369 ssa.OpPPC64LoweredAtomicCas32:
372 // LDAR (Rarg0), MutexHint, Rtmp
375 // STDCCC Rarg2, (Rarg0)
377 // LWSYNC // Only for sequential consistency; not required in CasRel.
386 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
391 r0 := v.Args[0].Reg()
392 r1 := v.Args[1].Reg()
393 r2 := v.Args[2].Reg()
395 // LWSYNC - Assuming shared data not write-through-required nor
396 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397 plwsync1 := s.Prog(ppc64.ALWSYNC)
398 plwsync1.To.Type = obj.TYPE_NONE
401 p.From.Type = obj.TYPE_MEM
403 p.To.Type = obj.TYPE_REG
404 p.To.Reg = ppc64.REGTMP
405 // If it is a Compare-and-Swap-Release operation, set the EH field with
408 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
412 p1.From.Type = obj.TYPE_REG
414 p1.To.Reg = ppc64.REGTMP
415 p1.To.Type = obj.TYPE_REG
417 p2 := s.Prog(ppc64.ABNE)
418 p2.To.Type = obj.TYPE_BRANCH
421 p3.From.Type = obj.TYPE_REG
423 p3.To.Type = obj.TYPE_MEM
426 p4 := s.Prog(ppc64.ABNE)
427 p4.To.Type = obj.TYPE_BRANCH
429 // LWSYNC - Assuming shared data not write-through-required nor
430 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431 // If the operation is a CAS-Release, then synchronization is not necessary.
433 plwsync2 := s.Prog(ppc64.ALWSYNC)
434 plwsync2.To.Type = obj.TYPE_NONE
437 p5 := s.Prog(ppc64.AMOVD)
438 p5.From.Type = obj.TYPE_CONST
440 p5.To.Type = obj.TYPE_REG
443 p6 := s.Prog(obj.AJMP)
444 p6.To.Type = obj.TYPE_BRANCH
446 p7 := s.Prog(ppc64.AMOVD)
447 p7.From.Type = obj.TYPE_CONST
449 p7.To.Type = obj.TYPE_REG
453 p8 := s.Prog(obj.ANOP)
456 case ssa.OpPPC64LoweredGetClosurePtr:
457 // Closure pointer is R11 (already)
458 gc.CheckLoweredGetClosurePtr(v)
460 case ssa.OpPPC64LoweredGetCallerSP:
461 // caller's SP is FixedFrameSize below the address of the first arg
462 p := s.Prog(ppc64.AMOVD)
463 p.From.Type = obj.TYPE_ADDR
464 p.From.Offset = -gc.Ctxt.FixedFrameSize()
465 p.From.Name = obj.NAME_PARAM
466 p.To.Type = obj.TYPE_REG
469 case ssa.OpPPC64LoweredGetCallerPC:
470 p := s.Prog(obj.AGETCALLERPC)
471 p.To.Type = obj.TYPE_REG
474 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475 // input is already rounded
478 loadOp := loadByType(v.Type)
480 gc.AddrAuto(&p.From, v.Args[0])
481 p.To.Type = obj.TYPE_REG
485 storeOp := storeByType(v.Type)
487 p.From.Type = obj.TYPE_REG
488 p.From.Reg = v.Args[0].Reg()
489 gc.AddrAuto(&p.To, v)
491 case ssa.OpPPC64DIVD:
501 r0 := v.Args[0].Reg()
502 r1 := v.Args[1].Reg()
504 p := s.Prog(ppc64.ACMP)
505 p.From.Type = obj.TYPE_REG
507 p.To.Type = obj.TYPE_CONST
510 pbahead := s.Prog(ppc64.ABEQ)
511 pbahead.To.Type = obj.TYPE_BRANCH
513 p = s.Prog(v.Op.Asm())
514 p.From.Type = obj.TYPE_REG
517 p.To.Type = obj.TYPE_REG
520 pbover := s.Prog(obj.AJMP)
521 pbover.To.Type = obj.TYPE_BRANCH
523 p = s.Prog(ppc64.ANEG)
524 p.To.Type = obj.TYPE_REG
526 p.From.Type = obj.TYPE_REG
533 case ssa.OpPPC64DIVW:
534 // word-width version of above
536 r0 := v.Args[0].Reg()
537 r1 := v.Args[1].Reg()
539 p := s.Prog(ppc64.ACMPW)
540 p.From.Type = obj.TYPE_REG
542 p.To.Type = obj.TYPE_CONST
545 pbahead := s.Prog(ppc64.ABEQ)
546 pbahead.To.Type = obj.TYPE_BRANCH
548 p = s.Prog(v.Op.Asm())
549 p.From.Type = obj.TYPE_REG
552 p.To.Type = obj.TYPE_REG
555 pbover := s.Prog(obj.AJMP)
556 pbover.To.Type = obj.TYPE_BRANCH
558 p = s.Prog(ppc64.ANEG)
559 p.To.Type = obj.TYPE_REG
561 p.From.Type = obj.TYPE_REG
568 case ssa.OpPPC64CLRLSLWI:
570 r1 := v.Args[0].Reg()
572 p := s.Prog(v.Op.Asm())
573 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
574 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
575 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
577 p.To.Type = obj.TYPE_REG
580 case ssa.OpPPC64CLRLSLDI:
582 r1 := v.Args[0].Reg()
584 p := s.Prog(v.Op.Asm())
585 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
586 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
587 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)})
589 p.To.Type = obj.TYPE_REG
592 // Mask has been set as sh
593 case ssa.OpPPC64RLDICL:
595 r1 := v.Args[0].Reg()
597 p := s.Prog(v.Op.Asm())
598 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}
599 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)})
601 p.To.Type = obj.TYPE_REG
604 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
605 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
606 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
607 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
608 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
609 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
610 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
611 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
613 r1 := v.Args[0].Reg()
614 r2 := v.Args[1].Reg()
615 p := s.Prog(v.Op.Asm())
616 p.From.Type = obj.TYPE_REG
619 p.To.Type = obj.TYPE_REG
622 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
623 r1 := v.Args[0].Reg()
624 r2 := v.Args[1].Reg()
625 p := s.Prog(v.Op.Asm())
626 p.From.Type = obj.TYPE_REG
629 p.To.Type = obj.TYPE_REG
630 p.To.Reg = ppc64.REGTMP // result is not needed
632 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
633 p := s.Prog(v.Op.Asm())
634 p.From.Type = obj.TYPE_CONST
635 p.From.Offset = v.AuxInt
636 p.Reg = v.Args[0].Reg()
637 p.To.Type = obj.TYPE_REG
640 case ssa.OpPPC64MADDLD:
642 r1 := v.Args[0].Reg()
643 r2 := v.Args[1].Reg()
644 r3 := v.Args[2].Reg()
646 p := s.Prog(v.Op.Asm())
647 p.From.Type = obj.TYPE_REG
650 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
651 p.To.Type = obj.TYPE_REG
654 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
656 r1 := v.Args[0].Reg()
657 r2 := v.Args[1].Reg()
658 r3 := v.Args[2].Reg()
660 p := s.Prog(v.Op.Asm())
661 p.From.Type = obj.TYPE_REG
664 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
665 p.To.Type = obj.TYPE_REG
668 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
669 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
670 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
671 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
673 p := s.Prog(v.Op.Asm())
674 p.To.Type = obj.TYPE_REG
676 p.From.Type = obj.TYPE_REG
677 p.From.Reg = v.Args[0].Reg()
679 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
680 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
681 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
682 p := s.Prog(v.Op.Asm())
683 p.Reg = v.Args[0].Reg()
684 p.From.Type = obj.TYPE_CONST
685 p.From.Offset = v.AuxInt
686 p.To.Type = obj.TYPE_REG
689 case ssa.OpPPC64SUBFCconst:
690 p := s.Prog(v.Op.Asm())
691 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
692 p.From.Type = obj.TYPE_REG
693 p.From.Reg = v.Args[0].Reg()
694 p.To.Type = obj.TYPE_REG
697 case ssa.OpPPC64ANDCCconst:
698 p := s.Prog(v.Op.Asm())
699 p.Reg = v.Args[0].Reg()
700 p.From.Type = obj.TYPE_CONST
701 p.From.Offset = v.AuxInt
702 p.To.Type = obj.TYPE_REG
703 p.To.Reg = ppc64.REGTMP // discard result
705 case ssa.OpPPC64MOVDaddr:
706 switch v.Aux.(type) {
708 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
710 // If aux offset and aux int are both 0, and the same
711 // input and output regs are used, no instruction
712 // needs to be generated, since it would just be
714 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
715 p := s.Prog(ppc64.AMOVD)
716 p.From.Type = obj.TYPE_ADDR
717 p.From.Reg = v.Args[0].Reg()
718 p.From.Offset = v.AuxInt
719 p.To.Type = obj.TYPE_REG
723 case *obj.LSym, *gc.Node:
724 p := s.Prog(ppc64.AMOVD)
725 p.From.Type = obj.TYPE_ADDR
726 p.From.Reg = v.Args[0].Reg()
727 p.To.Type = obj.TYPE_REG
729 gc.AddAux(&p.From, v)
733 case ssa.OpPPC64MOVDconst:
734 p := s.Prog(v.Op.Asm())
735 p.From.Type = obj.TYPE_CONST
736 p.From.Offset = v.AuxInt
737 p.To.Type = obj.TYPE_REG
740 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
741 p := s.Prog(v.Op.Asm())
742 p.From.Type = obj.TYPE_FCONST
743 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
744 p.To.Type = obj.TYPE_REG
747 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
748 p := s.Prog(v.Op.Asm())
749 p.From.Type = obj.TYPE_REG
750 p.From.Reg = v.Args[0].Reg()
751 p.To.Type = obj.TYPE_REG
752 p.To.Reg = v.Args[1].Reg()
754 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
755 p := s.Prog(v.Op.Asm())
756 p.From.Type = obj.TYPE_REG
757 p.From.Reg = v.Args[0].Reg()
758 p.To.Type = obj.TYPE_CONST
759 p.To.Offset = v.AuxInt
761 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
762 // Shift in register to required size
763 p := s.Prog(v.Op.Asm())
764 p.From.Type = obj.TYPE_REG
765 p.From.Reg = v.Args[0].Reg()
767 p.To.Type = obj.TYPE_REG
769 case ssa.OpPPC64MOVDload:
771 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
772 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
773 // the offset is not known until link time. If the load of a go.string uses relocation for the
774 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
775 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
776 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
777 // go.string types because other types will have proper alignment.
780 switch n := v.Aux.(type) {
782 gostring = strings.HasPrefix(n.Name, "go.string.")
785 // Generate full addr of the go.string const
787 p := s.Prog(ppc64.AMOVD)
788 p.From.Type = obj.TYPE_ADDR
789 p.From.Reg = v.Args[0].Reg()
790 gc.AddAux(&p.From, v)
791 p.To.Type = obj.TYPE_REG
793 // Load go.string using 0 offset
794 p = s.Prog(v.Op.Asm())
795 p.From.Type = obj.TYPE_MEM
797 p.To.Type = obj.TYPE_REG
801 // Not a go.string, generate a normal load
804 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
805 p := s.Prog(v.Op.Asm())
806 p.From.Type = obj.TYPE_MEM
807 p.From.Reg = v.Args[0].Reg()
808 gc.AddAux(&p.From, v)
809 p.To.Type = obj.TYPE_REG
812 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
813 p := s.Prog(v.Op.Asm())
814 p.From.Type = obj.TYPE_MEM
815 p.From.Reg = v.Args[0].Reg()
816 p.To.Type = obj.TYPE_REG
819 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
820 p := s.Prog(v.Op.Asm())
821 p.To.Type = obj.TYPE_MEM
822 p.To.Reg = v.Args[0].Reg()
823 p.From.Type = obj.TYPE_REG
824 p.From.Reg = v.Args[1].Reg()
826 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
827 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
828 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
829 p := s.Prog(v.Op.Asm())
830 p.From.Type = obj.TYPE_MEM
831 p.From.Reg = v.Args[0].Reg()
832 p.From.Index = v.Args[1].Reg()
833 p.To.Type = obj.TYPE_REG
836 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
837 p := s.Prog(v.Op.Asm())
838 p.From.Type = obj.TYPE_REG
839 p.From.Reg = ppc64.REGZERO
840 p.To.Type = obj.TYPE_MEM
841 p.To.Reg = v.Args[0].Reg()
844 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
845 p := s.Prog(v.Op.Asm())
846 p.From.Type = obj.TYPE_REG
847 p.From.Reg = v.Args[1].Reg()
848 p.To.Type = obj.TYPE_MEM
849 p.To.Reg = v.Args[0].Reg()
852 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
853 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
854 ssa.OpPPC64MOVHBRstoreidx:
855 p := s.Prog(v.Op.Asm())
856 p.From.Type = obj.TYPE_REG
857 p.From.Reg = v.Args[2].Reg()
858 p.To.Index = v.Args[1].Reg()
859 p.To.Type = obj.TYPE_MEM
860 p.To.Reg = v.Args[0].Reg()
862 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
864 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
865 // ISEL only accepts 0, 1, 2 condition values but the others can be
866 // achieved by swapping operand order.
867 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
868 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
869 // ISELB is used when a boolean result is needed, returning 0 or 1
870 p := s.Prog(ppc64.AISEL)
871 p.To.Type = obj.TYPE_REG
873 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
874 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
875 if v.Op == ssa.OpPPC64ISEL {
876 r.Reg = v.Args[1].Reg()
878 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
881 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
883 p.Reg = v.Args[0].Reg()
886 p.From.Type = obj.TYPE_CONST
887 p.From.Offset = v.AuxInt & 3
889 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
890 // The LoweredQuad code generation
891 // generates STXV instructions on
892 // power9. The Short variation is used
893 // if no loop is generated.
895 // sizes >= 64 generate a loop as follows:
897 // Set up loop counter in CTR, used by BC
898 // XXLXOR clears VS32
899 // XXLXOR VS32,VS32,VS32
900 // MOVD len/64,REG_TMP
910 // Bytes per iteration
916 // Only generate a loop if there is more
919 // Set up VS32 (V0) to hold 0s
920 p := s.Prog(ppc64.AXXLXOR)
921 p.From.Type = obj.TYPE_REG
922 p.From.Reg = ppc64.REG_VS32
923 p.To.Type = obj.TYPE_REG
924 p.To.Reg = ppc64.REG_VS32
925 p.Reg = ppc64.REG_VS32
927 // Set up CTR loop counter
928 p = s.Prog(ppc64.AMOVD)
929 p.From.Type = obj.TYPE_CONST
931 p.To.Type = obj.TYPE_REG
932 p.To.Reg = ppc64.REGTMP
934 p = s.Prog(ppc64.AMOVD)
935 p.From.Type = obj.TYPE_REG
936 p.From.Reg = ppc64.REGTMP
937 p.To.Type = obj.TYPE_REG
938 p.To.Reg = ppc64.REG_CTR
940 // Don't generate padding for
941 // loops with few iterations.
943 p = s.Prog(obj.APCALIGN)
944 p.From.Type = obj.TYPE_CONST
948 // generate 4 STXVs to zero 64 bytes
951 p = s.Prog(ppc64.ASTXV)
952 p.From.Type = obj.TYPE_REG
953 p.From.Reg = ppc64.REG_VS32
954 p.To.Type = obj.TYPE_MEM
955 p.To.Reg = v.Args[0].Reg()
957 // Save the top of loop
961 p = s.Prog(ppc64.ASTXV)
962 p.From.Type = obj.TYPE_REG
963 p.From.Reg = ppc64.REG_VS32
964 p.To.Type = obj.TYPE_MEM
965 p.To.Reg = v.Args[0].Reg()
968 p = s.Prog(ppc64.ASTXV)
969 p.From.Type = obj.TYPE_REG
970 p.From.Reg = ppc64.REG_VS32
971 p.To.Type = obj.TYPE_MEM
972 p.To.Reg = v.Args[0].Reg()
975 p = s.Prog(ppc64.ASTXV)
976 p.From.Type = obj.TYPE_REG
977 p.From.Reg = ppc64.REG_VS32
978 p.To.Type = obj.TYPE_MEM
979 p.To.Reg = v.Args[0].Reg()
982 // Increment address for the
983 // 64 bytes just zeroed.
984 p = s.Prog(ppc64.AADD)
985 p.Reg = v.Args[0].Reg()
986 p.From.Type = obj.TYPE_CONST
988 p.To.Type = obj.TYPE_REG
989 p.To.Reg = v.Args[0].Reg()
991 // Branch back to top of loop
993 // BC with BO_BCTR generates bdnz
994 p = s.Prog(ppc64.ABC)
995 p.From.Type = obj.TYPE_CONST
996 p.From.Offset = ppc64.BO_BCTR
998 p.To.Type = obj.TYPE_BRANCH
1001 // When ctr == 1 the loop was not generated but
1002 // there are at least 64 bytes to clear, so add
1003 // that to the remainder to generate the code
1004 // to clear those doublewords
1009 // Clear the remainder starting at offset zero
1012 if rem >= 16 && ctr <= 1 {
1013 // If the XXLXOR hasn't already been
1014 // generated, do it here to initialize
1016 p := s.Prog(ppc64.AXXLXOR)
1017 p.From.Type = obj.TYPE_REG
1018 p.From.Reg = ppc64.REG_VS32
1019 p.To.Type = obj.TYPE_REG
1020 p.To.Reg = ppc64.REG_VS32
1021 p.Reg = ppc64.REG_VS32
1023 // Generate STXV for 32 or 64
1026 p := s.Prog(ppc64.ASTXV)
1027 p.From.Type = obj.TYPE_REG
1028 p.From.Reg = ppc64.REG_VS32
1029 p.To.Type = obj.TYPE_MEM
1030 p.To.Reg = v.Args[0].Reg()
1031 p.To.Offset = offset
1033 p = s.Prog(ppc64.ASTXV)
1034 p.From.Type = obj.TYPE_REG
1035 p.From.Reg = ppc64.REG_VS32
1036 p.To.Type = obj.TYPE_MEM
1037 p.To.Reg = v.Args[0].Reg()
1038 p.To.Offset = offset + 16
1042 // Generate 16 bytes
1044 p := s.Prog(ppc64.ASTXV)
1045 p.From.Type = obj.TYPE_REG
1046 p.From.Reg = ppc64.REG_VS32
1047 p.To.Type = obj.TYPE_MEM
1048 p.To.Reg = v.Args[0].Reg()
1049 p.To.Offset = offset
1054 // first clear as many doublewords as possible
1055 // then clear remaining sizes as available
1057 op, size := ppc64.AMOVB, int64(1)
1060 op, size = ppc64.AMOVD, 8
1062 op, size = ppc64.AMOVW, 4
1064 op, size = ppc64.AMOVH, 2
1067 p.From.Type = obj.TYPE_REG
1068 p.From.Reg = ppc64.REG_R0
1069 p.To.Type = obj.TYPE_MEM
1070 p.To.Reg = v.Args[0].Reg()
1071 p.To.Offset = offset
1076 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
1078 // Unaligned data doesn't hurt performance
1079 // for these instructions on power8.
1081 // For sizes >= 64 generate a loop as follows:
1083 // Set up loop counter in CTR, used by BC
1084 // XXLXOR VS32,VS32,VS32
1085 // MOVD len/32,REG_TMP
1089 // STXVD2X VS32,(R0)(R20)
1090 // STXVD2X VS32,(R31)(R20)
1094 // any remainder is done as described below
1096 // for sizes < 64 bytes, first clear as many doublewords as possible,
1097 // then handle the remainder
1102 // the remainder bytes are cleared using one or more
1103 // of the following instructions with the appropriate
1104 // offsets depending which instructions are needed
1106 // MOVW R0,n1(R20) 4 bytes
1107 // MOVH R0,n2(R20) 2 bytes
1108 // MOVB R0,n3(R20) 1 byte
1110 // 7 bytes: MOVW, MOVH, MOVB
1111 // 6 bytes: MOVW, MOVH
1112 // 5 bytes: MOVW, MOVB
1113 // 3 bytes: MOVH, MOVB
1115 // each loop iteration does 32 bytes
1116 ctr := v.AuxInt / 32
1119 rem := v.AuxInt % 32
1121 // only generate a loop if there is more
1122 // than 1 iteration.
1124 // Set up VS32 (V0) to hold 0s
1125 p := s.Prog(ppc64.AXXLXOR)
1126 p.From.Type = obj.TYPE_REG
1127 p.From.Reg = ppc64.REG_VS32
1128 p.To.Type = obj.TYPE_REG
1129 p.To.Reg = ppc64.REG_VS32
1130 p.Reg = ppc64.REG_VS32
1132 // Set up CTR loop counter
1133 p = s.Prog(ppc64.AMOVD)
1134 p.From.Type = obj.TYPE_CONST
1136 p.To.Type = obj.TYPE_REG
1137 p.To.Reg = ppc64.REGTMP
1139 p = s.Prog(ppc64.AMOVD)
1140 p.From.Type = obj.TYPE_REG
1141 p.From.Reg = ppc64.REGTMP
1142 p.To.Type = obj.TYPE_REG
1143 p.To.Reg = ppc64.REG_CTR
1145 // Set up R31 to hold index value 16
1146 p = s.Prog(ppc64.AMOVD)
1147 p.From.Type = obj.TYPE_CONST
1149 p.To.Type = obj.TYPE_REG
1150 p.To.Reg = ppc64.REGTMP
1152 // Don't add padding for alignment
1153 // with few loop iterations.
1155 p = s.Prog(obj.APCALIGN)
1156 p.From.Type = obj.TYPE_CONST
1160 // generate 2 STXVD2Xs to store 16 bytes
1161 // when this is a loop then the top must be saved
1163 // This is the top of loop
1165 p = s.Prog(ppc64.ASTXVD2X)
1166 p.From.Type = obj.TYPE_REG
1167 p.From.Reg = ppc64.REG_VS32
1168 p.To.Type = obj.TYPE_MEM
1169 p.To.Reg = v.Args[0].Reg()
1170 p.To.Index = ppc64.REGZERO
1171 // Save the top of loop
1175 p = s.Prog(ppc64.ASTXVD2X)
1176 p.From.Type = obj.TYPE_REG
1177 p.From.Reg = ppc64.REG_VS32
1178 p.To.Type = obj.TYPE_MEM
1179 p.To.Reg = v.Args[0].Reg()
1180 p.To.Index = ppc64.REGTMP
1182 // Increment address for the
1183 // 4 doublewords just zeroed.
1184 p = s.Prog(ppc64.AADD)
1185 p.Reg = v.Args[0].Reg()
1186 p.From.Type = obj.TYPE_CONST
1188 p.To.Type = obj.TYPE_REG
1189 p.To.Reg = v.Args[0].Reg()
1191 // Branch back to top of loop
1193 // BC with BO_BCTR generates bdnz
1194 p = s.Prog(ppc64.ABC)
1195 p.From.Type = obj.TYPE_CONST
1196 p.From.Offset = ppc64.BO_BCTR
1197 p.Reg = ppc64.REG_R0
1198 p.To.Type = obj.TYPE_BRANCH
1202 // when ctr == 1 the loop was not generated but
1203 // there are at least 32 bytes to clear, so add
1204 // that to the remainder to generate the code
1205 // to clear those doublewords
1210 // clear the remainder starting at offset zero
1213 // first clear as many doublewords as possible
1214 // then clear remaining sizes as available
1216 op, size := ppc64.AMOVB, int64(1)
1219 op, size = ppc64.AMOVD, 8
1221 op, size = ppc64.AMOVW, 4
1223 op, size = ppc64.AMOVH, 2
1226 p.From.Type = obj.TYPE_REG
1227 p.From.Reg = ppc64.REG_R0
1228 p.To.Type = obj.TYPE_MEM
1229 p.To.Reg = v.Args[0].Reg()
1230 p.To.Offset = offset
1235 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
1237 bytesPerLoop := int64(32)
1238 // This will be used when moving more
1239 // than 8 bytes. Moves start with
1240 // as many 8 byte moves as possible, then
1241 // 4, 2, or 1 byte(s) as remaining. This will
1242 // work and be efficient for power8 or later.
1243 // If there are 64 or more bytes, then a
1244 // loop is generated to move 32 bytes and
1245 // update the src and dst addresses on each
1246 // iteration. When < 64 bytes, the appropriate
1247 // number of moves are generated based on the
1249 // When moving >= 64 bytes a loop is used
1250 // MOVD len/32,REG_TMP
1254 // LXVD2X (R0)(R21),VS32
1255 // LXVD2X (R31)(R21),VS33
1257 // STXVD2X VS32,(R0)(R20)
1258 // STXVD2X VS33,(R31)(R20)
1261 // Bytes not moved by this loop are moved
1262 // with a combination of the following instructions,
1263 // starting with the largest sizes and generating as
1264 // many as needed, using the appropriate offset value.
1274 // Each loop iteration moves 32 bytes
1275 ctr := v.AuxInt / bytesPerLoop
1277 // Remainder after the loop
1278 rem := v.AuxInt % bytesPerLoop
1280 dstReg := v.Args[0].Reg()
1281 srcReg := v.Args[1].Reg()
1283 // The set of registers used here, must match the clobbered reg list
1289 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1292 p := s.Prog(ppc64.AMOVD)
1293 p.From.Type = obj.TYPE_CONST
1295 p.To.Type = obj.TYPE_REG
1296 p.To.Reg = ppc64.REGTMP
1298 p = s.Prog(ppc64.AMOVD)
1299 p.From.Type = obj.TYPE_REG
1300 p.From.Reg = ppc64.REGTMP
1301 p.To.Type = obj.TYPE_REG
1302 p.To.Reg = ppc64.REG_CTR
1304 // Use REGTMP as index reg
1305 p = s.Prog(ppc64.AMOVD)
1306 p.From.Type = obj.TYPE_CONST
1308 p.To.Type = obj.TYPE_REG
1309 p.To.Reg = ppc64.REGTMP
1311 // Don't adding padding for
1312 // alignment with small iteration
1315 p = s.Prog(obj.APCALIGN)
1316 p.From.Type = obj.TYPE_CONST
1320 // Generate 16 byte loads and stores.
1321 // Use temp register for index (16)
1322 // on the second one.
1324 p = s.Prog(ppc64.ALXVD2X)
1325 p.From.Type = obj.TYPE_MEM
1327 p.From.Index = ppc64.REGZERO
1328 p.To.Type = obj.TYPE_REG
1329 p.To.Reg = ppc64.REG_VS32
1333 p = s.Prog(ppc64.ALXVD2X)
1334 p.From.Type = obj.TYPE_MEM
1336 p.From.Index = ppc64.REGTMP
1337 p.To.Type = obj.TYPE_REG
1338 p.To.Reg = ppc64.REG_VS33
1340 // increment the src reg for next iteration
1341 p = s.Prog(ppc64.AADD)
1343 p.From.Type = obj.TYPE_CONST
1344 p.From.Offset = bytesPerLoop
1345 p.To.Type = obj.TYPE_REG
1348 // generate 16 byte stores
1349 p = s.Prog(ppc64.ASTXVD2X)
1350 p.From.Type = obj.TYPE_REG
1351 p.From.Reg = ppc64.REG_VS32
1352 p.To.Type = obj.TYPE_MEM
1354 p.To.Index = ppc64.REGZERO
1356 p = s.Prog(ppc64.ASTXVD2X)
1357 p.From.Type = obj.TYPE_REG
1358 p.From.Reg = ppc64.REG_VS33
1359 p.To.Type = obj.TYPE_MEM
1361 p.To.Index = ppc64.REGTMP
1363 // increment the dst reg for next iteration
1364 p = s.Prog(ppc64.AADD)
1366 p.From.Type = obj.TYPE_CONST
1367 p.From.Offset = bytesPerLoop
1368 p.To.Type = obj.TYPE_REG
1371 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1373 p = s.Prog(ppc64.ABC)
1374 p.From.Type = obj.TYPE_CONST
1375 p.From.Offset = ppc64.BO_BCTR
1376 p.Reg = ppc64.REG_R0
1377 p.To.Type = obj.TYPE_BRANCH
1380 // srcReg and dstReg were incremented in the loop, so
1381 // later instructions start with offset 0.
1385 // No loop was generated for one iteration, so
1386 // add 32 bytes to the remainder to move those bytes.
1392 // Generate 16 byte loads and stores.
1393 // Use temp register for index (value 16)
1394 // on the second one.
1395 p := s.Prog(ppc64.ALXVD2X)
1396 p.From.Type = obj.TYPE_MEM
1398 p.From.Index = ppc64.REGZERO
1399 p.To.Type = obj.TYPE_REG
1400 p.To.Reg = ppc64.REG_VS32
1402 p = s.Prog(ppc64.ASTXVD2X)
1403 p.From.Type = obj.TYPE_REG
1404 p.From.Reg = ppc64.REG_VS32
1405 p.To.Type = obj.TYPE_MEM
1407 p.To.Index = ppc64.REGZERO
1413 // Use REGTMP as index reg
1414 p := s.Prog(ppc64.AMOVD)
1415 p.From.Type = obj.TYPE_CONST
1417 p.To.Type = obj.TYPE_REG
1418 p.To.Reg = ppc64.REGTMP
1420 p = s.Prog(ppc64.ALXVD2X)
1421 p.From.Type = obj.TYPE_MEM
1423 p.From.Index = ppc64.REGTMP
1424 p.To.Type = obj.TYPE_REG
1425 p.To.Reg = ppc64.REG_VS32
1427 p = s.Prog(ppc64.ASTXVD2X)
1428 p.From.Type = obj.TYPE_REG
1429 p.From.Reg = ppc64.REG_VS32
1430 p.To.Type = obj.TYPE_MEM
1432 p.To.Index = ppc64.REGTMP
1439 // Generate all the remaining load and store pairs, starting with
1440 // as many 8 byte moves as possible, then 4, 2, 1.
1442 op, size := ppc64.AMOVB, int64(1)
1445 op, size = ppc64.AMOVD, 8
1447 op, size = ppc64.AMOVW, 4
1449 op, size = ppc64.AMOVH, 2
1453 p.To.Type = obj.TYPE_REG
1454 p.To.Reg = ppc64.REGTMP
1455 p.From.Type = obj.TYPE_MEM
1457 p.From.Offset = offset
1461 p.From.Type = obj.TYPE_REG
1462 p.From.Reg = ppc64.REGTMP
1463 p.To.Type = obj.TYPE_MEM
1465 p.To.Offset = offset
1470 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
1471 bytesPerLoop := int64(64)
1472 // This is used when moving more
1473 // than 8 bytes on power9. Moves start with
1474 // as many 8 byte moves as possible, then
1475 // 4, 2, or 1 byte(s) as remaining. This will
1476 // work and be efficient for power8 or later.
1477 // If there are 64 or more bytes, then a
1478 // loop is generated to move 32 bytes and
1479 // update the src and dst addresses on each
1480 // iteration. When < 64 bytes, the appropriate
1481 // number of moves are generated based on the
1483 // When moving >= 64 bytes a loop is used
1484 // MOVD len/32,REG_TMP
1491 // STXV VS33,16(R20)
1494 // Bytes not moved by this loop are moved
1495 // with a combination of the following instructions,
1496 // starting with the largest sizes and generating as
1497 // many as needed, using the appropriate offset value.
1507 // Each loop iteration moves 32 bytes
1508 ctr := v.AuxInt / bytesPerLoop
1510 // Remainder after the loop
1511 rem := v.AuxInt % bytesPerLoop
1513 dstReg := v.Args[0].Reg()
1514 srcReg := v.Args[1].Reg()
1521 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1524 p := s.Prog(ppc64.AMOVD)
1525 p.From.Type = obj.TYPE_CONST
1527 p.To.Type = obj.TYPE_REG
1528 p.To.Reg = ppc64.REGTMP
1530 p = s.Prog(ppc64.AMOVD)
1531 p.From.Type = obj.TYPE_REG
1532 p.From.Reg = ppc64.REGTMP
1533 p.To.Type = obj.TYPE_REG
1534 p.To.Reg = ppc64.REG_CTR
1536 p = s.Prog(obj.APCALIGN)
1537 p.From.Type = obj.TYPE_CONST
1540 // Generate 16 byte loads and stores.
1541 p = s.Prog(ppc64.ALXV)
1542 p.From.Type = obj.TYPE_MEM
1544 p.From.Offset = offset
1545 p.To.Type = obj.TYPE_REG
1546 p.To.Reg = ppc64.REG_VS32
1550 p = s.Prog(ppc64.ALXV)
1551 p.From.Type = obj.TYPE_MEM
1553 p.From.Offset = offset + 16
1554 p.To.Type = obj.TYPE_REG
1555 p.To.Reg = ppc64.REG_VS33
1557 // generate 16 byte stores
1558 p = s.Prog(ppc64.ASTXV)
1559 p.From.Type = obj.TYPE_REG
1560 p.From.Reg = ppc64.REG_VS32
1561 p.To.Type = obj.TYPE_MEM
1563 p.To.Offset = offset
1565 p = s.Prog(ppc64.ASTXV)
1566 p.From.Type = obj.TYPE_REG
1567 p.From.Reg = ppc64.REG_VS33
1568 p.To.Type = obj.TYPE_MEM
1570 p.To.Offset = offset + 16
1572 // Generate 16 byte loads and stores.
1573 p = s.Prog(ppc64.ALXV)
1574 p.From.Type = obj.TYPE_MEM
1576 p.From.Offset = offset + 32
1577 p.To.Type = obj.TYPE_REG
1578 p.To.Reg = ppc64.REG_VS32
1580 p = s.Prog(ppc64.ALXV)
1581 p.From.Type = obj.TYPE_MEM
1583 p.From.Offset = offset + 48
1584 p.To.Type = obj.TYPE_REG
1585 p.To.Reg = ppc64.REG_VS33
1587 // generate 16 byte stores
1588 p = s.Prog(ppc64.ASTXV)
1589 p.From.Type = obj.TYPE_REG
1590 p.From.Reg = ppc64.REG_VS32
1591 p.To.Type = obj.TYPE_MEM
1593 p.To.Offset = offset + 32
1595 p = s.Prog(ppc64.ASTXV)
1596 p.From.Type = obj.TYPE_REG
1597 p.From.Reg = ppc64.REG_VS33
1598 p.To.Type = obj.TYPE_MEM
1600 p.To.Offset = offset + 48
1602 // increment the src reg for next iteration
1603 p = s.Prog(ppc64.AADD)
1605 p.From.Type = obj.TYPE_CONST
1606 p.From.Offset = bytesPerLoop
1607 p.To.Type = obj.TYPE_REG
1610 // increment the dst reg for next iteration
1611 p = s.Prog(ppc64.AADD)
1613 p.From.Type = obj.TYPE_CONST
1614 p.From.Offset = bytesPerLoop
1615 p.To.Type = obj.TYPE_REG
1618 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1620 p = s.Prog(ppc64.ABC)
1621 p.From.Type = obj.TYPE_CONST
1622 p.From.Offset = ppc64.BO_BCTR
1623 p.Reg = ppc64.REG_R0
1624 p.To.Type = obj.TYPE_BRANCH
1627 // srcReg and dstReg were incremented in the loop, so
1628 // later instructions start with offset 0.
1632 // No loop was generated for one iteration, so
1633 // add 32 bytes to the remainder to move those bytes.
1638 p := s.Prog(ppc64.ALXV)
1639 p.From.Type = obj.TYPE_MEM
1641 p.To.Type = obj.TYPE_REG
1642 p.To.Reg = ppc64.REG_VS32
1644 p = s.Prog(ppc64.ALXV)
1645 p.From.Type = obj.TYPE_MEM
1648 p.To.Type = obj.TYPE_REG
1649 p.To.Reg = ppc64.REG_VS33
1651 p = s.Prog(ppc64.ASTXV)
1652 p.From.Type = obj.TYPE_REG
1653 p.From.Reg = ppc64.REG_VS32
1654 p.To.Type = obj.TYPE_MEM
1657 p = s.Prog(ppc64.ASTXV)
1658 p.From.Type = obj.TYPE_REG
1659 p.From.Reg = ppc64.REG_VS33
1660 p.To.Type = obj.TYPE_MEM
1669 // Generate 16 byte loads and stores.
1670 p := s.Prog(ppc64.ALXV)
1671 p.From.Type = obj.TYPE_MEM
1673 p.From.Offset = offset
1674 p.To.Type = obj.TYPE_REG
1675 p.To.Reg = ppc64.REG_VS32
1677 p = s.Prog(ppc64.ASTXV)
1678 p.From.Type = obj.TYPE_REG
1679 p.From.Reg = ppc64.REG_VS32
1680 p.To.Type = obj.TYPE_MEM
1682 p.To.Offset = offset
1688 p := s.Prog(ppc64.ALXV)
1689 p.From.Type = obj.TYPE_MEM
1691 p.From.Offset = offset
1692 p.To.Type = obj.TYPE_REG
1693 p.To.Reg = ppc64.REG_VS32
1695 p = s.Prog(ppc64.ASTXV)
1696 p.From.Type = obj.TYPE_REG
1697 p.From.Reg = ppc64.REG_VS32
1698 p.To.Type = obj.TYPE_MEM
1700 p.To.Offset = offset
1706 // Generate all the remaining load and store pairs, starting with
1707 // as many 8 byte moves as possible, then 4, 2, 1.
1709 op, size := ppc64.AMOVB, int64(1)
1712 op, size = ppc64.AMOVD, 8
1714 op, size = ppc64.AMOVW, 4
1716 op, size = ppc64.AMOVH, 2
1720 p.To.Type = obj.TYPE_REG
1721 p.To.Reg = ppc64.REGTMP
1722 p.From.Type = obj.TYPE_MEM
1724 p.From.Offset = offset
1728 p.From.Type = obj.TYPE_REG
1729 p.From.Reg = ppc64.REGTMP
1730 p.To.Type = obj.TYPE_MEM
1732 p.To.Offset = offset
1737 case ssa.OpPPC64CALLstatic:
1740 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1741 p := s.Prog(ppc64.AMOVD)
1742 p.From.Type = obj.TYPE_REG
1743 p.From.Reg = v.Args[0].Reg()
1744 p.To.Type = obj.TYPE_REG
1745 p.To.Reg = ppc64.REG_LR
1747 if v.Args[0].Reg() != ppc64.REG_R12 {
1748 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1752 pp.To.Reg = ppc64.REG_LR
1754 if gc.Ctxt.Flag_shared {
1755 // When compiling Go into PIC, the function we just
1756 // called via pointer might have been implemented in
1757 // a separate module and so overwritten the TOC
1758 // pointer in R2; reload it.
1759 q := s.Prog(ppc64.AMOVD)
1760 q.From.Type = obj.TYPE_MEM
1762 q.From.Reg = ppc64.REGSP
1763 q.To.Type = obj.TYPE_REG
1764 q.To.Reg = ppc64.REG_R2
1767 case ssa.OpPPC64LoweredWB:
1768 p := s.Prog(obj.ACALL)
1769 p.To.Type = obj.TYPE_MEM
1770 p.To.Name = obj.NAME_EXTERN
1771 p.To.Sym = v.Aux.(*obj.LSym)
1773 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1774 p := s.Prog(obj.ACALL)
1775 p.To.Type = obj.TYPE_MEM
1776 p.To.Name = obj.NAME_EXTERN
1777 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1778 s.UseArgs(16) // space used in callee args area by assembly stubs
1780 case ssa.OpPPC64LoweredNilCheck:
1781 if objabi.GOOS == "aix" {
1785 // NOP (so the BNE has somewhere to land)
1788 p := s.Prog(ppc64.ACMP)
1789 p.From.Type = obj.TYPE_REG
1790 p.From.Reg = v.Args[0].Reg()
1791 p.To.Type = obj.TYPE_REG
1792 p.To.Reg = ppc64.REG_R0
1795 p2 := s.Prog(ppc64.ABNE)
1796 p2.To.Type = obj.TYPE_BRANCH
1799 // Write at 0 is forbidden and will trigger a SIGSEGV
1800 p = s.Prog(ppc64.AMOVW)
1801 p.From.Type = obj.TYPE_REG
1802 p.From.Reg = ppc64.REG_R0
1803 p.To.Type = obj.TYPE_MEM
1804 p.To.Reg = ppc64.REG_R0
1806 // NOP (so the BNE has somewhere to land)
1807 nop := s.Prog(obj.ANOP)
1811 // Issue a load which will fault if arg is nil.
1812 p := s.Prog(ppc64.AMOVBZ)
1813 p.From.Type = obj.TYPE_MEM
1814 p.From.Reg = v.Args[0].Reg()
1815 gc.AddAux(&p.From, v)
1816 p.To.Type = obj.TYPE_REG
1817 p.To.Reg = ppc64.REGTMP
1819 if logopt.Enabled() {
1820 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1822 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1823 gc.Warnl(v.Pos, "generated nil check")
1826 // These should be resolved by rules and not make it here.
1827 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1828 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1829 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1830 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1831 case ssa.OpPPC64InvertFlags:
1832 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1833 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1834 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1836 // TODO: implement for clobberdead experiment. Nop is ok for now.
1838 v.Fatalf("genValue not implemented: %s", v.LongString())
1842 var blockJump = [...]struct {
1844 asmeq, invasmun bool
1846 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1847 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1849 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1850 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1851 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1852 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1854 // TODO: need to work FP comparisons into block jumps
1855 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1856 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1857 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1858 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1861 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1863 case ssa.BlockDefer:
1864 // defer returns in R3:
1865 // 0 if we should continue executing
1866 // 1 if we should jump to deferreturn call
1867 p := s.Prog(ppc64.ACMP)
1868 p.From.Type = obj.TYPE_REG
1869 p.From.Reg = ppc64.REG_R3
1870 p.To.Type = obj.TYPE_REG
1871 p.To.Reg = ppc64.REG_R0
1873 p = s.Prog(ppc64.ABNE)
1874 p.To.Type = obj.TYPE_BRANCH
1875 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1876 if b.Succs[0].Block() != next {
1877 p := s.Prog(obj.AJMP)
1878 p.To.Type = obj.TYPE_BRANCH
1879 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1882 case ssa.BlockPlain:
1883 if b.Succs[0].Block() != next {
1884 p := s.Prog(obj.AJMP)
1885 p.To.Type = obj.TYPE_BRANCH
1886 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1891 case ssa.BlockRetJmp:
1892 p := s.Prog(obj.AJMP)
1893 p.To.Type = obj.TYPE_MEM
1894 p.To.Name = obj.NAME_EXTERN
1895 p.To.Sym = b.Aux.(*obj.LSym)
1897 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1898 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1899 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1900 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1901 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1902 jmp := blockJump[b.Kind]
1904 case b.Succs[0].Block():
1905 s.Br(jmp.invasm, b.Succs[1].Block())
1907 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1908 s.Br(ppc64.ABVS, b.Succs[1].Block())
1910 case b.Succs[1].Block():
1911 s.Br(jmp.asm, b.Succs[0].Block())
1913 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1916 if b.Likely != ssa.BranchUnlikely {
1917 s.Br(jmp.asm, b.Succs[0].Block())
1919 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1921 s.Br(obj.AJMP, b.Succs[1].Block())
1923 s.Br(jmp.invasm, b.Succs[1].Block())
1925 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1926 s.Br(ppc64.ABVS, b.Succs[1].Block())
1928 s.Br(obj.AJMP, b.Succs[0].Block())
1932 b.Fatalf("branch not implemented: %s", b.LongString())