1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/gc"
9 "cmd/compile/internal/logopt"
10 "cmd/compile/internal/ssa"
11 "cmd/compile/internal/types"
13 "cmd/internal/obj/ppc64"
19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21 // flive := b.FlagsLiveAtEnd
22 // if b.Control != nil && b.Control.Type.IsFlags() {
25 // for i := len(b.Values) - 1; i >= 0; i-- {
27 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28 // // The "mark" is any non-nil Aux value.
31 // if v.Type.IsFlags() {
34 // for _, a := range v.Args {
35 // if a.Type.IsFlags() {
42 // loadByType returns the load instruction of the given type.
43 func loadByType(t *types.Type) obj.As {
75 panic("bad load type")
78 // storeByType returns the store instruction of the given type.
79 func storeByType(t *types.Type) obj.As {
99 panic("bad store type")
102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
125 case ssa.OpPPC64LoweredMuluhilo:
126 // MULHDU Rarg1, Rarg0, Reg0
127 // MULLD Rarg1, Rarg0, Reg1
128 r0 := v.Args[0].Reg()
129 r1 := v.Args[1].Reg()
130 p := s.Prog(ppc64.AMULHDU)
131 p.From.Type = obj.TYPE_REG
134 p.To.Type = obj.TYPE_REG
136 p1 := s.Prog(ppc64.AMULLD)
137 p1.From.Type = obj.TYPE_REG
140 p1.To.Type = obj.TYPE_REG
143 case ssa.OpPPC64LoweredAdd64Carry:
144 // ADDC Rarg2, -1, Rtmp
145 // ADDE Rarg1, Rarg0, Reg0
147 r0 := v.Args[0].Reg()
148 r1 := v.Args[1].Reg()
149 r2 := v.Args[2].Reg()
150 p := s.Prog(ppc64.AADDC)
151 p.From.Type = obj.TYPE_CONST
154 p.To.Type = obj.TYPE_REG
155 p.To.Reg = ppc64.REGTMP
156 p1 := s.Prog(ppc64.AADDE)
157 p1.From.Type = obj.TYPE_REG
160 p1.To.Type = obj.TYPE_REG
162 p2 := s.Prog(ppc64.AADDZE)
163 p2.From.Type = obj.TYPE_REG
164 p2.From.Reg = ppc64.REGZERO
165 p2.To.Type = obj.TYPE_REG
168 case ssa.OpPPC64LoweredAtomicAnd8,
169 ssa.OpPPC64LoweredAtomicOr8:
171 // LBAR (Rarg0), Rtmp
172 // AND/OR Rarg1, Rtmp
173 // STBCCC Rtmp, (Rarg0)
175 r0 := v.Args[0].Reg()
176 r1 := v.Args[1].Reg()
177 // LWSYNC - Assuming shared data not write-through-required nor
178 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179 plwsync := s.Prog(ppc64.ALWSYNC)
180 plwsync.To.Type = obj.TYPE_NONE
181 p := s.Prog(ppc64.ALBAR)
182 p.From.Type = obj.TYPE_MEM
184 p.To.Type = obj.TYPE_REG
185 p.To.Reg = ppc64.REGTMP
186 p1 := s.Prog(v.Op.Asm())
187 p1.From.Type = obj.TYPE_REG
189 p1.To.Type = obj.TYPE_REG
190 p1.To.Reg = ppc64.REGTMP
191 p2 := s.Prog(ppc64.ASTBCCC)
192 p2.From.Type = obj.TYPE_REG
193 p2.From.Reg = ppc64.REGTMP
194 p2.To.Type = obj.TYPE_MEM
196 p2.RegTo2 = ppc64.REGTMP
197 p3 := s.Prog(ppc64.ABNE)
198 p3.To.Type = obj.TYPE_BRANCH
201 case ssa.OpPPC64LoweredAtomicAdd32,
202 ssa.OpPPC64LoweredAtomicAdd64:
204 // LDAR/LWAR (Rarg0), Rout
206 // STDCCC/STWCCC Rout, (Rarg0)
208 // MOVW Rout,Rout (if Add32)
211 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
215 r0 := v.Args[0].Reg()
216 r1 := v.Args[1].Reg()
218 // LWSYNC - Assuming shared data not write-through-required nor
219 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220 plwsync := s.Prog(ppc64.ALWSYNC)
221 plwsync.To.Type = obj.TYPE_NONE
224 p.From.Type = obj.TYPE_MEM
226 p.To.Type = obj.TYPE_REG
229 p1 := s.Prog(ppc64.AADD)
230 p1.From.Type = obj.TYPE_REG
233 p1.To.Type = obj.TYPE_REG
236 p3.From.Type = obj.TYPE_REG
238 p3.To.Type = obj.TYPE_MEM
241 p4 := s.Prog(ppc64.ABNE)
242 p4.To.Type = obj.TYPE_BRANCH
245 // Ensure a 32 bit result
246 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247 p5 := s.Prog(ppc64.AMOVWZ)
248 p5.To.Type = obj.TYPE_REG
250 p5.From.Type = obj.TYPE_REG
254 case ssa.OpPPC64LoweredAtomicExchange32,
255 ssa.OpPPC64LoweredAtomicExchange64:
257 // LDAR/LWAR (Rarg0), Rout
258 // STDCCC/STWCCC Rout, (Rarg0)
263 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
267 r0 := v.Args[0].Reg()
268 r1 := v.Args[1].Reg()
270 // LWSYNC - Assuming shared data not write-through-required nor
271 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272 plwsync := s.Prog(ppc64.ALWSYNC)
273 plwsync.To.Type = obj.TYPE_NONE
276 p.From.Type = obj.TYPE_MEM
278 p.To.Type = obj.TYPE_REG
282 p1.From.Type = obj.TYPE_REG
284 p1.To.Type = obj.TYPE_MEM
287 p2 := s.Prog(ppc64.ABNE)
288 p2.To.Type = obj.TYPE_BRANCH
291 pisync := s.Prog(ppc64.AISYNC)
292 pisync.To.Type = obj.TYPE_NONE
294 case ssa.OpPPC64LoweredAtomicLoad8,
295 ssa.OpPPC64LoweredAtomicLoad32,
296 ssa.OpPPC64LoweredAtomicLoad64,
297 ssa.OpPPC64LoweredAtomicLoadPtr:
299 // MOVB/MOVD/MOVW (Rarg0), Rout
306 case ssa.OpPPC64LoweredAtomicLoad8:
308 case ssa.OpPPC64LoweredAtomicLoad32:
312 arg0 := v.Args[0].Reg()
314 // SYNC when AuxInt == 1; otherwise, load-acquire
316 psync := s.Prog(ppc64.ASYNC)
317 psync.To.Type = obj.TYPE_NONE
321 p.From.Type = obj.TYPE_MEM
323 p.To.Type = obj.TYPE_REG
327 p1.From.Type = obj.TYPE_REG
329 p1.To.Type = obj.TYPE_REG
332 p2 := s.Prog(ppc64.ABNE)
333 p2.To.Type = obj.TYPE_BRANCH
335 pisync := s.Prog(ppc64.AISYNC)
336 pisync.To.Type = obj.TYPE_NONE
339 case ssa.OpPPC64LoweredAtomicStore8,
340 ssa.OpPPC64LoweredAtomicStore32,
341 ssa.OpPPC64LoweredAtomicStore64:
343 // MOVB/MOVW/MOVD arg1,(arg0)
346 case ssa.OpPPC64LoweredAtomicStore8:
348 case ssa.OpPPC64LoweredAtomicStore32:
351 arg0 := v.Args[0].Reg()
352 arg1 := v.Args[1].Reg()
353 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
355 syncOp := ppc64.ASYNC
357 syncOp = ppc64.ALWSYNC
359 psync := s.Prog(syncOp)
360 psync.To.Type = obj.TYPE_NONE
363 p.To.Type = obj.TYPE_MEM
365 p.From.Type = obj.TYPE_REG
368 case ssa.OpPPC64LoweredAtomicCas64,
369 ssa.OpPPC64LoweredAtomicCas32:
372 // LDAR (Rarg0), MutexHint, Rtmp
375 // STDCCC Rarg2, (Rarg0)
377 // LWSYNC // Only for sequential consistency; not required in CasRel.
386 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
391 r0 := v.Args[0].Reg()
392 r1 := v.Args[1].Reg()
393 r2 := v.Args[2].Reg()
395 // LWSYNC - Assuming shared data not write-through-required nor
396 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397 plwsync1 := s.Prog(ppc64.ALWSYNC)
398 plwsync1.To.Type = obj.TYPE_NONE
401 p.From.Type = obj.TYPE_MEM
403 p.To.Type = obj.TYPE_REG
404 p.To.Reg = ppc64.REGTMP
405 // If it is a Compare-and-Swap-Release operation, set the EH field with
408 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
412 p1.From.Type = obj.TYPE_REG
414 p1.To.Reg = ppc64.REGTMP
415 p1.To.Type = obj.TYPE_REG
417 p2 := s.Prog(ppc64.ABNE)
418 p2.To.Type = obj.TYPE_BRANCH
421 p3.From.Type = obj.TYPE_REG
423 p3.To.Type = obj.TYPE_MEM
426 p4 := s.Prog(ppc64.ABNE)
427 p4.To.Type = obj.TYPE_BRANCH
429 // LWSYNC - Assuming shared data not write-through-required nor
430 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431 // If the operation is a CAS-Release, then synchronization is not necessary.
433 plwsync2 := s.Prog(ppc64.ALWSYNC)
434 plwsync2.To.Type = obj.TYPE_NONE
437 p5 := s.Prog(ppc64.AMOVD)
438 p5.From.Type = obj.TYPE_CONST
440 p5.To.Type = obj.TYPE_REG
443 p6 := s.Prog(obj.AJMP)
444 p6.To.Type = obj.TYPE_BRANCH
446 p7 := s.Prog(ppc64.AMOVD)
447 p7.From.Type = obj.TYPE_CONST
449 p7.To.Type = obj.TYPE_REG
453 p8 := s.Prog(obj.ANOP)
456 case ssa.OpPPC64LoweredGetClosurePtr:
457 // Closure pointer is R11 (already)
458 gc.CheckLoweredGetClosurePtr(v)
460 case ssa.OpPPC64LoweredGetCallerSP:
461 // caller's SP is FixedFrameSize below the address of the first arg
462 p := s.Prog(ppc64.AMOVD)
463 p.From.Type = obj.TYPE_ADDR
464 p.From.Offset = -gc.Ctxt.FixedFrameSize()
465 p.From.Name = obj.NAME_PARAM
466 p.To.Type = obj.TYPE_REG
469 case ssa.OpPPC64LoweredGetCallerPC:
470 p := s.Prog(obj.AGETCALLERPC)
471 p.To.Type = obj.TYPE_REG
474 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475 // input is already rounded
478 loadOp := loadByType(v.Type)
480 gc.AddrAuto(&p.From, v.Args[0])
481 p.To.Type = obj.TYPE_REG
485 storeOp := storeByType(v.Type)
487 p.From.Type = obj.TYPE_REG
488 p.From.Reg = v.Args[0].Reg()
489 gc.AddrAuto(&p.To, v)
491 case ssa.OpPPC64DIVD:
501 r0 := v.Args[0].Reg()
502 r1 := v.Args[1].Reg()
504 p := s.Prog(ppc64.ACMP)
505 p.From.Type = obj.TYPE_REG
507 p.To.Type = obj.TYPE_CONST
510 pbahead := s.Prog(ppc64.ABEQ)
511 pbahead.To.Type = obj.TYPE_BRANCH
513 p = s.Prog(v.Op.Asm())
514 p.From.Type = obj.TYPE_REG
517 p.To.Type = obj.TYPE_REG
520 pbover := s.Prog(obj.AJMP)
521 pbover.To.Type = obj.TYPE_BRANCH
523 p = s.Prog(ppc64.ANEG)
524 p.To.Type = obj.TYPE_REG
526 p.From.Type = obj.TYPE_REG
533 case ssa.OpPPC64DIVW:
534 // word-width version of above
536 r0 := v.Args[0].Reg()
537 r1 := v.Args[1].Reg()
539 p := s.Prog(ppc64.ACMPW)
540 p.From.Type = obj.TYPE_REG
542 p.To.Type = obj.TYPE_CONST
545 pbahead := s.Prog(ppc64.ABEQ)
546 pbahead.To.Type = obj.TYPE_BRANCH
548 p = s.Prog(v.Op.Asm())
549 p.From.Type = obj.TYPE_REG
552 p.To.Type = obj.TYPE_REG
555 pbover := s.Prog(obj.AJMP)
556 pbover.To.Type = obj.TYPE_BRANCH
558 p = s.Prog(ppc64.ANEG)
559 p.To.Type = obj.TYPE_REG
561 p.From.Type = obj.TYPE_REG
568 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
569 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
570 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
571 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
572 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
573 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
574 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
576 r1 := v.Args[0].Reg()
577 r2 := v.Args[1].Reg()
578 p := s.Prog(v.Op.Asm())
579 p.From.Type = obj.TYPE_REG
582 p.To.Type = obj.TYPE_REG
585 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
586 r1 := v.Args[0].Reg()
587 r2 := v.Args[1].Reg()
588 p := s.Prog(v.Op.Asm())
589 p.From.Type = obj.TYPE_REG
592 p.To.Type = obj.TYPE_REG
593 p.To.Reg = ppc64.REGTMP // result is not needed
595 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
596 p := s.Prog(v.Op.Asm())
597 p.From.Type = obj.TYPE_CONST
598 p.From.Offset = v.AuxInt
599 p.Reg = v.Args[0].Reg()
600 p.To.Type = obj.TYPE_REG
603 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
605 r1 := v.Args[0].Reg()
606 r2 := v.Args[1].Reg()
607 r3 := v.Args[2].Reg()
609 p := s.Prog(v.Op.Asm())
610 p.From.Type = obj.TYPE_REG
613 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
614 p.To.Type = obj.TYPE_REG
617 case ssa.OpPPC64MaskIfNotCarry:
619 p := s.Prog(v.Op.Asm())
620 p.From.Type = obj.TYPE_REG
621 p.From.Reg = ppc64.REGZERO
622 p.To.Type = obj.TYPE_REG
625 case ssa.OpPPC64ADDconstForCarry:
626 r1 := v.Args[0].Reg()
627 p := s.Prog(v.Op.Asm())
629 p.From.Type = obj.TYPE_CONST
630 p.From.Offset = v.AuxInt
631 p.To.Type = obj.TYPE_REG
632 p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
634 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
635 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
636 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
637 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
639 p := s.Prog(v.Op.Asm())
640 p.To.Type = obj.TYPE_REG
642 p.From.Type = obj.TYPE_REG
643 p.From.Reg = v.Args[0].Reg()
645 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
646 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
647 p := s.Prog(v.Op.Asm())
648 p.Reg = v.Args[0].Reg()
649 p.From.Type = obj.TYPE_CONST
650 p.From.Offset = v.AuxInt
651 p.To.Type = obj.TYPE_REG
654 case ssa.OpPPC64ANDCCconst:
655 p := s.Prog(v.Op.Asm())
656 p.Reg = v.Args[0].Reg()
659 p.From.Type = obj.TYPE_CONST
660 p.From.Offset = gc.AuxOffset(v)
662 p.From.Type = obj.TYPE_CONST
663 p.From.Offset = v.AuxInt
666 p.To.Type = obj.TYPE_REG
667 p.To.Reg = ppc64.REGTMP // discard result
669 case ssa.OpPPC64MOVDaddr:
670 switch v.Aux.(type) {
672 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
674 // If aux offset and aux int are both 0, and the same
675 // input and output regs are used, no instruction
676 // needs to be generated, since it would just be
678 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
679 p := s.Prog(ppc64.AMOVD)
680 p.From.Type = obj.TYPE_ADDR
681 p.From.Reg = v.Args[0].Reg()
682 p.From.Offset = v.AuxInt
683 p.To.Type = obj.TYPE_REG
687 case *obj.LSym, *gc.Node:
688 p := s.Prog(ppc64.AMOVD)
689 p.From.Type = obj.TYPE_ADDR
690 p.From.Reg = v.Args[0].Reg()
691 p.To.Type = obj.TYPE_REG
693 gc.AddAux(&p.From, v)
697 case ssa.OpPPC64MOVDconst:
698 p := s.Prog(v.Op.Asm())
699 p.From.Type = obj.TYPE_CONST
700 p.From.Offset = v.AuxInt
701 p.To.Type = obj.TYPE_REG
704 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
705 p := s.Prog(v.Op.Asm())
706 p.From.Type = obj.TYPE_FCONST
707 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
708 p.To.Type = obj.TYPE_REG
711 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
712 p := s.Prog(v.Op.Asm())
713 p.From.Type = obj.TYPE_REG
714 p.From.Reg = v.Args[0].Reg()
715 p.To.Type = obj.TYPE_REG
716 p.To.Reg = v.Args[1].Reg()
718 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
719 p := s.Prog(v.Op.Asm())
720 p.From.Type = obj.TYPE_REG
721 p.From.Reg = v.Args[0].Reg()
722 p.To.Type = obj.TYPE_CONST
723 p.To.Offset = v.AuxInt
725 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
726 // Shift in register to required size
727 p := s.Prog(v.Op.Asm())
728 p.From.Type = obj.TYPE_REG
729 p.From.Reg = v.Args[0].Reg()
731 p.To.Type = obj.TYPE_REG
733 case ssa.OpPPC64MOVDload:
735 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
736 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
737 // the offset is not known until link time. If the load of a go.string uses relocation for the
738 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
739 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
740 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
741 // go.string types because other types will have proper alignment.
744 switch n := v.Aux.(type) {
746 gostring = strings.HasPrefix(n.Name, "go.string.")
749 // Generate full addr of the go.string const
751 p := s.Prog(ppc64.AMOVD)
752 p.From.Type = obj.TYPE_ADDR
753 p.From.Reg = v.Args[0].Reg()
754 gc.AddAux(&p.From, v)
755 p.To.Type = obj.TYPE_REG
757 // Load go.string using 0 offset
758 p = s.Prog(v.Op.Asm())
759 p.From.Type = obj.TYPE_MEM
761 p.To.Type = obj.TYPE_REG
765 // Not a go.string, generate a normal load
768 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
769 p := s.Prog(v.Op.Asm())
770 p.From.Type = obj.TYPE_MEM
771 p.From.Reg = v.Args[0].Reg()
772 gc.AddAux(&p.From, v)
773 p.To.Type = obj.TYPE_REG
776 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
777 p := s.Prog(v.Op.Asm())
778 p.From.Type = obj.TYPE_MEM
779 p.From.Reg = v.Args[0].Reg()
780 p.To.Type = obj.TYPE_REG
783 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
784 p := s.Prog(v.Op.Asm())
785 p.To.Type = obj.TYPE_MEM
786 p.To.Reg = v.Args[0].Reg()
787 p.From.Type = obj.TYPE_REG
788 p.From.Reg = v.Args[1].Reg()
790 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
791 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
792 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
793 p := s.Prog(v.Op.Asm())
794 p.From.Type = obj.TYPE_MEM
795 p.From.Reg = v.Args[0].Reg()
796 p.From.Index = v.Args[1].Reg()
797 p.To.Type = obj.TYPE_REG
800 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
801 p := s.Prog(v.Op.Asm())
802 p.From.Type = obj.TYPE_REG
803 p.From.Reg = ppc64.REGZERO
804 p.To.Type = obj.TYPE_MEM
805 p.To.Reg = v.Args[0].Reg()
808 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
809 p := s.Prog(v.Op.Asm())
810 p.From.Type = obj.TYPE_REG
811 p.From.Reg = v.Args[1].Reg()
812 p.To.Type = obj.TYPE_MEM
813 p.To.Reg = v.Args[0].Reg()
816 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
817 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
818 ssa.OpPPC64MOVHBRstoreidx:
819 p := s.Prog(v.Op.Asm())
820 p.From.Type = obj.TYPE_REG
821 p.From.Reg = v.Args[2].Reg()
822 p.To.Index = v.Args[1].Reg()
823 p.To.Type = obj.TYPE_MEM
824 p.To.Reg = v.Args[0].Reg()
826 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
828 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
829 // ISEL only accepts 0, 1, 2 condition values but the others can be
830 // achieved by swapping operand order.
831 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
832 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
833 // ISELB is used when a boolean result is needed, returning 0 or 1
834 p := s.Prog(ppc64.AISEL)
835 p.To.Type = obj.TYPE_REG
837 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
838 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
839 if v.Op == ssa.OpPPC64ISEL {
840 r.Reg = v.Args[1].Reg()
842 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
845 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
847 p.Reg = v.Args[0].Reg()
850 p.From.Type = obj.TYPE_CONST
851 p.From.Offset = v.AuxInt & 3
853 case ssa.OpPPC64LoweredZero:
855 // unaligned data doesn't hurt performance
856 // for these instructions on power8 or later
858 // for sizes >= 64 generate a loop as follows:
860 // set up loop counter in CTR, used by BC
861 // XXLXOR VS32,VS32,VS32
862 // MOVD len/32,REG_TMP
866 // STXVD2X VS32,(R0)(R3)
867 // STXVD2X VS32,(R31)(R3)
871 // any remainder is done as described below
873 // for sizes < 64 bytes, first clear as many doublewords as possible,
874 // then handle the remainder
879 // the remainder bytes are cleared using one or more
880 // of the following instructions with the appropriate
881 // offsets depending which instructions are needed
883 // MOVW R0,n1(R3) 4 bytes
884 // MOVH R0,n2(R3) 2 bytes
885 // MOVB R0,n3(R3) 1 byte
887 // 7 bytes: MOVW, MOVH, MOVB
888 // 6 bytes: MOVW, MOVH
889 // 5 bytes: MOVW, MOVB
890 // 3 bytes: MOVH, MOVB
892 // each loop iteration does 32 bytes
898 // only generate a loop if there is more
901 // Set up VS32 (V0) to hold 0s
902 p := s.Prog(ppc64.AXXLXOR)
903 p.From.Type = obj.TYPE_REG
904 p.From.Reg = ppc64.REG_VS32
905 p.To.Type = obj.TYPE_REG
906 p.To.Reg = ppc64.REG_VS32
907 p.Reg = ppc64.REG_VS32
909 // Set up CTR loop counter
910 p = s.Prog(ppc64.AMOVD)
911 p.From.Type = obj.TYPE_CONST
913 p.To.Type = obj.TYPE_REG
914 p.To.Reg = ppc64.REGTMP
916 p = s.Prog(ppc64.AMOVD)
917 p.From.Type = obj.TYPE_REG
918 p.From.Reg = ppc64.REGTMP
919 p.To.Type = obj.TYPE_REG
920 p.To.Reg = ppc64.REG_CTR
922 // Set up R31 to hold index value 16
923 p = s.Prog(ppc64.AMOVD)
924 p.From.Type = obj.TYPE_CONST
926 p.To.Type = obj.TYPE_REG
927 p.To.Reg = ppc64.REGTMP
929 // generate 2 STXVD2Xs to store 16 bytes
930 // when this is a loop then the top must be saved
932 // This is the top of loop
933 p = s.Prog(ppc64.ASTXVD2X)
934 p.From.Type = obj.TYPE_REG
935 p.From.Reg = ppc64.REG_VS32
936 p.To.Type = obj.TYPE_MEM
937 p.To.Reg = v.Args[0].Reg()
938 p.To.Index = ppc64.REGZERO
939 // Save the top of loop
944 p = s.Prog(ppc64.ASTXVD2X)
945 p.From.Type = obj.TYPE_REG
946 p.From.Reg = ppc64.REG_VS32
947 p.To.Type = obj.TYPE_MEM
948 p.To.Reg = v.Args[0].Reg()
949 p.To.Index = ppc64.REGTMP
951 // Increment address for the
952 // 4 doublewords just zeroed.
953 p = s.Prog(ppc64.AADD)
954 p.Reg = v.Args[0].Reg()
955 p.From.Type = obj.TYPE_CONST
957 p.To.Type = obj.TYPE_REG
958 p.To.Reg = v.Args[0].Reg()
960 // Branch back to top of loop
962 // BC with BO_BCTR generates bdnz
963 p = s.Prog(ppc64.ABC)
964 p.From.Type = obj.TYPE_CONST
965 p.From.Offset = ppc64.BO_BCTR
967 p.To.Type = obj.TYPE_BRANCH
971 // when ctr == 1 the loop was not generated but
972 // there are at least 32 bytes to clear, so add
973 // that to the remainder to generate the code
974 // to clear those doublewords
979 // clear the remainder starting at offset zero
982 // first clear as many doublewords as possible
983 // then clear remaining sizes as available
985 op, size := ppc64.AMOVB, int64(1)
988 op, size = ppc64.AMOVD, 8
990 op, size = ppc64.AMOVW, 4
992 op, size = ppc64.AMOVH, 2
995 p.From.Type = obj.TYPE_REG
996 p.From.Reg = ppc64.REG_R0
997 p.To.Type = obj.TYPE_MEM
998 p.To.Reg = v.Args[0].Reg()
1004 case ssa.OpPPC64LoweredMove:
1006 // This will be used when moving more
1007 // than 8 bytes. Moves start with
1008 // as many 8 byte moves as possible, then
1009 // 4, 2, or 1 byte(s) as remaining. This will
1010 // work and be efficient for power8 or later.
1011 // If there are 64 or more bytes, then a
1012 // loop is generated to move 32 bytes and
1013 // update the src and dst addresses on each
1014 // iteration. When < 64 bytes, the appropriate
1015 // number of moves are generated based on the
1017 // When moving >= 64 bytes a loop is used
1018 // MOVD len/32,REG_TMP
1022 // LXVD2X (R0)(R4),VS32
1023 // LXVD2X (R31)(R4),VS33
1025 // STXVD2X VS32,(R0)(R3)
1026 // STXVD2X VS33,(R31)(R4)
1029 // Bytes not moved by this loop are moved
1030 // with a combination of the following instructions,
1031 // starting with the largest sizes and generating as
1032 // many as needed, using the appropriate offset value.
1042 // Each loop iteration moves 32 bytes
1043 ctr := v.AuxInt / 32
1045 // Remainder after the loop
1046 rem := v.AuxInt % 32
1048 dst_reg := v.Args[0].Reg()
1049 src_reg := v.Args[1].Reg()
1051 // The set of registers used here, must match the clobbered reg list
1057 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1060 p := s.Prog(ppc64.AMOVD)
1061 p.From.Type = obj.TYPE_CONST
1063 p.To.Type = obj.TYPE_REG
1064 p.To.Reg = ppc64.REGTMP
1066 p = s.Prog(ppc64.AMOVD)
1067 p.From.Type = obj.TYPE_REG
1068 p.From.Reg = ppc64.REGTMP
1069 p.To.Type = obj.TYPE_REG
1070 p.To.Reg = ppc64.REG_CTR
1072 // Use REGTMP as index reg
1073 p = s.Prog(ppc64.AMOVD)
1074 p.From.Type = obj.TYPE_CONST
1076 p.To.Type = obj.TYPE_REG
1077 p.To.Reg = ppc64.REGTMP
1079 // Generate 16 byte loads and stores.
1080 // Use temp register for index (16)
1081 // on the second one.
1082 p = s.Prog(ppc64.ALXVD2X)
1083 p.From.Type = obj.TYPE_MEM
1084 p.From.Reg = src_reg
1085 p.From.Index = ppc64.REGZERO
1086 p.To.Type = obj.TYPE_REG
1087 p.To.Reg = ppc64.REG_VS32
1093 p = s.Prog(ppc64.ALXVD2X)
1094 p.From.Type = obj.TYPE_MEM
1095 p.From.Reg = src_reg
1096 p.From.Index = ppc64.REGTMP
1097 p.To.Type = obj.TYPE_REG
1098 p.To.Reg = ppc64.REG_VS33
1100 // increment the src reg for next iteration
1101 p = s.Prog(ppc64.AADD)
1103 p.From.Type = obj.TYPE_CONST
1105 p.To.Type = obj.TYPE_REG
1108 // generate 16 byte stores
1109 p = s.Prog(ppc64.ASTXVD2X)
1110 p.From.Type = obj.TYPE_REG
1111 p.From.Reg = ppc64.REG_VS32
1112 p.To.Type = obj.TYPE_MEM
1114 p.To.Index = ppc64.REGZERO
1116 p = s.Prog(ppc64.ASTXVD2X)
1117 p.From.Type = obj.TYPE_REG
1118 p.From.Reg = ppc64.REG_VS33
1119 p.To.Type = obj.TYPE_MEM
1121 p.To.Index = ppc64.REGTMP
1123 // increment the dst reg for next iteration
1124 p = s.Prog(ppc64.AADD)
1126 p.From.Type = obj.TYPE_CONST
1128 p.To.Type = obj.TYPE_REG
1131 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1133 p = s.Prog(ppc64.ABC)
1134 p.From.Type = obj.TYPE_CONST
1135 p.From.Offset = ppc64.BO_BCTR
1136 p.Reg = ppc64.REG_R0
1137 p.To.Type = obj.TYPE_BRANCH
1140 // src_reg and dst_reg were incremented in the loop, so
1141 // later instructions start with offset 0.
1145 // No loop was generated for one iteration, so
1146 // add 32 bytes to the remainder to move those bytes.
1152 // Generate 16 byte loads and stores.
1153 // Use temp register for index (value 16)
1154 // on the second one.
1155 p := s.Prog(ppc64.ALXVD2X)
1156 p.From.Type = obj.TYPE_MEM
1157 p.From.Reg = src_reg
1158 p.From.Index = ppc64.REGZERO
1159 p.To.Type = obj.TYPE_REG
1160 p.To.Reg = ppc64.REG_VS32
1162 p = s.Prog(ppc64.ASTXVD2X)
1163 p.From.Type = obj.TYPE_REG
1164 p.From.Reg = ppc64.REG_VS32
1165 p.To.Type = obj.TYPE_MEM
1167 p.To.Index = ppc64.REGZERO
1173 // Use REGTMP as index reg
1174 p = s.Prog(ppc64.AMOVD)
1175 p.From.Type = obj.TYPE_CONST
1177 p.To.Type = obj.TYPE_REG
1178 p.To.Reg = ppc64.REGTMP
1180 // Generate 16 byte loads and stores.
1181 // Use temp register for index (16)
1182 // on the second one.
1183 p = s.Prog(ppc64.ALXVD2X)
1184 p.From.Type = obj.TYPE_MEM
1185 p.From.Reg = src_reg
1186 p.From.Index = ppc64.REGTMP
1187 p.To.Type = obj.TYPE_REG
1188 p.To.Reg = ppc64.REG_VS32
1190 p = s.Prog(ppc64.ASTXVD2X)
1191 p.From.Type = obj.TYPE_REG
1192 p.From.Reg = ppc64.REG_VS32
1193 p.To.Type = obj.TYPE_MEM
1195 p.To.Index = ppc64.REGTMP
1202 // Generate all the remaining load and store pairs, starting with
1203 // as many 8 byte moves as possible, then 4, 2, 1.
1205 op, size := ppc64.AMOVB, int64(1)
1208 op, size = ppc64.AMOVD, 8
1210 op, size = ppc64.AMOVW, 4
1212 op, size = ppc64.AMOVH, 2
1216 p.To.Type = obj.TYPE_REG
1217 p.To.Reg = ppc64.REG_R14
1218 p.From.Type = obj.TYPE_MEM
1219 p.From.Reg = src_reg
1220 p.From.Offset = offset
1224 p.From.Type = obj.TYPE_REG
1225 p.From.Reg = ppc64.REG_R14
1226 p.To.Type = obj.TYPE_MEM
1228 p.To.Offset = offset
1233 case ssa.OpPPC64CALLstatic:
1236 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1237 p := s.Prog(ppc64.AMOVD)
1238 p.From.Type = obj.TYPE_REG
1239 p.From.Reg = v.Args[0].Reg()
1240 p.To.Type = obj.TYPE_REG
1241 p.To.Reg = ppc64.REG_LR
1243 if v.Args[0].Reg() != ppc64.REG_R12 {
1244 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1248 pp.To.Reg = ppc64.REG_LR
1250 if gc.Ctxt.Flag_shared {
1251 // When compiling Go into PIC, the function we just
1252 // called via pointer might have been implemented in
1253 // a separate module and so overwritten the TOC
1254 // pointer in R2; reload it.
1255 q := s.Prog(ppc64.AMOVD)
1256 q.From.Type = obj.TYPE_MEM
1258 q.From.Reg = ppc64.REGSP
1259 q.To.Type = obj.TYPE_REG
1260 q.To.Reg = ppc64.REG_R2
1263 case ssa.OpPPC64LoweredWB:
1264 p := s.Prog(obj.ACALL)
1265 p.To.Type = obj.TYPE_MEM
1266 p.To.Name = obj.NAME_EXTERN
1267 p.To.Sym = v.Aux.(*obj.LSym)
1269 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1270 p := s.Prog(obj.ACALL)
1271 p.To.Type = obj.TYPE_MEM
1272 p.To.Name = obj.NAME_EXTERN
1273 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1274 s.UseArgs(16) // space used in callee args area by assembly stubs
1276 case ssa.OpPPC64LoweredNilCheck:
1277 if objabi.GOOS == "aix" {
1281 // NOP (so the BNE has somewhere to land)
1284 p := s.Prog(ppc64.ACMP)
1285 p.From.Type = obj.TYPE_REG
1286 p.From.Reg = v.Args[0].Reg()
1287 p.To.Type = obj.TYPE_REG
1288 p.To.Reg = ppc64.REG_R0
1291 p2 := s.Prog(ppc64.ABNE)
1292 p2.To.Type = obj.TYPE_BRANCH
1295 // Write at 0 is forbidden and will trigger a SIGSEGV
1296 p = s.Prog(ppc64.AMOVW)
1297 p.From.Type = obj.TYPE_REG
1298 p.From.Reg = ppc64.REG_R0
1299 p.To.Type = obj.TYPE_MEM
1300 p.To.Reg = ppc64.REG_R0
1302 // NOP (so the BNE has somewhere to land)
1303 nop := s.Prog(obj.ANOP)
1307 // Issue a load which will fault if arg is nil.
1308 p := s.Prog(ppc64.AMOVBZ)
1309 p.From.Type = obj.TYPE_MEM
1310 p.From.Reg = v.Args[0].Reg()
1311 gc.AddAux(&p.From, v)
1312 p.To.Type = obj.TYPE_REG
1313 p.To.Reg = ppc64.REGTMP
1315 if logopt.Enabled() {
1316 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1318 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1319 gc.Warnl(v.Pos, "generated nil check")
1322 // These should be resolved by rules and not make it here.
1323 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1324 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1325 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1326 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1327 case ssa.OpPPC64InvertFlags:
1328 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1329 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT, ssa.OpPPC64FlagCarrySet, ssa.OpPPC64FlagCarryClear:
1330 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1332 // TODO: implement for clobberdead experiment. Nop is ok for now.
1334 v.Fatalf("genValue not implemented: %s", v.LongString())
1338 var blockJump = [...]struct {
1340 asmeq, invasmun bool
1342 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1343 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1345 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1346 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1347 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1348 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1350 // TODO: need to work FP comparisons into block jumps
1351 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1352 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1353 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1354 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1357 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1359 case ssa.BlockDefer:
1360 // defer returns in R3:
1361 // 0 if we should continue executing
1362 // 1 if we should jump to deferreturn call
1363 p := s.Prog(ppc64.ACMP)
1364 p.From.Type = obj.TYPE_REG
1365 p.From.Reg = ppc64.REG_R3
1366 p.To.Type = obj.TYPE_REG
1367 p.To.Reg = ppc64.REG_R0
1369 p = s.Prog(ppc64.ABNE)
1370 p.To.Type = obj.TYPE_BRANCH
1371 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1372 if b.Succs[0].Block() != next {
1373 p := s.Prog(obj.AJMP)
1374 p.To.Type = obj.TYPE_BRANCH
1375 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1378 case ssa.BlockPlain:
1379 if b.Succs[0].Block() != next {
1380 p := s.Prog(obj.AJMP)
1381 p.To.Type = obj.TYPE_BRANCH
1382 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1387 case ssa.BlockRetJmp:
1388 p := s.Prog(obj.AJMP)
1389 p.To.Type = obj.TYPE_MEM
1390 p.To.Name = obj.NAME_EXTERN
1391 p.To.Sym = b.Aux.(*obj.LSym)
1393 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1394 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1395 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1396 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1397 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1398 jmp := blockJump[b.Kind]
1400 case b.Succs[0].Block():
1401 s.Br(jmp.invasm, b.Succs[1].Block())
1403 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1404 s.Br(ppc64.ABVS, b.Succs[1].Block())
1406 case b.Succs[1].Block():
1407 s.Br(jmp.asm, b.Succs[0].Block())
1409 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1412 if b.Likely != ssa.BranchUnlikely {
1413 s.Br(jmp.asm, b.Succs[0].Block())
1415 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1417 s.Br(obj.AJMP, b.Succs[1].Block())
1419 s.Br(jmp.invasm, b.Succs[1].Block())
1421 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1422 s.Br(ppc64.ABVS, b.Succs[1].Block())
1424 s.Br(obj.AJMP, b.Succs[0].Block())
1428 b.Fatalf("branch not implemented: %s", b.LongString())