1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/gc"
9 "cmd/compile/internal/ssa"
10 "cmd/compile/internal/types"
12 "cmd/internal/obj/ppc64"
18 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
19 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
20 // flive := b.FlagsLiveAtEnd
21 // if b.Control != nil && b.Control.Type.IsFlags() {
24 // for i := len(b.Values) - 1; i >= 0; i-- {
26 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
27 // // The "mark" is any non-nil Aux value.
30 // if v.Type.IsFlags() {
33 // for _, a := range v.Args {
34 // if a.Type.IsFlags() {
41 // loadByType returns the load instruction of the given type.
42 func loadByType(t *types.Type) obj.As {
74 panic("bad load type")
77 // storeByType returns the store instruction of the given type.
78 func storeByType(t *types.Type) obj.As {
98 panic("bad store type")
101 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
124 case ssa.OpPPC64LoweredMuluhilo:
125 // MULHDU Rarg1, Rarg0, Reg0
126 // MULLD Rarg1, Rarg0, Reg1
127 r0 := v.Args[0].Reg()
128 r1 := v.Args[1].Reg()
129 p := s.Prog(ppc64.AMULHDU)
130 p.From.Type = obj.TYPE_REG
133 p.To.Type = obj.TYPE_REG
135 p1 := s.Prog(ppc64.AMULLD)
136 p1.From.Type = obj.TYPE_REG
139 p1.To.Type = obj.TYPE_REG
142 case ssa.OpPPC64LoweredAdd64Carry:
143 // ADDC Rarg2, -1, Rtmp
144 // ADDE Rarg1, Rarg0, Reg0
146 r0 := v.Args[0].Reg()
147 r1 := v.Args[1].Reg()
148 r2 := v.Args[2].Reg()
149 p := s.Prog(ppc64.AADDC)
150 p.From.Type = obj.TYPE_CONST
153 p.To.Type = obj.TYPE_REG
154 p.To.Reg = ppc64.REGTMP
155 p1 := s.Prog(ppc64.AADDE)
156 p1.From.Type = obj.TYPE_REG
159 p1.To.Type = obj.TYPE_REG
161 p2 := s.Prog(ppc64.AADDZE)
162 p2.From.Type = obj.TYPE_REG
163 p2.From.Reg = ppc64.REGZERO
164 p2.To.Type = obj.TYPE_REG
167 case ssa.OpPPC64LoweredAtomicAnd8,
168 ssa.OpPPC64LoweredAtomicOr8:
170 // LBAR (Rarg0), Rtmp
171 // AND/OR Rarg1, Rtmp
172 // STBCCC Rtmp, (Rarg0)
174 r0 := v.Args[0].Reg()
175 r1 := v.Args[1].Reg()
176 // LWSYNC - Assuming shared data not write-through-required nor
177 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
178 plwsync := s.Prog(ppc64.ALWSYNC)
179 plwsync.To.Type = obj.TYPE_NONE
180 p := s.Prog(ppc64.ALBAR)
181 p.From.Type = obj.TYPE_MEM
183 p.To.Type = obj.TYPE_REG
184 p.To.Reg = ppc64.REGTMP
185 p1 := s.Prog(v.Op.Asm())
186 p1.From.Type = obj.TYPE_REG
188 p1.To.Type = obj.TYPE_REG
189 p1.To.Reg = ppc64.REGTMP
190 p2 := s.Prog(ppc64.ASTBCCC)
191 p2.From.Type = obj.TYPE_REG
192 p2.From.Reg = ppc64.REGTMP
193 p2.To.Type = obj.TYPE_MEM
195 p2.RegTo2 = ppc64.REGTMP
196 p3 := s.Prog(ppc64.ABNE)
197 p3.To.Type = obj.TYPE_BRANCH
200 case ssa.OpPPC64LoweredAtomicAdd32,
201 ssa.OpPPC64LoweredAtomicAdd64:
203 // LDAR/LWAR (Rarg0), Rout
205 // STDCCC/STWCCC Rout, (Rarg0)
207 // MOVW Rout,Rout (if Add32)
210 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
214 r0 := v.Args[0].Reg()
215 r1 := v.Args[1].Reg()
217 // LWSYNC - Assuming shared data not write-through-required nor
218 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
219 plwsync := s.Prog(ppc64.ALWSYNC)
220 plwsync.To.Type = obj.TYPE_NONE
223 p.From.Type = obj.TYPE_MEM
225 p.To.Type = obj.TYPE_REG
228 p1 := s.Prog(ppc64.AADD)
229 p1.From.Type = obj.TYPE_REG
232 p1.To.Type = obj.TYPE_REG
235 p3.From.Type = obj.TYPE_REG
237 p3.To.Type = obj.TYPE_MEM
240 p4 := s.Prog(ppc64.ABNE)
241 p4.To.Type = obj.TYPE_BRANCH
244 // Ensure a 32 bit result
245 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
246 p5 := s.Prog(ppc64.AMOVWZ)
247 p5.To.Type = obj.TYPE_REG
249 p5.From.Type = obj.TYPE_REG
253 case ssa.OpPPC64LoweredAtomicExchange32,
254 ssa.OpPPC64LoweredAtomicExchange64:
256 // LDAR/LWAR (Rarg0), Rout
257 // STDCCC/STWCCC Rout, (Rarg0)
262 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
266 r0 := v.Args[0].Reg()
267 r1 := v.Args[1].Reg()
269 // LWSYNC - Assuming shared data not write-through-required nor
270 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
271 plwsync := s.Prog(ppc64.ALWSYNC)
272 plwsync.To.Type = obj.TYPE_NONE
275 p.From.Type = obj.TYPE_MEM
277 p.To.Type = obj.TYPE_REG
281 p1.From.Type = obj.TYPE_REG
283 p1.To.Type = obj.TYPE_MEM
286 p2 := s.Prog(ppc64.ABNE)
287 p2.To.Type = obj.TYPE_BRANCH
290 pisync := s.Prog(ppc64.AISYNC)
291 pisync.To.Type = obj.TYPE_NONE
293 case ssa.OpPPC64LoweredAtomicLoad8,
294 ssa.OpPPC64LoweredAtomicLoad32,
295 ssa.OpPPC64LoweredAtomicLoad64,
296 ssa.OpPPC64LoweredAtomicLoadPtr:
298 // MOVB/MOVD/MOVW (Rarg0), Rout
305 case ssa.OpPPC64LoweredAtomicLoad8:
307 case ssa.OpPPC64LoweredAtomicLoad32:
311 arg0 := v.Args[0].Reg()
313 // SYNC when AuxInt == 1; otherwise, load-acquire
315 psync := s.Prog(ppc64.ASYNC)
316 psync.To.Type = obj.TYPE_NONE
320 p.From.Type = obj.TYPE_MEM
322 p.To.Type = obj.TYPE_REG
326 p1.From.Type = obj.TYPE_REG
328 p1.To.Type = obj.TYPE_REG
331 p2 := s.Prog(ppc64.ABNE)
332 p2.To.Type = obj.TYPE_BRANCH
334 pisync := s.Prog(ppc64.AISYNC)
335 pisync.To.Type = obj.TYPE_NONE
338 case ssa.OpPPC64LoweredAtomicStore8,
339 ssa.OpPPC64LoweredAtomicStore32,
340 ssa.OpPPC64LoweredAtomicStore64:
342 // MOVB/MOVW/MOVD arg1,(arg0)
345 case ssa.OpPPC64LoweredAtomicStore8:
347 case ssa.OpPPC64LoweredAtomicStore32:
350 arg0 := v.Args[0].Reg()
351 arg1 := v.Args[1].Reg()
352 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
354 syncOp := ppc64.ASYNC
356 syncOp = ppc64.ALWSYNC
358 psync := s.Prog(syncOp)
359 psync.To.Type = obj.TYPE_NONE
362 p.To.Type = obj.TYPE_MEM
364 p.From.Type = obj.TYPE_REG
367 case ssa.OpPPC64LoweredAtomicCas64,
368 ssa.OpPPC64LoweredAtomicCas32:
371 // LDAR (Rarg0), MutexHint, Rtmp
374 // STDCCC Rarg2, (Rarg0)
376 // LWSYNC // Only for sequential consistency; not required in CasRel.
385 if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
390 r0 := v.Args[0].Reg()
391 r1 := v.Args[1].Reg()
392 r2 := v.Args[2].Reg()
394 // LWSYNC - Assuming shared data not write-through-required nor
395 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
396 plwsync1 := s.Prog(ppc64.ALWSYNC)
397 plwsync1.To.Type = obj.TYPE_NONE
400 p.From.Type = obj.TYPE_MEM
402 p.To.Type = obj.TYPE_REG
403 p.To.Reg = ppc64.REGTMP
404 // If it is a Compare-and-Swap-Release operation, set the EH field with
407 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
411 p1.From.Type = obj.TYPE_REG
413 p1.To.Reg = ppc64.REGTMP
414 p1.To.Type = obj.TYPE_REG
416 p2 := s.Prog(ppc64.ABNE)
417 p2.To.Type = obj.TYPE_BRANCH
420 p3.From.Type = obj.TYPE_REG
422 p3.To.Type = obj.TYPE_MEM
425 p4 := s.Prog(ppc64.ABNE)
426 p4.To.Type = obj.TYPE_BRANCH
428 // LWSYNC - Assuming shared data not write-through-required nor
429 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
430 // If the operation is a CAS-Release, then synchronization is not necessary.
432 plwsync2 := s.Prog(ppc64.ALWSYNC)
433 plwsync2.To.Type = obj.TYPE_NONE
436 p5 := s.Prog(ppc64.AMOVD)
437 p5.From.Type = obj.TYPE_CONST
439 p5.To.Type = obj.TYPE_REG
442 p6 := s.Prog(obj.AJMP)
443 p6.To.Type = obj.TYPE_BRANCH
445 p7 := s.Prog(ppc64.AMOVD)
446 p7.From.Type = obj.TYPE_CONST
448 p7.To.Type = obj.TYPE_REG
452 p8 := s.Prog(obj.ANOP)
455 case ssa.OpPPC64LoweredGetClosurePtr:
456 // Closure pointer is R11 (already)
457 gc.CheckLoweredGetClosurePtr(v)
459 case ssa.OpPPC64LoweredGetCallerSP:
460 // caller's SP is FixedFrameSize below the address of the first arg
461 p := s.Prog(ppc64.AMOVD)
462 p.From.Type = obj.TYPE_ADDR
463 p.From.Offset = -gc.Ctxt.FixedFrameSize()
464 p.From.Name = obj.NAME_PARAM
465 p.To.Type = obj.TYPE_REG
468 case ssa.OpPPC64LoweredGetCallerPC:
469 p := s.Prog(obj.AGETCALLERPC)
470 p.To.Type = obj.TYPE_REG
473 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
474 // input is already rounded
477 loadOp := loadByType(v.Type)
479 gc.AddrAuto(&p.From, v.Args[0])
480 p.To.Type = obj.TYPE_REG
484 storeOp := storeByType(v.Type)
486 p.From.Type = obj.TYPE_REG
487 p.From.Reg = v.Args[0].Reg()
488 gc.AddrAuto(&p.To, v)
490 case ssa.OpPPC64DIVD:
500 r0 := v.Args[0].Reg()
501 r1 := v.Args[1].Reg()
503 p := s.Prog(ppc64.ACMP)
504 p.From.Type = obj.TYPE_REG
506 p.To.Type = obj.TYPE_CONST
509 pbahead := s.Prog(ppc64.ABEQ)
510 pbahead.To.Type = obj.TYPE_BRANCH
512 p = s.Prog(v.Op.Asm())
513 p.From.Type = obj.TYPE_REG
516 p.To.Type = obj.TYPE_REG
519 pbover := s.Prog(obj.AJMP)
520 pbover.To.Type = obj.TYPE_BRANCH
522 p = s.Prog(ppc64.ANEG)
523 p.To.Type = obj.TYPE_REG
525 p.From.Type = obj.TYPE_REG
532 case ssa.OpPPC64DIVW:
533 // word-width version of above
535 r0 := v.Args[0].Reg()
536 r1 := v.Args[1].Reg()
538 p := s.Prog(ppc64.ACMPW)
539 p.From.Type = obj.TYPE_REG
541 p.To.Type = obj.TYPE_CONST
544 pbahead := s.Prog(ppc64.ABEQ)
545 pbahead.To.Type = obj.TYPE_BRANCH
547 p = s.Prog(v.Op.Asm())
548 p.From.Type = obj.TYPE_REG
551 p.To.Type = obj.TYPE_REG
554 pbover := s.Prog(obj.AJMP)
555 pbover.To.Type = obj.TYPE_BRANCH
557 p = s.Prog(ppc64.ANEG)
558 p.To.Type = obj.TYPE_REG
560 p.From.Type = obj.TYPE_REG
567 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
568 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
569 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
570 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
571 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
572 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
573 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
575 r1 := v.Args[0].Reg()
576 r2 := v.Args[1].Reg()
577 p := s.Prog(v.Op.Asm())
578 p.From.Type = obj.TYPE_REG
581 p.To.Type = obj.TYPE_REG
584 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
585 r1 := v.Args[0].Reg()
586 r2 := v.Args[1].Reg()
587 p := s.Prog(v.Op.Asm())
588 p.From.Type = obj.TYPE_REG
591 p.To.Type = obj.TYPE_REG
592 p.To.Reg = ppc64.REGTMP // result is not needed
594 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
595 p := s.Prog(v.Op.Asm())
596 p.From.Type = obj.TYPE_CONST
597 p.From.Offset = v.AuxInt
598 p.Reg = v.Args[0].Reg()
599 p.To.Type = obj.TYPE_REG
602 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
604 r1 := v.Args[0].Reg()
605 r2 := v.Args[1].Reg()
606 r3 := v.Args[2].Reg()
608 p := s.Prog(v.Op.Asm())
609 p.From.Type = obj.TYPE_REG
612 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
613 p.To.Type = obj.TYPE_REG
616 case ssa.OpPPC64MaskIfNotCarry:
618 p := s.Prog(v.Op.Asm())
619 p.From.Type = obj.TYPE_REG
620 p.From.Reg = ppc64.REGZERO
621 p.To.Type = obj.TYPE_REG
624 case ssa.OpPPC64ADDconstForCarry:
625 r1 := v.Args[0].Reg()
626 p := s.Prog(v.Op.Asm())
628 p.From.Type = obj.TYPE_CONST
629 p.From.Offset = v.AuxInt
630 p.To.Type = obj.TYPE_REG
631 p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
633 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
634 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
635 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
636 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
638 p := s.Prog(v.Op.Asm())
639 p.To.Type = obj.TYPE_REG
641 p.From.Type = obj.TYPE_REG
642 p.From.Reg = v.Args[0].Reg()
644 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
645 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
646 p := s.Prog(v.Op.Asm())
647 p.Reg = v.Args[0].Reg()
648 p.From.Type = obj.TYPE_CONST
649 p.From.Offset = v.AuxInt
650 p.To.Type = obj.TYPE_REG
653 case ssa.OpPPC64ANDCCconst:
654 p := s.Prog(v.Op.Asm())
655 p.Reg = v.Args[0].Reg()
658 p.From.Type = obj.TYPE_CONST
659 p.From.Offset = gc.AuxOffset(v)
661 p.From.Type = obj.TYPE_CONST
662 p.From.Offset = v.AuxInt
665 p.To.Type = obj.TYPE_REG
666 p.To.Reg = ppc64.REGTMP // discard result
668 case ssa.OpPPC64MOVDaddr:
669 switch v.Aux.(type) {
671 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
673 // If aux offset and aux int are both 0, and the same
674 // input and output regs are used, no instruction
675 // needs to be generated, since it would just be
677 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
678 p := s.Prog(ppc64.AMOVD)
679 p.From.Type = obj.TYPE_ADDR
680 p.From.Reg = v.Args[0].Reg()
681 p.From.Offset = v.AuxInt
682 p.To.Type = obj.TYPE_REG
686 case *obj.LSym, *gc.Node:
687 p := s.Prog(ppc64.AMOVD)
688 p.From.Type = obj.TYPE_ADDR
689 p.From.Reg = v.Args[0].Reg()
690 p.To.Type = obj.TYPE_REG
692 gc.AddAux(&p.From, v)
696 case ssa.OpPPC64MOVDconst:
697 p := s.Prog(v.Op.Asm())
698 p.From.Type = obj.TYPE_CONST
699 p.From.Offset = v.AuxInt
700 p.To.Type = obj.TYPE_REG
703 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
704 p := s.Prog(v.Op.Asm())
705 p.From.Type = obj.TYPE_FCONST
706 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
707 p.To.Type = obj.TYPE_REG
710 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
711 p := s.Prog(v.Op.Asm())
712 p.From.Type = obj.TYPE_REG
713 p.From.Reg = v.Args[0].Reg()
714 p.To.Type = obj.TYPE_REG
715 p.To.Reg = v.Args[1].Reg()
717 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
718 p := s.Prog(v.Op.Asm())
719 p.From.Type = obj.TYPE_REG
720 p.From.Reg = v.Args[0].Reg()
721 p.To.Type = obj.TYPE_CONST
722 p.To.Offset = v.AuxInt
724 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
725 // Shift in register to required size
726 p := s.Prog(v.Op.Asm())
727 p.From.Type = obj.TYPE_REG
728 p.From.Reg = v.Args[0].Reg()
730 p.To.Type = obj.TYPE_REG
732 case ssa.OpPPC64MOVDload:
734 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
735 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
736 // the offset is not known until link time. If the load of a go.string uses relocation for the
737 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
738 // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
739 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
740 // go.string types because other types will have proper alignment.
743 switch n := v.Aux.(type) {
745 gostring = strings.HasPrefix(n.Name, "go.string.")
748 // Generate full addr of the go.string const
750 p := s.Prog(ppc64.AMOVD)
751 p.From.Type = obj.TYPE_ADDR
752 p.From.Reg = v.Args[0].Reg()
753 gc.AddAux(&p.From, v)
754 p.To.Type = obj.TYPE_REG
756 // Load go.string using 0 offset
757 p = s.Prog(v.Op.Asm())
758 p.From.Type = obj.TYPE_MEM
760 p.To.Type = obj.TYPE_REG
764 // Not a go.string, generate a normal load
767 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
768 p := s.Prog(v.Op.Asm())
769 p.From.Type = obj.TYPE_MEM
770 p.From.Reg = v.Args[0].Reg()
771 gc.AddAux(&p.From, v)
772 p.To.Type = obj.TYPE_REG
775 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
776 p := s.Prog(v.Op.Asm())
777 p.From.Type = obj.TYPE_MEM
778 p.From.Reg = v.Args[0].Reg()
779 p.To.Type = obj.TYPE_REG
782 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
783 p := s.Prog(v.Op.Asm())
784 p.To.Type = obj.TYPE_MEM
785 p.To.Reg = v.Args[0].Reg()
786 p.From.Type = obj.TYPE_REG
787 p.From.Reg = v.Args[1].Reg()
789 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
790 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
791 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
792 p := s.Prog(v.Op.Asm())
793 p.From.Type = obj.TYPE_MEM
794 p.From.Reg = v.Args[0].Reg()
795 p.From.Index = v.Args[1].Reg()
796 gc.AddAux(&p.From, v)
797 p.To.Type = obj.TYPE_REG
800 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
801 p := s.Prog(v.Op.Asm())
802 p.From.Type = obj.TYPE_REG
803 p.From.Reg = ppc64.REGZERO
804 p.To.Type = obj.TYPE_MEM
805 p.To.Reg = v.Args[0].Reg()
808 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
809 p := s.Prog(v.Op.Asm())
810 p.From.Type = obj.TYPE_REG
811 p.From.Reg = v.Args[1].Reg()
812 p.To.Type = obj.TYPE_MEM
813 p.To.Reg = v.Args[0].Reg()
816 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
817 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
818 ssa.OpPPC64MOVHBRstoreidx:
819 p := s.Prog(v.Op.Asm())
820 p.From.Type = obj.TYPE_REG
821 p.From.Reg = v.Args[2].Reg()
822 p.To.Index = v.Args[1].Reg()
823 p.To.Type = obj.TYPE_MEM
824 p.To.Reg = v.Args[0].Reg()
827 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
829 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
830 // ISEL only accepts 0, 1, 2 condition values but the others can be
831 // achieved by swapping operand order.
832 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ
833 // arg0 ? arg2 : arg1 for conditions GE, LE, NE
834 // ISELB is used when a boolean result is needed, returning 0 or 1
835 p := s.Prog(ppc64.AISEL)
836 p.To.Type = obj.TYPE_REG
838 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
839 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
840 if v.Op == ssa.OpPPC64ISEL {
841 r.Reg = v.Args[1].Reg()
843 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
846 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
848 p.Reg = v.Args[0].Reg()
851 p.From.Type = obj.TYPE_CONST
852 p.From.Offset = v.AuxInt & 3
854 case ssa.OpPPC64LoweredZero:
856 // unaligned data doesn't hurt performance
857 // for these instructions on power8 or later
859 // for sizes >= 64 generate a loop as follows:
861 // set up loop counter in CTR, used by BC
862 // XXLXOR VS32,VS32,VS32
863 // MOVD len/32,REG_TMP
867 // STXVD2X VS32,(R0)(R3)
868 // STXVD2X VS32,(R31)(R3)
872 // any remainder is done as described below
874 // for sizes < 64 bytes, first clear as many doublewords as possible,
875 // then handle the remainder
880 // the remainder bytes are cleared using one or more
881 // of the following instructions with the appropriate
882 // offsets depending which instructions are needed
884 // MOVW R0,n1(R3) 4 bytes
885 // MOVH R0,n2(R3) 2 bytes
886 // MOVB R0,n3(R3) 1 byte
888 // 7 bytes: MOVW, MOVH, MOVB
889 // 6 bytes: MOVW, MOVH
890 // 5 bytes: MOVW, MOVB
891 // 3 bytes: MOVH, MOVB
893 // each loop iteration does 32 bytes
899 // only generate a loop if there is more
902 // Set up VS32 (V0) to hold 0s
903 p := s.Prog(ppc64.AXXLXOR)
904 p.From.Type = obj.TYPE_REG
905 p.From.Reg = ppc64.REG_VS32
906 p.To.Type = obj.TYPE_REG
907 p.To.Reg = ppc64.REG_VS32
908 p.Reg = ppc64.REG_VS32
910 // Set up CTR loop counter
911 p = s.Prog(ppc64.AMOVD)
912 p.From.Type = obj.TYPE_CONST
914 p.To.Type = obj.TYPE_REG
915 p.To.Reg = ppc64.REGTMP
917 p = s.Prog(ppc64.AMOVD)
918 p.From.Type = obj.TYPE_REG
919 p.From.Reg = ppc64.REGTMP
920 p.To.Type = obj.TYPE_REG
921 p.To.Reg = ppc64.REG_CTR
923 // Set up R31 to hold index value 16
924 p = s.Prog(ppc64.AMOVD)
925 p.From.Type = obj.TYPE_CONST
927 p.To.Type = obj.TYPE_REG
928 p.To.Reg = ppc64.REGTMP
930 // generate 2 STXVD2Xs to store 16 bytes
931 // when this is a loop then the top must be saved
933 // This is the top of loop
934 p = s.Prog(ppc64.ASTXVD2X)
935 p.From.Type = obj.TYPE_REG
936 p.From.Reg = ppc64.REG_VS32
937 p.To.Type = obj.TYPE_MEM
938 p.To.Reg = v.Args[0].Reg()
939 p.To.Index = ppc64.REGZERO
940 // Save the top of loop
945 p = s.Prog(ppc64.ASTXVD2X)
946 p.From.Type = obj.TYPE_REG
947 p.From.Reg = ppc64.REG_VS32
948 p.To.Type = obj.TYPE_MEM
949 p.To.Reg = v.Args[0].Reg()
950 p.To.Index = ppc64.REGTMP
952 // Increment address for the
953 // 4 doublewords just zeroed.
954 p = s.Prog(ppc64.AADD)
955 p.Reg = v.Args[0].Reg()
956 p.From.Type = obj.TYPE_CONST
958 p.To.Type = obj.TYPE_REG
959 p.To.Reg = v.Args[0].Reg()
961 // Branch back to top of loop
963 // BC with BO_BCTR generates bdnz
964 p = s.Prog(ppc64.ABC)
965 p.From.Type = obj.TYPE_CONST
966 p.From.Offset = ppc64.BO_BCTR
968 p.To.Type = obj.TYPE_BRANCH
972 // when ctr == 1 the loop was not generated but
973 // there are at least 32 bytes to clear, so add
974 // that to the remainder to generate the code
975 // to clear those doublewords
980 // clear the remainder starting at offset zero
983 // first clear as many doublewords as possible
984 // then clear remaining sizes as available
986 op, size := ppc64.AMOVB, int64(1)
989 op, size = ppc64.AMOVD, 8
991 op, size = ppc64.AMOVW, 4
993 op, size = ppc64.AMOVH, 2
996 p.From.Type = obj.TYPE_REG
997 p.From.Reg = ppc64.REG_R0
998 p.To.Type = obj.TYPE_MEM
999 p.To.Reg = v.Args[0].Reg()
1000 p.To.Offset = offset
1005 case ssa.OpPPC64LoweredMove:
1007 // This will be used when moving more
1008 // than 8 bytes. Moves start with
1009 // as many 8 byte moves as possible, then
1010 // 4, 2, or 1 byte(s) as remaining. This will
1011 // work and be efficient for power8 or later.
1012 // If there are 64 or more bytes, then a
1013 // loop is generated to move 32 bytes and
1014 // update the src and dst addresses on each
1015 // iteration. When < 64 bytes, the appropriate
1016 // number of moves are generated based on the
1018 // When moving >= 64 bytes a loop is used
1019 // MOVD len/32,REG_TMP
1023 // LXVD2X (R0)(R4),VS32
1024 // LXVD2X (R31)(R4),VS33
1026 // STXVD2X VS32,(R0)(R3)
1027 // STXVD2X VS33,(R31)(R4)
1030 // Bytes not moved by this loop are moved
1031 // with a combination of the following instructions,
1032 // starting with the largest sizes and generating as
1033 // many as needed, using the appropriate offset value.
1043 // Each loop iteration moves 32 bytes
1044 ctr := v.AuxInt / 32
1046 // Remainder after the loop
1047 rem := v.AuxInt % 32
1049 dst_reg := v.Args[0].Reg()
1050 src_reg := v.Args[1].Reg()
1052 // The set of registers used here, must match the clobbered reg list
1058 // Only generate looping code when loop counter is > 1 for >= 64 bytes
1061 p := s.Prog(ppc64.AMOVD)
1062 p.From.Type = obj.TYPE_CONST
1064 p.To.Type = obj.TYPE_REG
1065 p.To.Reg = ppc64.REGTMP
1067 p = s.Prog(ppc64.AMOVD)
1068 p.From.Type = obj.TYPE_REG
1069 p.From.Reg = ppc64.REGTMP
1070 p.To.Type = obj.TYPE_REG
1071 p.To.Reg = ppc64.REG_CTR
1073 // Use REGTMP as index reg
1074 p = s.Prog(ppc64.AMOVD)
1075 p.From.Type = obj.TYPE_CONST
1077 p.To.Type = obj.TYPE_REG
1078 p.To.Reg = ppc64.REGTMP
1080 // Generate 16 byte loads and stores.
1081 // Use temp register for index (16)
1082 // on the second one.
1083 p = s.Prog(ppc64.ALXVD2X)
1084 p.From.Type = obj.TYPE_MEM
1085 p.From.Reg = src_reg
1086 p.From.Index = ppc64.REGZERO
1087 p.To.Type = obj.TYPE_REG
1088 p.To.Reg = ppc64.REG_VS32
1094 p = s.Prog(ppc64.ALXVD2X)
1095 p.From.Type = obj.TYPE_MEM
1096 p.From.Reg = src_reg
1097 p.From.Index = ppc64.REGTMP
1098 p.To.Type = obj.TYPE_REG
1099 p.To.Reg = ppc64.REG_VS33
1101 // increment the src reg for next iteration
1102 p = s.Prog(ppc64.AADD)
1104 p.From.Type = obj.TYPE_CONST
1106 p.To.Type = obj.TYPE_REG
1109 // generate 16 byte stores
1110 p = s.Prog(ppc64.ASTXVD2X)
1111 p.From.Type = obj.TYPE_REG
1112 p.From.Reg = ppc64.REG_VS32
1113 p.To.Type = obj.TYPE_MEM
1115 p.To.Index = ppc64.REGZERO
1117 p = s.Prog(ppc64.ASTXVD2X)
1118 p.From.Type = obj.TYPE_REG
1119 p.From.Reg = ppc64.REG_VS33
1120 p.To.Type = obj.TYPE_MEM
1122 p.To.Index = ppc64.REGTMP
1124 // increment the dst reg for next iteration
1125 p = s.Prog(ppc64.AADD)
1127 p.From.Type = obj.TYPE_CONST
1129 p.To.Type = obj.TYPE_REG
1132 // BC with BO_BCTR generates bdnz to branch on nonzero CTR
1134 p = s.Prog(ppc64.ABC)
1135 p.From.Type = obj.TYPE_CONST
1136 p.From.Offset = ppc64.BO_BCTR
1137 p.Reg = ppc64.REG_R0
1138 p.To.Type = obj.TYPE_BRANCH
1141 // src_reg and dst_reg were incremented in the loop, so
1142 // later instructions start with offset 0.
1146 // No loop was generated for one iteration, so
1147 // add 32 bytes to the remainder to move those bytes.
1153 // Generate 16 byte loads and stores.
1154 // Use temp register for index (value 16)
1155 // on the second one.
1156 p := s.Prog(ppc64.ALXVD2X)
1157 p.From.Type = obj.TYPE_MEM
1158 p.From.Reg = src_reg
1159 p.From.Index = ppc64.REGZERO
1160 p.To.Type = obj.TYPE_REG
1161 p.To.Reg = ppc64.REG_VS32
1163 p = s.Prog(ppc64.ASTXVD2X)
1164 p.From.Type = obj.TYPE_REG
1165 p.From.Reg = ppc64.REG_VS32
1166 p.To.Type = obj.TYPE_MEM
1168 p.To.Index = ppc64.REGZERO
1174 // Use REGTMP as index reg
1175 p = s.Prog(ppc64.AMOVD)
1176 p.From.Type = obj.TYPE_CONST
1178 p.To.Type = obj.TYPE_REG
1179 p.To.Reg = ppc64.REGTMP
1181 // Generate 16 byte loads and stores.
1182 // Use temp register for index (16)
1183 // on the second one.
1184 p = s.Prog(ppc64.ALXVD2X)
1185 p.From.Type = obj.TYPE_MEM
1186 p.From.Reg = src_reg
1187 p.From.Index = ppc64.REGTMP
1188 p.To.Type = obj.TYPE_REG
1189 p.To.Reg = ppc64.REG_VS32
1191 p = s.Prog(ppc64.ASTXVD2X)
1192 p.From.Type = obj.TYPE_REG
1193 p.From.Reg = ppc64.REG_VS32
1194 p.To.Type = obj.TYPE_MEM
1196 p.To.Index = ppc64.REGTMP
1203 // Generate all the remaining load and store pairs, starting with
1204 // as many 8 byte moves as possible, then 4, 2, 1.
1206 op, size := ppc64.AMOVB, int64(1)
1209 op, size = ppc64.AMOVD, 8
1211 op, size = ppc64.AMOVW, 4
1213 op, size = ppc64.AMOVH, 2
1217 p.To.Type = obj.TYPE_REG
1218 p.To.Reg = ppc64.REG_R14
1219 p.From.Type = obj.TYPE_MEM
1220 p.From.Reg = src_reg
1221 p.From.Offset = offset
1225 p.From.Type = obj.TYPE_REG
1226 p.From.Reg = ppc64.REG_R14
1227 p.To.Type = obj.TYPE_MEM
1229 p.To.Offset = offset
1234 case ssa.OpPPC64CALLstatic:
1237 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1238 p := s.Prog(ppc64.AMOVD)
1239 p.From.Type = obj.TYPE_REG
1240 p.From.Reg = v.Args[0].Reg()
1241 p.To.Type = obj.TYPE_REG
1242 p.To.Reg = ppc64.REG_LR
1244 if v.Args[0].Reg() != ppc64.REG_R12 {
1245 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1249 pp.To.Reg = ppc64.REG_LR
1251 if gc.Ctxt.Flag_shared {
1252 // When compiling Go into PIC, the function we just
1253 // called via pointer might have been implemented in
1254 // a separate module and so overwritten the TOC
1255 // pointer in R2; reload it.
1256 q := s.Prog(ppc64.AMOVD)
1257 q.From.Type = obj.TYPE_MEM
1259 q.From.Reg = ppc64.REGSP
1260 q.To.Type = obj.TYPE_REG
1261 q.To.Reg = ppc64.REG_R2
1264 case ssa.OpPPC64LoweredWB:
1265 p := s.Prog(obj.ACALL)
1266 p.To.Type = obj.TYPE_MEM
1267 p.To.Name = obj.NAME_EXTERN
1268 p.To.Sym = v.Aux.(*obj.LSym)
1270 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1271 p := s.Prog(obj.ACALL)
1272 p.To.Type = obj.TYPE_MEM
1273 p.To.Name = obj.NAME_EXTERN
1274 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1275 s.UseArgs(16) // space used in callee args area by assembly stubs
1277 case ssa.OpPPC64LoweredNilCheck:
1278 if objabi.GOOS == "aix" {
1282 // NOP (so the BNE has somewhere to land)
1285 p := s.Prog(ppc64.ACMP)
1286 p.From.Type = obj.TYPE_REG
1287 p.From.Reg = v.Args[0].Reg()
1288 p.To.Type = obj.TYPE_REG
1289 p.To.Reg = ppc64.REG_R0
1292 p2 := s.Prog(ppc64.ABNE)
1293 p2.To.Type = obj.TYPE_BRANCH
1296 // Write at 0 is forbidden and will trigger a SIGSEGV
1297 p = s.Prog(ppc64.AMOVW)
1298 p.From.Type = obj.TYPE_REG
1299 p.From.Reg = ppc64.REG_R0
1300 p.To.Type = obj.TYPE_MEM
1301 p.To.Reg = ppc64.REG_R0
1303 // NOP (so the BNE has somewhere to land)
1304 nop := s.Prog(obj.ANOP)
1308 // Issue a load which will fault if arg is nil.
1309 p := s.Prog(ppc64.AMOVBZ)
1310 p.From.Type = obj.TYPE_MEM
1311 p.From.Reg = v.Args[0].Reg()
1312 gc.AddAux(&p.From, v)
1313 p.To.Type = obj.TYPE_REG
1314 p.To.Reg = ppc64.REGTMP
1316 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1317 gc.Warnl(v.Pos, "generated nil check")
1320 // These should be resolved by rules and not make it here.
1321 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1322 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1323 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1324 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1325 case ssa.OpPPC64InvertFlags:
1326 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1327 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1328 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1330 // TODO: implement for clobberdead experiment. Nop is ok for now.
1332 v.Fatalf("genValue not implemented: %s", v.LongString())
1336 var blockJump = [...]struct {
1338 asmeq, invasmun bool
1340 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1341 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1343 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1344 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1345 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1346 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1348 // TODO: need to work FP comparisons into block jumps
1349 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1350 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1351 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1352 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1355 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1357 case ssa.BlockDefer:
1358 // defer returns in R3:
1359 // 0 if we should continue executing
1360 // 1 if we should jump to deferreturn call
1361 p := s.Prog(ppc64.ACMP)
1362 p.From.Type = obj.TYPE_REG
1363 p.From.Reg = ppc64.REG_R3
1364 p.To.Type = obj.TYPE_REG
1365 p.To.Reg = ppc64.REG_R0
1367 p = s.Prog(ppc64.ABNE)
1368 p.To.Type = obj.TYPE_BRANCH
1369 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1370 if b.Succs[0].Block() != next {
1371 p := s.Prog(obj.AJMP)
1372 p.To.Type = obj.TYPE_BRANCH
1373 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1376 case ssa.BlockPlain:
1377 if b.Succs[0].Block() != next {
1378 p := s.Prog(obj.AJMP)
1379 p.To.Type = obj.TYPE_BRANCH
1380 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1385 case ssa.BlockRetJmp:
1386 p := s.Prog(obj.AJMP)
1387 p.To.Type = obj.TYPE_MEM
1388 p.To.Name = obj.NAME_EXTERN
1389 p.To.Sym = b.Aux.(*obj.LSym)
1391 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1392 ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1393 ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1394 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1395 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1396 jmp := blockJump[b.Kind]
1398 case b.Succs[0].Block():
1399 s.Br(jmp.invasm, b.Succs[1].Block())
1401 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1402 s.Br(ppc64.ABVS, b.Succs[1].Block())
1404 case b.Succs[1].Block():
1405 s.Br(jmp.asm, b.Succs[0].Block())
1407 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1410 if b.Likely != ssa.BranchUnlikely {
1411 s.Br(jmp.asm, b.Succs[0].Block())
1413 s.Br(ppc64.ABEQ, b.Succs[0].Block())
1415 s.Br(obj.AJMP, b.Succs[1].Block())
1417 s.Br(jmp.invasm, b.Succs[1].Block())
1419 // TODO: The second branch is probably predict-not-taken since it is for FP unordered
1420 s.Br(ppc64.ABVS, b.Succs[1].Block())
1422 s.Br(obj.AJMP, b.Succs[0].Block())
1426 b.Fatalf("branch not implemented: %s", b.LongString())