1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/inline"
10 "cmd/compile/internal/ir"
11 "cmd/compile/internal/logopt"
12 "cmd/compile/internal/pgo"
13 "cmd/compile/internal/typecheck"
14 "cmd/compile/internal/types"
23 // CallStat summarizes a single call site.
25 // This is used only for debug logging.
26 type CallStat struct {
27 Pkg string // base.Ctxt.Pkgpath
28 Pos string // file:line:col of call.
30 Caller string // Linker symbol name of calling function.
32 // Direct or indirect call.
35 // For indirect calls, interface call or other indirect function call.
38 // Total edge weight from this call site.
41 // Hottest callee from this call site, regardless of type
46 // Devirtualized callee if != "".
48 // Note that this may be different than Hottest because we apply
49 // type-check restrictions, which helps distinguish multiple calls on
52 DevirtualizedWeight int64
55 // ProfileGuided performs call devirtualization of indirect calls based on
56 // profile information.
58 // Specifically, it performs conditional devirtualization of interface calls or
59 // function value calls for the hottest callee.
61 // That is, for interface calls it performs a transformation like:
63 // type Iface interface {
67 // type Concrete struct{}
69 // func (Concrete) Foo() {}
71 // func foo(i Iface) {
77 // func foo(i Iface) {
78 // if c, ok := i.(Concrete); ok {
85 // For function value calls it performs a transformation like:
89 // func foo(fn func()) {
95 // func foo(fn func()) {
96 // if internal/abi.FuncPCABIInternal(fn) == internal/abi.FuncPCABIInternal(Concrete) {
103 // The primary benefit of this transformation is enabling inlining of the
105 func ProfileGuided(fn *ir.Func, p *pgo.Profile) {
108 name := ir.LinkFuncName(fn)
110 // Can't devirtualize go/defer calls. See comment in Static.
111 goDeferCall := make(map[*ir.CallExpr]bool)
113 var jsonW *json.Encoder
114 if base.Debug.PGODebug >= 3 {
115 jsonW = json.NewEncoder(os.Stdout)
118 var edit func(n ir.Node) ir.Node
119 edit = func(n ir.Node) ir.Node {
124 if gds, ok := n.(*ir.GoDeferStmt); ok {
125 if call, ok := gds.Call.(*ir.CallExpr); ok {
126 goDeferCall[call] = true
130 ir.EditChildren(n, edit)
132 call, ok := n.(*ir.CallExpr)
138 if base.Debug.PGODebug >= 3 {
139 // Statistics about every single call. Handy for external data analysis.
141 // TODO(prattmic): Log via logopt?
142 stat = constructCallStat(p, fn, name, call)
151 if op != ir.OCALLFUNC && op != ir.OCALLINTER {
155 if base.Debug.PGODebug >= 2 {
156 fmt.Printf("%v: PGO devirtualize considering call %v\n", ir.Line(call), call)
159 if goDeferCall[call] {
160 if base.Debug.PGODebug >= 2 {
161 fmt.Printf("%v: can't PGO devirtualize go/defer call %v\n", ir.Line(call), call)
171 newNode, callee, weight = maybeDevirtualizeFunctionCall(p, fn, call)
173 newNode, callee, weight = maybeDevirtualizeInterfaceCall(p, fn, call)
183 stat.Devirtualized = ir.LinkFuncName(callee)
184 stat.DevirtualizedWeight = weight
190 ir.EditChildren(fn, edit)
193 // Devirtualize interface call if possible and eligible. Returns the new
194 // ir.Node if call was devirtualized, and if so also the callee and weight of
195 // the devirtualized edge.
196 func maybeDevirtualizeInterfaceCall(p *pgo.Profile, fn *ir.Func, call *ir.CallExpr) (ir.Node, *ir.Func, int64) {
197 // Bail if we do not have a hot callee.
198 callee, weight := findHotConcreteInterfaceCallee(p, fn, call)
202 // Bail if we do not have a Type node for the hot callee.
203 ctyp := methodRecvType(callee)
207 // Bail if we know for sure it won't inline.
208 if !shouldPGODevirt(callee) {
211 // Bail if de-selected by PGO Hash.
212 if !base.PGOHash.MatchPosWithInfo(call.Pos(), "devirt", nil) {
216 return rewriteInterfaceCall(call, fn, callee, ctyp), callee, weight
219 // Devirtualize an indirect function call if possible and eligible. Returns the new
220 // ir.Node if call was devirtualized, and if so also the callee and weight of
221 // the devirtualized edge.
222 func maybeDevirtualizeFunctionCall(p *pgo.Profile, fn *ir.Func, call *ir.CallExpr) (ir.Node, *ir.Func, int64) {
223 // Bail if this is a direct call; no devirtualization necessary.
224 callee := pgo.DirectCallee(call.Fun)
229 // Bail if we do not have a hot callee.
230 callee, weight := findHotConcreteFunctionCallee(p, fn, call)
235 // TODO(go.dev/issue/61577): Closures need the closure context passed
236 // via the context register. That requires extra plumbing that we
238 if callee.OClosure != nil {
239 if base.Debug.PGODebug >= 3 {
240 fmt.Printf("callee %s is a closure, skipping\n", ir.FuncName(callee))
244 // TODO(prattmic): We don't properly handle methods as callees in two
245 // different dimensions:
247 // 1. Method expressions. e.g.,
249 // var fn func(*os.File, []byte) (int, error) = (*os.File).Read
251 // In this case, typ will report *os.File as the receiver while
252 // ctyp reports it as the first argument. types.Identical ignores
253 // receiver parameters, so it treats these as different, even though
254 // they are still call compatible.
256 // 2. Method values. e.g.,
259 // var fn func([]byte) (int, error) = f.Read
261 // types.Identical will treat these as compatible (since receiver
262 // parameters are ignored). However, in this case, we do not call
263 // (*os.File).Read directly. Instead, f is stored in closure context
264 // and we call the wrapper (*os.File).Read-fm. However, runtime/pprof
265 // hides wrappers from profiles, making it appear that there is a call
266 // directly to the method. We could recognize this pattern return the
267 // wrapper rather than the method.
269 // N.B. perf profiles will report wrapper symbols directly, so
270 // ideally we should support direct wrapper references as well.
271 if callee.Type().Recv() != nil {
272 if base.Debug.PGODebug >= 3 {
273 fmt.Printf("callee %s is a method, skipping\n", ir.FuncName(callee))
278 // Bail if we know for sure it won't inline.
279 if !shouldPGODevirt(callee) {
282 // Bail if de-selected by PGO Hash.
283 if !base.PGOHash.MatchPosWithInfo(call.Pos(), "devirt", nil) {
287 return rewriteFunctionCall(call, fn, callee), callee, weight
290 // shouldPGODevirt checks if we should perform PGO devirtualization to the
293 // PGO devirtualization is most valuable when the callee is inlined, so if it
294 // won't inline we can skip devirtualizing.
295 func shouldPGODevirt(fn *ir.Func) bool {
297 if base.Flag.LowerM > 1 || logopt.Enabled() {
300 if base.Flag.LowerM > 1 {
301 fmt.Printf("%v: should not PGO devirtualize %v: %s\n", ir.Line(fn), ir.FuncName(fn), reason)
303 if logopt.Enabled() {
304 logopt.LogOpt(fn.Pos(), ": should not PGO devirtualize function", "pgo-devirtualize", ir.FuncName(fn), reason)
310 reason = inline.InlineImpossible(fn)
315 // TODO(prattmic): checking only InlineImpossible is very conservative,
316 // primarily excluding only functions with pragmas. We probably want to
317 // move in either direction. Either:
319 // 1. Don't even bother to check InlineImpossible, as it affects so few
322 // 2. Or consider the function body (notably cost) to better determine
323 // if the function will actually inline.
328 // constructCallStat builds an initial CallStat describing this call, for
329 // logging. If the call is devirtualized, the devirtualization fields should be
331 func constructCallStat(p *pgo.Profile, fn *ir.Func, name string, call *ir.CallExpr) *CallStat {
333 case ir.OCALLFUNC, ir.OCALLINTER, ir.OCALLMETH:
335 // We don't care about logging builtin functions.
340 Pkg: base.Ctxt.Pkgpath,
345 offset := pgo.NodeLineOffset(call, fn)
347 hotter := func(e *pgo.IREdge) bool {
348 if stat.Hottest == "" {
351 if e.Weight != stat.HottestWeight {
352 return e.Weight > stat.HottestWeight
354 // If weight is the same, arbitrarily sort lexicographally, as
355 // findHotConcreteCallee does.
356 return e.Dst.Name() < stat.Hottest
359 // Sum of all edges from this callsite, regardless of callee.
360 // For direct calls, this should be the same as the single edge
361 // weight (except for multiple calls on one line, which we
362 // can't distinguish).
363 callerNode := p.WeightedCG.IRNodes[name]
364 for _, edge := range callerNode.OutEdges {
365 if edge.CallSiteOffset != offset {
368 stat.Weight += edge.Weight
370 stat.HottestWeight = edge.Weight
371 stat.Hottest = edge.Dst.Name()
377 stat.Interface = false
379 callee := pgo.DirectCallee(call.Fun)
382 if stat.Hottest == "" {
383 stat.Hottest = ir.LinkFuncName(callee)
390 stat.Interface = true
392 base.FatalfAt(call.Pos(), "OCALLMETH missed by typecheck")
398 // copyInputs copies the inputs to a call: the receiver (for interface calls)
399 // or function value (for function value calls) and the arguments. These
400 // expressions are evaluated once and assigned to temporaries.
402 // The assignment statement is added to init and the copied receiver/fn
403 // expression and copied arguments expressions are returned.
404 func copyInputs(curfn *ir.Func, pos src.XPos, recvOrFn ir.Node, args []ir.Node, init *ir.Nodes) (ir.Node, []ir.Node) {
405 // Evaluate receiver/fn and argument expressions. The receiver/fn is
406 // used twice but we don't want to cause side effects twice. The
407 // arguments are used in two different calls and we can't trivially
410 // recvOrFn must be first in the assignment list as its side effects
411 // must be ordered before argument side effects.
412 var lhs, rhs []ir.Node
413 newRecvOrFn := typecheck.TempAt(pos, curfn, recvOrFn.Type())
414 lhs = append(lhs, newRecvOrFn)
415 rhs = append(rhs, recvOrFn)
417 for _, arg := range args {
418 argvar := typecheck.TempAt(pos, curfn, arg.Type())
420 lhs = append(lhs, argvar)
421 rhs = append(rhs, arg)
424 asList := ir.NewAssignListStmt(pos, ir.OAS2, lhs, rhs)
425 init.Append(typecheck.Stmt(asList))
427 return newRecvOrFn, lhs[1:]
430 // retTemps returns a slice of temporaries to be used for storing result values from call.
431 func retTemps(curfn *ir.Func, pos src.XPos, call *ir.CallExpr) []ir.Node {
432 sig := call.Fun.Type()
433 var retvars []ir.Node
434 for _, ret := range sig.Results() {
435 retvars = append(retvars, typecheck.TempAt(pos, curfn, ret.Type))
440 // condCall returns an ir.InlinedCallExpr that performs a call to thenCall if
441 // cond is true and elseCall if cond is false. The return variables of the
442 // InlinedCallExpr evaluate to the return values from the call.
443 func condCall(curfn *ir.Func, pos src.XPos, cond ir.Node, thenCall, elseCall *ir.CallExpr, init ir.Nodes) *ir.InlinedCallExpr {
444 // Doesn't matter whether we use thenCall or elseCall, they must have
445 // the same return types.
446 retvars := retTemps(curfn, pos, thenCall)
448 var thenBlock, elseBlock ir.Nodes
449 if len(retvars) == 0 {
450 thenBlock.Append(thenCall)
451 elseBlock.Append(elseCall)
453 // Copy slice so edits in one location don't affect another.
454 thenRet := append([]ir.Node(nil), retvars...)
455 thenAsList := ir.NewAssignListStmt(pos, ir.OAS2, thenRet, []ir.Node{thenCall})
456 thenBlock.Append(typecheck.Stmt(thenAsList))
458 elseRet := append([]ir.Node(nil), retvars...)
459 elseAsList := ir.NewAssignListStmt(pos, ir.OAS2, elseRet, []ir.Node{elseCall})
460 elseBlock.Append(typecheck.Stmt(elseAsList))
463 nif := ir.NewIfStmt(pos, cond, thenBlock, elseBlock)
467 body := []ir.Node{typecheck.Stmt(nif)}
469 // This isn't really an inlined call of course, but InlinedCallExpr
470 // makes handling reassignment of return values easier.
471 res := ir.NewInlinedCallExpr(pos, body, retvars)
472 res.SetType(thenCall.Type())
477 // rewriteInterfaceCall devirtualizes the given interface call using a direct
478 // method call to concretetyp.
479 func rewriteInterfaceCall(call *ir.CallExpr, curfn, callee *ir.Func, concretetyp *types.Type) ir.Node {
480 if base.Flag.LowerM != 0 {
481 fmt.Printf("%v: PGO devirtualizing interface call %v to %v\n", ir.Line(call), call.Fun, callee)
484 // We generate an OINCALL of:
494 // recv, arg1, argN = recv expr, arg1 expr, argN expr
496 // t, ok := recv.(Concrete)
498 // ret1, retN = t.Method(arg1, ... argN)
500 // ret1, retN = recv.Method(arg1, ... argN)
503 // OINCALL retvars: ret1, ... retN
505 // This isn't really an inlined call of course, but InlinedCallExpr
506 // makes handling reassignment of return values easier.
508 // TODO(prattmic): This increases the size of the AST in the caller,
509 // making it less like to inline. We may want to compensate for this
512 sel := call.Fun.(*ir.SelectorExpr)
515 init := ir.TakeInit(call)
517 recv, args := copyInputs(curfn, pos, sel.X, call.Args.Take(), &init)
519 // Copy slice so edits in one location don't affect another.
520 argvars := append([]ir.Node(nil), args...)
523 tmpnode := typecheck.TempAt(base.Pos, curfn, concretetyp)
524 tmpok := typecheck.TempAt(base.Pos, curfn, types.Types[types.TBOOL])
526 assert := ir.NewTypeAssertExpr(pos, recv, concretetyp)
528 assertAsList := ir.NewAssignListStmt(pos, ir.OAS2, []ir.Node{tmpnode, tmpok}, []ir.Node{typecheck.Expr(assert)})
529 init.Append(typecheck.Stmt(assertAsList))
531 concreteCallee := typecheck.XDotMethod(pos, tmpnode, method, true)
532 // Copy slice so edits in one location don't affect another.
533 argvars = append([]ir.Node(nil), argvars...)
534 concreteCall := typecheck.Call(pos, concreteCallee, argvars, call.IsDDD).(*ir.CallExpr)
536 res := condCall(curfn, pos, tmpok, concreteCall, call, init)
538 if base.Debug.PGODebug >= 3 {
539 fmt.Printf("PGO devirtualizing interface call to %+v. After: %+v\n", concretetyp, res)
545 // rewriteFunctionCall devirtualizes the given OCALLFUNC using a direct
546 // function call to callee.
547 func rewriteFunctionCall(call *ir.CallExpr, curfn, callee *ir.Func) ir.Node {
548 if base.Flag.LowerM != 0 {
549 fmt.Printf("%v: PGO devirtualizing function call %v to %v\n", ir.Line(call), call.Fun, callee)
552 // We generate an OINCALL of:
562 // fn, arg1, argN = fn expr, arg1 expr, argN expr
564 // fnPC := internal/abi.FuncPCABIInternal(fn)
565 // concretePC := internal/abi.FuncPCABIInternal(concrete)
567 // if fnPC == concretePC {
568 // ret1, retN = concrete(arg1, ... argN) // Same closure context passed (TODO)
570 // ret1, retN = fn(arg1, ... argN)
573 // OINCALL retvars: ret1, ... retN
575 // This isn't really an inlined call of course, but InlinedCallExpr
576 // makes handling reassignment of return values easier.
579 init := ir.TakeInit(call)
581 fn, args := copyInputs(curfn, pos, call.Fun, call.Args.Take(), &init)
583 // Copy slice so edits in one location don't affect another.
584 argvars := append([]ir.Node(nil), args...)
587 // FuncPCABIInternal takes an interface{}, emulate that. This is needed
588 // for to ensure we get the MAKEFACE we need for SSA.
589 fnIface := typecheck.Expr(ir.NewConvExpr(pos, ir.OCONV, types.Types[types.TINTER], fn))
590 calleeIface := typecheck.Expr(ir.NewConvExpr(pos, ir.OCONV, types.Types[types.TINTER], callee.Nname))
592 fnPC := ir.FuncPC(pos, fnIface, obj.ABIInternal)
593 concretePC := ir.FuncPC(pos, calleeIface, obj.ABIInternal)
595 pcEq := typecheck.Expr(ir.NewBinaryExpr(base.Pos, ir.OEQ, fnPC, concretePC))
597 // TODO(go.dev/issue/61577): Handle callees that a closures and need a
598 // copy of the closure context from call. For now, we skip callees that
599 // are closures in maybeDevirtualizeFunctionCall.
600 if callee.OClosure != nil {
601 base.Fatalf("Callee is a closure: %+v", callee)
604 // Copy slice so edits in one location don't affect another.
605 argvars = append([]ir.Node(nil), argvars...)
606 concreteCall := typecheck.Call(pos, callee.Nname, argvars, call.IsDDD).(*ir.CallExpr)
608 res := condCall(curfn, pos, pcEq, concreteCall, call, init)
610 if base.Debug.PGODebug >= 3 {
611 fmt.Printf("PGO devirtualizing function call to %+v. After: %+v\n", ir.FuncName(callee), res)
617 // methodRecvType returns the type containing method fn. Returns nil if fn
619 func methodRecvType(fn *ir.Func) *types.Type {
620 recv := fn.Nname.Type().Recv()
627 // interfaceCallRecvTypeAndMethod returns the type and the method of the interface
628 // used in an interface call.
629 func interfaceCallRecvTypeAndMethod(call *ir.CallExpr) (*types.Type, *types.Sym) {
630 if call.Op() != ir.OCALLINTER {
631 base.Fatalf("Call isn't OCALLINTER: %+v", call)
634 sel, ok := call.Fun.(*ir.SelectorExpr)
636 base.Fatalf("OCALLINTER doesn't contain SelectorExpr: %+v", call)
639 return sel.X.Type(), sel.Sel
642 // findHotConcreteCallee returns the *ir.Func of the hottest callee of a call,
643 // if available, and its edge weight. extraFn can perform additional
644 // applicability checks on each candidate edge. If extraFn returns false,
645 // candidate will not be considered a valid callee candidate.
646 func findHotConcreteCallee(p *pgo.Profile, caller *ir.Func, call *ir.CallExpr, extraFn func(callerName string, callOffset int, candidate *pgo.IREdge) bool) (*ir.Func, int64) {
647 callerName := ir.LinkFuncName(caller)
648 callerNode := p.WeightedCG.IRNodes[callerName]
649 callOffset := pgo.NodeLineOffset(call, caller)
651 var hottest *pgo.IREdge
653 // Returns true if e is hotter than hottest.
655 // Naively this is just e.Weight > hottest.Weight, but because OutEdges
656 // has arbitrary iteration order, we need to apply additional sort
657 // criteria when e.Weight == hottest.Weight to ensure we have stable
659 hotter := func(e *pgo.IREdge) bool {
663 if e.Weight != hottest.Weight {
664 return e.Weight > hottest.Weight
667 // Now e.Weight == hottest.Weight, we must select on other
670 // If only one edge has IR, prefer that one.
671 if (hottest.Dst.AST == nil) != (e.Dst.AST == nil) {
672 if e.Dst.AST != nil {
678 // Arbitrary, but the callee names will always differ. Select
679 // the lexicographically first callee.
680 return e.Dst.Name() < hottest.Dst.Name()
683 for _, e := range callerNode.OutEdges {
684 if e.CallSiteOffset != callOffset {
689 // TODO(prattmic): consider total caller weight? i.e.,
690 // if the hottest callee is only 10% of the weight,
691 // maybe don't devirtualize? Similarly, if this is call
692 // is globally very cold, there is not much value in
694 if base.Debug.PGODebug >= 2 {
695 fmt.Printf("%v: edge %s:%d -> %s (weight %d): too cold (hottest %d)\n", ir.Line(call), callerName, callOffset, e.Dst.Name(), e.Weight, hottest.Weight)
700 if e.Dst.AST == nil {
701 // Destination isn't visible from this package
704 // We must assume it implements the interface.
706 // We still record this as the hottest callee so far
707 // because we only want to return the #1 hottest
708 // callee. If we skip this then we'd return the #2
710 if base.Debug.PGODebug >= 2 {
711 fmt.Printf("%v: edge %s:%d -> %s (weight %d) (missing IR): hottest so far\n", ir.Line(call), callerName, callOffset, e.Dst.Name(), e.Weight)
717 if extraFn != nil && !extraFn(callerName, callOffset, e) {
721 if base.Debug.PGODebug >= 2 {
722 fmt.Printf("%v: edge %s:%d -> %s (weight %d): hottest so far\n", ir.Line(call), callerName, callOffset, e.Dst.Name(), e.Weight)
728 if base.Debug.PGODebug >= 2 {
729 fmt.Printf("%v: call %s:%d: no hot callee\n", ir.Line(call), callerName, callOffset)
734 if base.Debug.PGODebug >= 2 {
735 fmt.Printf("%v call %s:%d: hottest callee %s (weight %d)\n", ir.Line(call), callerName, callOffset, hottest.Dst.Name(), hottest.Weight)
737 return hottest.Dst.AST, hottest.Weight
740 // findHotConcreteInterfaceCallee returns the *ir.Func of the hottest callee of an
741 // interface call, if available, and its edge weight.
742 func findHotConcreteInterfaceCallee(p *pgo.Profile, caller *ir.Func, call *ir.CallExpr) (*ir.Func, int64) {
743 inter, method := interfaceCallRecvTypeAndMethod(call)
745 return findHotConcreteCallee(p, caller, call, func(callerName string, callOffset int, e *pgo.IREdge) bool {
746 ctyp := methodRecvType(e.Dst.AST)
749 // TODO(prattmic): Support non-interface indirect calls.
750 if base.Debug.PGODebug >= 2 {
751 fmt.Printf("%v: edge %s:%d -> %s (weight %d): callee not a method\n", ir.Line(call), callerName, callOffset, e.Dst.Name(), e.Weight)
756 // If ctyp doesn't implement inter it is most likely from a
757 // different call on the same line
758 if !typecheck.Implements(ctyp, inter) {
759 // TODO(prattmic): this is overly strict. Consider if
760 // ctyp is a partial implementation of an interface
761 // that gets embedded in types that complete the
762 // interface. It would still be OK to devirtualize a
763 // call to this method.
765 // What we'd need to do is check that the function
766 // pointer in the itab matches the method we want,
767 // rather than doing a full type assertion.
768 if base.Debug.PGODebug >= 2 {
769 why := typecheck.ImplementsExplain(ctyp, inter)
770 fmt.Printf("%v: edge %s:%d -> %s (weight %d): %v doesn't implement %v (%s)\n", ir.Line(call), callerName, callOffset, e.Dst.Name(), e.Weight, ctyp, inter, why)
775 // If the method name is different it is most likely from a
776 // different call on the same line
777 if !strings.HasSuffix(e.Dst.Name(), "."+method.Name) {
778 if base.Debug.PGODebug >= 2 {
779 fmt.Printf("%v: edge %s:%d -> %s (weight %d): callee is a different method\n", ir.Line(call), callerName, callOffset, e.Dst.Name(), e.Weight)
788 // findHotConcreteFunctionCallee returns the *ir.Func of the hottest callee of an
789 // indirect function call, if available, and its edge weight.
790 func findHotConcreteFunctionCallee(p *pgo.Profile, caller *ir.Func, call *ir.CallExpr) (*ir.Func, int64) {
791 typ := call.Fun.Type().Underlying()
793 return findHotConcreteCallee(p, caller, call, func(callerName string, callOffset int, e *pgo.IREdge) bool {
794 ctyp := e.Dst.AST.Type().Underlying()
796 // If ctyp doesn't match typ it is most likely from a different
797 // call on the same line.
799 // Note that we are comparing underlying types, as different
800 // defined types are OK. e.g., a call to a value of type
801 // net/http.HandlerFunc can be devirtualized to a function with
802 // the same underlying type.
803 if !types.Identical(typ, ctyp) {
804 if base.Debug.PGODebug >= 2 {
805 fmt.Printf("%v: edge %s:%d -> %s (weight %d): %v doesn't match %v\n", ir.Line(call), callerName, callOffset, e.Dst.Name(), e.Weight, ctyp, typ)