1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/pgo"
11 "cmd/compile/internal/typecheck"
12 "cmd/compile/internal/types"
18 // These constants enumerate the set of possible ways/scenarios
19 // in which we'll adjust the score of a given callsite.
20 type scoreAdjustTyp uint
22 // These constants capture the various ways in which the inliner's
23 // scoring phase can adjust a callsite score based on heuristics. They
24 // fall broadly into three categories:
26 // 1) adjustments based solely on the callsite context (ex: call
27 // appears on panic path)
29 // 2) adjustments that take into account specific interesting values
30 // passed at a call site (ex: passing a constant that could result in
31 // cprop/deadcode in the caller)
33 // 3) adjustments that take into account values returned from the call
34 // at a callsite (ex: call always returns the same inlinable function,
35 // and return value flows unmodified into an indirect call)
37 // For categories 2 and 3 above, each adjustment can have either a
38 // "must" version and a "may" version (but not both). Here the idea is
39 // that in the "must" version the value flow is unconditional: if the
40 // callsite executes, then the condition we're interested in (ex:
41 // param feeding call) is guaranteed to happen. For the "may" version,
42 // there may be control flow that could cause the benefit to be
45 // Category 1 adjustments (see above)
46 panicPathAdj scoreAdjustTyp = (1 << iota)
50 // Category 2 adjustments (see above).
52 passConstToNestedIfAdj
53 passConcreteToItfCallAdj
54 passConcreteToNestedItfCallAdj
56 passFuncToNestedIndCallAdj
57 passInlinableFuncToIndCallAdj
58 passInlinableFuncToNestedIndCallAdj
60 // Category 3 adjustments.
61 returnFeedsConstToIfAdj
62 returnFeedsFuncToIndCallAdj
63 returnFeedsInlinableFuncToIndCallAdj
64 returnFeedsConcreteToInterfaceCallAdj
67 // This table records the specific values we use to adjust call
68 // site scores in a given scenario.
69 // NOTE: these numbers are chosen very arbitrarily; ideally
70 // we will go through some sort of turning process to decide
71 // what value for each one produces the best performance.
73 var adjValues = map[scoreAdjustTyp]int{
77 passConstToIfAdj: -20,
78 passConstToNestedIfAdj: -15,
79 passConcreteToItfCallAdj: -30,
80 passConcreteToNestedItfCallAdj: -25,
81 passFuncToIndCallAdj: -25,
82 passFuncToNestedIndCallAdj: -20,
83 passInlinableFuncToIndCallAdj: -45,
84 passInlinableFuncToNestedIndCallAdj: -40,
85 returnFeedsConstToIfAdj: -15,
86 returnFeedsFuncToIndCallAdj: -25,
87 returnFeedsInlinableFuncToIndCallAdj: -40,
88 returnFeedsConcreteToInterfaceCallAdj: -25,
91 func adjValue(x scoreAdjustTyp) int {
92 if val, ok := adjValues[x]; ok {
95 panic("internal error unregistered adjustment type")
99 var mayMustAdj = [...]struct{ may, must scoreAdjustTyp }{
100 {may: passConstToNestedIfAdj, must: passConstToIfAdj},
101 {may: passConcreteToNestedItfCallAdj, must: passConcreteToItfCallAdj},
102 {may: passFuncToNestedIndCallAdj, must: passFuncToNestedIndCallAdj},
103 {may: passInlinableFuncToNestedIndCallAdj, must: passInlinableFuncToIndCallAdj},
106 func isMay(x scoreAdjustTyp) bool {
107 return mayToMust(x) != 0
110 func isMust(x scoreAdjustTyp) bool {
111 return mustToMay(x) != 0
114 func mayToMust(x scoreAdjustTyp) scoreAdjustTyp {
115 for _, v := range mayMustAdj {
123 func mustToMay(x scoreAdjustTyp) scoreAdjustTyp {
124 for _, v := range mayMustAdj {
132 // computeCallSiteScore takes a given call site whose ir node is 'call' and
133 // callee function is 'callee' and with previously computed call site
134 // properties 'csflags', then computes a score for the callsite that
135 // combines the size cost of the callee with heuristics based on
136 // previously parameter and function properties.
137 func computeCallSiteScore(callee *ir.Func, calleeProps *FuncProps, call ir.Node, csflags CSPropBits) (int, scoreAdjustTyp) {
138 // Start with the size-based score for the callee.
139 score := int(callee.Inl.Cost)
140 var tmask scoreAdjustTyp
142 if debugTrace&debugTraceScoring != 0 {
143 fmt.Fprintf(os.Stderr, "=-= scoring call to %s at %s , initial=%d\n",
144 callee.Sym().Name, fmtFullPos(call.Pos()), score)
147 // First some score adjustments to discourage inlining in selected cases.
148 if csflags&CallSiteOnPanicPath != 0 {
149 score, tmask = adjustScore(panicPathAdj, score, tmask)
151 if csflags&CallSiteInInitFunc != 0 {
152 score, tmask = adjustScore(initFuncAdj, score, tmask)
155 // Then adjustments to encourage inlining in selected cases.
156 if csflags&CallSiteInLoop != 0 {
157 score, tmask = adjustScore(inLoopAdj, score, tmask)
160 if calleeProps == nil {
164 // Walk through the actual expressions being passed at the call.
165 calleeRecvrParms := callee.Type().RecvParams()
166 ce := call.(*ir.CallExpr)
167 for idx := range ce.Args {
169 if calleeRecvrParms[idx].Sym == nil ||
170 calleeRecvrParms[idx].Sym.IsBlank() {
174 pflag := calleeProps.ParamFlags[idx]
175 if debugTrace&debugTraceScoring != 0 {
176 fmt.Fprintf(os.Stderr, "=-= arg %d of %d: val %v flags=%s\n",
177 idx, len(ce.Args), arg, pflag.String())
179 _, islit := isLiteral(arg)
180 iscci := isConcreteConvIface(arg)
181 fname, isfunc, _ := isFuncName(arg)
182 if debugTrace&debugTraceScoring != 0 {
183 fmt.Fprintf(os.Stderr, "=-= isLit=%v iscci=%v isfunc=%v for arg %v\n", islit, iscci, isfunc, arg)
187 if pflag&ParamMayFeedIfOrSwitch != 0 {
188 score, tmask = adjustScore(passConstToNestedIfAdj, score, tmask)
190 if pflag&ParamFeedsIfOrSwitch != 0 {
191 score, tmask = adjustScore(passConstToIfAdj, score, tmask)
196 // FIXME: ideally here it would be nice to make a
197 // distinction between the inlinable case and the
198 // non-inlinable case, but this is hard to do. Example:
200 // type I interface { Tiny() int; Giant() }
201 // type Conc struct { x int }
202 // func (c *Conc) Tiny() int { return 42 }
203 // func (c *Conc) Giant() { <huge amounts of code> }
205 // func passConcToItf(c *Conc) {
206 // makesItfMethodCall(c)
209 // In the code above, function properties will only tell
210 // us that 'makesItfMethodCall' invokes a method on its
211 // interface parameter, but we don't know whether it calls
212 // "Tiny" or "Giant". If we knew if called "Tiny", then in
213 // theory in addition to converting the interface call to
214 // a direct call, we could also inline (in which case
215 // we'd want to decrease the score even more).
217 // One thing we could do (not yet implemented) is iterate
218 // through all of the methods of "*Conc" that allow it to
219 // satisfy I, and if all are inlinable, then exploit that.
220 if pflag&ParamMayFeedInterfaceMethodCall != 0 {
221 score, tmask = adjustScore(passConcreteToNestedItfCallAdj, score, tmask)
223 if pflag&ParamFeedsInterfaceMethodCall != 0 {
224 score, tmask = adjustScore(passConcreteToItfCallAdj, score, tmask)
229 mayadj := passFuncToNestedIndCallAdj
230 mustadj := passFuncToIndCallAdj
231 if fn := fname.Func; fn != nil && typecheck.HaveInlineBody(fn) {
232 mayadj = passInlinableFuncToNestedIndCallAdj
233 mustadj = passInlinableFuncToIndCallAdj
235 if pflag&ParamMayFeedIndirectCall != 0 {
236 score, tmask = adjustScore(mayadj, score, tmask)
238 if pflag&ParamFeedsIndirectCall != 0 {
239 score, tmask = adjustScore(mustadj, score, tmask)
247 func adjustScore(typ scoreAdjustTyp, score int, mask scoreAdjustTyp) (int, scoreAdjustTyp) {
253 may := mustToMay(typ)
255 // promote may to must, so undo may
256 score -= adjValue(may)
259 } else if isMay(typ) {
260 must := mayToMust(typ)
261 if mask&(must|typ) != 0 {
266 if debugTrace&debugTraceScoring != 0 {
267 fmt.Fprintf(os.Stderr, "=-= applying adj %d for %s\n",
268 adjValue(typ), typ.String())
270 score += adjValue(typ)
276 var resultFlagToPositiveAdj map[ResultPropBits]scoreAdjustTyp
277 var paramFlagToPositiveAdj map[ParamPropBits]scoreAdjustTyp
279 func setupFlagToAdjMaps() {
280 resultFlagToPositiveAdj = map[ResultPropBits]scoreAdjustTyp{
281 ResultIsAllocatedMem: returnFeedsConcreteToInterfaceCallAdj,
282 ResultAlwaysSameFunc: returnFeedsFuncToIndCallAdj,
283 ResultAlwaysSameConstant: returnFeedsConstToIfAdj,
285 paramFlagToPositiveAdj = map[ParamPropBits]scoreAdjustTyp{
286 ParamMayFeedInterfaceMethodCall: passConcreteToNestedItfCallAdj,
287 ParamFeedsInterfaceMethodCall: passConcreteToItfCallAdj,
288 ParamMayFeedIndirectCall: passInlinableFuncToNestedIndCallAdj,
289 ParamFeedsIndirectCall: passInlinableFuncToIndCallAdj,
293 // largestScoreAdjustment tries to estimate the largest possible
294 // negative score adjustment that could be applied to a call of the
295 // function with the specified props. Example:
297 // func foo() { func bar(x int, p *int) int {
298 // ... if x < 0 { *p = x }
302 // Function 'foo' above on the left has no interesting properties,
303 // thus as a result the most we'll adjust any call to is the value for
304 // "call in loop". If the calculated cost of the function is 150, and
305 // the in-loop adjustment is 5 (for example), then there is not much
306 // point treating it as inlinable. On the other hand "bar" has a param
307 // property (parameter "x" feeds unmodified to an "if" statement") and
308 // a return property (always returns same constant) meaning that a
309 // given call _could_ be rescored down as much as -35 points-- thus if
310 // the size of "bar" is 100 (for example) then there is at least a
311 // chance that scoring will enable inlining.
312 func largestScoreAdjustment(fn *ir.Func, props *FuncProps) int {
313 if resultFlagToPositiveAdj == nil {
316 var tmask scoreAdjustTyp
317 score := adjValues[inLoopAdj] // any call can be in a loop
318 for _, pf := range props.ParamFlags {
319 if adj, ok := paramFlagToPositiveAdj[pf]; ok {
320 score, tmask = adjustScore(adj, score, tmask)
323 for _, rf := range props.ResultFlags {
324 if adj, ok := resultFlagToPositiveAdj[rf]; ok {
325 score, tmask = adjustScore(adj, score, tmask)
329 if debugTrace&debugTraceScoring != 0 {
330 fmt.Fprintf(os.Stderr, "=-= largestScore(%v) is %d\n",
337 // callSiteTab contains entries for each call in the function
338 // currently being processed by InlineCalls; this variable will either
339 // be set to 'cstabCache' below (for non-inlinable routines) or to the
340 // local 'cstab' entry in the fnInlHeur object for inlinable routines.
342 // NOTE: this assumes that inlining operations are happening in a serial,
343 // single-threaded fashion,f which is true today but probably won't hold
344 // in the future (for example, we might want to score the callsites
345 // in multiple functions in parallel); if the inliner evolves in this
346 // direction we'll need to come up with a different approach here.
347 var callSiteTab CallSiteTab
349 // scoreCallsCache caches a call site table and call site list between
350 // invocations of ScoreCalls so that we can reuse previously allocated
352 var scoreCallsCache scoreCallsCacheType
354 type scoreCallsCacheType struct {
359 // ScoreCalls assigns numeric scores to each of the callsites in
360 // function 'fn'; the lower the score, the more helpful we think it
361 // will be to inline.
363 // Unlike a lot of the other inline heuristics machinery, callsite
364 // scoring can't be done as part of the CanInline call for a function,
365 // due to fact that we may be working on a non-trivial SCC. So for
366 // example with this SCC:
368 // func foo(x int) { func bar(x int, f func()) {
370 // bar(x, func(){}) foo(x-1)
374 // We don't want to perform scoring for the 'foo' call in "bar" until
375 // after foo has been analyzed, but it's conceivable that CanInline
376 // might visit bar before foo for this SCC.
377 func ScoreCalls(fn *ir.Func) {
378 enableDebugTraceIfEnv()
380 if debugTrace&debugTraceScoring != 0 {
381 fmt.Fprintf(os.Stderr, "=-= ScoreCalls(%v)\n", ir.FuncName(fn))
384 // If this is an inlinable function, use the precomputed
385 // call site table for it. If the function wasn't an inline
386 // candidate, collect a callsite table for it now.
387 var cstab CallSiteTab
388 if funcInlHeur, ok := fpmap[fn]; ok {
389 cstab = funcInlHeur.cstab
391 if len(scoreCallsCache.tab) != 0 {
392 panic("missing call to ScoreCallsCleanup")
394 if scoreCallsCache.tab == nil {
395 scoreCallsCache.tab = make(CallSiteTab)
397 if debugTrace&debugTraceScoring != 0 {
398 fmt.Fprintf(os.Stderr, "=-= building cstab for non-inl func %s\n",
401 cstab = computeCallSiteTable(fn, fn.Body, scoreCallsCache.tab, nil, 0)
404 const doCallResults = true
405 scoreCallsRegion(fn, fn.Body, cstab, doCallResults, nil)
408 // scoreCallsRegion assigns numeric scores to each of the callsites in
409 // region 'region' within function 'fn'. This can be called on
410 // an entire function, or with 'region' set to a chunk of
411 // code corresponding to an inlined call.
412 func scoreCallsRegion(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, doCallResults bool, ic *ir.InlinedCallExpr) {
413 if debugTrace&debugTraceScoring != 0 {
414 fmt.Fprintf(os.Stderr, "=-= scoreCallsRegion(%v, %s)\n",
415 ir.FuncName(fn), region[0].Op().String())
418 // Sort callsites to avoid any surprises with non deterministic
419 // map iteration order (this is probably not needed, but here just
421 csl := scoreCallsCache.csl[:0]
422 for _, cs := range cstab {
423 csl = append(csl, cs)
425 scoreCallsCache.csl = csl[:0]
426 sort.Slice(csl, func(i, j int) bool {
427 return csl[i].ID < csl[j].ID
430 // Score each call site.
431 var resultNameTab map[*ir.Name]resultPropAndCS
432 for _, cs := range csl {
433 var cprops *FuncProps
436 if funcInlHeur, ok := fpmap[cs.Callee]; ok {
437 cprops = funcInlHeur.props
439 } else if cs.Callee.Inl != nil {
440 cprops = DeserializeFromString(cs.Callee.Inl.Properties)
443 if base.Debug.DumpInlFuncProps != "" {
444 fmt.Fprintf(os.Stderr, "=-= *** unable to score call to %s from %s\n", cs.Callee.Sym().Name, fmtFullPos(cs.Call.Pos()))
445 panic("should never happen")
450 cs.Score, cs.ScoreMask = computeCallSiteScore(cs.Callee, cprops, cs.Call, cs.Flags)
453 if debugTrace&debugTraceScoring != 0 {
454 fmt.Fprintf(os.Stderr, "=-= examineCallResults at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops)
456 resultNameTab = examineCallResults(cs, resultNameTab)
459 if debugTrace&debugTraceScoring != 0 {
460 fmt.Fprintf(os.Stderr, "=-= scoring call at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops)
464 if resultNameTab != nil {
465 rescoreBasedOnCallResultUses(fn, resultNameTab, cstab)
470 if ic != nil && callSiteTab != nil {
471 // Integrate the calls from this cstab into the table for the caller.
472 if err := callSiteTab.merge(cstab); err != nil {
473 base.FatalfAt(ic.Pos(), "%v", err)
480 // ScoreCallsCleanup resets the state of the callsite cache
481 // once ScoreCalls is done with a function.
482 func ScoreCallsCleanup() {
483 if base.Debug.DumpInlCallSiteScores != 0 {
484 if allCallSites == nil {
485 allCallSites = make(CallSiteTab)
487 for call, cs := range callSiteTab {
488 allCallSites[call] = cs
491 for k := range scoreCallsCache.tab {
492 delete(scoreCallsCache.tab, k)
496 // GetCallSiteScore returns the previously calculated score for call
498 func GetCallSiteScore(fn *ir.Func, call *ir.CallExpr) (int, bool) {
499 if funcInlHeur, ok := fpmap[fn]; ok {
500 if cs, ok := funcInlHeur.cstab[call]; ok {
501 return cs.Score, true
504 if cs, ok := callSiteTab[call]; ok {
505 return cs.Score, true
510 var allCallSites CallSiteTab
512 // DumpInlCallSiteScores is invoked by the inliner if the debug flag
513 // "-d=dumpinlcallsitescores" is set; it dumps out a human-readable
514 // summary of all (potentially) inlinable callsites in the package,
515 // along with info on call site scoring and the adjustments made to a
516 // given score. Here profile is the PGO profile in use (may be
517 // nil), budgetCallback is a callback that can be invoked to find out
518 // the original pre-adjustment hairyness limit for the function, and
519 // inlineHotMaxBudget is the constant of the same name used in the
520 // inliner. Sample output lines:
522 // Score Adjustment Status Callee CallerPos ScoreFlags
523 // 115 40 DEMOTED cmd/compile/internal/abi.(*ABIParamAssignment).Offset expand_calls.go:1679:14|6 panicPathAdj
524 // 76 -5n PROMOTED runtime.persistentalloc mcheckmark.go:48:45|3 inLoopAdj
525 // 201 0 --- PGO unicode.DecodeRuneInString utf8.go:312:30|1
526 // 7 -5 --- PGO internal/abi.Name.DataChecked type.go:625:22|0 inLoopAdj
528 // In the dump above, "Score" is the final score calculated for the
529 // callsite, "Adjustment" is the amount added to or subtracted from
530 // the original hairyness estimate to form the score. "Status" shows
531 // whether anything changed with the site -- did the adjustment bump
532 // it down just below the threshold ("PROMOTED") or instead bump it
533 // above the threshold ("DEMOTED"); this will be blank ("---") if no
534 // threshold was crossed as a result of the heuristics. Note that
535 // "Status" also shows whether PGO was involved. "Callee" is the name
536 // of the function called, "CallerPos" is the position of the
537 // callsite, and "ScoreFlags" is a digest of the specific properties
538 // we used to make adjustments to callsite score via heuristics.
539 func DumpInlCallSiteScores(profile *pgo.Profile, budgetCallback func(fn *ir.Func, profile *pgo.Profile) (int32, bool)) {
541 var indirectlyDueToPromotion func(cs *CallSite) bool
542 indirectlyDueToPromotion = func(cs *CallSite) bool {
543 bud, _ := budgetCallback(cs.Callee, profile)
544 hairyval := cs.Callee.Inl.Cost
545 score := int32(cs.Score)
546 if hairyval > bud && score <= bud {
549 if cs.parent != nil {
550 return indirectlyDueToPromotion(cs.parent)
555 genstatus := func(cs *CallSite) string {
556 hairyval := cs.Callee.Inl.Cost
557 bud, isPGO := budgetCallback(cs.Callee, profile)
558 score := int32(cs.Score)
562 case hairyval <= bud && score <= bud:
563 // "Normal" inlined case: hairy val sufficiently low that
564 // it would have been inlined anyway without heuristics.
566 case hairyval > bud && score > bud:
567 // "Normal" not inlined case: hairy val sufficiently high
568 // and scoring didn't lower it.
569 case hairyval > bud && score <= bud:
570 // Promoted: we would not have inlined it before, but
571 // after score adjustment we decided to inline.
574 case hairyval <= bud && score > bud:
575 // Demoted: we would have inlined it before, but after
576 // score adjustment we decided not to inline.
579 inlined := cs.aux&csAuxInlined != 0
581 if cs.parent != nil {
582 indprom = indirectlyDueToPromotion(cs.parent)
584 if inlined && indprom {
587 if inlined && !expinl {
589 } else if !inlined && expinl {
598 if base.Debug.DumpInlCallSiteScores != 0 {
600 for _, cs := range allCallSites {
603 sort.Slice(sl, func(i, j int) bool {
604 if sl[i].Score != sl[j].Score {
605 return sl[i].Score < sl[j].Score
607 fni := ir.PkgFuncName(sl[i].Callee)
608 fnj := ir.PkgFuncName(sl[j].Callee)
612 ecsi := EncodeCallSiteKey(sl[i])
613 ecsj := EncodeCallSiteKey(sl[j])
617 mkname := func(fn *ir.Func) string {
619 if fn == nil || fn.Nname == nil {
622 if fn.Sym().Pkg == types.LocalPkg {
623 n = "ยท" + fn.Sym().Name
625 n = ir.PkgFuncName(fn)
627 // don't try to print super-long names
631 return n[:32] + "..." + n[len(n)-32:len(n)]
635 fmt.Fprintf(os.Stdout, "# scores for package %s\n", types.LocalPkg.Path)
636 fmt.Fprintf(os.Stdout, "# Score Adjustment Status Callee CallerPos Flags ScoreFlags\n")
638 for _, cs := range sl {
639 hairyval := cs.Callee.Inl.Cost
640 adj := int32(cs.Score) - hairyval
641 nm := mkname(cs.Callee)
642 ecc := EncodeCallSiteKey(cs)
643 fmt.Fprintf(os.Stdout, "%d %d\t%s\t%s\t%s\t%s\n",
644 cs.Score, adj, genstatus(cs),
646 cs.ScoreMask.String())