1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/pgo"
11 "cmd/compile/internal/typecheck"
12 "cmd/compile/internal/types"
18 // These constants enumerate the set of possible ways/scenarios
19 // in which we'll adjust the score of a given callsite.
20 type scoreAdjustTyp uint
22 // These constants capture the various ways in which the inliner's
23 // scoring phase can adjust a callsite score based on heuristics. They
24 // fall broadly into three categories:
26 // 1) adjustments based solely on the callsite context (ex: call
27 // appears on panic path)
29 // 2) adjustments that take into account specific interesting values
30 // passed at a call site (ex: passing a constant that could result in
31 // cprop/deadcode in the caller)
33 // 3) adjustments that take into account values returned from the call
34 // at a callsite (ex: call always returns the same inlinable function,
35 // and return value flows unmodified into an indirect call)
37 // For categories 2 and 3 above, each adjustment can have either a
38 // "must" version and a "may" version (but not both). Here the idea is
39 // that in the "must" version the value flow is unconditional: if the
40 // callsite executes, then the condition we're interested in (ex:
41 // param feeding call) is guaranteed to happen. For the "may" version,
42 // there may be control flow that could cause the benefit to be
45 // Category 1 adjustments (see above)
46 panicPathAdj scoreAdjustTyp = (1 << iota)
50 // Category 2 adjustments (see above).
52 passConstToNestedIfAdj
53 passConcreteToItfCallAdj
54 passConcreteToNestedItfCallAdj
56 passFuncToNestedIndCallAdj
57 passInlinableFuncToIndCallAdj
58 passInlinableFuncToNestedIndCallAdj
60 // Category 3 adjustments.
61 returnFeedsConstToIfAdj
62 returnFeedsFuncToIndCallAdj
63 returnFeedsInlinableFuncToIndCallAdj
64 returnFeedsConcreteToInterfaceCallAdj
67 // This table records the specific values we use to adjust call
68 // site scores in a given scenario.
69 // NOTE: these numbers are chosen very arbitrarily; ideally
70 // we will go through some sort of turning process to decide
71 // what value for each one produces the best performance.
73 var adjValues = map[scoreAdjustTyp]int{
77 passConstToIfAdj: -20,
78 passConstToNestedIfAdj: -15,
79 passConcreteToItfCallAdj: -30,
80 passConcreteToNestedItfCallAdj: -25,
81 passFuncToIndCallAdj: -25,
82 passFuncToNestedIndCallAdj: -20,
83 passInlinableFuncToIndCallAdj: -45,
84 passInlinableFuncToNestedIndCallAdj: -40,
85 returnFeedsConstToIfAdj: -15,
86 returnFeedsFuncToIndCallAdj: -25,
87 returnFeedsInlinableFuncToIndCallAdj: -40,
88 returnFeedsConcreteToInterfaceCallAdj: -25,
91 func adjValue(x scoreAdjustTyp) int {
92 if val, ok := adjValues[x]; ok {
95 panic("internal error unregistered adjustment type")
99 var mayMustAdj = [...]struct{ may, must scoreAdjustTyp }{
100 {may: passConstToNestedIfAdj, must: passConstToIfAdj},
101 {may: passConcreteToNestedItfCallAdj, must: passConcreteToItfCallAdj},
102 {may: passFuncToNestedIndCallAdj, must: passFuncToNestedIndCallAdj},
103 {may: passInlinableFuncToNestedIndCallAdj, must: passInlinableFuncToIndCallAdj},
106 func isMay(x scoreAdjustTyp) bool {
107 return mayToMust(x) != 0
110 func isMust(x scoreAdjustTyp) bool {
111 return mustToMay(x) != 0
114 func mayToMust(x scoreAdjustTyp) scoreAdjustTyp {
115 for _, v := range mayMustAdj {
123 func mustToMay(x scoreAdjustTyp) scoreAdjustTyp {
124 for _, v := range mayMustAdj {
132 // computeCallSiteScore takes a given call site whose ir node is 'call' and
133 // callee function is 'callee' and with previously computed call site
134 // properties 'csflags', then computes a score for the callsite that
135 // combines the size cost of the callee with heuristics based on
136 // previously parameter and function properties.
137 func computeCallSiteScore(callee *ir.Func, calleeProps *FuncProps, call ir.Node, csflags CSPropBits) (int, scoreAdjustTyp) {
138 // Start with the size-based score for the callee.
139 score := int(callee.Inl.Cost)
140 var tmask scoreAdjustTyp
142 if debugTrace&debugTraceScoring != 0 {
143 fmt.Fprintf(os.Stderr, "=-= scoring call to %s at %s , initial=%d\n",
144 callee.Sym().Name, fmtFullPos(call.Pos()), score)
147 // First some score adjustments to discourage inlining in selected cases.
148 if csflags&CallSiteOnPanicPath != 0 {
149 score, tmask = adjustScore(panicPathAdj, score, tmask)
151 if csflags&CallSiteInInitFunc != 0 {
152 score, tmask = adjustScore(initFuncAdj, score, tmask)
155 // Then adjustments to encourage inlining in selected cases.
156 if csflags&CallSiteInLoop != 0 {
157 score, tmask = adjustScore(inLoopAdj, score, tmask)
160 // Walk through the actual expressions being passed at the call.
161 calleeRecvrParms := callee.Type().RecvParams()
162 ce := call.(*ir.CallExpr)
163 for idx := range ce.Args {
165 if calleeRecvrParms[idx].Sym == nil ||
166 calleeRecvrParms[idx].Sym.IsBlank() {
170 pflag := calleeProps.ParamFlags[idx]
171 if debugTrace&debugTraceScoring != 0 {
172 fmt.Fprintf(os.Stderr, "=-= arg %d of %d: val %v flags=%s\n",
173 idx, len(ce.Args), arg, pflag.String())
175 _, islit := isLiteral(arg)
176 iscci := isConcreteConvIface(arg)
177 fname, isfunc, _ := isFuncName(arg)
178 if debugTrace&debugTraceScoring != 0 {
179 fmt.Fprintf(os.Stderr, "=-= isLit=%v iscci=%v isfunc=%v for arg %v\n", islit, iscci, isfunc, arg)
183 if pflag&ParamMayFeedIfOrSwitch != 0 {
184 score, tmask = adjustScore(passConstToNestedIfAdj, score, tmask)
186 if pflag&ParamFeedsIfOrSwitch != 0 {
187 score, tmask = adjustScore(passConstToIfAdj, score, tmask)
192 // FIXME: ideally here it would be nice to make a
193 // distinction between the inlinable case and the
194 // non-inlinable case, but this is hard to do. Example:
196 // type I interface { Tiny() int; Giant() }
197 // type Conc struct { x int }
198 // func (c *Conc) Tiny() int { return 42 }
199 // func (c *Conc) Giant() { <huge amounts of code> }
201 // func passConcToItf(c *Conc) {
202 // makesItfMethodCall(c)
205 // In the code above, function properties will only tell
206 // us that 'makesItfMethodCall' invokes a method on its
207 // interface parameter, but we don't know whether it calls
208 // "Tiny" or "Giant". If we knew if called "Tiny", then in
209 // theory in addition to converting the interface call to
210 // a direct call, we could also inline (in which case
211 // we'd want to decrease the score even more).
213 // One thing we could do (not yet implemented) is iterate
214 // through all of the methods of "*Conc" that allow it to
215 // satisfy I, and if all are inlinable, then exploit that.
216 if pflag&ParamMayFeedInterfaceMethodCall != 0 {
217 score, tmask = adjustScore(passConcreteToNestedItfCallAdj, score, tmask)
219 if pflag&ParamFeedsInterfaceMethodCall != 0 {
220 score, tmask = adjustScore(passConcreteToItfCallAdj, score, tmask)
225 mayadj := passFuncToNestedIndCallAdj
226 mustadj := passFuncToIndCallAdj
227 if fn := fname.Func; fn != nil && typecheck.HaveInlineBody(fn) {
228 mayadj = passInlinableFuncToNestedIndCallAdj
229 mustadj = passInlinableFuncToIndCallAdj
231 if pflag&ParamMayFeedIndirectCall != 0 {
232 score, tmask = adjustScore(mayadj, score, tmask)
234 if pflag&ParamFeedsIndirectCall != 0 {
235 score, tmask = adjustScore(mustadj, score, tmask)
243 func adjustScore(typ scoreAdjustTyp, score int, mask scoreAdjustTyp) (int, scoreAdjustTyp) {
249 may := mustToMay(typ)
251 // promote may to must, so undo may
252 score -= adjValue(may)
255 } else if isMay(typ) {
256 must := mayToMust(typ)
257 if mask&(must|typ) != 0 {
262 if debugTrace&debugTraceScoring != 0 {
263 fmt.Fprintf(os.Stderr, "=-= applying adj %d for %s\n",
264 adjValue(typ), typ.String())
266 score += adjValue(typ)
272 var resultFlagToPositiveAdj map[ResultPropBits]scoreAdjustTyp
273 var paramFlagToPositiveAdj map[ParamPropBits]scoreAdjustTyp
275 func setupFlagToAdjMaps() {
276 resultFlagToPositiveAdj = map[ResultPropBits]scoreAdjustTyp{
277 ResultIsAllocatedMem: returnFeedsConcreteToInterfaceCallAdj,
278 ResultAlwaysSameFunc: returnFeedsFuncToIndCallAdj,
279 ResultAlwaysSameConstant: returnFeedsConstToIfAdj,
281 paramFlagToPositiveAdj = map[ParamPropBits]scoreAdjustTyp{
282 ParamMayFeedInterfaceMethodCall: passConcreteToNestedItfCallAdj,
283 ParamFeedsInterfaceMethodCall: passConcreteToItfCallAdj,
284 ParamMayFeedIndirectCall: passInlinableFuncToNestedIndCallAdj,
285 ParamFeedsIndirectCall: passInlinableFuncToIndCallAdj,
289 // largestScoreAdjustment tries to estimate the largest possible
290 // negative score adjustment that could be applied to a call of the
291 // function with the specified props. Example:
293 // func foo() { func bar(x int, p *int) int {
294 // ... if x < 0 { *p = x }
298 // Function 'foo' above on the left has no interesting properties,
299 // thus as a result the most we'll adjust any call to is the value for
300 // "call in loop". If the calculated cost of the function is 150, and
301 // the in-loop adjustment is 5 (for example), then there is not much
302 // point treating it as inlinable. On the other hand "bar" has a param
303 // property (parameter "x" feeds unmodified to an "if" statement") and
304 // a return property (always returns same constant) meaning that a
305 // given call _could_ be rescored down as much as -35 points-- thus if
306 // the size of "bar" is 100 (for example) then there is at least a
307 // chance that scoring will enable inlining.
308 func largestScoreAdjustment(fn *ir.Func, props *FuncProps) int {
309 if resultFlagToPositiveAdj == nil {
312 var tmask scoreAdjustTyp
313 score := adjValues[inLoopAdj] // any call can be in a loop
314 for _, pf := range props.ParamFlags {
315 if adj, ok := paramFlagToPositiveAdj[pf]; ok {
316 score, tmask = adjustScore(adj, score, tmask)
319 for _, rf := range props.ResultFlags {
320 if adj, ok := resultFlagToPositiveAdj[rf]; ok {
321 score, tmask = adjustScore(adj, score, tmask)
325 if debugTrace&debugTraceScoring != 0 {
326 fmt.Fprintf(os.Stderr, "=-= largestScore(%v) is %d\n",
333 // DumpInlCallSiteScores is invoked by the inliner if the debug flag
334 // "-d=dumpinlcallsitescores" is set; it dumps out a human-readable
335 // summary of all (potentially) inlinable callsites in the package,
336 // along with info on call site scoring and the adjustments made to a
337 // given score. Here profile is the PGO profile in use (may be
338 // nil), budgetCallback is a callback that can be invoked to find out
339 // the original pre-adjustment hairyness limit for the function, and
340 // inlineHotMaxBudget is the constant of the same name used in the
341 // inliner. Sample output lines:
343 // Score Adjustment Status Callee CallerPos ScoreFlags
344 // 115 40 DEMOTED cmd/compile/internal/abi.(*ABIParamAssignment).Offset expand_calls.go:1679:14|6 panicPathAdj
345 // 76 -5n PROMOTED runtime.persistentalloc mcheckmark.go:48:45|3 inLoopAdj
346 // 201 0 --- PGO unicode.DecodeRuneInString utf8.go:312:30|1
347 // 7 -5 --- PGO internal/abi.Name.DataChecked type.go:625:22|0 inLoopAdj
349 // In the dump above, "Score" is the final score calculated for the
350 // callsite, "Adjustment" is the amount added to or subtracted from
351 // the original hairyness estimate to form the score. "Status" shows
352 // whether anything changed with the site -- did the adjustment bump
353 // it down just below the threshold ("PROMOTED") or instead bump it
354 // above the threshold ("DEMOTED"); this will be blank ("---") if no
355 // threshold was crossed as a result of the heuristics. Note that
356 // "Status" also shows whether PGO was involved. "Callee" is the name
357 // of the function called, "CallerPos" is the position of the
358 // callsite, and "ScoreFlags" is a digest of the specific properties
359 // we used to make adjustments to callsite score via heuristics.
360 func DumpInlCallSiteScores(profile *pgo.Profile, budgetCallback func(fn *ir.Func, profile *pgo.Profile) (int32, bool)) {
362 fmt.Fprintf(os.Stdout, "# scores for package %s\n", types.LocalPkg.Path)
364 genstatus := func(cs *CallSite, prof *pgo.Profile) string {
365 hairyval := cs.Callee.Inl.Cost
366 bud, isPGO := budgetCallback(cs.Callee, prof)
367 score := int32(cs.Score)
371 case hairyval <= bud && score <= bud:
372 // "Normal" inlined case: hairy val sufficiently low that
373 // it would have been inlined anyway without heuristics.
374 case hairyval > bud && score > bud:
375 // "Normal" not inlined case: hairy val sufficiently high
376 // and scoring didn't lower it.
377 case hairyval > bud && score <= bud:
378 // Promoted: we would not have inlined it before, but
379 // after score adjustment we decided to inline.
381 case hairyval <= bud && score > bud:
382 // Demoted: we would have inlined it before, but after
383 // score adjustment we decided not to inline.
392 if base.Debug.DumpInlCallSiteScores != 0 {
394 for _, funcInlHeur := range fpmap {
395 for _, cs := range funcInlHeur.cstab {
399 sort.Slice(sl, func(i, j int) bool {
400 if sl[i].Score != sl[j].Score {
401 return sl[i].Score < sl[j].Score
403 fni := ir.PkgFuncName(sl[i].Callee)
404 fnj := ir.PkgFuncName(sl[j].Callee)
408 ecsi := EncodeCallSiteKey(sl[i])
409 ecsj := EncodeCallSiteKey(sl[j])
413 mkname := func(fn *ir.Func) string {
415 if fn == nil || fn.Nname == nil {
418 if fn.Sym().Pkg == types.LocalPkg {
419 n = "ยท" + fn.Sym().Name
421 n = ir.PkgFuncName(fn)
423 // don't try to print super-long names
427 return n[:32] + "..." + n[len(n)-32:len(n)]
431 fmt.Fprintf(os.Stdout, "Score Adjustment Status Callee CallerPos Flags ScoreFlags\n")
433 for _, cs := range sl {
434 hairyval := cs.Callee.Inl.Cost
435 adj := int32(cs.Score) - hairyval
436 fmt.Fprintf(os.Stdout, "%d %d\t%s\t%s\t%s\t%s\n",
437 cs.Score, adj, genstatus(cs, profile),
439 EncodeCallSiteKey(cs),
440 cs.ScoreMask.String())