1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
23 // Compile is the main entry point for this package.
24 // Compile modifies f so that on return:
25 // · all Values in f map to 0 or 1 assembly instructions of the target architecture
26 // · the order of f.Blocks is the order to emit the Blocks
27 // · the order of b.Values is the order to emit the Values in each Block
28 // · f has a non-nil regAlloc field
29 func Compile(f *Func) {
30 // TODO: debugging - set flags to control verbosity of compiler,
31 // which phases to dump IR before/after, etc.
33 f.Logf("compiling %s\n", f.Name)
38 seed := int64(crc32.ChecksumIEEE(([]byte)(f.Name))) ^ int64(checkRandSeed)
39 rnd = rand.New(rand.NewSource(seed))
42 // hook to print function & phase if panic happens
47 stack := make([]byte, 16384)
48 n := runtime.Stack(stack, false)
50 if f.HTMLWriter != nil {
51 f.HTMLWriter.flushPhases()
53 f.Fatalf("panic during %s while compiling %s:\n\n%v\n\n%s\n", phaseName, f.Name, err, stack)
61 f.HTMLWriter.WritePhase("start", "start")
62 if BuildDump != "" && BuildDump == f.Name {
68 const logMemStats = false
69 for _, p := range passes {
70 if !f.Config.optimize && !p.required || p.disabled {
76 f.Logf(" pass %s begin\n", p.name)
78 // TODO: capture logging during this pass, add it to the HTML
79 var mStart runtime.MemStats
80 if logMemStats || p.mem {
81 runtime.ReadMemStats(&mStart)
84 if checkEnabled && !f.scheduled {
85 // Test that we don't depend on the value order, by randomizing
86 // the order of values in each block. See issue 18169.
87 for _, b := range f.Blocks {
88 for i := 0; i < len(b.Values)-1; i++ {
89 j := i + rnd.Intn(len(b.Values)-i)
90 b.Values[i], b.Values[j] = b.Values[j], b.Values[i]
99 // Need something less crude than "Log the whole intermediate result".
100 if f.Log() || f.HTMLWriter != nil {
101 time := tEnd.Sub(tStart).Nanoseconds()
104 var mEnd runtime.MemStats
105 runtime.ReadMemStats(&mEnd)
106 nBytes := mEnd.TotalAlloc - mStart.TotalAlloc
107 nAllocs := mEnd.Mallocs - mStart.Mallocs
108 stats = fmt.Sprintf("[%d ns %d allocs %d bytes]", time, nAllocs, nBytes)
110 stats = fmt.Sprintf("[%d ns]", time)
114 f.Logf(" pass %s end %s\n", p.name, stats)
117 f.HTMLWriter.WritePhase(phaseName, fmt.Sprintf("%s <span class=\"stats\">%s</span>", phaseName, stats))
120 // Surround timing information w/ enough context to allow comparisons.
121 time := tEnd.Sub(tStart).Nanoseconds()
123 f.LogStat("TIME(ns)", time)
126 var mEnd runtime.MemStats
127 runtime.ReadMemStats(&mEnd)
128 nBytes := mEnd.TotalAlloc - mStart.TotalAlloc
129 nAllocs := mEnd.Mallocs - mStart.Mallocs
130 f.LogStat("TIME(ns):BYTES:ALLOCS", time, nBytes, nAllocs)
133 if p.dump != nil && p.dump[f.Name] {
134 // Dump function to appropriately named file
135 f.dumpFile(phaseName)
142 if f.HTMLWriter != nil {
143 // Ensure we write any pending phases to the html
144 f.HTMLWriter.flushPhases()
147 if f.ruleMatches != nil {
149 for key := range f.ruleMatches {
150 keys = append(keys, key)
153 buf := new(bytes.Buffer)
154 fmt.Fprintf(buf, "%s: ", f.Name)
155 for _, key := range keys {
156 fmt.Fprintf(buf, "%s=%d ", key, f.ruleMatches[key])
158 fmt.Fprint(buf, "\n")
159 fmt.Print(buf.String())
162 // Squash error printing defer
166 // dumpFile creates a file from the phase name and function name
167 // Dumping is done to files to avoid buffering huge strings before
169 func (f *Func) dumpFile(phaseName string) {
171 fname := fmt.Sprintf("%s_%02d__%s.dump", f.Name, int(f.dumpFileSeq), phaseName)
172 fname = strings.Replace(fname, " ", "_", -1)
173 fname = strings.Replace(fname, "/", "_", -1)
174 fname = strings.Replace(fname, ":", "_", -1)
176 fi, err := os.Create(fname)
178 f.Warnl(src.NoXPos, "Unable to create after-phase dump file %s", fname)
182 p := stringFuncPrinter{w: fi}
192 time bool // report time to run pass
193 mem bool // report mem stats to run pass
194 stats int // pass reports own "stats" (e.g., branches removed)
195 debug int // pass performs some debugging. =1 should be in error-testing-friendly Warnl format.
196 test int // pass-specific ad-hoc option, perhaps useful in development
197 dump map[string]bool // dump if function name matches
200 func (p *pass) addDump(s string) {
202 p.dump = make(map[string]bool)
207 func (p *pass) String() string {
214 // Run consistency checker between each phase
221 var IntrinsicsDebug int
222 var IntrinsicsDisable bool
227 var BuildDump string // name of function to dump after initial build of ssa
229 // PhaseOption sets the specified flag in the specified ssa phase,
230 // returning empty string if this was successful or a string explaining
231 // the error if it was not.
232 // A version of the phase name with "_" replaced by " " is also checked for a match.
233 // If the phase name begins a '~' then the rest of the underscores-replaced-with-blanks
234 // version is used as a regular expression to match the phase name(s).
236 // Special cases that have turned out to be useful:
237 // ssa/check/on enables checking after each phase
238 // ssa/all/time enables time reporting for all phases
240 // See gc/lex.go for dissection of the option string.
243 // GO_GCFLAGS=-d=ssa/generic_cse/time,ssa/generic_cse/stats,ssa/generic_cse/debug=3 ./make.bash
245 // BOOT_GO_GCFLAGS=-d='ssa/~^.*scc$/off' GO_GCFLAGS='-d=ssa/~^.*scc$/off' ./make.bash
247 func PhaseOption(phase, flag string, val int, valString string) string {
251 phasenames := " check, all, build, intrinsics"
252 for _, p := range passes {
253 pn := strings.Replace(p.name, " ", "_", -1)
254 if len(pn)+len(phasenames)-lastcr > 70 {
256 lastcr = len(phasenames)
259 phasenames += ", " + pn
262 return `PhaseOptions usage:
264 go tool compile -d=ssa/<phase>/<flag>[=<value>|<function_name>]
272 on, off, debug, mem, time, test, stats, dump, seed
274 - <value> defaults to 1
276 - <function_name> is required for the "dump" flag, and specifies the
277 name of function to dump after <phase>
279 Phase "all" supports flags "time", "mem", and "dump".
280 Phase "intrinsics" supports flags "on", "off", and "debug".
282 If the "dump" flag is specified, the output is written on a file named
283 <phase>__<function_name>_<seq>.dump; otherwise it is directed to stdout.
288 enables checking after each phase
290 -d=ssa/check/seed=1234
291 enables checking after each phase, using 1234 to seed the PRNG
292 used for value order randomization
295 enables time reporting for all phases
298 sets debugging level to 2 in the prove pass
300 Multiple flags can be passed at once, by separating them with
303 -d=ssa/check/on,ssa/all/time
307 if phase == "check" {
310 checkEnabled = val != 0
311 debugPoset = checkEnabled // also turn on advanced self-checking in prove's datastructure
314 checkEnabled = val == 0
315 debugPoset = checkEnabled
320 debugPoset = checkEnabled
337 BuildDump = valString
340 return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
344 if phase == "intrinsics" {
347 IntrinsicsDisable = val == 0
349 IntrinsicsDisable = val != 0
351 IntrinsicsDebug = val
353 return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
357 if phase == "build" {
366 BuildDump = valString
368 return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
373 underphase := strings.Replace(phase, "_", " ", -1)
374 var re *regexp.Regexp
376 r, ok := regexp.Compile(underphase[1:])
378 return fmt.Sprintf("Error %s in regexp for phase %s, flag %s", ok.Error(), phase, flag)
383 for i, p := range passes {
392 } else if p.name == phase || p.name == underphase || re != nil && re.MatchString(p.name) {
395 p.disabled = val == 0
397 p.disabled = val != 0
411 return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
413 if p.disabled && p.required {
414 return fmt.Sprintf("Cannot disable required SSA phase %s using -d=ssa/%s debug option", phase, phase)
423 return fmt.Sprintf("Did not find a phase matching %s in -d=ssa/... debug option", phase)
426 // list of passes for the compiler
427 var passes = [...]pass{
428 // TODO: combine phielim and copyelim into a single pass?
429 {name: "number lines", fn: numberLines, required: true},
430 {name: "early phielim", fn: phielim},
431 {name: "early copyelim", fn: copyelim},
432 {name: "early deadcode", fn: deadcode}, // remove generated dead code to avoid doing pointless work during opt
433 {name: "short circuit", fn: shortcircuit},
434 {name: "decompose user", fn: decomposeUser, required: true},
435 {name: "pre-opt deadcode", fn: deadcode},
436 {name: "opt", fn: opt, required: true}, // NB: some generic rules know the name of the opt pass. TODO: split required rules and optimizing rules
437 {name: "zero arg cse", fn: zcse, required: true}, // required to merge OpSB values
438 {name: "opt deadcode", fn: deadcode, required: true}, // remove any blocks orphaned during opt
439 {name: "generic cse", fn: cse},
440 {name: "phiopt", fn: phiopt},
441 {name: "gcse deadcode", fn: deadcode, required: true}, // clean out after cse and phiopt
442 {name: "nilcheckelim", fn: nilcheckelim},
443 {name: "prove", fn: prove},
444 {name: "early fuse", fn: fuseEarly},
445 {name: "decompose builtin", fn: decomposeBuiltIn, required: true},
446 {name: "expand calls", fn: expandCalls, required: true},
447 {name: "softfloat", fn: softfloat, required: true},
448 {name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
449 {name: "dead auto elim", fn: elimDeadAutosGeneric},
450 {name: "generic deadcode", fn: deadcode, required: true}, // remove dead stores, which otherwise mess up store chain
451 {name: "check bce", fn: checkbce},
452 {name: "branchelim", fn: branchelim},
453 {name: "late fuse", fn: fuseLate},
454 {name: "dse", fn: dse},
455 {name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
456 {name: "insert resched checks", fn: insertLoopReschedChecks,
457 disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
458 {name: "lower", fn: lower, required: true},
459 {name: "addressing modes", fn: addressingModes, required: false},
460 {name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again
461 {name: "lowered cse", fn: cse},
462 {name: "elim unread autos", fn: elimUnreadAutos},
463 {name: "tighten tuple selectors", fn: tightenTupleSelectors, required: true},
464 {name: "lowered deadcode", fn: deadcode, required: true},
465 {name: "checkLower", fn: checkLower, required: true},
466 {name: "late phielim", fn: phielim},
467 {name: "late copyelim", fn: copyelim},
468 {name: "tighten", fn: tighten}, // move values closer to their uses
469 {name: "late deadcode", fn: deadcode},
470 {name: "critical", fn: critical, required: true}, // remove critical edges
471 {name: "phi tighten", fn: phiTighten}, // place rematerializable phi args near uses to reduce value lifetimes
472 {name: "likelyadjust", fn: likelyadjust},
473 {name: "layout", fn: layout, required: true}, // schedule blocks
474 {name: "schedule", fn: schedule, required: true}, // schedule values
475 {name: "late nilcheck", fn: nilcheckelim2},
476 {name: "flagalloc", fn: flagalloc, required: true}, // allocate flags register
477 {name: "regalloc", fn: regalloc, required: true}, // allocate int & float registers + stack slots
478 {name: "loop rotate", fn: loopRotate},
479 {name: "stackframe", fn: stackframe, required: true},
480 {name: "trim", fn: trim}, // remove empty blocks
483 // Double-check phase ordering constraints.
484 // This code is intended to document the ordering requirements
485 // between different phases. It does not override the passes
487 type constraint struct {
488 a, b string // a must come before b
491 var passOrder = [...]constraint{
492 // "insert resched checks" uses mem, better to clean out stores first.
493 {"dse", "insert resched checks"},
494 // insert resched checks adds new blocks containing generic instructions
495 {"insert resched checks", "lower"},
496 {"insert resched checks", "tighten"},
498 // prove relies on common-subexpression elimination for maximum benefits.
499 {"generic cse", "prove"},
500 // deadcode after prove to eliminate all new dead blocks.
501 {"prove", "generic deadcode"},
502 // common-subexpression before dead-store elim, so that we recognize
503 // when two address expressions are the same.
504 {"generic cse", "dse"},
505 // cse substantially improves nilcheckelim efficacy
506 {"generic cse", "nilcheckelim"},
507 // allow deadcode to clean up after nilcheckelim
508 {"nilcheckelim", "generic deadcode"},
509 // nilcheckelim generates sequences of plain basic blocks
510 {"nilcheckelim", "late fuse"},
511 // nilcheckelim relies on opt to rewrite user nil checks
512 {"opt", "nilcheckelim"},
513 // tighten will be most effective when as many values have been removed as possible
514 {"generic deadcode", "tighten"},
515 {"generic cse", "tighten"},
516 // checkbce needs the values removed
517 {"generic deadcode", "check bce"},
518 // don't run optimization pass until we've decomposed builtin objects
519 {"decompose builtin", "late opt"},
520 // decompose builtin is the last pass that may introduce new float ops, so run softfloat after it
521 {"decompose builtin", "softfloat"},
522 // tuple selectors must be tightened to generators and de-duplicated before scheduling
523 {"tighten tuple selectors", "schedule"},
524 // remove critical edges before phi tighten, so that phi args get better placement
525 {"critical", "phi tighten"},
526 // don't layout blocks until critical edges have been removed
527 {"critical", "layout"},
528 // regalloc requires the removal of all critical edges
529 {"critical", "regalloc"},
530 // regalloc requires all the values in a block to be scheduled
531 {"schedule", "regalloc"},
532 // checkLower must run after lowering & subsequent dead code elim
533 {"lower", "checkLower"},
534 {"lowered deadcode", "checkLower"},
535 // late nilcheck needs instructions to be scheduled.
536 {"schedule", "late nilcheck"},
537 // flagalloc needs instructions to be scheduled.
538 {"schedule", "flagalloc"},
539 // regalloc needs flags to be allocated first.
540 {"flagalloc", "regalloc"},
541 // loopRotate will confuse regalloc.
542 {"regalloc", "loop rotate"},
543 // stackframe needs to know about spilled registers.
544 {"regalloc", "stackframe"},
545 // trim needs regalloc to be done first.
546 {"regalloc", "trim"},
550 for _, c := range passOrder {
554 for k, p := range passes {
563 log.Panicf("pass %s not found", a)
566 log.Panicf("pass %s not found", b)
569 log.Panicf("passes %s and %s out of order", a, b)