1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
24 // Compile is the main entry point for this package.
25 // Compile modifies f so that on return:
26 // - all Values in f map to 0 or 1 assembly instructions of the target architecture
27 // - the order of f.Blocks is the order to emit the Blocks
28 // - the order of b.Values is the order to emit the Values in each Block
29 // - f has a non-nil regAlloc field
30 func Compile(f *Func) {
31 // TODO: debugging - set flags to control verbosity of compiler,
32 // which phases to dump IR before/after, etc.
34 f.Logf("compiling %s\n", f.Name)
39 seed := int64(crc32.ChecksumIEEE(([]byte)(f.Name))) ^ int64(checkRandSeed)
40 rnd = rand.New(rand.NewSource(seed))
43 // hook to print function & phase if panic happens
48 stack := make([]byte, 16384)
49 n := runtime.Stack(stack, false)
51 if f.HTMLWriter != nil {
52 f.HTMLWriter.flushPhases()
54 f.Fatalf("panic during %s while compiling %s:\n\n%v\n\n%s\n", phaseName, f.Name, err, stack)
62 f.HTMLWriter.WritePhase("start", "start")
63 if BuildDump[f.Name] {
69 const logMemStats = false
70 for _, p := range passes {
71 if !f.Config.optimize && !p.required || p.disabled {
77 f.Logf(" pass %s begin\n", p.name)
79 // TODO: capture logging during this pass, add it to the HTML
80 var mStart runtime.MemStats
81 if logMemStats || p.mem {
82 runtime.ReadMemStats(&mStart)
85 if checkEnabled && !f.scheduled {
86 // Test that we don't depend on the value order, by randomizing
87 // the order of values in each block. See issue 18169.
88 for _, b := range f.Blocks {
89 for i := 0; i < len(b.Values)-1; i++ {
90 j := i + rnd.Intn(len(b.Values)-i)
91 b.Values[i], b.Values[j] = b.Values[j], b.Values[i]
100 // Need something less crude than "Log the whole intermediate result".
101 if f.Log() || f.HTMLWriter != nil {
102 time := tEnd.Sub(tStart).Nanoseconds()
105 var mEnd runtime.MemStats
106 runtime.ReadMemStats(&mEnd)
107 nBytes := mEnd.TotalAlloc - mStart.TotalAlloc
108 nAllocs := mEnd.Mallocs - mStart.Mallocs
109 stats = fmt.Sprintf("[%d ns %d allocs %d bytes]", time, nAllocs, nBytes)
111 stats = fmt.Sprintf("[%d ns]", time)
115 f.Logf(" pass %s end %s\n", p.name, stats)
118 f.HTMLWriter.WritePhase(phaseName, fmt.Sprintf("%s <span class=\"stats\">%s</span>", phaseName, stats))
121 // Surround timing information w/ enough context to allow comparisons.
122 time := tEnd.Sub(tStart).Nanoseconds()
124 f.LogStat("TIME(ns)", time)
127 var mEnd runtime.MemStats
128 runtime.ReadMemStats(&mEnd)
129 nBytes := mEnd.TotalAlloc - mStart.TotalAlloc
130 nAllocs := mEnd.Mallocs - mStart.Mallocs
131 f.LogStat("TIME(ns):BYTES:ALLOCS", time, nBytes, nAllocs)
134 if p.dump != nil && p.dump[f.Name] {
135 // Dump function to appropriately named file
136 f.dumpFile(phaseName)
143 if f.HTMLWriter != nil {
144 // Ensure we write any pending phases to the html
145 f.HTMLWriter.flushPhases()
148 if f.ruleMatches != nil {
150 for key := range f.ruleMatches {
151 keys = append(keys, key)
154 buf := new(strings.Builder)
155 fmt.Fprintf(buf, "%s: ", f.Name)
156 for _, key := range keys {
157 fmt.Fprintf(buf, "%s=%d ", key, f.ruleMatches[key])
159 fmt.Fprint(buf, "\n")
160 fmt.Print(buf.String())
163 // Squash error printing defer
167 // DumpFileForPhase creates a file from the function name and phase name,
168 // warning and returning nil if this is not possible.
169 func (f *Func) DumpFileForPhase(phaseName string) io.WriteCloser {
171 fname := fmt.Sprintf("%s_%02d__%s.dump", f.Name, int(f.dumpFileSeq), phaseName)
172 fname = strings.Replace(fname, " ", "_", -1)
173 fname = strings.Replace(fname, "/", "_", -1)
174 fname = strings.Replace(fname, ":", "_", -1)
176 if ssaDir := os.Getenv("GOSSADIR"); ssaDir != "" {
177 fname = filepath.Join(ssaDir, fname)
180 fi, err := os.Create(fname)
182 f.Warnl(src.NoXPos, "Unable to create after-phase dump file %s", fname)
188 // dumpFile creates a file from the phase name and function name
189 // Dumping is done to files to avoid buffering huge strings before
191 func (f *Func) dumpFile(phaseName string) {
192 fi := f.DumpFileForPhase(phaseName)
194 p := stringFuncPrinter{w: fi}
205 time bool // report time to run pass
206 mem bool // report mem stats to run pass
207 stats int // pass reports own "stats" (e.g., branches removed)
208 debug int // pass performs some debugging. =1 should be in error-testing-friendly Warnl format.
209 test int // pass-specific ad-hoc option, perhaps useful in development
210 dump map[string]bool // dump if function name matches
213 func (p *pass) addDump(s string) {
215 p.dump = make(map[string]bool)
220 func (p *pass) String() string {
227 // Run consistency checker between each phase
234 var IntrinsicsDebug int
235 var IntrinsicsDisable bool
240 var BuildDump map[string]bool = make(map[string]bool) // names of functions to dump after initial build of ssa
242 var GenssaDump map[string]bool = make(map[string]bool) // names of functions to dump after ssa has been converted to asm
244 // PhaseOption sets the specified flag in the specified ssa phase,
245 // returning empty string if this was successful or a string explaining
246 // the error if it was not.
247 // A version of the phase name with "_" replaced by " " is also checked for a match.
248 // If the phase name begins a '~' then the rest of the underscores-replaced-with-blanks
249 // version is used as a regular expression to match the phase name(s).
251 // Special cases that have turned out to be useful:
252 // - ssa/check/on enables checking after each phase
253 // - ssa/all/time enables time reporting for all phases
255 // See gc/lex.go for dissection of the option string.
258 // GO_GCFLAGS=-d=ssa/generic_cse/time,ssa/generic_cse/stats,ssa/generic_cse/debug=3 ./make.bash
260 // BOOT_GO_GCFLAGS=-d='ssa/~^.*scc$/off' GO_GCFLAGS='-d=ssa/~^.*scc$/off' ./make.bash
261 func PhaseOption(phase, flag string, val int, valString string) string {
265 phasenames := " check, all, build, intrinsics, genssa"
266 for _, p := range passes {
267 pn := strings.Replace(p.name, " ", "_", -1)
268 if len(pn)+len(phasenames)-lastcr > 70 {
270 lastcr = len(phasenames)
273 phasenames += ", " + pn
276 return `PhaseOptions usage:
278 go tool compile -d=ssa/<phase>/<flag>[=<value>|<function_name>]
286 on, off, debug, mem, time, test, stats, dump, seed
288 - <value> defaults to 1
290 - <function_name> is required for the "dump" flag, and specifies the
291 name of function to dump after <phase>
293 Phase "all" supports flags "time", "mem", and "dump".
294 Phase "intrinsics" supports flags "on", "off", and "debug".
295 Phase "genssa" (assembly generation) supports the flag "dump".
297 If the "dump" flag is specified, the output is written on a file named
298 <phase>__<function_name>_<seq>.dump; otherwise it is directed to stdout.
303 enables checking after each phase
305 -d=ssa/check/seed=1234
306 enables checking after each phase, using 1234 to seed the PRNG
307 used for value order randomization
310 enables time reporting for all phases
313 sets debugging level to 2 in the prove pass
315 Be aware that when "/debug=X" is applied to a pass, some passes
316 will emit debug output for all functions, and other passes will
317 only emit debug output for functions that match the current
320 Multiple flags can be passed at once, by separating them with
323 -d=ssa/check/on,ssa/all/time
327 if phase == "check" {
330 checkEnabled = val != 0
331 debugPoset = checkEnabled // also turn on advanced self-checking in prove's data structure
334 checkEnabled = val == 0
335 debugPoset = checkEnabled
340 debugPoset = checkEnabled
357 BuildDump[valString] = true
358 GenssaDump[valString] = true
361 return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option (expected ssa/all/{time,mem,dump=function_name})", flag, phase)
365 if phase == "intrinsics" {
368 IntrinsicsDisable = val == 0
370 IntrinsicsDisable = val != 0
372 IntrinsicsDebug = val
374 return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option (expected ssa/intrinsics/{on,off,debug})", flag, phase)
378 if phase == "build" {
387 BuildDump[valString] = true
389 return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option (expected ssa/build/{debug,test,stats,dump=function_name})", flag, phase)
393 if phase == "genssa" {
396 GenssaDump[valString] = true
398 return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option (expected ssa/genssa/dump=function_name)", flag, phase)
403 underphase := strings.Replace(phase, "_", " ", -1)
404 var re *regexp.Regexp
406 r, ok := regexp.Compile(underphase[1:])
408 return fmt.Sprintf("Error %s in regexp for phase %s, flag %s", ok.Error(), phase, flag)
413 for i, p := range passes {
422 } else if p.name == phase || p.name == underphase || re != nil && re.MatchString(p.name) {
425 p.disabled = val == 0
427 p.disabled = val != 0
441 return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
443 if p.disabled && p.required {
444 return fmt.Sprintf("Cannot disable required SSA phase %s using -d=ssa/%s debug option", phase, phase)
453 return fmt.Sprintf("Did not find a phase matching %s in -d=ssa/... debug option", phase)
456 // list of passes for the compiler
457 var passes = [...]pass{
458 // TODO: combine phielim and copyelim into a single pass?
459 {name: "number lines", fn: numberLines, required: true},
460 {name: "early phielim", fn: phielim},
461 {name: "early copyelim", fn: copyelim},
462 {name: "early deadcode", fn: deadcode}, // remove generated dead code to avoid doing pointless work during opt
463 {name: "short circuit", fn: shortcircuit},
464 {name: "decompose user", fn: decomposeUser, required: true},
465 {name: "pre-opt deadcode", fn: deadcode},
466 {name: "opt", fn: opt, required: true}, // NB: some generic rules know the name of the opt pass. TODO: split required rules and optimizing rules
467 {name: "zero arg cse", fn: zcse, required: true}, // required to merge OpSB values
468 {name: "opt deadcode", fn: deadcode, required: true}, // remove any blocks orphaned during opt
469 {name: "generic cse", fn: cse},
470 {name: "phiopt", fn: phiopt},
471 {name: "gcse deadcode", fn: deadcode, required: true}, // clean out after cse and phiopt
472 {name: "nilcheckelim", fn: nilcheckelim},
473 {name: "prove", fn: prove},
474 {name: "early fuse", fn: fuseEarly},
475 {name: "expand calls", fn: expandCalls, required: true},
476 {name: "decompose builtin", fn: postExpandCallsDecompose, required: true},
477 {name: "softfloat", fn: softfloat, required: true},
478 {name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
479 {name: "dead auto elim", fn: elimDeadAutosGeneric},
480 {name: "sccp", fn: sccp},
481 {name: "generic deadcode", fn: deadcode, required: true}, // remove dead stores, which otherwise mess up store chain
482 {name: "check bce", fn: checkbce},
483 {name: "branchelim", fn: branchelim},
484 {name: "late fuse", fn: fuseLate},
485 {name: "dse", fn: dse},
486 {name: "memcombine", fn: memcombine},
487 {name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
488 {name: "insert resched checks", fn: insertLoopReschedChecks,
489 disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
490 {name: "lower", fn: lower, required: true},
491 {name: "addressing modes", fn: addressingModes, required: false},
492 {name: "late lower", fn: lateLower, required: true},
493 {name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again
494 {name: "lowered cse", fn: cse},
495 {name: "elim unread autos", fn: elimUnreadAutos},
496 {name: "tighten tuple selectors", fn: tightenTupleSelectors, required: true},
497 {name: "lowered deadcode", fn: deadcode, required: true},
498 {name: "checkLower", fn: checkLower, required: true},
499 {name: "late phielim", fn: phielim},
500 {name: "late copyelim", fn: copyelim},
501 {name: "tighten", fn: tighten, required: true}, // move values closer to their uses
502 {name: "late deadcode", fn: deadcode},
503 {name: "critical", fn: critical, required: true}, // remove critical edges
504 {name: "phi tighten", fn: phiTighten}, // place rematerializable phi args near uses to reduce value lifetimes
505 {name: "likelyadjust", fn: likelyadjust},
506 {name: "layout", fn: layout, required: true}, // schedule blocks
507 {name: "schedule", fn: schedule, required: true}, // schedule values
508 {name: "late nilcheck", fn: nilcheckelim2},
509 {name: "flagalloc", fn: flagalloc, required: true}, // allocate flags register
510 {name: "regalloc", fn: regalloc, required: true}, // allocate int & float registers + stack slots
511 {name: "loop rotate", fn: loopRotate},
512 {name: "trim", fn: trim}, // remove empty blocks
515 // Double-check phase ordering constraints.
516 // This code is intended to document the ordering requirements
517 // between different phases. It does not override the passes
519 type constraint struct {
520 a, b string // a must come before b
523 var passOrder = [...]constraint{
524 // "insert resched checks" uses mem, better to clean out stores first.
525 {"dse", "insert resched checks"},
526 // insert resched checks adds new blocks containing generic instructions
527 {"insert resched checks", "lower"},
528 {"insert resched checks", "tighten"},
530 // prove relies on common-subexpression elimination for maximum benefits.
531 {"generic cse", "prove"},
532 // deadcode after prove to eliminate all new dead blocks.
533 {"prove", "generic deadcode"},
534 // common-subexpression before dead-store elim, so that we recognize
535 // when two address expressions are the same.
536 {"generic cse", "dse"},
537 // cse substantially improves nilcheckelim efficacy
538 {"generic cse", "nilcheckelim"},
539 // allow deadcode to clean up after nilcheckelim
540 {"nilcheckelim", "generic deadcode"},
541 // nilcheckelim generates sequences of plain basic blocks
542 {"nilcheckelim", "late fuse"},
543 // nilcheckelim relies on opt to rewrite user nil checks
544 {"opt", "nilcheckelim"},
545 // tighten will be most effective when as many values have been removed as possible
546 {"generic deadcode", "tighten"},
547 {"generic cse", "tighten"},
548 // checkbce needs the values removed
549 {"generic deadcode", "check bce"},
550 // decompose builtin now also cleans up after expand calls
551 {"expand calls", "decompose builtin"},
552 // don't run optimization pass until we've decomposed builtin objects
553 {"decompose builtin", "late opt"},
554 // decompose builtin is the last pass that may introduce new float ops, so run softfloat after it
555 {"decompose builtin", "softfloat"},
556 // tuple selectors must be tightened to generators and de-duplicated before scheduling
557 {"tighten tuple selectors", "schedule"},
558 // remove critical edges before phi tighten, so that phi args get better placement
559 {"critical", "phi tighten"},
560 // don't layout blocks until critical edges have been removed
561 {"critical", "layout"},
562 // regalloc requires the removal of all critical edges
563 {"critical", "regalloc"},
564 // regalloc requires all the values in a block to be scheduled
565 {"schedule", "regalloc"},
566 // the rules in late lower run after the general rules.
567 {"lower", "late lower"},
568 // late lower may generate some values that need to be CSEed.
569 {"late lower", "lowered cse"},
570 // checkLower must run after lowering & subsequent dead code elim
571 {"lower", "checkLower"},
572 {"lowered deadcode", "checkLower"},
573 {"late lower", "checkLower"},
574 // late nilcheck needs instructions to be scheduled.
575 {"schedule", "late nilcheck"},
576 // flagalloc needs instructions to be scheduled.
577 {"schedule", "flagalloc"},
578 // regalloc needs flags to be allocated first.
579 {"flagalloc", "regalloc"},
580 // loopRotate will confuse regalloc.
581 {"regalloc", "loop rotate"},
582 // trim needs regalloc to be done first.
583 {"regalloc", "trim"},
584 // memcombine works better if fuse happens first, to help merge stores.
585 {"late fuse", "memcombine"},
586 // memcombine is a arch-independent pass.
587 {"memcombine", "lower"},
591 for _, c := range passOrder {
595 for k, p := range passes {
604 log.Panicf("pass %s not found", a)
607 log.Panicf("pass %s not found", b)
610 log.Panicf("passes %s and %s out of order", a, b)