1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/reflectdata"
9 "cmd/compile/internal/types"
17 // A ZeroRegion records parts of an object which are known to be zero.
18 // A ZeroRegion only applies to a single memory state.
19 // Each bit in mask is set if the corresponding pointer-sized word of
20 // the base object is known to be zero.
21 // In other words, if mask & (1<<i) != 0, then [base+i*ptrSize, base+(i+1)*ptrSize)
22 // is known to be zero.
23 type ZeroRegion struct {
28 // mightBeHeapPointer reports whether v might point to the heap.
29 // v must have pointer type.
30 func mightBeHeapPointer(v *Value) bool {
37 // mightContainHeapPointer reports whether the data currently at addresses
38 // [ptr,ptr+size) might contain heap pointers. "currently" means at memory state mem.
39 // zeroes contains ZeroRegion data to help make that decision (see computeZeroMap).
40 func mightContainHeapPointer(ptr *Value, size int64, mem *Value, zeroes map[ID]ZeroRegion) bool {
41 if IsReadOnlyGlobalAddr(ptr) {
42 // The read-only globals section cannot contain any heap pointers.
46 // See if we can prove that the queried memory is all zero.
48 // Find base pointer and offset. Hopefully, the base is the result of a new(T).
50 for ptr.Op == OpOffPtr {
55 ptrSize := ptr.Block.Func.Config.PtrSize
56 if off%ptrSize != 0 || size%ptrSize != 0 {
57 ptr.Fatalf("unaligned pointer write")
59 if off < 0 || off+size > 64*ptrSize {
60 // memory range goes off end of tracked offsets
65 // This isn't the object we know about at this memory state.
68 // Mask of bits we're asking about
69 m := (uint64(1)<<(size/ptrSize) - 1) << (off / ptrSize)
72 // All locations are known to be zero, so no heap pointers.
78 // needwb reports whether we need write barrier for store op v.
79 // v must be Store/Move/Zero.
80 // zeroes provides known zero information (keyed by ID of memory-type values).
81 func needwb(v *Value, zeroes map[ID]ZeroRegion) bool {
82 t, ok := v.Aux.(*types.Type)
84 v.Fatalf("store aux is not a type: %s", v.LongString())
91 return false // writes into the stack don't need write barrier
93 // If we're writing to a place that might have heap pointers, we need
95 if mightContainHeapPointer(dst, t.Size(), v.MemoryArg(), zeroes) {
98 // Lastly, check if the values we're writing might be heap pointers.
99 // If they aren't, we don't need a write barrier.
102 if !mightBeHeapPointer(v.Args[1]) {
106 return false // nil is not a heap pointer
108 if !mightContainHeapPointer(v.Args[1], t.Size(), v.Args[2], zeroes) {
112 v.Fatalf("store op unknown: %s", v.LongString())
117 // needWBsrc reports whether GC needs to see v when it is the source of a store.
118 func needWBsrc(v *Value) bool {
119 return !IsGlobalAddr(v)
122 // needWBdst reports whether GC needs to see what used to be in *ptr when ptr is
123 // the target of a pointer store.
124 func needWBdst(ptr, mem *Value, zeroes map[ID]ZeroRegion) bool {
125 // Detect storing to zeroed memory.
127 for ptr.Op == OpOffPtr {
131 ptrSize := ptr.Block.Func.Config.PtrSize
132 if off%ptrSize != 0 {
133 ptr.Fatalf("unaligned pointer write")
135 if off < 0 || off >= 64*ptrSize {
136 // write goes off end of tracked offsets
143 // If destination is known to be zeroed, we don't need the write barrier
144 // to record the old value in *ptr.
145 return z.mask>>uint(off/ptrSize)&1 == 0
148 // writebarrier pass inserts write barriers for store ops (Store, Move, Zero)
149 // when necessary (the condition above). It rewrites store ops to branches
150 // and runtime calls, like
152 // if writeBarrier.enabled {
153 // buf := gcWriteBarrier2() // Not a regular Go call
159 // A sequence of WB stores for many pointer fields of a single type will
160 // be emitted together, with a single branch.
161 func writebarrier(f *Func) {
162 if !f.fe.UseWriteBarrier() {
166 // Number of write buffer entries we can request at once.
167 // Must match runtime/mwbbuf.go:wbMaxEntriesPerCall.
168 // It must also match the number of instances of runtime.gcWriteBarrier{X}.
171 var sb, sp, wbaddr, const0 *Value
172 var cgoCheckPtrWrite, cgoCheckMemmove *obj.LSym
173 var wbZero, wbMove *obj.LSym
174 var stores, after []*Value
175 var sset, sset2 *sparseSet
176 var storeNumber []int32
178 // Compute map from a value to the SelectN [1] value that uses it.
179 select1 := f.Cache.allocValueSlice(f.NumValues())
180 defer func() { f.Cache.freeValueSlice(select1) }()
181 for _, b := range f.Blocks {
182 for _, v := range b.Values {
183 if v.Op != OpSelectN {
189 select1[v.Args[0].ID] = v
193 zeroes := f.computeZeroMap(select1)
194 for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand
195 // first, identify all the stores that need to insert a write barrier.
196 // mark them with WB ops temporarily. record presence of WB ops.
197 nWBops := 0 // count of temporarily created WB ops remaining to be rewritten in the current block
198 for _, v := range b.Values {
200 case OpStore, OpMove, OpZero:
201 if needwb(v, zeroes) {
219 // lazily initialize global values for write barrier test and calls
220 // find SB and SP values in entry block
221 initpos := f.Entry.Pos
223 wbsym := f.fe.Syslook("writeBarrier")
224 wbaddr = f.Entry.NewValue1A(initpos, OpAddr, f.Config.Types.UInt32Ptr, wbsym, sb)
225 wbZero = f.fe.Syslook("wbZero")
226 wbMove = f.fe.Syslook("wbMove")
227 if buildcfg.Experiment.CgoCheck2 {
228 cgoCheckPtrWrite = f.fe.Syslook("cgoCheckPtrWrite")
229 cgoCheckMemmove = f.fe.Syslook("cgoCheckMemmove")
231 const0 = f.ConstInt32(f.Config.Types.UInt32, 0)
233 // allocate auxiliary data structures for computing store order
234 sset = f.newSparseSet(f.NumValues())
235 defer f.retSparseSet(sset)
236 sset2 = f.newSparseSet(f.NumValues())
237 defer f.retSparseSet(sset2)
238 storeNumber = f.Cache.allocInt32Slice(f.NumValues())
239 defer f.Cache.freeInt32Slice(storeNumber)
242 // order values in store order
243 b.Values = storeOrder(b.Values, sset, storeNumber)
245 // find the start and end of the last contiguous WB store sequence.
246 // a branch will be inserted there. values after it will be moved
252 for i := len(values) - 1; i >= 0; i-- {
255 case OpStoreWB, OpMoveWB, OpZeroWB:
261 case OpVarDef, OpVarLive:
270 stores = append(stores[:0], b.Values[start:end]...) // copy to avoid aliasing
271 after = append(after[:0], b.Values[end:]...)
272 b.Values = b.Values[:start]
274 // find the memory before the WB stores
275 mem := stores[0].MemoryArg()
278 // If the source of a MoveWB is volatile (will be clobbered by a
279 // function call), we need to copy it to a temporary location, as
280 // marshaling the args of wbMove might clobber the value we're
282 // Look for volatile source, copy it to temporary before we check
283 // the write barrier flag.
284 // It is unlikely to have more than one of them. Just do a linear
285 // search instead of using a map.
287 type volatileCopy struct {
288 src *Value // address of original volatile value
289 tmp *Value // address of temporary we've copied the volatile value into
291 var volatiles []volatileCopy
293 if !(f.ABIDefault == f.ABI1 && len(f.Config.intParamRegs) >= 3) {
294 // We don't need to do this if the calls we're going to do take
295 // all their arguments in registers.
296 // 3 is the magic number because it covers wbZero, wbMove, cgoCheckMemmove.
298 for _, w := range stores {
299 if w.Op == OpMoveWB {
302 for _, c := range volatiles {
304 continue copyLoop // already copied
309 tmp := f.fe.Auto(w.Pos, t)
310 mem = b.NewValue1A(w.Pos, OpVarDef, types.TypeMem, tmp, mem)
311 tmpaddr := b.NewValue2A(w.Pos, OpLocalAddr, t.PtrTo(), tmp, sp, mem)
313 mem = b.NewValue3I(w.Pos, OpMove, types.TypeMem, siz, tmpaddr, val, mem)
315 volatiles = append(volatiles, volatileCopy{val, tmpaddr})
321 // Build branch point.
322 bThen := f.NewBlock(BlockPlain)
323 bEnd := f.NewBlock(b.Kind)
328 // Set up control flow for end block.
330 bEnd.Likely = b.Likely
331 for _, e := range b.Succs {
332 bEnd.Succs = append(bEnd.Succs, e)
333 e.b.Preds[e.i].b = bEnd
336 // set up control flow for write barrier test
337 // load word, test word, avoiding partial register write from load byte.
338 cfgtypes := &f.Config.Types
339 flag := b.NewValue2(pos, OpLoad, cfgtypes.UInt32, wbaddr, mem)
340 flag = b.NewValue2(pos, OpNeq32, cfgtypes.Bool, flag, const0)
343 b.Likely = BranchUnlikely
344 b.Succs = b.Succs[:0]
347 bThen.AddEdgeTo(bEnd)
349 // For each write barrier store, append write barrier code to bThen.
353 addEntry := func(v *Value) {
354 if curCall == nil || curCall.AuxInt == maxEntries {
355 t := types.NewTuple(types.Types[types.TUINTPTR].PtrTo(), types.TypeMem)
356 curCall = bThen.NewValue1(pos, OpWB, t, memThen)
357 curPtr = bThen.NewValue1(pos, OpSelect0, types.Types[types.TUINTPTR].PtrTo(), curCall)
358 memThen = bThen.NewValue1(pos, OpSelect1, types.TypeMem, curCall)
360 // Store value in write buffer
361 num := curCall.AuxInt
362 curCall.AuxInt = num + 1
363 wbuf := bThen.NewValue1I(pos, OpOffPtr, types.Types[types.TUINTPTR].PtrTo(), num*f.Config.PtrSize, curPtr)
364 memThen = bThen.NewValue3A(pos, OpStore, types.TypeMem, types.Types[types.TUINTPTR], wbuf, v, memThen)
367 // Note: we can issue the write barrier code in any order. In particular,
368 // it doesn't matter if they are in a different order *even if* they end
369 // up referring to overlapping memory regions. For instance if an OpStore
370 // stores to a location that is later read by an OpMove. In all cases
371 // any pointers we must get into the write barrier buffer still make it,
372 // possibly in a different order and possibly a different (but definitely
373 // more than 0) number of times.
374 // In light of that, we process all the OpStoreWBs first. This minimizes
375 // the amount of spill/restore code we need around the Zero/Move calls.
377 // srcs contains the value IDs of pointer values we've put in the write barrier buffer.
380 // dsts contains the value IDs of locations which we've read a pointer out of
381 // and put the result in the write barrier buffer.
385 for _, w := range stores {
386 if w.Op != OpStoreWB {
392 if !srcs.contains(val.ID) && needWBsrc(val) {
396 if !dsts.contains(ptr.ID) && needWBdst(ptr, w.Args[2], zeroes) {
398 // Load old value from store target.
399 // Note: This turns bad pointer writes into bad
400 // pointer reads, which could be confusing. We could avoid
401 // reading from obviously bad pointers, which would
402 // take care of the vast majority of these. We could
403 // patch this up in the signal handler, or use XCHG to
404 // combine the read and the write.
405 oldVal := bThen.NewValue2(pos, OpLoad, types.Types[types.TUINTPTR], ptr, memThen)
406 // Save old value to write buffer.
409 f.fe.Func().SetWBPos(pos)
413 for _, w := range stores {
418 typ := reflectdata.TypeLinksym(w.Aux.(*types.Type))
420 taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb)
421 memThen = wbcall(pos, bThen, wbZero, sp, memThen, taddr, dst)
422 f.fe.Func().SetWBPos(pos)
428 for _, c := range volatiles {
435 typ := reflectdata.TypeLinksym(w.Aux.(*types.Type))
436 // moveWB(&typ, dst, src)
437 taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb)
438 memThen = wbcall(pos, bThen, wbMove, sp, memThen, taddr, dst, src)
439 f.fe.Func().SetWBPos(pos)
445 mem = bEnd.NewValue2(pos, OpPhi, types.TypeMem, mem, memThen)
447 // Do raw stores after merge point.
448 for _, w := range stores {
453 if buildcfg.Experiment.CgoCheck2 {
454 // Issue cgo checking code.
455 mem = wbcall(pos, bEnd, cgoCheckPtrWrite, sp, mem, ptr, val)
457 mem = bEnd.NewValue3A(pos, OpStore, types.TypeMem, w.Aux, ptr, val, mem)
460 mem = bEnd.NewValue2I(pos, OpZero, types.TypeMem, w.AuxInt, dst, mem)
466 for _, c := range volatiles {
473 if buildcfg.Experiment.CgoCheck2 {
474 // Issue cgo checking code.
475 typ := reflectdata.TypeLinksym(w.Aux.(*types.Type))
476 taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb)
477 mem = wbcall(pos, bEnd, cgoCheckMemmove, sp, mem, taddr, dst, src)
479 mem = bEnd.NewValue3I(pos, OpMove, types.TypeMem, w.AuxInt, dst, src, mem)
481 case OpVarDef, OpVarLive:
482 mem = bEnd.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, mem)
486 // The last store becomes the WBend marker. This marker is used by the liveness
487 // pass to determine what parts of the code are preemption-unsafe.
488 // All subsequent memory operations use this memory, so we have to sacrifice the
489 // previous last memory op to become this new value.
490 bEnd.Values = append(bEnd.Values, last)
493 last.Pos = last.Pos.WithNotStmt()
494 last.Type = types.TypeMem
497 // Free all the old stores, except last which became the WBend marker.
498 for _, w := range stores {
503 for _, w := range stores {
509 // put values after the store sequence into the end block
510 bEnd.Values = append(bEnd.Values, after...)
511 for _, w := range after {
515 // if we have more stores in this block, do this block again
522 // computeZeroMap returns a map from an ID of a memory value to
523 // a set of locations that are known to be zeroed at that memory value.
524 func (f *Func) computeZeroMap(select1 []*Value) map[ID]ZeroRegion {
526 ptrSize := f.Config.PtrSize
527 // Keep track of which parts of memory are known to be zero.
528 // This helps with removing write barriers for various initialization patterns.
529 // This analysis is conservative. We only keep track, for each memory state, of
530 // which of the first 64 words of a single object are known to be zero.
531 zeroes := map[ID]ZeroRegion{}
533 for _, b := range f.Blocks {
534 for _, v := range b.Values {
535 if mem, ok := IsNewObject(v, select1); ok {
536 // While compiling package runtime itself, we might see user
537 // calls to newobject, which will have result type
538 // unsafe.Pointer instead. We can't easily infer how large the
539 // allocated memory is, so just skip it.
540 if types.LocalPkg.Path == "runtime" && v.Type.IsUnsafePtr() {
544 nptr := v.Type.Elem().Size() / ptrSize
548 zeroes[mem.ID] = ZeroRegion{base: v, mask: 1<<uint(nptr) - 1}
552 // Find stores to those new objects.
555 for _, b := range f.Blocks {
556 // Note: iterating forwards helps convergence, as values are
557 // typically (but not always!) in store order.
558 for _, v := range b.Values {
562 z, ok := zeroes[v.MemoryArg().ID]
568 size := v.Aux.(*types.Type).Size()
569 for ptr.Op == OpOffPtr {
574 // Different base object - we don't know anything.
575 // We could even be writing to the base object we know
576 // about, but through an aliased but offset pointer.
577 // So we have to throw all the zero information we have away.
580 // Round to cover any partially written pointer slots.
581 // Pointer writes should never be unaligned like this, but non-pointer
582 // writes to pointer-containing types will do this.
583 if d := off % ptrSize; d != 0 {
587 if d := size % ptrSize; d != 0 {
590 // Clip to the 64 words that we track.
596 if max > 64*ptrSize {
599 // Clear bits for parts that we are writing (and hence
600 // will no longer necessarily be zero).
601 for i := min; i < max; i += ptrSize {
603 z.mask &^= 1 << uint(bit)
606 // No more known zeros - don't bother keeping.
609 // Save updated known zero contents for new store.
610 if zeroes[v.ID] != z {
620 if f.pass.debug > 0 {
621 fmt.Printf("func %s\n", f.Name)
622 for mem, z := range zeroes {
623 fmt.Printf(" memory=v%d ptr=%v zeromask=%b\n", mem, z.base, z.mask)
629 // wbcall emits write barrier runtime call in b, returns memory.
630 func wbcall(pos src.XPos, b *Block, fn *obj.LSym, sp, mem *Value, args ...*Value) *Value {
631 config := b.Func.Config
632 typ := config.Types.Uintptr // type of all argument values
635 // TODO (register args) this is a bit of a hack.
636 inRegs := b.Func.ABIDefault == b.Func.ABI1 && len(config.intParamRegs) >= 3
639 // Store arguments to the appropriate stack slot.
640 off := config.ctxt.Arch.FixedFrameSize
641 for _, arg := range args {
642 stkaddr := b.NewValue1I(pos, OpOffPtr, typ.PtrTo(), off, sp)
643 mem = b.NewValue3A(pos, OpStore, types.TypeMem, typ, stkaddr, arg, mem)
649 args = append(args, mem)
652 argTypes := make([]*types.Type, nargs, 3) // at most 3 args; allows stack allocation
653 for i := 0; i < nargs; i++ {
656 call := b.NewValue0A(pos, OpStaticCall, types.TypeResultMem, StaticAuxCall(fn, b.Func.ABIDefault.ABIAnalyzeTypes(nil, argTypes, nil)))
657 call.AddArgs(args...)
658 call.AuxInt = int64(nargs) * typ.Size()
659 return b.NewValue1I(pos, OpSelectN, types.TypeMem, 0, call)
662 // round to a multiple of r, r is a power of 2.
663 func round(o int64, r int64) int64 {
664 return (o + r - 1) &^ (r - 1)
667 // IsStackAddr reports whether v is known to be an address of a stack slot.
668 func IsStackAddr(v *Value) bool {
669 for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
673 case OpSP, OpLocalAddr, OpSelectNAddr, OpGetCallerSP:
679 // IsGlobalAddr reports whether v is known to be an address of a global (or nil).
680 func IsGlobalAddr(v *Value) bool {
681 for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
684 if v.Op == OpAddr && v.Args[0].Op == OpSB {
685 return true // address of a global
687 if v.Op == OpConstNil {
690 if v.Op == OpLoad && IsReadOnlyGlobalAddr(v.Args[0]) {
691 return true // loading from a read-only global - the resulting address can't be a heap address.
696 // IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global.
697 func IsReadOnlyGlobalAddr(v *Value) bool {
698 if v.Op == OpConstNil {
699 // Nil pointers are read only. See issue 33438.
702 if v.Op == OpAddr && v.Aux != nil && v.Aux.(*obj.LSym).Type == objabi.SRODATA {
708 // IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object,
709 // if so, also returns the memory state mem at which v is zero.
710 func IsNewObject(v *Value, select1 []*Value) (mem *Value, ok bool) {
713 if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 {
714 if v.Op != OpSelectN || v.AuxInt != 0 {
717 mem = select1[v.Args[0].ID]
726 if mem.Op != OpSelectN {
729 if mem.Type != types.TypeMem {
731 } // assume it is the right selection if true
734 if call.Op != OpStaticCall {
737 if !isSameCall(call.Aux, "runtime.newobject") {
740 if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 {
741 if v.Args[0] == call {
746 if v.Args[0].Op != OpOffPtr {
749 if v.Args[0].Args[0].Op != OpSP {
752 if v.Args[0].AuxInt != c.ctxt.Arch.FixedFrameSize+c.RegSize { // offset of return value
758 // IsSanitizerSafeAddr reports whether v is known to be an address
759 // that doesn't need instrumentation.
760 func IsSanitizerSafeAddr(v *Value) bool {
761 for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
765 case OpSP, OpLocalAddr, OpSelectNAddr:
766 // Stack addresses are always safe.
768 case OpITab, OpStringPtr, OpGetClosurePtr:
769 // Itabs, string data, and closure fields are
770 // read-only once initialized.
773 vt := v.Aux.(*obj.LSym).Type
774 return vt == objabi.SRODATA || vt == objabi.SLIBFUZZER_8BIT_COUNTER || vt == objabi.SCOVERAGE_COUNTER || vt == objabi.SCOVERAGE_AUXVAR
779 // isVolatile reports whether v is a pointer to argument region on stack which
780 // will be clobbered by a function call.
781 func isVolatile(v *Value) bool {
782 for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy || v.Op == OpSelectNAddr {