1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "cmd/compile/internal/reflectdata"
9 "cmd/compile/internal/types"
16 // A ZeroRegion records parts of an object which are known to be zero.
17 // A ZeroRegion only applies to a single memory state.
18 // Each bit in mask is set if the corresponding pointer-sized word of
19 // the base object is known to be zero.
20 // In other words, if mask & (1<<i) != 0, then [base+i*ptrSize, base+(i+1)*ptrSize)
21 // is known to be zero.
22 type ZeroRegion struct {
27 // mightBeHeapPointer reports whether v might point to the heap.
28 // v must have pointer type.
29 func mightBeHeapPointer(v *Value) bool {
36 // mightContainHeapPointer reports whether the data currently at addresses
37 // [ptr,ptr+size) might contain heap pointers. "currently" means at memory state mem.
38 // zeroes contains ZeroRegion data to help make that decision (see computeZeroMap).
39 func mightContainHeapPointer(ptr *Value, size int64, mem *Value, zeroes map[ID]ZeroRegion) bool {
40 if IsReadOnlyGlobalAddr(ptr) {
41 // The read-only globals section cannot contain any heap pointers.
45 // See if we can prove that the queried memory is all zero.
47 // Find base pointer and offset. Hopefully, the base is the result of a new(T).
49 for ptr.Op == OpOffPtr {
54 ptrSize := ptr.Block.Func.Config.PtrSize
55 if off%ptrSize != 0 || size%ptrSize != 0 {
56 ptr.Fatalf("unaligned pointer write")
58 if off < 0 || off+size > 64*ptrSize {
59 // memory range goes off end of tracked offsets
64 // This isn't the object we know about at this memory state.
67 // Mask of bits we're asking about
68 m := (uint64(1)<<(size/ptrSize) - 1) << (off / ptrSize)
71 // All locations are known to be zero, so no heap pointers.
77 // needwb reports whether we need write barrier for store op v.
78 // v must be Store/Move/Zero.
79 // zeroes provides known zero information (keyed by ID of memory-type values).
80 func needwb(v *Value, zeroes map[ID]ZeroRegion) bool {
81 t, ok := v.Aux.(*types.Type)
83 v.Fatalf("store aux is not a type: %s", v.LongString())
90 return false // writes into the stack don't need write barrier
92 // If we're writing to a place that might have heap pointers, we need
94 if mightContainHeapPointer(dst, t.Size(), v.MemoryArg(), zeroes) {
97 // Lastly, check if the values we're writing might be heap pointers.
98 // If they aren't, we don't need a write barrier.
101 if !mightBeHeapPointer(v.Args[1]) {
105 return false // nil is not a heap pointer
107 if !mightContainHeapPointer(v.Args[1], t.Size(), v.Args[2], zeroes) {
111 v.Fatalf("store op unknown: %s", v.LongString())
116 // writebarrier pass inserts write barriers for store ops (Store, Move, Zero)
117 // when necessary (the condition above). It rewrites store ops to branches
118 // and runtime calls, like
120 // if writeBarrier.enabled {
121 // gcWriteBarrier(ptr, val) // Not a regular Go call
126 // A sequence of WB stores for many pointer fields of a single type will
127 // be emitted together, with a single branch.
128 func writebarrier(f *Func) {
129 if !f.fe.UseWriteBarrier() {
133 var sb, sp, wbaddr, const0 *Value
134 var typedmemmove, typedmemclr, gcWriteBarrier *obj.LSym
135 var stores, after []*Value
137 var storeNumber []int32
139 // Compute map from a value to the SelectN [1] value that uses it.
140 select1 := f.Cache.allocValueSlice(f.NumValues())
141 defer func() { f.Cache.freeValueSlice(select1) }()
142 for _, b := range f.Blocks {
143 for _, v := range b.Values {
144 if v.Op != OpSelectN {
150 select1[v.Args[0].ID] = v
154 zeroes := f.computeZeroMap(select1)
155 for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand
156 // first, identify all the stores that need to insert a write barrier.
157 // mark them with WB ops temporarily. record presence of WB ops.
158 nWBops := 0 // count of temporarily created WB ops remaining to be rewritten in the current block
159 for _, v := range b.Values {
161 case OpStore, OpMove, OpZero:
162 if needwb(v, zeroes) {
180 // lazily initialize global values for write barrier test and calls
181 // find SB and SP values in entry block
182 initpos := f.Entry.Pos
184 wbsym := f.fe.Syslook("writeBarrier")
185 wbaddr = f.Entry.NewValue1A(initpos, OpAddr, f.Config.Types.UInt32Ptr, wbsym, sb)
186 gcWriteBarrier = f.fe.Syslook("gcWriteBarrier")
187 typedmemmove = f.fe.Syslook("typedmemmove")
188 typedmemclr = f.fe.Syslook("typedmemclr")
189 const0 = f.ConstInt32(f.Config.Types.UInt32, 0)
191 // allocate auxiliary data structures for computing store order
192 sset = f.newSparseSet(f.NumValues())
193 defer f.retSparseSet(sset)
194 storeNumber = f.Cache.allocInt32Slice(f.NumValues())
195 defer f.Cache.freeInt32Slice(storeNumber)
198 // order values in store order
199 b.Values = storeOrder(b.Values, sset, storeNumber)
201 // find the start and end of the last contiguous WB store sequence.
202 // a branch will be inserted there. values after it will be moved
208 for i := len(values) - 1; i >= 0; i-- {
211 case OpStoreWB, OpMoveWB, OpZeroWB:
217 case OpVarDef, OpVarLive:
226 stores = append(stores[:0], b.Values[start:end]...) // copy to avoid aliasing
227 after = append(after[:0], b.Values[end:]...)
228 b.Values = b.Values[:start]
230 // find the memory before the WB stores
231 mem := stores[0].MemoryArg()
233 bThen := f.NewBlock(BlockPlain)
234 bElse := f.NewBlock(BlockPlain)
235 bEnd := f.NewBlock(b.Kind)
241 // set up control flow for end block
243 bEnd.Likely = b.Likely
244 for _, e := range b.Succs {
245 bEnd.Succs = append(bEnd.Succs, e)
246 e.b.Preds[e.i].b = bEnd
249 // set up control flow for write barrier test
250 // load word, test word, avoiding partial register write from load byte.
251 cfgtypes := &f.Config.Types
252 flag := b.NewValue2(pos, OpLoad, cfgtypes.UInt32, wbaddr, mem)
253 flag = b.NewValue2(pos, OpNeq32, cfgtypes.Bool, flag, const0)
256 b.Likely = BranchUnlikely
257 b.Succs = b.Succs[:0]
260 // TODO: For OpStoreWB and the buffered write barrier,
261 // we could move the write out of the write barrier,
262 // which would lead to fewer branches. We could do
263 // something similar to OpZeroWB, since the runtime
264 // could provide just the barrier half and then we
265 // could unconditionally do an OpZero (which could
266 // also generate better zeroing code). OpMoveWB is
267 // trickier and would require changing how
268 // cgoCheckMemmove works.
269 bThen.AddEdgeTo(bEnd)
270 bElse.AddEdgeTo(bEnd)
272 // for each write barrier store, append write barrier version to bThen
273 // and simple store version to bElse
277 // If the source of a MoveWB is volatile (will be clobbered by a
278 // function call), we need to copy it to a temporary location, as
279 // marshaling the args of typedmemmove might clobber the value we're
281 // Look for volatile source, copy it to temporary before we emit any
283 // It is unlikely to have more than one of them. Just do a linear
284 // search instead of using a map.
285 type volatileCopy struct {
286 src *Value // address of original volatile value
287 tmp *Value // address of temporary we've copied the volatile value into
289 var volatiles []volatileCopy
291 for _, w := range stores {
292 if w.Op == OpMoveWB {
295 for _, c := range volatiles {
297 continue copyLoop // already copied
302 tmp := f.fe.Auto(w.Pos, t)
303 memThen = bThen.NewValue1A(w.Pos, OpVarDef, types.TypeMem, tmp, memThen)
304 tmpaddr := bThen.NewValue2A(w.Pos, OpLocalAddr, t.PtrTo(), tmp, sp, memThen)
306 memThen = bThen.NewValue3I(w.Pos, OpMove, types.TypeMem, siz, tmpaddr, val, memThen)
308 volatiles = append(volatiles, volatileCopy{val, tmpaddr})
313 for _, w := range stores {
327 typ = reflectdata.TypeLinksym(w.Aux.(*types.Type))
331 typ = reflectdata.TypeLinksym(w.Aux.(*types.Type))
333 case OpVarDef, OpVarLive:
336 // then block: emit write barrier call
338 case OpStoreWB, OpMoveWB, OpZeroWB:
339 if w.Op == OpStoreWB {
340 memThen = bThen.NewValue3A(pos, OpWB, types.TypeMem, gcWriteBarrier, ptr, val, memThen)
343 if w.Op == OpMoveWB && isVolatile(srcval) {
344 for _, c := range volatiles {
351 memThen = wbcall(pos, bThen, fn, typ, ptr, srcval, memThen, sp, sb)
353 // Note that we set up a writebarrier function call.
355 case OpVarDef, OpVarLive:
356 memThen = bThen.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memThen)
359 // else block: normal store
362 memElse = bElse.NewValue3A(pos, OpStore, types.TypeMem, w.Aux, ptr, val, memElse)
364 memElse = bElse.NewValue3I(pos, OpMove, types.TypeMem, w.AuxInt, ptr, val, memElse)
367 memElse = bElse.NewValue2I(pos, OpZero, types.TypeMem, w.AuxInt, ptr, memElse)
369 case OpVarDef, OpVarLive:
370 memElse = bElse.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memElse)
375 mem = bEnd.NewValue2(pos, OpPhi, types.TypeMem, memThen, memElse)
376 // The last store becomes the WBend marker. This marker is used by the liveness
377 // pass to determine what parts of the code are preemption-unsafe.
378 // All subsequent memory operations use this memory, so we have to sacrifice the
379 // previous last memory op to become this new value.
380 bEnd.Values = append(bEnd.Values, last)
383 last.Pos = last.Pos.WithNotStmt()
384 last.Type = types.TypeMem
387 // Free all the old stores, except last which became the WBend marker.
388 for _, w := range stores {
393 for _, w := range stores {
399 // put values after the store sequence into the end block
400 bEnd.Values = append(bEnd.Values, after...)
401 for _, w := range after {
405 // if we have more stores in this block, do this block again
412 // computeZeroMap returns a map from an ID of a memory value to
413 // a set of locations that are known to be zeroed at that memory value.
414 func (f *Func) computeZeroMap(select1 []*Value) map[ID]ZeroRegion {
416 ptrSize := f.Config.PtrSize
417 // Keep track of which parts of memory are known to be zero.
418 // This helps with removing write barriers for various initialization patterns.
419 // This analysis is conservative. We only keep track, for each memory state, of
420 // which of the first 64 words of a single object are known to be zero.
421 zeroes := map[ID]ZeroRegion{}
423 for _, b := range f.Blocks {
424 for _, v := range b.Values {
425 if mem, ok := IsNewObject(v, select1); ok {
426 // While compiling package runtime itself, we might see user
427 // calls to newobject, which will have result type
428 // unsafe.Pointer instead. We can't easily infer how large the
429 // allocated memory is, so just skip it.
430 if types.LocalPkg.Path == "runtime" && v.Type.IsUnsafePtr() {
434 nptr := v.Type.Elem().Size() / ptrSize
438 zeroes[mem.ID] = ZeroRegion{base: v, mask: 1<<uint(nptr) - 1}
442 // Find stores to those new objects.
445 for _, b := range f.Blocks {
446 // Note: iterating forwards helps convergence, as values are
447 // typically (but not always!) in store order.
448 for _, v := range b.Values {
452 z, ok := zeroes[v.MemoryArg().ID]
458 size := v.Aux.(*types.Type).Size()
459 for ptr.Op == OpOffPtr {
464 // Different base object - we don't know anything.
465 // We could even be writing to the base object we know
466 // about, but through an aliased but offset pointer.
467 // So we have to throw all the zero information we have away.
470 // Round to cover any partially written pointer slots.
471 // Pointer writes should never be unaligned like this, but non-pointer
472 // writes to pointer-containing types will do this.
473 if d := off % ptrSize; d != 0 {
477 if d := size % ptrSize; d != 0 {
480 // Clip to the 64 words that we track.
486 if max > 64*ptrSize {
489 // Clear bits for parts that we are writing (and hence
490 // will no longer necessarily be zero).
491 for i := min; i < max; i += ptrSize {
493 z.mask &^= 1 << uint(bit)
496 // No more known zeros - don't bother keeping.
499 // Save updated known zero contents for new store.
500 if zeroes[v.ID] != z {
510 if f.pass.debug > 0 {
511 fmt.Printf("func %s\n", f.Name)
512 for mem, z := range zeroes {
513 fmt.Printf(" memory=v%d ptr=%v zeromask=%b\n", mem, z.base, z.mask)
519 // wbcall emits write barrier runtime call in b, returns memory.
520 func wbcall(pos src.XPos, b *Block, fn, typ *obj.LSym, ptr, val, mem, sp, sb *Value) *Value {
521 config := b.Func.Config
524 // TODO (register args) this is a bit of a hack.
525 inRegs := b.Func.ABIDefault == b.Func.ABI1 && len(config.intParamRegs) >= 3
527 // put arguments on stack
528 off := config.ctxt.Arch.FixedFrameSize
530 var argTypes []*types.Type
531 if typ != nil { // for typedmemmove
532 taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb)
533 argTypes = append(argTypes, b.Func.Config.Types.Uintptr)
534 off = round(off, taddr.Type.Alignment())
536 wbargs = append(wbargs, taddr)
538 arg := b.NewValue1I(pos, OpOffPtr, taddr.Type.PtrTo(), off, sp)
539 mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, taddr, mem)
541 off += taddr.Type.Size()
544 argTypes = append(argTypes, ptr.Type)
545 off = round(off, ptr.Type.Alignment())
547 wbargs = append(wbargs, ptr)
549 arg := b.NewValue1I(pos, OpOffPtr, ptr.Type.PtrTo(), off, sp)
550 mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, ptr, mem)
552 off += ptr.Type.Size()
555 argTypes = append(argTypes, val.Type)
556 off = round(off, val.Type.Alignment())
558 wbargs = append(wbargs, val)
560 arg := b.NewValue1I(pos, OpOffPtr, val.Type.PtrTo(), off, sp)
561 mem = b.NewValue3A(pos, OpStore, types.TypeMem, val.Type, arg, val, mem)
563 off += val.Type.Size()
565 off = round(off, config.PtrSize)
566 wbargs = append(wbargs, mem)
569 call := b.NewValue0A(pos, OpStaticCall, types.TypeResultMem, StaticAuxCall(fn, b.Func.ABIDefault.ABIAnalyzeTypes(nil, argTypes, nil)))
570 call.AddArgs(wbargs...)
571 call.AuxInt = off - config.ctxt.Arch.FixedFrameSize
572 return b.NewValue1I(pos, OpSelectN, types.TypeMem, 0, call)
575 // round to a multiple of r, r is a power of 2.
576 func round(o int64, r int64) int64 {
577 return (o + r - 1) &^ (r - 1)
580 // IsStackAddr reports whether v is known to be an address of a stack slot.
581 func IsStackAddr(v *Value) bool {
582 for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
586 case OpSP, OpLocalAddr, OpSelectNAddr, OpGetCallerSP:
592 // IsGlobalAddr reports whether v is known to be an address of a global (or nil).
593 func IsGlobalAddr(v *Value) bool {
594 for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
597 if v.Op == OpAddr && v.Args[0].Op == OpSB {
598 return true // address of a global
600 if v.Op == OpConstNil {
603 if v.Op == OpLoad && IsReadOnlyGlobalAddr(v.Args[0]) {
604 return true // loading from a read-only global - the resulting address can't be a heap address.
609 // IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global.
610 func IsReadOnlyGlobalAddr(v *Value) bool {
611 if v.Op == OpConstNil {
612 // Nil pointers are read only. See issue 33438.
615 if v.Op == OpAddr && v.Aux != nil && v.Aux.(*obj.LSym).Type == objabi.SRODATA {
621 // IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object,
622 // if so, also returns the memory state mem at which v is zero.
623 func IsNewObject(v *Value, select1 []*Value) (mem *Value, ok bool) {
626 if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 {
627 if v.Op != OpSelectN || v.AuxInt != 0 {
630 mem = select1[v.Args[0].ID]
639 if mem.Op != OpSelectN {
642 if mem.Type != types.TypeMem {
644 } // assume it is the right selection if true
647 if call.Op != OpStaticCall {
650 if !isSameCall(call.Aux, "runtime.newobject") {
653 if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 {
654 if v.Args[0] == call {
659 if v.Args[0].Op != OpOffPtr {
662 if v.Args[0].Args[0].Op != OpSP {
665 if v.Args[0].AuxInt != c.ctxt.Arch.FixedFrameSize+c.RegSize { // offset of return value
671 // IsSanitizerSafeAddr reports whether v is known to be an address
672 // that doesn't need instrumentation.
673 func IsSanitizerSafeAddr(v *Value) bool {
674 for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
678 case OpSP, OpLocalAddr, OpSelectNAddr:
679 // Stack addresses are always safe.
681 case OpITab, OpStringPtr, OpGetClosurePtr:
682 // Itabs, string data, and closure fields are
683 // read-only once initialized.
686 vt := v.Aux.(*obj.LSym).Type
687 return vt == objabi.SRODATA || vt == objabi.SLIBFUZZER_8BIT_COUNTER || vt == objabi.SCOVERAGE_COUNTER || vt == objabi.SCOVERAGE_AUXVAR
692 // isVolatile reports whether v is a pointer to argument region on stack which
693 // will be clobbered by a function call.
694 func isVolatile(v *Value) bool {
695 for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy || v.Op == OpSelectNAddr {