1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
12 // The following thunks allow calling the gcc-compiled race runtime directly
13 // from Go code without going all the way through cgo.
14 // First, it's much faster (up to 50% speedup for real Go programs).
15 // Second, it eliminates race-related special cases from cgocall and scheduler.
16 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
18 // A brief recap of the amd64 calling convention.
19 // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
20 // Callee-saved registers are: BX, BP, R12-R15.
21 // SP must be 16-byte aligned.
23 // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
24 // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
25 // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
26 // http://msdn.microsoft.com/en-us/library/ms235286.aspx
27 // We do not do this, because it seems to be intended for vararg/unprototyped functions.
28 // Gcc-compiled race runtime does not try to use that space.
42 // func runtime·raceread(addr uintptr)
43 // Called from instrumented code.
44 TEXT runtime·raceread(SB), NOSPLIT, $0-8
45 MOVQ addr+0(FP), RARG1
47 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
48 MOVQ $__tsan_read(SB), AX
49 JMP racecalladdr<>(SB)
51 // func runtime·RaceRead(addr uintptr)
52 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
53 // This needs to be a tail call, because raceread reads caller pc.
54 JMP runtime·raceread(SB)
56 // void runtime·racereadpc(void *addr, void *callpc, void *pc)
57 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
58 MOVQ addr+0(FP), RARG1
59 MOVQ callpc+8(FP), RARG2
61 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
62 MOVQ $__tsan_read_pc(SB), AX
63 JMP racecalladdr<>(SB)
65 // func runtime·racewrite(addr uintptr)
66 // Called from instrumented code.
67 TEXT runtime·racewrite(SB), NOSPLIT, $0-8
68 MOVQ addr+0(FP), RARG1
70 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
71 MOVQ $__tsan_write(SB), AX
72 JMP racecalladdr<>(SB)
74 // func runtime·RaceWrite(addr uintptr)
75 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
76 // This needs to be a tail call, because racewrite reads caller pc.
77 JMP runtime·racewrite(SB)
79 // void runtime·racewritepc(void *addr, void *callpc, void *pc)
80 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
81 MOVQ addr+0(FP), RARG1
82 MOVQ callpc+8(FP), RARG2
84 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
85 MOVQ $__tsan_write_pc(SB), AX
86 JMP racecalladdr<>(SB)
88 // func runtime·racereadrange(addr, size uintptr)
89 // Called from instrumented code.
90 TEXT runtime·racereadrange(SB), NOSPLIT, $0-16
91 MOVQ addr+0(FP), RARG1
92 MOVQ size+8(FP), RARG2
94 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
95 MOVQ $__tsan_read_range(SB), AX
96 JMP racecalladdr<>(SB)
98 // func runtime·RaceReadRange(addr, size uintptr)
99 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
100 // This needs to be a tail call, because racereadrange reads caller pc.
101 JMP runtime·racereadrange(SB)
103 // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
104 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
105 MOVQ addr+0(FP), RARG1
106 MOVQ size+8(FP), RARG2
107 MOVQ pc+16(FP), RARG3
108 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
109 MOVQ $__tsan_read_range(SB), AX
110 JMP racecalladdr<>(SB)
112 // func runtime·racewriterange(addr, size uintptr)
113 // Called from instrumented code.
114 TEXT runtime·racewriterange(SB), NOSPLIT, $0-16
115 MOVQ addr+0(FP), RARG1
116 MOVQ size+8(FP), RARG2
118 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
119 MOVQ $__tsan_write_range(SB), AX
120 JMP racecalladdr<>(SB)
122 // func runtime·RaceWriteRange(addr, size uintptr)
123 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
124 // This needs to be a tail call, because racewriterange reads caller pc.
125 JMP runtime·racewriterange(SB)
127 // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
128 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
129 MOVQ addr+0(FP), RARG1
130 MOVQ size+8(FP), RARG2
131 MOVQ pc+16(FP), RARG3
132 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
133 MOVQ $__tsan_write_range(SB), AX
134 JMP racecalladdr<>(SB)
136 // If addr (RARG1) is out of range, do nothing.
137 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
138 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
141 MOVQ g_racectx(R14), RARG0 // goroutine context
142 // Check that addr is within [arenastart, arenaend) or within [noptrdata, enoptrbss).
143 CMPQ RARG1, runtime·racearenastart(SB)
145 CMPQ RARG1, runtime·racearenaend(SB)
148 MOVQ $runtime·noptrdata(SB), R13
151 MOVQ $runtime·enoptrbss(SB), R13
155 MOVQ AX, AX // w/o this 6a miscompiles this function
160 // func runtime·racefuncenter(pc uintptr)
161 // Called from instrumented code.
162 TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8
163 MOVQ DX, R15 // save function entry context (for closures)
166 MOVQ g_racectx(R14), RARG0 // goroutine context
167 MOVQ callpc+0(FP), RARG1
168 // void __tsan_func_enter(ThreadState *thr, void *pc);
169 MOVQ $__tsan_func_enter(SB), AX
171 MOVQ R15, DX // restore function entry context
174 // func runtime·racefuncexit()
175 // Called from instrumented code.
176 TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0
179 MOVQ g_racectx(R14), RARG0 // goroutine context
180 // void __tsan_func_exit(ThreadState *thr);
181 MOVQ $__tsan_func_exit(SB), AX
184 // Atomic operations for sync/atomic package.
187 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0
188 MOVQ $__tsan_go_atomic32_load(SB), AX
189 CALL racecallatomic<>(SB)
192 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0
193 MOVQ $__tsan_go_atomic64_load(SB), AX
194 CALL racecallatomic<>(SB)
197 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0
198 JMP sync∕atomic·LoadInt32(SB)
200 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0
201 JMP sync∕atomic·LoadInt64(SB)
203 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0
204 JMP sync∕atomic·LoadInt64(SB)
206 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0
207 JMP sync∕atomic·LoadInt64(SB)
210 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0
211 MOVQ $__tsan_go_atomic32_store(SB), AX
212 CALL racecallatomic<>(SB)
215 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0
216 MOVQ $__tsan_go_atomic64_store(SB), AX
217 CALL racecallatomic<>(SB)
220 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0
221 JMP sync∕atomic·StoreInt32(SB)
223 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0
224 JMP sync∕atomic·StoreInt64(SB)
226 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0
227 JMP sync∕atomic·StoreInt64(SB)
229 TEXT sync∕atomic·StorePointer(SB), NOSPLIT, $0-0
230 JMP sync∕atomic·StoreInt64(SB)
233 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0
234 MOVQ $__tsan_go_atomic32_exchange(SB), AX
235 CALL racecallatomic<>(SB)
238 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0
239 MOVQ $__tsan_go_atomic64_exchange(SB), AX
240 CALL racecallatomic<>(SB)
243 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0
244 JMP sync∕atomic·SwapInt32(SB)
246 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0
247 JMP sync∕atomic·SwapInt64(SB)
249 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0
250 JMP sync∕atomic·SwapInt64(SB)
252 TEXT sync∕atomic·SwapPointer(SB), NOSPLIT, $0-0
253 JMP sync∕atomic·SwapInt64(SB)
256 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-0
257 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX
258 CALL racecallatomic<>(SB)
259 MOVL add+8(FP), AX // convert fetch_add to add_fetch
263 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-0
264 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX
265 CALL racecallatomic<>(SB)
266 MOVQ add+8(FP), AX // convert fetch_add to add_fetch
270 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-0
271 JMP sync∕atomic·AddInt32(SB)
273 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-0
274 JMP sync∕atomic·AddInt64(SB)
276 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0
277 JMP sync∕atomic·AddInt64(SB)
279 TEXT sync∕atomic·AddPointer(SB), NOSPLIT, $0-0
280 JMP sync∕atomic·AddInt64(SB)
283 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
284 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX
285 CALL racecallatomic<>(SB)
288 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
289 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX
290 CALL racecallatomic<>(SB)
293 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
294 JMP sync∕atomic·CompareAndSwapInt32(SB)
296 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
297 JMP sync∕atomic·CompareAndSwapInt64(SB)
299 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
300 JMP sync∕atomic·CompareAndSwapInt64(SB)
302 TEXT sync∕atomic·CompareAndSwapPointer(SB), NOSPLIT, $0-0
303 JMP sync∕atomic·CompareAndSwapInt64(SB)
305 // Generic atomic operation implementation.
306 // AX already contains target function.
307 TEXT racecallatomic<>(SB), NOSPLIT, $0-0
308 // Trigger SIGSEGV early.
313 MOVQ g_racectx(R14), RARG0 // goroutine context
314 MOVQ 8(SP), RARG1 // caller pc
315 MOVQ (SP), RARG2 // pc
316 LEAQ 16(SP), RARG3 // arguments
319 // void runtime·racecall(void(*f)(...), ...)
320 // Calls C function f from race runtime and passes up to 4 arguments to it.
321 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
322 TEXT runtime·racecall(SB), NOSPLIT, $0-0
324 MOVQ arg0+8(FP), RARG0
325 MOVQ arg1+16(FP), RARG1
326 MOVQ arg2+24(FP), RARG2
327 MOVQ arg3+32(FP), RARG3
330 // Switches SP to g0 stack and calls (AX). Arguments already set.
331 TEXT racecall<>(SB), NOSPLIT, $0-0
335 // Switch to g0 stack.
336 MOVQ SP, R12 // callee-saved, preserved across the CALL
339 JE call // already on g0
340 MOVQ (g_sched+gobuf_sp)(R10), SP
342 ANDQ $~15, SP // alignment for gcc ABI
347 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
348 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
349 // The overall effect of Go->C->Go call chain is similar to that of mcall.
350 TEXT runtime·racesymbolizethunk(SB), NOSPLIT, $56-8
351 // Save callee-saved registers (Go code won't respect that).
352 // This is superset of darwin/linux/windows registers.
366 MOVQ R14, g(R12) // g = m->g0
367 MOVQ RARG0, 0(SP) // func arg
368 CALL runtime·racesymbolize(SB)
369 // All registers are smashed after Go code, reload.
373 MOVQ m_curg(R13), R14
374 MOVQ R14, g(R12) // g = m->curg
375 // Restore callee-saved registers.