src/runtime/race_amd64.s

   1 // Copyright 2013 The Go Authors.  All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // +build race
   6
   7 #include "go_asm.h"
   8 #include "go_tls.h"
   9 #include "funcdata.h"
  10 #include "textflag.h"
  11
  12 // The following thunks allow calling the gcc-compiled race runtime directly
  13 // from Go code without going all the way through cgo.
  14 // First, it's much faster (up to 50% speedup for real Go programs).
  15 // Second, it eliminates race-related special cases from cgocall and scheduler.
  16 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
  17
  18 // A brief recap of the amd64 calling convention.
  19 // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
  20 // Callee-saved registers are: BX, BP, R12-R15.
  21 // SP must be 16-byte aligned.
  22 // On Windows:
  23 // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
  24 // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
  25 // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
  26 // http://msdn.microsoft.com/en-us/library/ms235286.aspx
  27 // We do not do this, because it seems to be intended for vararg/unprototyped functions.
  28 // Gcc-compiled race runtime does not try to use that space.
  29
  30 #ifdef GOOS_windows
  31 #define RARG0 CX
  32 #define RARG1 DX
  33 #define RARG2 R8
  34 #define RARG3 R9
  35 #else
  36 #define RARG0 DI
  37 #define RARG1 SI
  38 #define RARG2 DX
  39 #define RARG3 CX
  40 #endif
  41
  42 // func runtime·raceread(addr uintptr)
  43 // Called from instrumented code.
  44 TEXT    runtime·raceread(SB), NOSPLIT, $0-8
  45         MOVQ    addr+0(FP), RARG1
  46         MOVQ    (SP), RARG2
  47         // void __tsan_read(ThreadState *thr, void *addr, void *pc);
  48         MOVQ    $__tsan_read(SB), AX
  49         JMP     racecalladdr<>(SB)
  50
  51 // func runtime·RaceRead(addr uintptr)
  52 TEXT    runtime·RaceRead(SB), NOSPLIT, $0-8
  53         // This needs to be a tail call, because raceread reads caller pc.
  54         JMP     runtime·raceread(SB)
  55
  56 // void runtime·racereadpc(void *addr, void *callpc, void *pc)
  57 TEXT    runtime·racereadpc(SB), NOSPLIT, $0-24
  58         MOVQ    addr+0(FP), RARG1
  59         MOVQ    callpc+8(FP), RARG2
  60         MOVQ    pc+16(FP), RARG3
  61         // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
  62         MOVQ    $__tsan_read_pc(SB), AX
  63         JMP     racecalladdr<>(SB)
  64
  65 // func runtime·racewrite(addr uintptr)
  66 // Called from instrumented code.
  67 TEXT    runtime·racewrite(SB), NOSPLIT, $0-8
  68         MOVQ    addr+0(FP), RARG1
  69         MOVQ    (SP), RARG2
  70         // void __tsan_write(ThreadState *thr, void *addr, void *pc);
  71         MOVQ    $__tsan_write(SB), AX
  72         JMP     racecalladdr<>(SB)
  73
  74 // func runtime·RaceWrite(addr uintptr)
  75 TEXT    runtime·RaceWrite(SB), NOSPLIT, $0-8
  76         // This needs to be a tail call, because racewrite reads caller pc.
  77         JMP     runtime·racewrite(SB)
  78
  79 // void runtime·racewritepc(void *addr, void *callpc, void *pc)
  80 TEXT    runtime·racewritepc(SB), NOSPLIT, $0-24
  81         MOVQ    addr+0(FP), RARG1
  82         MOVQ    callpc+8(FP), RARG2
  83         MOVQ    pc+16(FP), RARG3
  84         // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
  85         MOVQ    $__tsan_write_pc(SB), AX
  86         JMP     racecalladdr<>(SB)
  87
  88 // func runtime·racereadrange(addr, size uintptr)
  89 // Called from instrumented code.
  90 TEXT    runtime·racereadrange(SB), NOSPLIT, $0-16
  91         MOVQ    addr+0(FP), RARG1
  92         MOVQ    size+8(FP), RARG2
  93         MOVQ    (SP), RARG3
  94         // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
  95         MOVQ    $__tsan_read_range(SB), AX
  96         JMP     racecalladdr<>(SB)
  97
  98 // func runtime·RaceReadRange(addr, size uintptr)
  99 TEXT    runtime·RaceReadRange(SB), NOSPLIT, $0-16
 100         // This needs to be a tail call, because racereadrange reads caller pc.
 101         JMP     runtime·racereadrange(SB)
 102
 103 // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
 104 TEXT    runtime·racereadrangepc1(SB), NOSPLIT, $0-24
 105         MOVQ    addr+0(FP), RARG1
 106         MOVQ    size+8(FP), RARG2
 107         MOVQ    pc+16(FP), RARG3
 108         // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
 109         MOVQ    $__tsan_read_range(SB), AX
 110         JMP     racecalladdr<>(SB)
 111
 112 // func runtime·racewriterange(addr, size uintptr)
 113 // Called from instrumented code.
 114 TEXT    runtime·racewriterange(SB), NOSPLIT, $0-16
 115         MOVQ    addr+0(FP), RARG1
 116         MOVQ    size+8(FP), RARG2
 117         MOVQ    (SP), RARG3
 118         // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
 119         MOVQ    $__tsan_write_range(SB), AX
 120         JMP     racecalladdr<>(SB)
 121
 122 // func runtime·RaceWriteRange(addr, size uintptr)
 123 TEXT    runtime·RaceWriteRange(SB), NOSPLIT, $0-16
 124         // This needs to be a tail call, because racewriterange reads caller pc.
 125         JMP     runtime·racewriterange(SB)
 126
 127 // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
 128 TEXT    runtime·racewriterangepc1(SB), NOSPLIT, $0-24
 129         MOVQ    addr+0(FP), RARG1
 130         MOVQ    size+8(FP), RARG2
 131         MOVQ    pc+16(FP), RARG3
 132         // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
 133         MOVQ    $__tsan_write_range(SB), AX
 134         JMP     racecalladdr<>(SB)
 135
 136 // If addr (RARG1) is out of range, do nothing.
 137 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
 138 TEXT    racecalladdr<>(SB), NOSPLIT, $0-0
 139         get_tls(R12)
 140         MOVQ    g(R12), R14
 141         MOVQ    g_racectx(R14), RARG0   // goroutine context
 142         // Check that addr is within [arenastart, arenaend) or within [noptrdata, enoptrbss).
 143         CMPQ    RARG1, runtime·racearenastart(SB)
 144         JB      data
 145         CMPQ    RARG1, runtime·racearenaend(SB)
 146         JB      call
 147 data:
 148         MOVQ    $runtime·noptrdata(SB), R13
 149         CMPQ    RARG1, R13
 150         JB      ret
 151         MOVQ    $runtime·enoptrbss(SB), R13
 152         CMPQ    RARG1, R13
 153         JAE     ret
 154 call:
 155         MOVQ    AX, AX          // w/o this 6a miscompiles this function
 156         JMP     racecall<>(SB)
 157 ret:
 158         RET
 159
 160 // func runtime·racefuncenter(pc uintptr)
 161 // Called from instrumented code.
 162 TEXT    runtime·racefuncenter(SB), NOSPLIT, $0-8
 163         MOVQ    DX, R15         // save function entry context (for closures)
 164         get_tls(R12)
 165         MOVQ    g(R12), R14
 166         MOVQ    g_racectx(R14), RARG0   // goroutine context
 167         MOVQ    callpc+0(FP), RARG1
 168         // void __tsan_func_enter(ThreadState *thr, void *pc);
 169         MOVQ    $__tsan_func_enter(SB), AX
 170         CALL    racecall<>(SB)
 171         MOVQ    R15, DX // restore function entry context
 172         RET
 173
 174 // func runtime·racefuncexit()
 175 // Called from instrumented code.
 176 TEXT    runtime·racefuncexit(SB), NOSPLIT, $0-0
 177         get_tls(R12)
 178         MOVQ    g(R12), R14
 179         MOVQ    g_racectx(R14), RARG0   // goroutine context
 180         // void __tsan_func_exit(ThreadState *thr);
 181         MOVQ    $__tsan_func_exit(SB), AX
 182         JMP     racecall<>(SB)
 183
 184 // Atomic operations for sync/atomic package.
 185
 186 // Load
 187 TEXT    sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0
 188         MOVQ    $__tsan_go_atomic32_load(SB), AX
 189         CALL    racecallatomic<>(SB)
 190         RET
 191
 192 TEXT    sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0
 193         MOVQ    $__tsan_go_atomic64_load(SB), AX
 194         CALL    racecallatomic<>(SB)
 195         RET
 196
 197 TEXT    sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0
 198         JMP     sync∕atomic·LoadInt32(SB)
 199
 200 TEXT    sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0
 201         JMP     sync∕atomic·LoadInt64(SB)
 202
 203 TEXT    sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0
 204         JMP     sync∕atomic·LoadInt64(SB)
 205
 206 TEXT    sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0
 207         JMP     sync∕atomic·LoadInt64(SB)
 208
 209 // Store
 210 TEXT    sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0
 211         MOVQ    $__tsan_go_atomic32_store(SB), AX
 212         CALL    racecallatomic<>(SB)
 213         RET
 214
 215 TEXT    sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0
 216         MOVQ    $__tsan_go_atomic64_store(SB), AX
 217         CALL    racecallatomic<>(SB)
 218         RET
 219
 220 TEXT    sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0
 221         JMP     sync∕atomic·StoreInt32(SB)
 222
 223 TEXT    sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0
 224         JMP     sync∕atomic·StoreInt64(SB)
 225
 226 TEXT    sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0
 227         JMP     sync∕atomic·StoreInt64(SB)
 228
 229 TEXT    sync∕atomic·StorePointer(SB), NOSPLIT, $0-0
 230         JMP     sync∕atomic·StoreInt64(SB)
 231
 232 // Swap
 233 TEXT    sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0
 234         MOVQ    $__tsan_go_atomic32_exchange(SB), AX
 235         CALL    racecallatomic<>(SB)
 236         RET
 237
 238 TEXT    sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0
 239         MOVQ    $__tsan_go_atomic64_exchange(SB), AX
 240         CALL    racecallatomic<>(SB)
 241         RET
 242
 243 TEXT    sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0
 244         JMP     sync∕atomic·SwapInt32(SB)
 245
 246 TEXT    sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0
 247         JMP     sync∕atomic·SwapInt64(SB)
 248
 249 TEXT    sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0
 250         JMP     sync∕atomic·SwapInt64(SB)
 251
 252 TEXT    sync∕atomic·SwapPointer(SB), NOSPLIT, $0-0
 253         JMP     sync∕atomic·SwapInt64(SB)
 254
 255 // Add
 256 TEXT    sync∕atomic·AddInt32(SB), NOSPLIT, $0-0
 257         MOVQ    $__tsan_go_atomic32_fetch_add(SB), AX
 258         CALL    racecallatomic<>(SB)
 259         MOVL    add+8(FP), AX   // convert fetch_add to add_fetch
 260         ADDL    AX, ret+16(FP)
 261         RET
 262
 263 TEXT    sync∕atomic·AddInt64(SB), NOSPLIT, $0-0
 264         MOVQ    $__tsan_go_atomic64_fetch_add(SB), AX
 265         CALL    racecallatomic<>(SB)
 266         MOVQ    add+8(FP), AX   // convert fetch_add to add_fetch
 267         ADDQ    AX, ret+16(FP)
 268         RET
 269
 270 TEXT    sync∕atomic·AddUint32(SB), NOSPLIT, $0-0
 271         JMP     sync∕atomic·AddInt32(SB)
 272
 273 TEXT    sync∕atomic·AddUint64(SB), NOSPLIT, $0-0
 274         JMP     sync∕atomic·AddInt64(SB)
 275
 276 TEXT    sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0
 277         JMP     sync∕atomic·AddInt64(SB)
 278
 279 TEXT    sync∕atomic·AddPointer(SB), NOSPLIT, $0-0
 280         JMP     sync∕atomic·AddInt64(SB)
 281
 282 // CompareAndSwap
 283 TEXT    sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
 284         MOVQ    $__tsan_go_atomic32_compare_exchange(SB), AX
 285         CALL    racecallatomic<>(SB)
 286         RET
 287
 288 TEXT    sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
 289         MOVQ    $__tsan_go_atomic64_compare_exchange(SB), AX
 290         CALL    racecallatomic<>(SB)
 291         RET
 292
 293 TEXT    sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
 294         JMP     sync∕atomic·CompareAndSwapInt32(SB)
 295
 296 TEXT    sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
 297         JMP     sync∕atomic·CompareAndSwapInt64(SB)
 298
 299 TEXT    sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
 300         JMP     sync∕atomic·CompareAndSwapInt64(SB)
 301
 302 TEXT    sync∕atomic·CompareAndSwapPointer(SB), NOSPLIT, $0-0
 303         JMP     sync∕atomic·CompareAndSwapInt64(SB)
 304
 305 // Generic atomic operation implementation.
 306 // AX already contains target function.
 307 TEXT    racecallatomic<>(SB), NOSPLIT, $0-0
 308         // Trigger SIGSEGV early.
 309         MOVQ    16(SP), R12
 310         MOVL    (R12), R12
 311         get_tls(R12)
 312         MOVQ    g(R12), R14
 313         MOVQ    g_racectx(R14), RARG0   // goroutine context
 314         MOVQ    8(SP), RARG1    // caller pc
 315         MOVQ    (SP), RARG2     // pc
 316         LEAQ    16(SP), RARG3   // arguments
 317         JMP     racecall<>(SB)
 318
 319 // void runtime·racecall(void(*f)(...), ...)
 320 // Calls C function f from race runtime and passes up to 4 arguments to it.
 321 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
 322 TEXT    runtime·racecall(SB), NOSPLIT, $0-0
 323         MOVQ    fn+0(FP), AX
 324         MOVQ    arg0+8(FP), RARG0
 325         MOVQ    arg1+16(FP), RARG1
 326         MOVQ    arg2+24(FP), RARG2
 327         MOVQ    arg3+32(FP), RARG3
 328         JMP     racecall<>(SB)
 329
 330 // Switches SP to g0 stack and calls (AX). Arguments already set.
 331 TEXT    racecall<>(SB), NOSPLIT, $0-0
 332         get_tls(R12)
 333         MOVQ    g(R12), R14
 334         MOVQ    g_m(R14), R13
 335         // Switch to g0 stack.
 336         MOVQ    SP, R12         // callee-saved, preserved across the CALL
 337         MOVQ    m_g0(R13), R10
 338         CMPQ    R10, R14
 339         JE      call    // already on g0
 340         MOVQ    (g_sched+gobuf_sp)(R10), SP
 341 call:
 342         ANDQ    $~15, SP        // alignment for gcc ABI
 343         CALL    AX
 344         MOVQ    R12, SP
 345         RET
 346
 347 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
 348 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
 349 // The overall effect of Go->C->Go call chain is similar to that of mcall.
 350 TEXT    runtime·racesymbolizethunk(SB), NOSPLIT, $56-8
 351         // Save callee-saved registers (Go code won't respect that).
 352         // This is superset of darwin/linux/windows registers.
 353         PUSHQ   BX
 354         PUSHQ   BP
 355         PUSHQ   DI
 356         PUSHQ   SI
 357         PUSHQ   R12
 358         PUSHQ   R13
 359         PUSHQ   R14
 360         PUSHQ   R15
 361         // Set g = g0.
 362         get_tls(R12)
 363         MOVQ    g(R12), R13
 364         MOVQ    g_m(R13), R13
 365         MOVQ    m_g0(R13), R14
 366         MOVQ    R14, g(R12)     // g = m->g0
 367         MOVQ    RARG0, 0(SP)    // func arg
 368         CALL    runtime·racesymbolize(SB)
 369         // All registers are smashed after Go code, reload.
 370         get_tls(R12)
 371         MOVQ    g(R12), R13
 372         MOVQ    g_m(R13), R13
 373         MOVQ    m_curg(R13), R14
 374         MOVQ    R14, g(R12)     // g = m->curg
 375         // Restore callee-saved registers.
 376         POPQ    R15
 377         POPQ    R14
 378         POPQ    R13
 379         POPQ    R12
 380         POPQ    SI
 381         POPQ    DI
 382         POPQ    BP
 383         POPQ    BX
 384         RET