1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 // System calls and other sys.stuff for 386, Linux
13 // Most linux systems use glibc's dynamic linker, which puts the
14 // __kernel_vsyscall vdso helper at 0x10(GS) for easy access from position
15 // independent code and setldt in runtime does the same in the statically
16 // linked case. However, systems that use alternative libc such as Android's
17 // bionic and musl, do not save the helper anywhere, and so the only way to
18 // invoke a syscall from position independent code is boring old int $0x80
19 // (which is also what syscall wrappers in bionic/musl use).
21 // The benchmarks also showed that using int $0x80 is as fast as calling
22 // *%gs:0x10 except on AMD Opteron. See https://golang.org/cl/19833
23 // for the benchmark program and raw data.
24 //#define INVOKE_SYSCALL CALL 0x10(GS) // non-portable
25 #define INVOKE_SYSCALL INT $0x80
37 #define SYS_socketcall 102
38 #define SYS_setittimer 104
40 #define SYS_sched_yield 158
41 #define SYS_nanosleep 162
42 #define SYS_rt_sigreturn 173
43 #define SYS_rt_sigaction 174
44 #define SYS_rt_sigprocmask 175
45 #define SYS_sigaltstack 186
47 #define SYS_mincore 218
48 #define SYS_madvise 219
49 #define SYS_gettid 224
51 #define SYS_sched_getaffinity 242
52 #define SYS_set_thread_area 243
53 #define SYS_exit_group 252
54 #define SYS_timer_create 259
55 #define SYS_timer_settime 260
56 #define SYS_timer_delete 263
57 #define SYS_clock_gettime 265
58 #define SYS_tgkill 270
61 TEXT runtime·exit(SB),NOSPLIT,$0
62 MOVL $SYS_exit_group, AX
68 TEXT exit1<>(SB),NOSPLIT,$0
75 // func exitThread(wait *atomic.Uint32)
76 TEXT runtime·exitThread(SB),NOSPLIT,$0-4
78 // We're done using the stack.
80 MOVL $1, AX // exit (just this thread)
81 MOVL $0, BX // exit code
82 INT $0x80 // no stack; must not use CALL
83 // We may not even have a stack any more.
87 TEXT runtime·open(SB),NOSPLIT,$0
99 TEXT runtime·closefd(SB),NOSPLIT,$0
109 TEXT runtime·write1(SB),NOSPLIT,$0
118 TEXT runtime·read(SB),NOSPLIT,$0
127 // func pipe2(flags int32) (r, w int32, errno int32)
128 TEXT runtime·pipe2(SB),NOSPLIT,$0-16
133 MOVL AX, errno+12(FP)
136 TEXT runtime·usleep(SB),NOSPLIT,$8
142 MOVL $1000, AX // usec to nsec
147 MOVL $SYS_nanosleep, AX
153 TEXT runtime·gettid(SB),NOSPLIT,$0-4
159 TEXT runtime·raise(SB),NOSPLIT,$12
162 MOVL AX, BX // arg 1 pid
165 MOVL AX, CX // arg 2 tid
166 MOVL sig+0(FP), DX // arg 3 signal
171 TEXT runtime·raiseproc(SB),NOSPLIT,$12
174 MOVL AX, BX // arg 1 pid
175 MOVL sig+0(FP), CX // arg 2 signal
180 TEXT ·getpid(SB),NOSPLIT,$0-4
186 TEXT ·tgkill(SB),NOSPLIT,$0
194 TEXT runtime·setitimer(SB),NOSPLIT,$0-12
195 MOVL $SYS_setittimer, AX
202 TEXT runtime·timer_create(SB),NOSPLIT,$0-16
203 MOVL $SYS_timer_create, AX
204 MOVL clockid+0(FP), BX
206 MOVL timerid+8(FP), DX
211 TEXT runtime·timer_settime(SB),NOSPLIT,$0-20
212 MOVL $SYS_timer_settime, AX
213 MOVL timerid+0(FP), BX
221 TEXT runtime·timer_delete(SB),NOSPLIT,$0-8
222 MOVL $SYS_timer_delete, AX
223 MOVL timerid+0(FP), BX
228 TEXT runtime·mincore(SB),NOSPLIT,$0-16
229 MOVL $SYS_mincore, AX
237 // func walltime() (sec int64, nsec int32)
238 TEXT runtime·walltime(SB), NOSPLIT, $8-12
239 // We don't know how much stack space the VDSO code will need,
242 MOVL SP, BP // Save old SP; BP unchanged by C code.
246 MOVL g_m(AX), SI // SI unchanged by C code.
248 // Set vdsoPC and vdsoSP for SIGPROF traceback.
249 // Save the old values on stack and restore them on exit,
250 // so this function is reentrant.
251 MOVL m_vdsoPC(SI), CX
252 MOVL m_vdsoSP(SI), DX
258 MOVL CX, m_vdsoPC(SI)
259 MOVL DX, m_vdsoSP(SI)
261 CMPL AX, m_curg(SI) // Only switch if on curg.
265 MOVL (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
268 SUBL $16, SP // Space for results
269 ANDL $~15, SP // Align for C code
271 // Stack layout, depending on call path:
272 // x(SP) vDSO INVOKE_SYSCALL
273 // 12 ts.tv_nsec ts.tv_nsec
274 // 8 ts.tv_sec ts.tv_sec
278 MOVL runtime·vdsoClockgettimeSym(SB), AX
282 LEAL 8(SP), BX // &ts (struct timespec)
284 MOVL $0, 0(SP) // CLOCK_REALTIME
289 MOVL $SYS_clock_gettime, AX
290 MOVL $0, BX // CLOCK_REALTIME
295 MOVL 8(SP), AX // sec
296 MOVL 12(SP), BX // nsec
298 MOVL BP, SP // Restore real SP
299 // Restore vdsoPC, vdsoSP
300 // We don't worry about being signaled between the two stores.
301 // If we are not in a signal handler, we'll restore vdsoSP to 0,
302 // and no one will care about vdsoPC. If we are in a signal handler,
303 // we cannot receive another signal.
305 MOVL CX, m_vdsoSP(SI)
307 MOVL CX, m_vdsoPC(SI)
309 // sec is in AX, nsec in BX
310 MOVL AX, sec_lo+0(FP)
311 MOVL $0, sec_hi+4(FP)
315 // int64 nanotime(void) so really
316 // void nanotime(int64 *nsec)
317 TEXT runtime·nanotime1(SB), NOSPLIT, $8-8
318 // Switch to g0 stack. See comment above in runtime·walltime.
320 MOVL SP, BP // Save old SP; BP unchanged by C code.
324 MOVL g_m(AX), SI // SI unchanged by C code.
326 // Set vdsoPC and vdsoSP for SIGPROF traceback.
327 // Save the old values on stack and restore them on exit,
328 // so this function is reentrant.
329 MOVL m_vdsoPC(SI), CX
330 MOVL m_vdsoSP(SI), DX
336 MOVL CX, m_vdsoPC(SI)
337 MOVL DX, m_vdsoSP(SI)
339 CMPL AX, m_curg(SI) // Only switch if on curg.
343 MOVL (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
346 SUBL $16, SP // Space for results
347 ANDL $~15, SP // Align for C code
349 MOVL runtime·vdsoClockgettimeSym(SB), AX
353 LEAL 8(SP), BX // &ts (struct timespec)
355 MOVL $1, 0(SP) // CLOCK_MONOTONIC
360 MOVL $SYS_clock_gettime, AX
361 MOVL $1, BX // CLOCK_MONOTONIC
366 MOVL 8(SP), AX // sec
367 MOVL 12(SP), BX // nsec
369 MOVL BP, SP // Restore real SP
370 // Restore vdsoPC, vdsoSP
371 // We don't worry about being signaled between the two stores.
372 // If we are not in a signal handler, we'll restore vdsoSP to 0,
373 // and no one will care about vdsoPC. If we are in a signal handler,
374 // we cannot receive another signal.
376 MOVL CX, m_vdsoSP(SI)
378 MOVL CX, m_vdsoPC(SI)
380 // sec is in AX, nsec in BX
381 // convert to DX:AX nsec
387 MOVL AX, ret_lo+0(FP)
388 MOVL DX, ret_hi+4(FP)
391 TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0
392 MOVL $SYS_rt_sigprocmask, AX
403 TEXT runtime·rt_sigaction(SB),NOSPLIT,$0
404 MOVL $SYS_rt_sigaction, AX
413 TEXT runtime·sigfwd(SB),NOSPLIT,$12-16
420 ANDL $-15, SP // align stack: handler might be a C function
424 MOVL SI, 12(SP) // save SI: handler might be a Go function
430 // Called using C ABI.
431 TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME,$28
432 // Save callee-saved C registers, since the caller may be a C signal handler.
437 // We don't save mxcsr or the x87 control word because sigtrampgo doesn't
446 CALL runtime·sigtrampgo(SB)
454 TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
455 JMP runtime·sigtramp(SB)
457 // For cgo unwinding to work, this function must look precisely like
458 // the one in glibc. The glibc source code is:
459 // https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/i386/libc_sigaction.c;h=0665b41bbcd0986f0b33bf19a7ecbcedf9961d0a#l59
460 // The code that cares about the precise instructions used is:
461 // https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libgcc/config/i386/linux-unwind.h;h=5486223d60272c73d5103b29ae592d2ee998e1cf#l136
463 // For gdb unwinding to work, this function must look precisely like the one in
464 // glibc and must be named "__restore_rt" or contain the string "sigaction" in
465 // the name. The gdb source code is:
466 // https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=gdb/i386-linux-tdep.c;h=a6adeca1b97416f7194341151a8ce30723a786a3#l168
467 TEXT runtime·sigreturn__sigaction(SB),NOSPLIT,$0
468 MOVL $SYS_rt_sigreturn, AX
469 // Sigreturn expects same SP as signal handler,
470 // so cannot CALL 0x10(GS) here.
472 INT $3 // not reached
475 TEXT runtime·mmap(SB),NOSPLIT,$0
480 MOVL flags+12(FP), SI
497 TEXT runtime·munmap(SB),NOSPLIT,$0
507 TEXT runtime·madvise(SB),NOSPLIT,$0
508 MOVL $SYS_madvise, AX
516 // int32 futex(int32 *uaddr, int32 op, int32 val,
517 // struct timespec *timeout, int32 *uaddr2, int32 val2);
518 TEXT runtime·futex(SB),NOSPLIT,$0
524 MOVL addr2+16(FP), DI
530 // int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
531 TEXT runtime·clone(SB),NOSPLIT,$0
535 MOVL $0, DX // parent tid ptr
536 MOVL $0, DI // child tid ptr
538 // Copy mp, gp, fn off parent stack for use by child.
548 // cannot use CALL 0x10(GS) here, because the stack changes during the
549 // system call (after CALL 0x10(GS), the child is still using the
550 // parent's stack when executing its RET instruction).
553 // In parent, return.
559 // Paranoia: check that SP is as we expect.
560 NOP SP // tell vet SP changed - stop checking offsets
566 // Initialize AX to Linux tid
579 MOVL AX, m_procid(BX) // save tid as m->procid
581 // set up ldt 7+id to point at m->tls.
584 ADDL $7, DI // m0 is LDT#7. count up.
585 // setldt(tls#, &tls, sizeof tls)
586 PUSHAL // save registers
587 PUSHL $32 // sizeof tls
590 CALL runtime·setldt(SB)
596 // Now segment is established. Initialize m, g.
601 CALL runtime·stackcheck(SB) // smashes AX, CX
602 MOVL 0(DX), DX // paranoia; check they are not nil
605 // more paranoia; check that stack splitting code works
607 CALL runtime·emptyfunc(SB)
615 TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
616 MOVL $SYS_sigaltstack, AX
626 // struct user_desc {
627 // unsigned int entry_number;
628 // unsigned long base_addr;
629 // unsigned int limit;
630 // unsigned int seg_32bit:1;
631 // unsigned int contents:2;
632 // unsigned int read_exec_only:1;
633 // unsigned int limit_in_pages:1;
634 // unsigned int seg_not_present:1;
635 // unsigned int useable:1;
637 #define SEG_32BIT 0x01
638 // contents are the 2 bits 0x02 and 0x04.
639 #define CONTENTS_DATA 0x00
640 #define CONTENTS_STACK 0x02
641 #define CONTENTS_CODE 0x04
642 #define READ_EXEC_ONLY 0x08
643 #define LIMIT_IN_PAGES 0x10
644 #define SEG_NOT_PRESENT 0x20
647 // `-1` means the kernel will pick a TLS entry on the first setldt call,
648 // which happens during runtime init, and that we'll store back the saved
649 // entry and reuse that on subsequent calls when creating new threads.
650 DATA runtime·tls_entry_number+0(SB)/4, $-1
651 GLOBL runtime·tls_entry_number(SB), NOPTR, $4
653 // setldt(int entry, int address, int limit)
654 // We use set_thread_area, which mucks with the GDT, instead of modify_ldt,
655 // which would modify the LDT, but is disabled on some kernels.
656 // The name, setldt, is a misnomer, although we leave this name as it is for
657 // the compatibility with other platforms.
658 TEXT runtime·setldt(SB),NOSPLIT,$32
662 // Android stores the TLS offset in runtime·tls_g.
663 SUBL runtime·tls_g(SB), DX
667 * When linking against the system libraries,
668 * we use its pthread_create and let it set up %gs
669 * for us. When we do that, the private storage
670 * we get is not at 0(GS), but -4(GS).
671 * To insulate the rest of the tool chain from this
672 * ugliness, 8l rewrites 0(TLS) into -4(GS) for us.
673 * To accommodate that rewrite, we translate
674 * the address here and bump the limit to 0xffffffff (no limit)
675 * so that -4(GS) maps to 0(address).
676 * Also, the final 0(GS) (current 4(DX)) has to point
677 * to itself, to mimic ELF.
679 ADDL $0x4, DX // address
684 MOVL runtime·tls_entry_number(SB), CX
687 LEAL 16(SP), AX // struct user_desc
688 MOVL CX, 0(AX) // unsigned int entry_number
689 MOVL DX, 4(AX) // unsigned long base_addr
690 MOVL $0xfffff, 8(AX) // unsigned int limit
691 MOVL $(SEG_32BIT|LIMIT_IN_PAGES|USEABLE|CONTENTS_DATA), 12(AX) // flag bits
693 // call set_thread_area
694 MOVL AX, BX // user_desc
695 MOVL $SYS_set_thread_area, AX
696 // We can't call this via 0x10(GS) because this is called from setldt0 to set that up.
699 // breakpoint on error
704 // read allocated entry number back out of user_desc
705 LEAL 16(SP), AX // get our user_desc back
708 // store entry number if the kernel allocated it
711 MOVL AX, runtime·tls_entry_number(SB)
713 // compute segment selector - (entry*8+3)
720 TEXT runtime·osyield(SB),NOSPLIT,$0
721 MOVL $SYS_sched_yield, AX
725 TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0
726 MOVL $SYS_sched_getaffinity, AX
734 // int access(const char *name, int mode)
735 TEXT runtime·access(SB),NOSPLIT,$0
743 // int connect(int fd, const struct sockaddr *addr, socklen_t addrlen)
744 TEXT runtime·connect(SB),NOSPLIT,$0-16
745 // connect is implemented as socketcall(NR_socket, 3, *(rest of args))
746 // stack already should have fd, addr, addrlen.
747 MOVL $SYS_socketcall, AX
748 MOVL $3, BX // connect
754 // int socket(int domain, int type, int protocol)
755 TEXT runtime·socket(SB),NOSPLIT,$0-16
756 // socket is implemented as socketcall(NR_socket, 1, *(rest of args))
757 // stack already should have domain, type, protocol.
758 MOVL $SYS_socketcall, AX
759 MOVL $1, BX // socket
760 LEAL domain+0(FP), CX
765 // func sbrk0() uintptr
766 TEXT runtime·sbrk0(SB),NOSPLIT,$0-4
767 // Implemented as brk(NULL).