1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
10 // _rt0_386 is common startup code for most 386 systems when using
11 // internal linking. This is the entry point for the program from the
12 // kernel for an ordinary -buildmode=exe program. The stack holds the
13 // number of arguments and the C-style argv.
14 TEXT _rt0_386(SB),NOSPLIT,$8
15 MOVL 8(SP), AX // argc
16 LEAL 12(SP), BX // argv
19 JMP runtime·rt0_go(SB)
21 // _rt0_386_lib is common startup code for most 386 systems when
22 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
23 // arrange to invoke this function as a global constructor (for
24 // c-archive) or when the shared library is loaded (for c-shared).
25 // We expect argc and argv to be passed on the stack following the
27 TEXT _rt0_386_lib(SB),NOSPLIT,$0
35 MOVL AX, _rt0_386_lib_argc<>(SB)
37 MOVL AX, _rt0_386_lib_argv<>(SB)
39 // Synchronous initialization.
40 CALL runtime·libpreinit(SB)
44 // Create a new thread to do the runtime initialization.
45 MOVL _cgo_sys_thread_create(SB), AX
49 // Align stack to call C function.
50 // We moved SP to BP above, but BP was clobbered by the libpreinit call.
54 MOVL $_rt0_386_lib_go(SB), BX
65 MOVL $0x800000, 0(SP) // stacksize = 8192KB
66 MOVL $_rt0_386_lib_go(SB), AX
68 CALL runtime·newosproc0(SB)
78 // _rt0_386_lib_go initializes the Go runtime.
79 // This is started in a separate thread by _rt0_386_lib.
80 TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
81 MOVL _rt0_386_lib_argc<>(SB), AX
83 MOVL _rt0_386_lib_argv<>(SB), AX
85 JMP runtime·rt0_go(SB)
87 DATA _rt0_386_lib_argc<>(SB)/4, $0
88 GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
89 DATA _rt0_386_lib_argv<>(SB)/4, $0
90 GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
92 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
93 // Copy arguments forward on an even stack.
94 // Users of this function jump to it, they don't call it.
97 SUBL $128, SP // plenty of scratch
99 MOVL AX, 120(SP) // save argc, argv away
102 // set default stack bounds.
103 // _cgo_init may update stackguard.
104 MOVL $runtime·g0(SB), BP
105 LEAL (-64*1024+104)(SP), BX
106 MOVL BX, g_stackguard0(BP)
107 MOVL BX, g_stackguard1(BP)
108 MOVL BX, (g_stack+stack_lo)(BP)
109 MOVL SP, (g_stack+stack_hi)(BP)
111 // find out information about the processor we're on
112 // first see if CPUID instruction is supported.
115 XORL $(1<<21), 0(SP) // flip ID bit
120 POPFL // restore EFLAGS
124 bad_proc: // show that the program requires MMX.
126 MOVL $bad_proc_msg<>(SB), 4(SP)
128 CALL runtime·write(SB)
130 CALL runtime·exit(SB)
131 CALL runtime·abort(SB)
140 CMPL BX, $0x756E6547 // "Genu"
142 CMPL DX, $0x49656E69 // "ineI"
144 CMPL CX, $0x6C65746E // "ntel"
146 MOVB $1, runtime·isIntel(SB)
149 // Load EAX=1 cpuid flags
152 MOVL CX, DI // Move to global variable clobbers CX when generating PIC
153 MOVL AX, runtime·processorVersionInfo(SB)
155 // Check for MMX support
156 TESTL $(1<<23), DX // MMX
160 // if there is an _cgo_init, call it to let it
161 // initialize and to set up GS. if not,
162 // we set up GS ourselves.
163 MOVL _cgo_init(SB), AX
167 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
168 // Compensate for tls_g (+8).
171 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
174 MOVL BX, 12(SP) // arg 4: not used when using platform's TLS
176 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
178 MOVL BX, 8(SP) // arg 3: not used when using platform's TLS
181 MOVL $setg_gcc<>(SB), BX
182 MOVL BX, 4(SP) // arg 2: setg_gcc
183 MOVL BP, 0(SP) // arg 1: g0
186 // update stackguard after _cgo_init
187 MOVL $runtime·g0(SB), CX
188 MOVL (g_stack+stack_lo)(CX), AX
189 ADDL $const_stackGuard, AX
190 MOVL AX, g_stackguard0(CX)
191 MOVL AX, g_stackguard1(CX)
194 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
199 // skip runtime·ldt0setup(SB) and tls test on OpenBSD in all cases
203 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
210 // store through it, to make sure it works
213 MOVL runtime·m0+m_tls(SB), AX
218 // set up m and g "registers"
220 LEAL runtime·g0(SB), DX
222 LEAL runtime·m0(SB), AX
229 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
231 // convention is D is always cleared
234 CALL runtime·check(SB)
241 CALL runtime·args(SB)
242 CALL runtime·osinit(SB)
243 CALL runtime·schedinit(SB)
245 // create a new goroutine to start program
246 PUSHL $runtime·mainPC(SB) // entry
247 CALL runtime·newproc(SB)
251 CALL runtime·mstart(SB)
253 CALL runtime·abort(SB)
256 DATA bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n"
257 GLOBL bad_proc_msg<>(SB), RODATA, $61
259 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB)
260 GLOBL runtime·mainPC(SB),RODATA,$4
262 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
266 TEXT runtime·asminit(SB),NOSPLIT,$0-0
267 // Linux and MinGW start the FPU in extended double precision.
268 // Other operating systems use double precision.
269 // Change to double precision to match them,
270 // and to match other hardware that only has double.
271 FLDCW runtime·controlWord64(SB)
274 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
275 CALL runtime·mstart0(SB)
283 // restore state from Gobuf; longjmp
284 TEXT runtime·gogo(SB), NOSPLIT, $0-4
285 MOVL buf+0(FP), BX // gobuf
287 MOVL 0(DX), CX // make sure g != nil
290 TEXT gogo<>(SB), NOSPLIT, $0
293 MOVL gobuf_sp(BX), SP // restore SP
294 MOVL gobuf_ret(BX), AX
295 MOVL gobuf_ctxt(BX), DX
296 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
297 MOVL $0, gobuf_ret(BX)
298 MOVL $0, gobuf_ctxt(BX)
299 MOVL gobuf_pc(BX), BX
302 // func mcall(fn func(*g))
303 // Switch to m->g0's stack, call fn(g).
304 // Fn must never return. It should gogo(&g->sched)
305 // to keep running g.
306 TEXT runtime·mcall(SB), NOSPLIT, $0-4
310 MOVL g(DX), AX // save state in g->sched
311 MOVL 0(SP), BX // caller's PC
312 MOVL BX, (g_sched+gobuf_pc)(AX)
313 LEAL fn+0(FP), BX // caller's SP
314 MOVL BX, (g_sched+gobuf_sp)(AX)
316 // switch to m->g0 & its stack, call fn
320 CMPL SI, AX // if g == m->g0 call badmcall
322 MOVL $runtime·badmcall(SB), AX
324 MOVL SI, g(DX) // g = m->g0
325 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
331 MOVL $runtime·badmcall2(SB), AX
335 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
336 // of the G stack. We need to distinguish the routine that
337 // lives at the bottom of the G stack from the one that lives
338 // at the top of the system stack because the one at the top of
339 // the system stack terminates the stack walk (see topofstack()).
340 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
343 // func systemstack(fn func())
344 TEXT runtime·systemstack(SB), NOSPLIT, $0-4
345 MOVL fn+0(FP), DI // DI = fn
347 MOVL g(CX), AX // AX = g
348 MOVL g_m(AX), BX // BX = m
350 CMPL AX, m_gsignal(BX)
353 MOVL m_g0(BX), DX // DX = g0
361 // save our state in g->sched. Pretend to
362 // be systemstack_switch if the G stack is scanned.
363 CALL gosave_systemstack_switch<>(SB)
368 MOVL (g_sched+gobuf_sp)(DX), BX
371 // call target function
382 MOVL (g_sched+gobuf_sp)(AX), SP
383 MOVL $0, (g_sched+gobuf_sp)(AX)
387 // already on system stack; tail call the function
388 // Using a tail call here cleans up tracebacks since we won't stop
389 // at an intermediate systemstack.
395 // Bad: g is not gsignal, not g0, not curg. What is it?
396 // Hide call from linker nosplit analysis.
397 MOVL $runtime·badsystemstack(SB), AX
402 * support for morestack
405 // Called during function prolog when more stack is needed.
407 // The traceback routines see morestack on a g0 as being
408 // the top of a stack (for example, morestack calling newstack
409 // calling the scheduler calling newm calling gc), so we must
410 // record an argument size. For that purpose, it has no arguments.
411 TEXT runtime·morestack(SB),NOSPLIT,$0-0
412 // Cannot grow scheduler stack (m->g0).
419 CALL runtime·badmorestackg0(SB)
420 CALL runtime·abort(SB)
422 // Cannot grow signal stack.
423 MOVL m_gsignal(BX), SI
426 CALL runtime·badmorestackgsignal(SB)
427 CALL runtime·abort(SB)
430 // Set m->morebuf to f's caller.
431 NOP SP // tell vet SP changed - stop checking offsets
432 MOVL 4(SP), DI // f's caller's PC
433 MOVL DI, (m_morebuf+gobuf_pc)(BX)
434 LEAL 8(SP), CX // f's caller's SP
435 MOVL CX, (m_morebuf+gobuf_sp)(BX)
438 MOVL SI, (m_morebuf+gobuf_g)(BX)
440 // Set g->sched to context in f.
441 MOVL 0(SP), AX // f's PC
442 MOVL AX, (g_sched+gobuf_pc)(SI)
443 LEAL 4(SP), AX // f's SP
444 MOVL AX, (g_sched+gobuf_sp)(SI)
445 MOVL DX, (g_sched+gobuf_ctxt)(SI)
447 // Call newstack on m->g0's stack.
450 MOVL (g_sched+gobuf_sp)(BP), AX
451 MOVL -4(AX), BX // fault if CALL would, before smashing SP
453 CALL runtime·newstack(SB)
454 CALL runtime·abort(SB) // crash if newstack returns
457 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
459 JMP runtime·morestack(SB)
461 // reflectcall: call a function with the given argument list
462 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
463 // we don't have variable-sized frames, so we use a small number
464 // of constant-sized-frame functions to encode a few bits of size in the pc.
465 // Caution: ugly multiline assembly macros in your future!
467 #define DISPATCH(NAME,MAXSIZE) \
470 MOVL $NAME(SB), AX; \
472 // Note: can't just "JMP NAME(SB)" - bad inlining results.
474 TEXT ·reflectcall(SB), NOSPLIT, $0-28
475 MOVL frameSize+20(FP), CX
476 DISPATCH(runtime·call16, 16)
477 DISPATCH(runtime·call32, 32)
478 DISPATCH(runtime·call64, 64)
479 DISPATCH(runtime·call128, 128)
480 DISPATCH(runtime·call256, 256)
481 DISPATCH(runtime·call512, 512)
482 DISPATCH(runtime·call1024, 1024)
483 DISPATCH(runtime·call2048, 2048)
484 DISPATCH(runtime·call4096, 4096)
485 DISPATCH(runtime·call8192, 8192)
486 DISPATCH(runtime·call16384, 16384)
487 DISPATCH(runtime·call32768, 32768)
488 DISPATCH(runtime·call65536, 65536)
489 DISPATCH(runtime·call131072, 131072)
490 DISPATCH(runtime·call262144, 262144)
491 DISPATCH(runtime·call524288, 524288)
492 DISPATCH(runtime·call1048576, 1048576)
493 DISPATCH(runtime·call2097152, 2097152)
494 DISPATCH(runtime·call4194304, 4194304)
495 DISPATCH(runtime·call8388608, 8388608)
496 DISPATCH(runtime·call16777216, 16777216)
497 DISPATCH(runtime·call33554432, 33554432)
498 DISPATCH(runtime·call67108864, 67108864)
499 DISPATCH(runtime·call134217728, 134217728)
500 DISPATCH(runtime·call268435456, 268435456)
501 DISPATCH(runtime·call536870912, 536870912)
502 DISPATCH(runtime·call1073741824, 1073741824)
503 MOVL $runtime·badreflectcall(SB), AX
506 #define CALLFN(NAME,MAXSIZE) \
507 TEXT NAME(SB), WRAPPER, $MAXSIZE-28; \
509 /* copy arguments to stack */ \
510 MOVL stackArgs+8(FP), SI; \
511 MOVL stackArgsSize+12(FP), CX; \
514 /* call function */ \
517 PCDATA $PCDATA_StackMapIndex, $0; \
519 /* copy return values back */ \
520 MOVL stackArgsType+0(FP), DX; \
521 MOVL stackArgs+8(FP), DI; \
522 MOVL stackArgsSize+12(FP), CX; \
523 MOVL stackRetOffset+16(FP), BX; \
528 CALL callRet<>(SB); \
531 // callRet copies return values back at the end of call*. This is a
532 // separate function so it can allocate stack space for the arguments
533 // to reflectcallmove. It does not follow the Go ABI; it expects its
534 // arguments in registers.
535 TEXT callRet<>(SB), NOSPLIT, $20-0
541 CALL runtime·reflectcallmove(SB)
547 CALLFN(·call128, 128)
548 CALLFN(·call256, 256)
549 CALLFN(·call512, 512)
550 CALLFN(·call1024, 1024)
551 CALLFN(·call2048, 2048)
552 CALLFN(·call4096, 4096)
553 CALLFN(·call8192, 8192)
554 CALLFN(·call16384, 16384)
555 CALLFN(·call32768, 32768)
556 CALLFN(·call65536, 65536)
557 CALLFN(·call131072, 131072)
558 CALLFN(·call262144, 262144)
559 CALLFN(·call524288, 524288)
560 CALLFN(·call1048576, 1048576)
561 CALLFN(·call2097152, 2097152)
562 CALLFN(·call4194304, 4194304)
563 CALLFN(·call8388608, 8388608)
564 CALLFN(·call16777216, 16777216)
565 CALLFN(·call33554432, 33554432)
566 CALLFN(·call67108864, 67108864)
567 CALLFN(·call134217728, 134217728)
568 CALLFN(·call268435456, 268435456)
569 CALLFN(·call536870912, 536870912)
570 CALLFN(·call1073741824, 1073741824)
572 TEXT runtime·procyield(SB),NOSPLIT,$0-0
573 MOVL cycles+0(FP), AX
580 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
581 // Stores are already ordered on x86, so this is just a
585 // Save state of caller into g->sched,
586 // but using fake PC from systemstack_switch.
587 // Must only be called from functions with no locals ($0)
588 // or else unwinding from systemstack_switch is incorrect.
589 TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0
595 MOVL AX, (g_sched+gobuf_sp)(BX)
596 MOVL $runtime·systemstack_switch(SB), AX
597 MOVL AX, (g_sched+gobuf_pc)(BX)
598 MOVL $0, (g_sched+gobuf_ret)(BX)
599 // Assert ctxt is zero. See func save.
600 MOVL (g_sched+gobuf_ctxt)(BX), AX
603 CALL runtime·abort(SB)
608 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
609 // Call fn(arg) aligned appropriately for the gcc ABI.
610 // Called on a system stack, and there may be no g yet (during needm).
611 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-8
616 ANDL $~15, SP // alignment, perhaps unnecessary
617 MOVL DX, 8(SP) // save old SP
618 MOVL BX, 0(SP) // first argument in x86-32 ABI
624 // func asmcgocall(fn, arg unsafe.Pointer) int32
625 // Call fn(arg) on the scheduler stack,
626 // aligned appropriately for the gcc ABI.
627 // See cgocall.go for more details.
628 TEXT ·asmcgocall(SB),NOSPLIT,$0-12
634 // Figure out if we need to switch to m->g0 stack.
635 // We get called to create new OS threads too, and those
636 // come in on the m->g0 stack already. Or we might already
637 // be on the m->gsignal stack.
641 JEQ nosave // Don't even have a G yet.
643 CMPL DI, m_gsignal(BP)
648 CALL gosave_systemstack_switch<>(SB)
651 MOVL (g_sched+gobuf_sp)(SI), SP
654 // Now on a scheduling stack (a pthread-created stack).
656 ANDL $~15, SP // alignment, perhaps unnecessary
657 MOVL DI, 8(SP) // save g
658 MOVL (g_stack+stack_hi)(DI), DI
660 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
661 MOVL BX, 0(SP) // first argument in x86-32 ABI
664 // Restore registers, g, stack pointer.
667 MOVL (g_stack+stack_hi)(DI), SI
675 // Now on a scheduling stack (a pthread-created stack).
677 ANDL $~15, SP // alignment, perhaps unnecessary
678 MOVL DX, 4(SP) // save original stack pointer
679 MOVL BX, 0(SP) // first argument in x86-32 ABI
682 MOVL 4(SP), CX // restore original stack pointer
687 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
688 // See cgocall.go for more details.
689 TEXT ·cgocallback(SB),NOSPLIT,$12-12 // Frame size must match commented places below
692 // If g is nil, Go did not create the current thread.
693 // Call needm to obtain one for temporary use.
694 // In this case, we're running on the thread stack, so there's
695 // lots of space, but the linker doesn't know. Hide the call from
696 // the linker analysis by using an indirect call through AX.
707 MOVL BP, savedm-4(SP) // saved copy of oldm
710 MOVL $runtime·needm(SB), AX
712 MOVL $0, savedm-4(SP) // dropm on return
717 // Set m->sched.sp = SP, so that if a panic happens
718 // during the function we are about to execute, it will
719 // have a valid SP to run on the g0 stack.
720 // The next few lines (after the havem label)
721 // will save this SP onto the stack and then write
722 // the same SP back to m->sched.sp. That seems redundant,
723 // but if an unrecovered panic happens, unwindm will
724 // restore the g->sched.sp from the stack location
725 // and then systemstack will try to use it. If we don't set it here,
726 // that restored SP will be uninitialized (typically 0) and
727 // will not be usable.
729 MOVL SP, (g_sched+gobuf_sp)(SI)
732 // Now there's a valid m, and we're running on its m->g0.
733 // Save current m->g0->sched.sp on stack and then set it to SP.
734 // Save current sp in m->g0->sched.sp in preparation for
735 // switch back to m->curg stack.
736 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
738 MOVL (g_sched+gobuf_sp)(SI), AX
740 MOVL SP, (g_sched+gobuf_sp)(SI)
742 // Switch to m->curg stack and call runtime.cgocallbackg.
743 // Because we are taking over the execution of m->curg
744 // but *not* resuming what had been running, we need to
745 // save that information (m->curg->sched) so we can restore it.
746 // We can restore m->curg->sched.sp easily, because calling
747 // runtime.cgocallbackg leaves SP unchanged upon return.
748 // To save m->curg->sched.pc, we push it onto the curg stack and
749 // open a frame the same size as cgocallback's g0 frame.
750 // Once we switch to the curg stack, the pushed PC will appear
751 // to be the return PC of cgocallback, so that the traceback
752 // will seamlessly trace back into the earlier calls.
755 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
756 MOVL (g_sched+gobuf_pc)(SI), BP
757 MOVL BP, -4(DI) // "push" return PC on the g stack
758 // Gather our arguments into registers.
762 LEAL -(4+12)(DI), SP // Must match declared frame size
766 CALL runtime·cgocallbackg(SB)
768 // Restore g->sched (== m->curg->sched) from saved values.
771 MOVL 12(SP), BP // Must match declared frame size
772 MOVL BP, (g_sched+gobuf_pc)(SI)
773 LEAL (12+4)(SP), DI // Must match declared frame size
774 MOVL DI, (g_sched+gobuf_sp)(SI)
776 // Switch back to m->g0's stack and restore m->g0->sched.sp.
777 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
778 // so we do not have to restore it.)
783 MOVL (g_sched+gobuf_sp)(SI), SP
785 MOVL AX, (g_sched+gobuf_sp)(SI)
787 // If the m on entry was nil, we called needm above to borrow an m
788 // for the duration of the call. Since the call is over, return it with dropm.
789 MOVL savedm-4(SP), DX
792 MOVL $runtime·dropm(SB), AX
798 // void setg(G*); set g. for use by needm.
799 TEXT runtime·setg(SB), NOSPLIT, $0-4
802 MOVL runtime·tls_g(SB), CX
816 // void setg_gcc(G*); set g. for use by gcc
817 TEXT setg_gcc<>(SB), NOSPLIT, $0
823 TEXT runtime·abort(SB),NOSPLIT,$0-0
828 // check that SP is in range [g->stack.lo, g->stack.hi)
829 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
832 CMPL (g_stack+stack_hi)(AX), SP
834 CALL runtime·abort(SB)
835 CMPL SP, (g_stack+stack_lo)(AX)
837 CALL runtime·abort(SB)
840 // func cputicks() int64
841 TEXT runtime·cputicks(SB),NOSPLIT,$0-8
842 // LFENCE/MFENCE instruction support is dependent on SSE2.
843 // When no SSE2 support is present do not enforce any serialization
844 // since using CPUID to serialize the instruction stream is
846 #ifdef GO386_softfloat
847 JMP rdtsc // no fence instructions available
849 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
851 // Instruction stream serializing RDTSCP is supported.
852 // RDTSCP is supported by Intel Nehalem (2008) and
853 // AMD K8 Rev. F (2006) and newer.
856 MOVL AX, ret_lo+0(FP)
857 MOVL DX, ret_hi+4(FP)
860 // MFENCE is instruction stream serializing and flushes the
861 // store buffers on AMD. The serialization semantics of LFENCE on AMD
862 // are dependent on MSR C001_1029 and CPU generation.
863 // LFENCE on Intel does wait for all previous instructions to have executed.
864 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
865 // previous instructions executed and all previous loads and stores to globally visible.
866 // Using MFENCE;LFENCE here aligns the serializing properties without
867 // runtime detection of CPU manufacturer.
874 TEXT ldt0setup<>(SB),NOSPLIT,$16-0
876 CALL runtime·wintls(SB)
878 // set up ldt 7 to point at m0.tls
879 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
880 // the entry number is just a hint. setldt will set up GS with what it used.
882 LEAL runtime·m0+m_tls(SB), AX
884 MOVL $32, 8(SP) // sizeof(tls array)
885 CALL runtime·setldt(SB)
888 TEXT runtime·emptyfunc(SB),0,$0-0
891 // hash function using AES hardware instructions
892 TEXT runtime·memhash(SB),NOSPLIT,$0-16
893 CMPB runtime·useAeshash(SB), $0
895 MOVL p+0(FP), AX // ptr to data
896 MOVL s+8(FP), BX // size
898 JMP aeshashbody<>(SB)
900 JMP runtime·memhashFallback(SB)
902 TEXT runtime·strhash(SB),NOSPLIT,$0-12
903 CMPB runtime·useAeshash(SB), $0
905 MOVL p+0(FP), AX // ptr to string object
906 MOVL 4(AX), BX // length of string
907 MOVL (AX), AX // string data
909 JMP aeshashbody<>(SB)
911 JMP runtime·strhashFallback(SB)
915 // DX: address to put return value
916 TEXT aeshashbody<>(SB),NOSPLIT,$0-0
917 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
918 PINSRW $4, BX, X0 // 16 bits of length
919 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
920 MOVO X0, X1 // save unscrambled seed
921 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
922 AESENC X0, X0 // scramble seed
941 // 16 bytes loaded at this address won't cross
942 // a page boundary, so we can load it directly.
945 PAND masks<>(SB)(BX*8), X1
948 PXOR X0, X1 // xor data with seed
949 AESENC X1, X1 // scramble combo 3 times
956 // address ends in 1111xxxx. Might be up against
957 // a page boundary, so load ending at last byte.
958 // Then shift bytes down using pshufb.
959 MOVOU -32(AX)(BX*1), X1
961 PSHUFB shifts<>(SB)(BX*8), X1
965 // Return scrambled input seed
975 // make second starting seed
976 PXOR runtime·aeskeysched+16(SB), X1
979 // load data to be hashed
981 MOVOU -16(AX)(BX*1), X3
1001 // make 3 more starting seeds
1004 PXOR runtime·aeskeysched+16(SB), X1
1005 PXOR runtime·aeskeysched+32(SB), X2
1006 PXOR runtime·aeskeysched+48(SB), X3
1013 MOVOU -32(AX)(BX*1), X6
1014 MOVOU -16(AX)(BX*1), X7
1043 // make 3 more starting seeds
1046 PXOR runtime·aeskeysched+16(SB), X1
1047 PXOR runtime·aeskeysched+32(SB), X2
1048 PXOR runtime·aeskeysched+48(SB), X3
1053 // start with last (possibly overlapping) block
1054 MOVOU -64(AX)(BX*1), X4
1055 MOVOU -48(AX)(BX*1), X5
1056 MOVOU -32(AX)(BX*1), X6
1057 MOVOU -16(AX)(BX*1), X7
1059 // scramble state once
1065 // compute number of remaining 64-byte blocks
1070 // scramble state, xor in a block
1090 // 3 more scrambles to finish
1112 TEXT runtime·memhash32(SB),NOSPLIT,$0-12
1113 CMPB runtime·useAeshash(SB), $0
1115 MOVL p+0(FP), AX // ptr to data
1116 MOVL h+4(FP), X0 // seed
1117 PINSRD $1, (AX), X0 // data
1118 AESENC runtime·aeskeysched+0(SB), X0
1119 AESENC runtime·aeskeysched+16(SB), X0
1120 AESENC runtime·aeskeysched+32(SB), X0
1124 JMP runtime·memhash32Fallback(SB)
1126 TEXT runtime·memhash64(SB),NOSPLIT,$0-12
1127 CMPB runtime·useAeshash(SB), $0
1129 MOVL p+0(FP), AX // ptr to data
1130 MOVQ (AX), X0 // data
1131 PINSRD $2, h+4(FP), X0 // seed
1132 AESENC runtime·aeskeysched+0(SB), X0
1133 AESENC runtime·aeskeysched+16(SB), X0
1134 AESENC runtime·aeskeysched+32(SB), X0
1138 JMP runtime·memhash64Fallback(SB)
1140 // simple mask to get rid of data in the high part of the register.
1141 DATA masks<>+0x00(SB)/4, $0x00000000
1142 DATA masks<>+0x04(SB)/4, $0x00000000
1143 DATA masks<>+0x08(SB)/4, $0x00000000
1144 DATA masks<>+0x0c(SB)/4, $0x00000000
1146 DATA masks<>+0x10(SB)/4, $0x000000ff
1147 DATA masks<>+0x14(SB)/4, $0x00000000
1148 DATA masks<>+0x18(SB)/4, $0x00000000
1149 DATA masks<>+0x1c(SB)/4, $0x00000000
1151 DATA masks<>+0x20(SB)/4, $0x0000ffff
1152 DATA masks<>+0x24(SB)/4, $0x00000000
1153 DATA masks<>+0x28(SB)/4, $0x00000000
1154 DATA masks<>+0x2c(SB)/4, $0x00000000
1156 DATA masks<>+0x30(SB)/4, $0x00ffffff
1157 DATA masks<>+0x34(SB)/4, $0x00000000
1158 DATA masks<>+0x38(SB)/4, $0x00000000
1159 DATA masks<>+0x3c(SB)/4, $0x00000000
1161 DATA masks<>+0x40(SB)/4, $0xffffffff
1162 DATA masks<>+0x44(SB)/4, $0x00000000
1163 DATA masks<>+0x48(SB)/4, $0x00000000
1164 DATA masks<>+0x4c(SB)/4, $0x00000000
1166 DATA masks<>+0x50(SB)/4, $0xffffffff
1167 DATA masks<>+0x54(SB)/4, $0x000000ff
1168 DATA masks<>+0x58(SB)/4, $0x00000000
1169 DATA masks<>+0x5c(SB)/4, $0x00000000
1171 DATA masks<>+0x60(SB)/4, $0xffffffff
1172 DATA masks<>+0x64(SB)/4, $0x0000ffff
1173 DATA masks<>+0x68(SB)/4, $0x00000000
1174 DATA masks<>+0x6c(SB)/4, $0x00000000
1176 DATA masks<>+0x70(SB)/4, $0xffffffff
1177 DATA masks<>+0x74(SB)/4, $0x00ffffff
1178 DATA masks<>+0x78(SB)/4, $0x00000000
1179 DATA masks<>+0x7c(SB)/4, $0x00000000
1181 DATA masks<>+0x80(SB)/4, $0xffffffff
1182 DATA masks<>+0x84(SB)/4, $0xffffffff
1183 DATA masks<>+0x88(SB)/4, $0x00000000
1184 DATA masks<>+0x8c(SB)/4, $0x00000000
1186 DATA masks<>+0x90(SB)/4, $0xffffffff
1187 DATA masks<>+0x94(SB)/4, $0xffffffff
1188 DATA masks<>+0x98(SB)/4, $0x000000ff
1189 DATA masks<>+0x9c(SB)/4, $0x00000000
1191 DATA masks<>+0xa0(SB)/4, $0xffffffff
1192 DATA masks<>+0xa4(SB)/4, $0xffffffff
1193 DATA masks<>+0xa8(SB)/4, $0x0000ffff
1194 DATA masks<>+0xac(SB)/4, $0x00000000
1196 DATA masks<>+0xb0(SB)/4, $0xffffffff
1197 DATA masks<>+0xb4(SB)/4, $0xffffffff
1198 DATA masks<>+0xb8(SB)/4, $0x00ffffff
1199 DATA masks<>+0xbc(SB)/4, $0x00000000
1201 DATA masks<>+0xc0(SB)/4, $0xffffffff
1202 DATA masks<>+0xc4(SB)/4, $0xffffffff
1203 DATA masks<>+0xc8(SB)/4, $0xffffffff
1204 DATA masks<>+0xcc(SB)/4, $0x00000000
1206 DATA masks<>+0xd0(SB)/4, $0xffffffff
1207 DATA masks<>+0xd4(SB)/4, $0xffffffff
1208 DATA masks<>+0xd8(SB)/4, $0xffffffff
1209 DATA masks<>+0xdc(SB)/4, $0x000000ff
1211 DATA masks<>+0xe0(SB)/4, $0xffffffff
1212 DATA masks<>+0xe4(SB)/4, $0xffffffff
1213 DATA masks<>+0xe8(SB)/4, $0xffffffff
1214 DATA masks<>+0xec(SB)/4, $0x0000ffff
1216 DATA masks<>+0xf0(SB)/4, $0xffffffff
1217 DATA masks<>+0xf4(SB)/4, $0xffffffff
1218 DATA masks<>+0xf8(SB)/4, $0xffffffff
1219 DATA masks<>+0xfc(SB)/4, $0x00ffffff
1221 GLOBL masks<>(SB),RODATA,$256
1223 // these are arguments to pshufb. They move data down from
1224 // the high bytes of the register to the low bytes of the register.
1225 // index is how many bytes to move.
1226 DATA shifts<>+0x00(SB)/4, $0x00000000
1227 DATA shifts<>+0x04(SB)/4, $0x00000000
1228 DATA shifts<>+0x08(SB)/4, $0x00000000
1229 DATA shifts<>+0x0c(SB)/4, $0x00000000
1231 DATA shifts<>+0x10(SB)/4, $0xffffff0f
1232 DATA shifts<>+0x14(SB)/4, $0xffffffff
1233 DATA shifts<>+0x18(SB)/4, $0xffffffff
1234 DATA shifts<>+0x1c(SB)/4, $0xffffffff
1236 DATA shifts<>+0x20(SB)/4, $0xffff0f0e
1237 DATA shifts<>+0x24(SB)/4, $0xffffffff
1238 DATA shifts<>+0x28(SB)/4, $0xffffffff
1239 DATA shifts<>+0x2c(SB)/4, $0xffffffff
1241 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
1242 DATA shifts<>+0x34(SB)/4, $0xffffffff
1243 DATA shifts<>+0x38(SB)/4, $0xffffffff
1244 DATA shifts<>+0x3c(SB)/4, $0xffffffff
1246 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
1247 DATA shifts<>+0x44(SB)/4, $0xffffffff
1248 DATA shifts<>+0x48(SB)/4, $0xffffffff
1249 DATA shifts<>+0x4c(SB)/4, $0xffffffff
1251 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
1252 DATA shifts<>+0x54(SB)/4, $0xffffff0f
1253 DATA shifts<>+0x58(SB)/4, $0xffffffff
1254 DATA shifts<>+0x5c(SB)/4, $0xffffffff
1256 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
1257 DATA shifts<>+0x64(SB)/4, $0xffff0f0e
1258 DATA shifts<>+0x68(SB)/4, $0xffffffff
1259 DATA shifts<>+0x6c(SB)/4, $0xffffffff
1261 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
1262 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
1263 DATA shifts<>+0x78(SB)/4, $0xffffffff
1264 DATA shifts<>+0x7c(SB)/4, $0xffffffff
1266 DATA shifts<>+0x80(SB)/4, $0x0b0a0908
1267 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
1268 DATA shifts<>+0x88(SB)/4, $0xffffffff
1269 DATA shifts<>+0x8c(SB)/4, $0xffffffff
1271 DATA shifts<>+0x90(SB)/4, $0x0a090807
1272 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
1273 DATA shifts<>+0x98(SB)/4, $0xffffff0f
1274 DATA shifts<>+0x9c(SB)/4, $0xffffffff
1276 DATA shifts<>+0xa0(SB)/4, $0x09080706
1277 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
1278 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
1279 DATA shifts<>+0xac(SB)/4, $0xffffffff
1281 DATA shifts<>+0xb0(SB)/4, $0x08070605
1282 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
1283 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
1284 DATA shifts<>+0xbc(SB)/4, $0xffffffff
1286 DATA shifts<>+0xc0(SB)/4, $0x07060504
1287 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
1288 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
1289 DATA shifts<>+0xcc(SB)/4, $0xffffffff
1291 DATA shifts<>+0xd0(SB)/4, $0x06050403
1292 DATA shifts<>+0xd4(SB)/4, $0x0a090807
1293 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
1294 DATA shifts<>+0xdc(SB)/4, $0xffffff0f
1296 DATA shifts<>+0xe0(SB)/4, $0x05040302
1297 DATA shifts<>+0xe4(SB)/4, $0x09080706
1298 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
1299 DATA shifts<>+0xec(SB)/4, $0xffff0f0e
1301 DATA shifts<>+0xf0(SB)/4, $0x04030201
1302 DATA shifts<>+0xf4(SB)/4, $0x08070605
1303 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
1304 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
1306 GLOBL shifts<>(SB),RODATA,$256
1308 TEXT ·checkASM(SB),NOSPLIT,$0-1
1309 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1310 MOVL $masks<>(SB), AX
1311 MOVL $shifts<>(SB), BX
1317 TEXT runtime·return0(SB), NOSPLIT, $0
1321 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1322 // Must obey the gcc calling convention.
1323 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1328 MOVL (g_stack+stack_hi)(AX), AX
1331 // The top-most function running on a goroutine
1332 // returns to goexit+PCQuantum.
1333 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0
1335 CALL runtime·goexit1(SB) // does not return
1336 // traceback from goexit1 must hit code range of goexit
1339 // Add a module's moduledata to the linked list of moduledata objects. This
1340 // is called from .init_array by a function generated in the linker and so
1341 // follows the platform ABI wrt register preservation -- it only touches AX,
1342 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
1343 // instead the pointer to the moduledata is passed in AX.
1344 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1345 MOVL runtime·lastmoduledatap(SB), DX
1346 MOVL AX, moduledata_next(DX)
1347 MOVL AX, runtime·lastmoduledatap(SB)
1350 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
1355 FMOVDP F0, ret+4(FP)
1358 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
1361 FLDCW runtime·controlWord64trunc(SB)
1368 // gcWriteBarrier informs the GC about heap pointer writes.
1370 // gcWriteBarrier returns space in a write barrier buffer which
1371 // should be filled in by the caller.
1372 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1373 // number of bytes of buffer needed in DI, and returns a pointer
1374 // to the buffer space in DI.
1375 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1376 // but may clobber others (e.g., SSE registers).
1377 // Typical use would be, when doing *(CX+88) = AX
1378 // CMPL $0, runtime.writeBarrier(SB)
1380 // CALL runtime.gcBatchBarrier2(SB)
1386 TEXT gcWriteBarrier<>(SB),NOSPLIT,$28
1387 // Save the registers clobbered by the fast path. This is slightly
1388 // faster than having the caller spill these.
1392 // TODO: Consider passing g.m.p in as an argument so they can be shared
1393 // across a sequence of write barriers.
1398 // Get current buffer write position.
1399 MOVL (p_wbBuf+wbBuf_next)(BX), CX // original next position
1400 ADDL DI, CX // new next position
1401 // Is the buffer full?
1402 CMPL CX, (p_wbBuf+wbBuf_end)(BX)
1404 // Commit to the larger buffer.
1405 MOVL CX, (p_wbBuf+wbBuf_next)(BX)
1406 // Make return value (the original next position)
1409 // Restore registers.
1415 // Save all general purpose registers since these could be
1416 // clobbered by wbBufFlush and were not saved by the caller.
1426 CALL runtime·wbBufFlush(SB)
1435 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
1437 JMP gcWriteBarrier<>(SB)
1438 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
1440 JMP gcWriteBarrier<>(SB)
1441 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
1443 JMP gcWriteBarrier<>(SB)
1444 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
1446 JMP gcWriteBarrier<>(SB)
1447 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
1449 JMP gcWriteBarrier<>(SB)
1450 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
1452 JMP gcWriteBarrier<>(SB)
1453 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
1455 JMP gcWriteBarrier<>(SB)
1456 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
1458 JMP gcWriteBarrier<>(SB)
1460 // Note: these functions use a special calling convention to save generated code space.
1461 // Arguments are passed in registers, but the space for those arguments are allocated
1462 // in the caller's stack frame. These stubs write the args into that stack space and
1463 // then tail call to the corresponding runtime handler.
1464 // The tail call makes these stubs disappear in backtraces.
1465 TEXT runtime·panicIndex(SB),NOSPLIT,$0-8
1468 JMP runtime·goPanicIndex(SB)
1469 TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8
1472 JMP runtime·goPanicIndexU(SB)
1473 TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8
1476 JMP runtime·goPanicSliceAlen(SB)
1477 TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8
1480 JMP runtime·goPanicSliceAlenU(SB)
1481 TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8
1484 JMP runtime·goPanicSliceAcap(SB)
1485 TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8
1488 JMP runtime·goPanicSliceAcapU(SB)
1489 TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8
1492 JMP runtime·goPanicSliceB(SB)
1493 TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8
1496 JMP runtime·goPanicSliceBU(SB)
1497 TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8
1500 JMP runtime·goPanicSlice3Alen(SB)
1501 TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8
1504 JMP runtime·goPanicSlice3AlenU(SB)
1505 TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8
1508 JMP runtime·goPanicSlice3Acap(SB)
1509 TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8
1512 JMP runtime·goPanicSlice3AcapU(SB)
1513 TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8
1516 JMP runtime·goPanicSlice3B(SB)
1517 TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8
1520 JMP runtime·goPanicSlice3BU(SB)
1521 TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8
1524 JMP runtime·goPanicSlice3C(SB)
1525 TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8
1528 JMP runtime·goPanicSlice3CU(SB)
1529 TEXT runtime·panicSliceConvert(SB),NOSPLIT,$0-8
1532 JMP runtime·goPanicSliceConvert(SB)
1534 // Extended versions for 64-bit indexes.
1535 TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12
1539 JMP runtime·goPanicExtendIndex(SB)
1540 TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12
1544 JMP runtime·goPanicExtendIndexU(SB)
1545 TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12
1549 JMP runtime·goPanicExtendSliceAlen(SB)
1550 TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12
1554 JMP runtime·goPanicExtendSliceAlenU(SB)
1555 TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12
1559 JMP runtime·goPanicExtendSliceAcap(SB)
1560 TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12
1564 JMP runtime·goPanicExtendSliceAcapU(SB)
1565 TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12
1569 JMP runtime·goPanicExtendSliceB(SB)
1570 TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12
1574 JMP runtime·goPanicExtendSliceBU(SB)
1575 TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12
1579 JMP runtime·goPanicExtendSlice3Alen(SB)
1580 TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12
1584 JMP runtime·goPanicExtendSlice3AlenU(SB)
1585 TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12
1589 JMP runtime·goPanicExtendSlice3Acap(SB)
1590 TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12
1594 JMP runtime·goPanicExtendSlice3AcapU(SB)
1595 TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12
1599 JMP runtime·goPanicExtendSlice3B(SB)
1600 TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12
1604 JMP runtime·goPanicExtendSlice3BU(SB)
1605 TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12
1609 JMP runtime·goPanicExtendSlice3C(SB)
1610 TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12
1614 JMP runtime·goPanicExtendSlice3CU(SB)
1617 // Use the free TLS_SLOT_APP slot #2 on Android Q.
1618 // Earlier androids are set up in gcc_android.c.
1619 DATA runtime·tls_g+0(SB)/4, $8
1620 GLOBL runtime·tls_g+0(SB), NOPTR, $4
1623 GLOBL runtime·tls_g+0(SB), NOPTR, $4