1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 #include "zasm_GOOS_GOARCH.h"
9 TEXT runtime·rt0_go(SB),NOSPLIT,$0
10 // copy arguments forward on an even stack
13 SUBQ $(4*8+7), SP // 2args 2auto
18 // create istack out of the given (operating system) stack.
19 // _cgo_init may update stackguard.
20 MOVQ $runtime·g0(SB), DI
21 LEAQ (-64*1024+104)(SP), BX
22 MOVQ BX, g_stackguard0(DI)
23 MOVQ BX, g_stackguard1(DI)
24 MOVQ BX, (g_stack+stack_lo)(DI)
25 MOVQ SP, (g_stack+stack_hi)(DI)
27 // find out information about the processor we're on
34 MOVL CX, runtime·cpuid_ecx(SB)
35 MOVL DX, runtime·cpuid_edx(SB)
38 // if there is an _cgo_init, call it.
39 MOVQ _cgo_init(SB), AX
43 MOVQ DI, CX // Win64 uses CX for first parameter
44 MOVQ $setg_gcc<>(SB), SI
47 // update stackguard after _cgo_init
48 MOVQ $runtime·g0(SB), CX
49 MOVQ (g_stack+stack_lo)(CX), AX
50 ADDQ $const_StackGuard, AX
51 MOVQ AX, g_stackguard0(CX)
52 MOVQ AX, g_stackguard1(CX)
54 CMPL runtime·iswindows(SB), $0
57 // skip TLS setup on Plan 9
58 CMPL runtime·isplan9(SB), $1
60 // skip TLS setup on Solaris
61 CMPL runtime·issolaris(SB), $1
64 LEAQ runtime·tls0(SB), DI
65 CALL runtime·settls(SB)
67 // store through it, to make sure it works
70 MOVQ runtime·tls0(SB), AX
75 // set the per-goroutine and per-mach "registers"
77 LEAQ runtime·g0(SB), CX
79 LEAQ runtime·m0(SB), AX
86 CLD // convention is D is always left cleared
87 CALL runtime·check(SB)
89 MOVL 16(SP), AX // copy argc
91 MOVQ 24(SP), AX // copy argv
94 CALL runtime·osinit(SB)
95 CALL runtime·schedinit(SB)
97 // create a new goroutine to start program
98 MOVQ $runtime·main·f(SB), BP // entry
101 CALL runtime·newproc(SB)
106 CALL runtime·mstart(SB)
108 MOVL $0xf1, 0xf1 // crash
111 DATA runtime·main·f+0(SB)/8,$runtime·main(SB)
112 GLOBL runtime·main·f(SB),RODATA,$8
114 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
118 TEXT runtime·asminit(SB),NOSPLIT,$0-0
119 // No per-thread init.
126 // void gosave(Gobuf*)
127 // save state in Gobuf; setjmp
128 TEXT runtime·gosave(SB), NOSPLIT, $0-8
129 MOVQ buf+0(FP), AX // gobuf
130 LEAQ buf+0(FP), BX // caller's SP
131 MOVQ BX, gobuf_sp(AX)
132 MOVQ 0(SP), BX // caller's PC
133 MOVQ BX, gobuf_pc(AX)
134 MOVQ $0, gobuf_ret(AX)
135 MOVQ $0, gobuf_ctxt(AX)
142 // restore state from Gobuf; longjmp
143 TEXT runtime·gogo(SB), NOSPLIT, $0-8
144 MOVQ buf+0(FP), BX // gobuf
146 MOVQ 0(DX), CX // make sure g != nil
149 MOVQ gobuf_sp(BX), SP // restore SP
150 MOVQ gobuf_ret(BX), AX
151 MOVQ gobuf_ctxt(BX), DX
152 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
153 MOVQ $0, gobuf_ret(BX)
154 MOVQ $0, gobuf_ctxt(BX)
155 MOVQ gobuf_pc(BX), BX
158 // func mcall(fn func(*g))
159 // Switch to m->g0's stack, call fn(g).
160 // Fn must never return. It should gogo(&g->sched)
161 // to keep running g.
162 TEXT runtime·mcall(SB), NOSPLIT, $0-8
166 MOVQ g(CX), AX // save state in g->sched
167 MOVQ 0(SP), BX // caller's PC
168 MOVQ BX, (g_sched+gobuf_pc)(AX)
169 LEAQ fn+0(FP), BX // caller's SP
170 MOVQ BX, (g_sched+gobuf_sp)(AX)
171 MOVQ AX, (g_sched+gobuf_g)(AX)
173 // switch to m->g0 & its stack, call fn
177 CMPQ SI, AX // if g == m->g0 call badmcall
179 MOVQ $runtime·badmcall(SB), AX
181 MOVQ SI, g(CX) // g = m->g0
182 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
188 MOVQ $runtime·badmcall2(SB), AX
192 // switchtoM is a dummy routine that onM leaves at the bottom
193 // of the G stack. We need to distinguish the routine that
194 // lives at the bottom of the G stack from the one that lives
195 // at the top of the M stack because the one at the top of
196 // the M stack terminates the stack walk (see topofstack()).
197 TEXT runtime·switchtoM(SB), NOSPLIT, $0-0
200 // func onM_signalok(fn func())
201 TEXT runtime·onM_signalok(SB), NOSPLIT, $0-8
203 MOVQ g(CX), AX // AX = g
204 MOVQ g_m(AX), BX // BX = m
205 MOVQ m_gsignal(BX), DX // DX = gsignal
211 MOVQ fn+0(FP), DI // DI = fn
217 // func onM(fn func())
218 TEXT runtime·onM(SB), NOSPLIT, $0-8
219 MOVQ fn+0(FP), DI // DI = fn
221 MOVQ g(CX), AX // AX = g
222 MOVQ g_m(AX), BX // BX = m
224 MOVQ m_g0(BX), DX // DX = g0
232 // Not g0, not curg. Must be gsignal, but that's not allowed.
233 // Hide call from linker nosplit analysis.
234 MOVQ $runtime·badonm(SB), AX
238 // save our state in g->sched. Pretend to
239 // be switchtoM if the G stack is scanned.
240 MOVQ $runtime·switchtoM(SB), BP
241 MOVQ BP, (g_sched+gobuf_pc)(AX)
242 MOVQ SP, (g_sched+gobuf_sp)(AX)
243 MOVQ AX, (g_sched+gobuf_g)(AX)
247 MOVQ (g_sched+gobuf_sp)(DX), BX
248 // make it look like mstart called onM on g0, to stop traceback
250 MOVQ $runtime·mstart(SB), DX
254 // call target function
265 MOVQ (g_sched+gobuf_sp)(AX), SP
266 MOVQ $0, (g_sched+gobuf_sp)(AX)
270 // already on m stack, just call directly
277 * support for morestack
280 // Called during function prolog when more stack is needed.
282 // The traceback routines see morestack on a g0 as being
283 // the top of a stack (for example, morestack calling newstack
284 // calling the scheduler calling newm calling gc), so we must
285 // record an argument size. For that purpose, it has no arguments.
286 TEXT runtime·morestack(SB),NOSPLIT,$0-0
287 // Cannot grow scheduler stack (m->g0).
296 // Cannot grow signal stack (m->gsignal).
297 MOVQ m_gsignal(BX), SI
303 // Set m->morebuf to f's caller.
304 MOVQ 8(SP), AX // f's caller's PC
305 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
306 LEAQ 16(SP), AX // f's caller's SP
307 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
310 MOVQ SI, (m_morebuf+gobuf_g)(BX)
312 // Set g->sched to context in f.
313 MOVQ 0(SP), AX // f's PC
314 MOVQ AX, (g_sched+gobuf_pc)(SI)
315 MOVQ SI, (g_sched+gobuf_g)(SI)
316 LEAQ 8(SP), AX // f's SP
317 MOVQ AX, (g_sched+gobuf_sp)(SI)
318 MOVQ DX, (g_sched+gobuf_ctxt)(SI)
320 // Call newstack on m->g0's stack.
323 MOVQ (g_sched+gobuf_sp)(BP), SP
324 CALL runtime·newstack(SB)
325 MOVQ $0, 0x1003 // crash if newstack returns
328 // morestack but not preserving ctxt.
329 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
331 JMP runtime·morestack(SB)
333 // reflectcall: call a function with the given argument list
334 // func call(f *FuncVal, arg *byte, argsize, retoffset uint32).
335 // we don't have variable-sized frames, so we use a small number
336 // of constant-sized-frame functions to encode a few bits of size in the pc.
337 // Caution: ugly multiline assembly macros in your future!
339 #define DISPATCH(NAME,MAXSIZE) \
342 MOVQ $NAME(SB), AX; \
344 // Note: can't just "JMP NAME(SB)" - bad inlining results.
346 TEXT ·reflectcall(SB), NOSPLIT, $0-24
347 MOVLQZX argsize+16(FP), CX
348 DISPATCH(runtime·call16, 16)
349 DISPATCH(runtime·call32, 32)
350 DISPATCH(runtime·call64, 64)
351 DISPATCH(runtime·call128, 128)
352 DISPATCH(runtime·call256, 256)
353 DISPATCH(runtime·call512, 512)
354 DISPATCH(runtime·call1024, 1024)
355 DISPATCH(runtime·call2048, 2048)
356 DISPATCH(runtime·call4096, 4096)
357 DISPATCH(runtime·call8192, 8192)
358 DISPATCH(runtime·call16384, 16384)
359 DISPATCH(runtime·call32768, 32768)
360 DISPATCH(runtime·call65536, 65536)
361 DISPATCH(runtime·call131072, 131072)
362 DISPATCH(runtime·call262144, 262144)
363 DISPATCH(runtime·call524288, 524288)
364 DISPATCH(runtime·call1048576, 1048576)
365 DISPATCH(runtime·call2097152, 2097152)
366 DISPATCH(runtime·call4194304, 4194304)
367 DISPATCH(runtime·call8388608, 8388608)
368 DISPATCH(runtime·call16777216, 16777216)
369 DISPATCH(runtime·call33554432, 33554432)
370 DISPATCH(runtime·call67108864, 67108864)
371 DISPATCH(runtime·call134217728, 134217728)
372 DISPATCH(runtime·call268435456, 268435456)
373 DISPATCH(runtime·call536870912, 536870912)
374 DISPATCH(runtime·call1073741824, 1073741824)
375 MOVQ $runtime·badreflectcall(SB), AX
378 #define CALLFN(NAME,MAXSIZE) \
379 TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
381 /* copy arguments to stack */ \
382 MOVQ argptr+8(FP), SI; \
383 MOVLQZX argsize+16(FP), CX; \
386 /* call function */ \
388 PCDATA $PCDATA_StackMapIndex, $0; \
390 /* copy return values back */ \
391 MOVQ argptr+8(FP), DI; \
392 MOVLQZX argsize+16(FP), CX; \
393 MOVLQZX retoffset+20(FP), BX; \
404 CALLFN(·call128, 128)
405 CALLFN(·call256, 256)
406 CALLFN(·call512, 512)
407 CALLFN(·call1024, 1024)
408 CALLFN(·call2048, 2048)
409 CALLFN(·call4096, 4096)
410 CALLFN(·call8192, 8192)
411 CALLFN(·call16384, 16384)
412 CALLFN(·call32768, 32768)
413 CALLFN(·call65536, 65536)
414 CALLFN(·call131072, 131072)
415 CALLFN(·call262144, 262144)
416 CALLFN(·call524288, 524288)
417 CALLFN(·call1048576, 1048576)
418 CALLFN(·call2097152, 2097152)
419 CALLFN(·call4194304, 4194304)
420 CALLFN(·call8388608, 8388608)
421 CALLFN(·call16777216, 16777216)
422 CALLFN(·call33554432, 33554432)
423 CALLFN(·call67108864, 67108864)
424 CALLFN(·call134217728, 134217728)
425 CALLFN(·call268435456, 268435456)
426 CALLFN(·call536870912, 536870912)
427 CALLFN(·call1073741824, 1073741824)
429 // bool cas(int32 *val, int32 old, int32 new)
436 TEXT runtime·cas(SB), NOSPLIT, $0-17
450 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
458 TEXT runtime·cas64(SB), NOSPLIT, $0-25
473 TEXT runtime·casuintptr(SB), NOSPLIT, $0-25
474 JMP runtime·cas64(SB)
476 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-16
477 JMP runtime·atomicload64(SB)
479 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-16
480 JMP runtime·atomicload64(SB)
482 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16
483 JMP runtime·atomicstore64(SB)
485 // bool casp(void **val, void *old, void *new)
492 TEXT runtime·casp(SB), NOSPLIT, $0-25
506 // uint32 xadd(uint32 volatile *val, int32 delta)
510 TEXT runtime·xadd(SB), NOSPLIT, $0-20
520 TEXT runtime·xadd64(SB), NOSPLIT, $0-24
530 TEXT runtime·xchg(SB), NOSPLIT, $0-20
537 TEXT runtime·xchg64(SB), NOSPLIT, $0-24
544 TEXT runtime·xchgp(SB), NOSPLIT, $0-24
551 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-24
552 JMP runtime·xchg64(SB)
554 TEXT runtime·procyield(SB),NOSPLIT,$0-0
555 MOVL cycles+0(FP), AX
562 TEXT runtime·atomicstorep(SB), NOSPLIT, $0-16
568 TEXT runtime·atomicstore(SB), NOSPLIT, $0-12
574 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
580 // void runtime·atomicor8(byte volatile*, byte);
581 TEXT runtime·atomicor8(SB), NOSPLIT, $0-9
588 // void jmpdefer(fn, sp);
589 // called from deferreturn.
591 // 2. sub 5 bytes from the callers return
592 // 3. jmp to the argument
593 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
594 MOVQ fv+0(FP), DX // fn
595 MOVQ argp+8(FP), BX // caller sp
596 LEAQ -8(BX), SP // caller sp after CALL
597 SUBQ $5, (SP) // return to CALL again
599 JMP BX // but first run the deferred function
601 // Save state of caller into g->sched. Smashes R8, R9.
602 TEXT gosave<>(SB),NOSPLIT,$0
606 MOVQ R9, (g_sched+gobuf_pc)(R8)
608 MOVQ R9, (g_sched+gobuf_sp)(R8)
609 MOVQ $0, (g_sched+gobuf_ret)(R8)
610 MOVQ $0, (g_sched+gobuf_ctxt)(R8)
613 // asmcgocall(void(*fn)(void*), void *arg)
614 // Call fn(arg) on the scheduler stack,
615 // aligned appropriately for the gcc ABI.
616 // See cgocall.c for more details.
617 TEXT ·asmcgocall(SB),NOSPLIT,$0-16
620 CALL asmcgocall<>(SB)
623 TEXT ·asmcgocall_errno(SB),NOSPLIT,$0-20
626 CALL asmcgocall<>(SB)
630 // asmcgocall common code. fn in AX, arg in BX. returns errno in AX.
631 TEXT asmcgocall<>(SB),NOSPLIT,$0-0
634 // Figure out if we need to switch to m->g0 stack.
635 // We get called to create new OS threads too, and those
636 // come in on the m->g0 stack already.
644 MOVQ m_gsignal(BP), SI
651 MOVQ (g_sched+gobuf_sp)(SI), SP
654 // Now on a scheduling stack (a pthread-created stack).
655 // Make sure we have enough room for 4 stack-backed fast-call
656 // registers as per windows amd64 calling convention.
658 ANDQ $~15, SP // alignment for gcc ABI
659 MOVQ DI, 48(SP) // save g
660 MOVQ (g_stack+stack_hi)(DI), DI
662 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
663 MOVQ BX, DI // DI = first argument in AMD64 ABI
664 MOVQ BX, CX // CX = first argument in Win64
667 // Restore registers, g, stack pointer.
670 MOVQ (g_stack+stack_hi)(DI), SI
676 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
677 // Turn the fn into a Go func (by taking its address) and call
678 // cgocallback_gofunc.
679 TEXT runtime·cgocallback(SB),NOSPLIT,$24-24
684 MOVQ framesize+16(FP), AX
686 MOVQ $runtime·cgocallback_gofunc(SB), AX
690 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
691 // See cgocall.c for more details.
692 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-24
695 // If g is nil, Go did not create the current thread.
696 // Call needm to obtain one m for temporary use.
697 // In this case, we're running on the thread stack, so there's
698 // lots of space, but the linker doesn't know. Hide the call from
699 // the linker analysis by using an indirect call through AX.
710 MOVQ BP, R8 // holds oldm until end of function
714 MOVQ $runtime·needm(SB), AX
721 // Set m->sched.sp = SP, so that if a panic happens
722 // during the function we are about to execute, it will
723 // have a valid SP to run on the g0 stack.
724 // The next few lines (after the havem label)
725 // will save this SP onto the stack and then write
726 // the same SP back to m->sched.sp. That seems redundant,
727 // but if an unrecovered panic happens, unwindm will
728 // restore the g->sched.sp from the stack location
729 // and then onM will try to use it. If we don't set it here,
730 // that restored SP will be uninitialized (typically 0) and
731 // will not be usable.
733 MOVQ SP, (g_sched+gobuf_sp)(SI)
736 // Now there's a valid m, and we're running on its m->g0.
737 // Save current m->g0->sched.sp on stack and then set it to SP.
738 // Save current sp in m->g0->sched.sp in preparation for
739 // switch back to m->curg stack.
740 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
742 MOVQ (g_sched+gobuf_sp)(SI), AX
744 MOVQ SP, (g_sched+gobuf_sp)(SI)
746 // Switch to m->curg stack and call runtime.cgocallbackg.
747 // Because we are taking over the execution of m->curg
748 // but *not* resuming what had been running, we need to
749 // save that information (m->curg->sched) so we can restore it.
750 // We can restore m->curg->sched.sp easily, because calling
751 // runtime.cgocallbackg leaves SP unchanged upon return.
752 // To save m->curg->sched.pc, we push it onto the stack.
753 // This has the added benefit that it looks to the traceback
754 // routine like cgocallbackg is going to return to that
755 // PC (because the frame we allocate below has the same
756 // size as cgocallback_gofunc's frame declared above)
757 // so that the traceback will seamlessly trace back into
758 // the earlier calls.
760 // In the new goroutine, 0(SP) holds the saved R8.
763 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
764 MOVQ (g_sched+gobuf_pc)(SI), BP
768 CALL runtime·cgocallbackg(SB)
771 // Restore g->sched (== m->curg->sched) from saved values.
775 MOVQ BP, (g_sched+gobuf_pc)(SI)
777 MOVQ DI, (g_sched+gobuf_sp)(SI)
779 // Switch back to m->g0's stack and restore m->g0->sched.sp.
780 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
781 // so we do not have to restore it.)
786 MOVQ (g_sched+gobuf_sp)(SI), SP
788 MOVQ AX, (g_sched+gobuf_sp)(SI)
790 // If the m on entry was nil, we called needm above to borrow an m
791 // for the duration of the call. Since the call is over, return it with dropm.
794 MOVQ $runtime·dropm(SB), AX
800 // void setg(G*); set g. for use by needm.
801 TEXT runtime·setg(SB), NOSPLIT, $0-8
817 // void setg_gcc(G*); set g called from gcc.
818 TEXT setg_gcc<>(SB),NOSPLIT,$0
823 // check that SP is in range [g->stack.lo, g->stack.hi)
824 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
827 CMPQ (g_stack+stack_hi)(AX), SP
830 CMPQ SP, (g_stack+stack_lo)(AX)
835 TEXT runtime·getcallerpc(SB),NOSPLIT,$0-16
836 MOVQ argp+0(FP),AX // addr of first arg
837 MOVQ -8(AX),AX // get calling pc
841 TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-16
842 MOVQ p+0(FP),AX // addr of first arg
843 MOVQ -8(AX),AX // get calling pc
847 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16
848 MOVQ argp+0(FP),AX // addr of first arg
850 MOVQ BX, -8(AX) // set calling pc
853 TEXT runtime·getcallersp(SB),NOSPLIT,$0-16
858 // func gogetcallersp(p unsafe.Pointer) uintptr
859 TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-16
860 MOVQ p+0(FP),AX // addr of first arg
864 // int64 runtime·cputicks(void)
865 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
872 // hash function using AES hardware instructions
873 TEXT runtime·aeshash(SB),NOSPLIT,$0-32
874 MOVQ p+0(FP), AX // ptr to data
875 MOVQ s+8(FP), CX // size
876 JMP runtime·aeshashbody(SB)
878 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-32
879 MOVQ p+0(FP), AX // ptr to string struct
880 // s+8(FP) is ignored, it is always sizeof(String)
881 MOVQ 8(AX), CX // length of string
882 MOVQ (AX), AX // string data
883 JMP runtime·aeshashbody(SB)
887 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-32
888 MOVQ h+16(FP), X0 // seed to low 64 bits of xmm0
889 PINSRQ $1, CX, X0 // size to high 64 bits of xmm0
890 MOVO runtime·aeskeysched+0(SB), X2
891 MOVO runtime·aeskeysched+16(SB), X3
903 // 1-16 bytes remaining
905 // This load may overlap with the previous load above.
906 // We'll hash some bytes twice, but that's ok.
907 MOVOU -16(AX)(CX*1), X1
912 JE finalize // 0 bytes
917 // 16 bytes loaded at this address won't cross
918 // a page boundary, so we can load it directly.
921 MOVQ $masks<>(SB), BP
925 // address ends in 1111xxxx. Might be up against
926 // a page boundary, so load ending at last byte.
927 // Then shift bytes down using pshufb.
928 MOVOU -16(AX)(CX*1), X1
930 MOVQ $shifts<>(SB), BP
931 PSHUFB (BP)(CX*8), X1
933 // incorporate partial block into hash
944 TEXT runtime·aeshash32(SB),NOSPLIT,$0-32
945 MOVQ p+0(FP), AX // ptr to data
946 // s+8(FP) is ignored, it is always sizeof(int32)
947 MOVQ h+16(FP), X0 // seed
948 PINSRD $2, (AX), X0 // data
949 AESENC runtime·aeskeysched+0(SB), X0
950 AESENC runtime·aeskeysched+16(SB), X0
951 AESENC runtime·aeskeysched+0(SB), X0
955 TEXT runtime·aeshash64(SB),NOSPLIT,$0-32
956 MOVQ p+0(FP), AX // ptr to data
957 // s+8(FP) is ignored, it is always sizeof(int64)
958 MOVQ h+16(FP), X0 // seed
959 PINSRQ $1, (AX), X0 // data
960 AESENC runtime·aeskeysched+0(SB), X0
961 AESENC runtime·aeskeysched+16(SB), X0
962 AESENC runtime·aeskeysched+0(SB), X0
966 // simple mask to get rid of data in the high part of the register.
967 DATA masks<>+0x00(SB)/8, $0x0000000000000000
968 DATA masks<>+0x08(SB)/8, $0x0000000000000000
969 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
970 DATA masks<>+0x18(SB)/8, $0x0000000000000000
971 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
972 DATA masks<>+0x28(SB)/8, $0x0000000000000000
973 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
974 DATA masks<>+0x38(SB)/8, $0x0000000000000000
975 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
976 DATA masks<>+0x48(SB)/8, $0x0000000000000000
977 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
978 DATA masks<>+0x58(SB)/8, $0x0000000000000000
979 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
980 DATA masks<>+0x68(SB)/8, $0x0000000000000000
981 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
982 DATA masks<>+0x78(SB)/8, $0x0000000000000000
983 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
984 DATA masks<>+0x88(SB)/8, $0x0000000000000000
985 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
986 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
987 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
988 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
989 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
990 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
991 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
992 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
993 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
994 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
995 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
996 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
997 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
998 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
999 GLOBL masks<>(SB),RODATA,$256
1001 // these are arguments to pshufb. They move data down from
1002 // the high bytes of the register to the low bytes of the register.
1003 // index is how many bytes to move.
1004 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1005 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1006 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1007 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1008 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1009 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1010 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1011 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1012 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1013 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1014 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1015 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1016 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1017 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1018 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1019 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1020 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1021 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1022 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1023 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1024 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1025 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1026 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1027 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1028 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1029 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1030 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1031 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1032 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1033 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1034 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1035 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1036 GLOBL shifts<>(SB),RODATA,$256
1038 TEXT runtime·memeq(SB),NOSPLIT,$0-25
1041 MOVQ size+16(FP), BX
1042 CALL runtime·memeqbody(SB)
1046 // eqstring tests whether two strings are equal.
1047 // See runtime_test.go:eqstring_generic for
1048 // equivalent Go code.
1049 TEXT runtime·eqstring(SB),NOSPLIT,$0-33
1050 MOVQ s1len+8(FP), AX
1051 MOVQ s2len+24(FP), BX
1054 MOVQ s1str+0(FP), SI
1055 MOVQ s2str+16(FP), DI
1058 CALL runtime·memeqbody(SB)
1071 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
1077 // 64 bytes at a time using xmm registers
1104 // 8 bytes at a time using 64-bit register
1117 // remaining 0-8 bytes
1119 MOVQ -8(SI)(BX*1), CX
1120 MOVQ -8(DI)(BX*1), DX
1135 // load at SI won't cross a page boundary.
1139 // address ends in 11111xxx. Load up to bytes we want, move to correct position.
1140 MOVQ -8(SI)(BX*1), SI
1150 MOVQ -8(DI)(BX*1), DI
1160 TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
1161 MOVQ s1_base+0(FP), SI
1162 MOVQ s1_len+8(FP), BX
1163 MOVQ s2_base+16(FP), DI
1164 MOVQ s2_len+24(FP), DX
1165 CALL runtime·cmpbody(SB)
1169 TEXT runtime·cmpbytes(SB),NOSPLIT,$0-56
1174 CALL runtime·cmpbody(SB)
1185 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
1190 CMOVQLT BX, BP // BP = min(alen, blen) = # of bytes to compare
1201 XORQ $0xffff, AX // convert EQ to NE
1202 JNE diff16 // branch if at least one byte is not equal
1208 // AX = bit mask of differences
1210 BSFQ AX, BX // index of first byte that differs
1215 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
1218 // 0 through 16 bytes left, alen>=8, blen>=8
1227 MOVQ -8(SI)(BP*1), AX
1228 MOVQ -8(DI)(BP*1), CX
1232 // AX and CX contain parts of a and b that differ.
1234 BSWAPQ AX // reverse order of bytes
1237 BSRQ CX, CX // index of highest bit difference
1238 SHRQ CX, AX // move a's bit to bottom
1239 ANDQ $1, AX // mask bit
1240 LEAQ -1(AX*2), AX // 1/0 => +1/-1
1243 // 0-7 bytes in common
1245 LEAQ (BP*8), CX // bytes left -> bits left
1246 NEGQ CX // - bits lift (== 64 - bits left mod 64)
1249 // load bytes of a into high bytes of AX
1255 MOVQ -8(SI)(BP*1), SI
1260 // load bytes of b in to high bytes of BX
1266 MOVQ -8(DI)(BP*1), DI
1271 BSWAPQ SI // reverse order of bytes
1273 XORQ SI, DI // find bit differences
1275 BSRQ DI, CX // index of highest bit difference
1276 SHRQ CX, SI // move a's bit to bottom
1277 ANDQ $1, SI // mask bit
1278 LEAQ -1(SI*2), AX // 1/0 => +1/-1
1285 SETGT AX // 1 if alen > blen
1286 SETEQ CX // 1 if alen == blen
1287 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
1290 TEXT bytes·IndexByte(SB),NOSPLIT,$0
1292 MOVQ s_len+8(FP), BX
1294 CALL runtime·indexbytebody(SB)
1298 TEXT strings·IndexByte(SB),NOSPLIT,$0
1300 MOVQ s_len+8(FP), BX
1302 CALL runtime·indexbytebody(SB)
1312 TEXT runtime·indexbytebody(SB),NOSPLIT,$0
1318 // round up to first 16-byte boundary
1325 // search the beginning
1330 // DI is 16-byte aligned; get ready to search using SSE instructions
1332 // round down to last 16-byte boundary
1337 // shuffle X0 around so that each byte contains c
1345 // move the next 16-byte chunk of the buffer into X1
1347 // compare bytes in X0 to X1
1349 // take the top bit of each byte in X1 and put the result in DX
1363 // if CX == 0, the zero flag will be set and we'll end up
1364 // returning a false success
1373 // handle for lengths < 16
1381 // we've found the chunk containing the byte
1382 // now just figure out which specific byte it is
1384 // get the index of the least significant set bit
1397 TEXT bytes·Equal(SB),NOSPLIT,$0-49
1398 MOVQ a_len+8(FP), BX
1399 MOVQ b_len+32(FP), CX
1405 CALL runtime·memeqbody(SB)
1410 // A Duff's device for zeroing memory.
1411 // The compiler jumps to computed addresses within
1412 // this routine to zero chunks of memory. Do not
1413 // change this code without also changing the code
1414 // in ../../cmd/6g/ggen.c:clearfat.
1416 // DI: ptr to memory to be zeroed
1417 // DI is updated as a side effect.
1418 TEXT runtime·duffzero(SB), NOSPLIT, $0-0
1549 // A Duff's device for copying memory.
1550 // The compiler jumps to computed addresses within
1551 // this routine to copy chunks of memory. Source
1552 // and destination must not overlap. Do not
1553 // change this code without also changing the code
1554 // in ../../cmd/6g/cgen.c:sgen.
1555 // SI: ptr to source memory
1556 // DI: ptr to destination memory
1557 // SI and DI are updated as a side effect.
1559 // NOTE: this is equivalent to a sequence of MOVSQ but
1560 // for some reason that is 3.5x slower than this code.
1561 // The STOSQ above seem fine, though.
1562 TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
2205 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
2209 MOVL m_fastrand(AX), DX
2212 XORL $0x88888eef, DX
2214 MOVL DX, m_fastrand(AX)
2218 TEXT runtime·return0(SB), NOSPLIT, $0
2223 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
2224 // Must obey the gcc calling convention.
2225 TEXT _cgo_topofstack(SB),NOSPLIT,$0
2230 MOVQ (g_stack+stack_hi)(AX), AX