1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 #include "zasm_GOOS_GOARCH.h"
9 TEXT runtime·rt0_go(SB),NOSPLIT,$0
10 // copy arguments forward on an even stack
13 SUBL $128, SP // plenty of scratch
15 MOVL AX, 120(SP) // save argc, argv away
18 // set default stack bounds.
19 // _cgo_init may update stackguard.
20 MOVL $runtime·g0(SB), BP
21 LEAL (-64*1024+104)(SP), BX
22 MOVL BX, g_stackguard0(BP)
23 MOVL BX, g_stackguard1(BP)
24 MOVL BX, (g_stack+stack_lo)(BP)
25 MOVL SP, (g_stack+stack_hi)(BP)
27 // find out information about the processor we're on
34 MOVL CX, runtime·cpuid_ecx(SB)
35 MOVL DX, runtime·cpuid_edx(SB)
38 // if there is an _cgo_init, call it to let it
39 // initialize and to set up GS. if not,
40 // we set up GS ourselves.
41 MOVL _cgo_init(SB), AX
44 MOVL $setg_gcc<>(SB), BX
49 // update stackguard after _cgo_init
50 MOVL $runtime·g0(SB), CX
51 MOVL (g_stack+stack_lo)(CX), AX
52 ADDL $const_StackGuard, AX
53 MOVL AX, g_stackguard0(CX)
54 MOVL AX, g_stackguard1(CX)
56 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
57 CMPL runtime·iswindows(SB), $0
60 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
61 CMPL runtime·isplan9(SB), $1
65 CALL runtime·ldt0setup(SB)
67 // store through it, to make sure it works
70 MOVL runtime·tls0(SB), AX
75 // set up m and g "registers"
77 LEAL runtime·g0(SB), CX
79 LEAL runtime·m0(SB), AX
86 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
88 // convention is D is always cleared
91 CALL runtime·check(SB)
99 CALL runtime·osinit(SB)
100 CALL runtime·schedinit(SB)
102 // create a new goroutine to start program
103 PUSHL $runtime·main·f(SB) // entry
105 CALL runtime·newproc(SB)
110 CALL runtime·mstart(SB)
115 DATA runtime·main·f+0(SB)/4,$runtime·main(SB)
116 GLOBL runtime·main·f(SB),RODATA,$4
118 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
122 TEXT runtime·asminit(SB),NOSPLIT,$0-0
123 // Linux and MinGW start the FPU in extended double precision.
124 // Other operating systems use double precision.
125 // Change to double precision to match them,
126 // and to match other hardware that only has double.
136 // void gosave(Gobuf*)
137 // save state in Gobuf; setjmp
138 TEXT runtime·gosave(SB), NOSPLIT, $0-4
139 MOVL buf+0(FP), AX // gobuf
140 LEAL buf+0(FP), BX // caller's SP
141 MOVL BX, gobuf_sp(AX)
142 MOVL 0(SP), BX // caller's PC
143 MOVL BX, gobuf_pc(AX)
144 MOVL $0, gobuf_ret(AX)
145 MOVL $0, gobuf_ctxt(AX)
152 // restore state from Gobuf; longjmp
153 TEXT runtime·gogo(SB), NOSPLIT, $0-4
154 MOVL buf+0(FP), BX // gobuf
156 MOVL 0(DX), CX // make sure g != nil
159 MOVL gobuf_sp(BX), SP // restore SP
160 MOVL gobuf_ret(BX), AX
161 MOVL gobuf_ctxt(BX), DX
162 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
163 MOVL $0, gobuf_ret(BX)
164 MOVL $0, gobuf_ctxt(BX)
165 MOVL gobuf_pc(BX), BX
168 // func mcall(fn func(*g))
169 // Switch to m->g0's stack, call fn(g).
170 // Fn must never return. It should gogo(&g->sched)
171 // to keep running g.
172 TEXT runtime·mcall(SB), NOSPLIT, $0-4
176 MOVL g(CX), AX // save state in g->sched
177 MOVL 0(SP), BX // caller's PC
178 MOVL BX, (g_sched+gobuf_pc)(AX)
179 LEAL fn+0(FP), BX // caller's SP
180 MOVL BX, (g_sched+gobuf_sp)(AX)
181 MOVL AX, (g_sched+gobuf_g)(AX)
183 // switch to m->g0 & its stack, call fn
187 CMPL SI, AX // if g == m->g0 call badmcall
189 MOVL $runtime·badmcall(SB), AX
191 MOVL SI, g(CX) // g = m->g0
192 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
198 MOVL $runtime·badmcall2(SB), AX
202 // switchtoM is a dummy routine that onM leaves at the bottom
203 // of the G stack. We need to distinguish the routine that
204 // lives at the bottom of the G stack from the one that lives
205 // at the top of the M stack because the one at the top of
206 // the M stack terminates the stack walk (see topofstack()).
207 TEXT runtime·switchtoM(SB), NOSPLIT, $0-0
210 // func onM_signalok(fn func())
211 TEXT runtime·onM_signalok(SB), NOSPLIT, $0-4
213 MOVL g(CX), AX // AX = g
214 MOVL g_m(AX), BX // BX = m
215 MOVL m_gsignal(BX), DX // DX = gsignal
221 MOVL fn+0(FP), DI // DI = fn
227 // func onM(fn func())
228 TEXT runtime·onM(SB), NOSPLIT, $0-4
229 MOVL fn+0(FP), DI // DI = fn
231 MOVL g(CX), AX // AX = g
232 MOVL g_m(AX), BX // BX = m
234 MOVL m_g0(BX), DX // DX = g0
242 // Not g0, not curg. Must be gsignal, but that's not allowed.
243 // Hide call from linker nosplit analysis.
244 MOVL $runtime·badonm(SB), AX
248 // save our state in g->sched. Pretend to
249 // be switchtoM if the G stack is scanned.
250 MOVL $runtime·switchtoM(SB), (g_sched+gobuf_pc)(AX)
251 MOVL SP, (g_sched+gobuf_sp)(AX)
252 MOVL AX, (g_sched+gobuf_g)(AX)
256 MOVL (g_sched+gobuf_sp)(DX), BX
257 // make it look like mstart called onM on g0, to stop traceback
259 MOVL $runtime·mstart(SB), DX
263 // call target function
274 MOVL (g_sched+gobuf_sp)(AX), SP
275 MOVL $0, (g_sched+gobuf_sp)(AX)
279 // already on m stack, just call directly
286 * support for morestack
289 // Called during function prolog when more stack is needed.
291 // The traceback routines see morestack on a g0 as being
292 // the top of a stack (for example, morestack calling newstack
293 // calling the scheduler calling newm calling gc), so we must
294 // record an argument size. For that purpose, it has no arguments.
295 TEXT runtime·morestack(SB),NOSPLIT,$0-0
296 // Cannot grow scheduler stack (m->g0).
305 // Cannot grow signal stack.
306 MOVL m_gsignal(BX), SI
312 // Set m->morebuf to f's caller.
313 MOVL 4(SP), DI // f's caller's PC
314 MOVL DI, (m_morebuf+gobuf_pc)(BX)
315 LEAL 8(SP), CX // f's caller's SP
316 MOVL CX, (m_morebuf+gobuf_sp)(BX)
319 MOVL SI, (m_morebuf+gobuf_g)(BX)
321 // Set g->sched to context in f.
322 MOVL 0(SP), AX // f's PC
323 MOVL AX, (g_sched+gobuf_pc)(SI)
324 MOVL SI, (g_sched+gobuf_g)(SI)
325 LEAL 4(SP), AX // f's SP
326 MOVL AX, (g_sched+gobuf_sp)(SI)
327 MOVL DX, (g_sched+gobuf_ctxt)(SI)
329 // Call newstack on m->g0's stack.
332 MOVL (g_sched+gobuf_sp)(BP), AX
333 MOVL -4(AX), BX // fault if CALL would, before smashing SP
335 CALL runtime·newstack(SB)
336 MOVL $0, 0x1003 // crash if newstack returns
339 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
341 JMP runtime·morestack(SB)
343 // reflectcall: call a function with the given argument list
344 // func call(f *FuncVal, arg *byte, argsize, retoffset uint32).
345 // we don't have variable-sized frames, so we use a small number
346 // of constant-sized-frame functions to encode a few bits of size in the pc.
347 // Caution: ugly multiline assembly macros in your future!
349 #define DISPATCH(NAME,MAXSIZE) \
352 MOVL $NAME(SB), AX; \
354 // Note: can't just "JMP NAME(SB)" - bad inlining results.
356 TEXT ·reflectcall(SB), NOSPLIT, $0-16
357 MOVL argsize+8(FP), CX
358 DISPATCH(runtime·call16, 16)
359 DISPATCH(runtime·call32, 32)
360 DISPATCH(runtime·call64, 64)
361 DISPATCH(runtime·call128, 128)
362 DISPATCH(runtime·call256, 256)
363 DISPATCH(runtime·call512, 512)
364 DISPATCH(runtime·call1024, 1024)
365 DISPATCH(runtime·call2048, 2048)
366 DISPATCH(runtime·call4096, 4096)
367 DISPATCH(runtime·call8192, 8192)
368 DISPATCH(runtime·call16384, 16384)
369 DISPATCH(runtime·call32768, 32768)
370 DISPATCH(runtime·call65536, 65536)
371 DISPATCH(runtime·call131072, 131072)
372 DISPATCH(runtime·call262144, 262144)
373 DISPATCH(runtime·call524288, 524288)
374 DISPATCH(runtime·call1048576, 1048576)
375 DISPATCH(runtime·call2097152, 2097152)
376 DISPATCH(runtime·call4194304, 4194304)
377 DISPATCH(runtime·call8388608, 8388608)
378 DISPATCH(runtime·call16777216, 16777216)
379 DISPATCH(runtime·call33554432, 33554432)
380 DISPATCH(runtime·call67108864, 67108864)
381 DISPATCH(runtime·call134217728, 134217728)
382 DISPATCH(runtime·call268435456, 268435456)
383 DISPATCH(runtime·call536870912, 536870912)
384 DISPATCH(runtime·call1073741824, 1073741824)
385 MOVL $runtime·badreflectcall(SB), AX
388 #define CALLFN(NAME,MAXSIZE) \
389 TEXT NAME(SB), WRAPPER, $MAXSIZE-16; \
391 /* copy arguments to stack */ \
392 MOVL argptr+4(FP), SI; \
393 MOVL argsize+8(FP), CX; \
396 /* call function */ \
399 PCDATA $PCDATA_StackMapIndex, $0; \
401 /* copy return values back */ \
402 MOVL argptr+4(FP), DI; \
403 MOVL argsize+8(FP), CX; \
404 MOVL retoffset+12(FP), BX; \
415 CALLFN(·call128, 128)
416 CALLFN(·call256, 256)
417 CALLFN(·call512, 512)
418 CALLFN(·call1024, 1024)
419 CALLFN(·call2048, 2048)
420 CALLFN(·call4096, 4096)
421 CALLFN(·call8192, 8192)
422 CALLFN(·call16384, 16384)
423 CALLFN(·call32768, 32768)
424 CALLFN(·call65536, 65536)
425 CALLFN(·call131072, 131072)
426 CALLFN(·call262144, 262144)
427 CALLFN(·call524288, 524288)
428 CALLFN(·call1048576, 1048576)
429 CALLFN(·call2097152, 2097152)
430 CALLFN(·call4194304, 4194304)
431 CALLFN(·call8388608, 8388608)
432 CALLFN(·call16777216, 16777216)
433 CALLFN(·call33554432, 33554432)
434 CALLFN(·call67108864, 67108864)
435 CALLFN(·call134217728, 134217728)
436 CALLFN(·call268435456, 268435456)
437 CALLFN(·call536870912, 536870912)
438 CALLFN(·call1073741824, 1073741824)
440 // bool cas(int32 *val, int32 old, int32 new)
447 TEXT runtime·cas(SB), NOSPLIT, $0-13
461 TEXT runtime·casuintptr(SB), NOSPLIT, $0-13
464 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-8
465 JMP runtime·atomicload(SB)
467 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-8
468 JMP runtime·atomicload(SB)
470 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-8
471 JMP runtime·atomicstore(SB)
473 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
481 TEXT runtime·cas64(SB), NOSPLIT, $0-21
483 MOVL old_lo+4(FP), AX
484 MOVL old_hi+8(FP), DX
485 MOVL new_lo+12(FP), BX
486 MOVL new_hi+16(FP), CX
498 // bool casp(void **p, void *old, void *new)
505 TEXT runtime·casp1(SB), NOSPLIT, $0-13
519 // uint32 xadd(uint32 volatile *val, int32 delta)
523 TEXT runtime·xadd(SB), NOSPLIT, $0-12
533 TEXT runtime·xchg(SB), NOSPLIT, $0-12
540 TEXT runtime·xchgp1(SB), NOSPLIT, $0-12
547 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12
550 TEXT runtime·procyield(SB),NOSPLIT,$0-0
551 MOVL cycles+0(FP), AX
558 TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8
564 TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
570 // uint64 atomicload64(uint64 volatile* addr);
571 TEXT runtime·atomicload64(SB), NOSPLIT, $0-12
573 LEAL ret_lo+4(FP), BX
575 BYTE $0x0f; BYTE $0x6f; BYTE $0x00
576 // MOVQ %MM0, 0(%EBX)
577 BYTE $0x0f; BYTE $0x7f; BYTE $0x03
579 BYTE $0x0F; BYTE $0x77
582 // void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
583 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12
585 // MOVQ and EMMS were introduced on the Pentium MMX.
586 // MOVQ 0x8(%ESP), %MM0
587 BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
589 BYTE $0x0f; BYTE $0x7f; BYTE $0x00
591 BYTE $0x0F; BYTE $0x77
592 // This is essentially a no-op, but it provides required memory fencing.
593 // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
599 // void runtime·atomicor8(byte volatile*, byte);
600 TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
607 // void jmpdefer(fn, sp);
608 // called from deferreturn.
610 // 2. sub 5 bytes from the callers return
611 // 3. jmp to the argument
612 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
613 MOVL fv+0(FP), DX // fn
614 MOVL argp+4(FP), BX // caller sp
615 LEAL -4(BX), SP // caller sp after CALL
616 SUBL $5, (SP) // return to CALL again
618 JMP BX // but first run the deferred function
620 // Save state of caller into g->sched.
621 TEXT gosave<>(SB),NOSPLIT,$0
627 MOVL AX, (g_sched+gobuf_sp)(BX)
629 MOVL AX, (g_sched+gobuf_pc)(BX)
630 MOVL $0, (g_sched+gobuf_ret)(BX)
631 MOVL $0, (g_sched+gobuf_ctxt)(BX)
636 // asmcgocall(void(*fn)(void*), void *arg)
637 // Call fn(arg) on the scheduler stack,
638 // aligned appropriately for the gcc ABI.
639 // See cgocall.c for more details.
640 TEXT ·asmcgocall(SB),NOSPLIT,$0-8
643 CALL asmcgocall<>(SB)
646 TEXT ·asmcgocall_errno(SB),NOSPLIT,$0-12
649 CALL asmcgocall<>(SB)
653 TEXT asmcgocall<>(SB),NOSPLIT,$0-0
654 // fn in AX, arg in BX
657 // Figure out if we need to switch to m->g0 stack.
658 // We get called to create new OS threads too, and those
659 // come in on the m->g0 stack already.
669 MOVL (g_sched+gobuf_sp)(SI), SP
671 // Now on a scheduling stack (a pthread-created stack).
673 ANDL $~15, SP // alignment, perhaps unnecessary
674 MOVL DI, 8(SP) // save g
675 MOVL (g_stack+stack_hi)(DI), DI
677 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
678 MOVL BX, 0(SP) // first argument in x86-32 ABI
681 // Restore registers, g, stack pointer.
684 MOVL (g_stack+stack_hi)(DI), SI
690 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
691 // Turn the fn into a Go func (by taking its address) and call
692 // cgocallback_gofunc.
693 TEXT runtime·cgocallback(SB),NOSPLIT,$12-12
698 MOVL framesize+8(FP), AX
700 MOVL $runtime·cgocallback_gofunc(SB), AX
704 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
705 // See cgocall.c for more details.
706 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-12
709 // If g is nil, Go did not create the current thread.
710 // Call needm to obtain one for temporary use.
711 // In this case, we're running on the thread stack, so there's
712 // lots of space, but the linker doesn't know. Hide the call from
713 // the linker analysis by using an indirect call through AX.
724 MOVL BP, DX // saved copy of oldm
728 MOVL $runtime·needm(SB), AX
735 // Set m->sched.sp = SP, so that if a panic happens
736 // during the function we are about to execute, it will
737 // have a valid SP to run on the g0 stack.
738 // The next few lines (after the havem label)
739 // will save this SP onto the stack and then write
740 // the same SP back to m->sched.sp. That seems redundant,
741 // but if an unrecovered panic happens, unwindm will
742 // restore the g->sched.sp from the stack location
743 // and then onM will try to use it. If we don't set it here,
744 // that restored SP will be uninitialized (typically 0) and
745 // will not be usable.
747 MOVL SP, (g_sched+gobuf_sp)(SI)
750 // Now there's a valid m, and we're running on its m->g0.
751 // Save current m->g0->sched.sp on stack and then set it to SP.
752 // Save current sp in m->g0->sched.sp in preparation for
753 // switch back to m->curg stack.
754 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
756 MOVL (g_sched+gobuf_sp)(SI), AX
758 MOVL SP, (g_sched+gobuf_sp)(SI)
760 // Switch to m->curg stack and call runtime.cgocallbackg.
761 // Because we are taking over the execution of m->curg
762 // but *not* resuming what had been running, we need to
763 // save that information (m->curg->sched) so we can restore it.
764 // We can restore m->curg->sched.sp easily, because calling
765 // runtime.cgocallbackg leaves SP unchanged upon return.
766 // To save m->curg->sched.pc, we push it onto the stack.
767 // This has the added benefit that it looks to the traceback
768 // routine like cgocallbackg is going to return to that
769 // PC (because the frame we allocate below has the same
770 // size as cgocallback_gofunc's frame declared above)
771 // so that the traceback will seamlessly trace back into
772 // the earlier calls.
774 // In the new goroutine, 0(SP) holds the saved oldm (DX) register.
775 // 4(SP) and 8(SP) are unused.
778 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
779 MOVL (g_sched+gobuf_pc)(SI), BP
783 CALL runtime·cgocallbackg(SB)
786 // Restore g->sched (== m->curg->sched) from saved values.
790 MOVL BP, (g_sched+gobuf_pc)(SI)
792 MOVL DI, (g_sched+gobuf_sp)(SI)
794 // Switch back to m->g0's stack and restore m->g0->sched.sp.
795 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
796 // so we do not have to restore it.)
801 MOVL (g_sched+gobuf_sp)(SI), SP
803 MOVL AX, (g_sched+gobuf_sp)(SI)
805 // If the m on entry was nil, we called needm above to borrow an m
806 // for the duration of the call. Since the call is over, return it with dropm.
809 MOVL $runtime·dropm(SB), AX
815 // void setg(G*); set g. for use by needm.
816 TEXT runtime·setg(SB), NOSPLIT, $0-4
832 // void setg_gcc(G*); set g. for use by gcc
833 TEXT setg_gcc<>(SB), NOSPLIT, $0
839 // check that SP is in range [g->stack.lo, g->stack.hi)
840 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
843 CMPL (g_stack+stack_hi)(AX), SP
846 CMPL SP, (g_stack+stack_lo)(AX)
851 TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8
852 MOVL argp+0(FP),AX // addr of first arg
853 MOVL -4(AX),AX // get calling pc
857 TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-8
858 MOVL p+0(FP),AX // addr of first arg
859 MOVL -4(AX),AX // get calling pc
863 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
864 MOVL argp+0(FP),AX // addr of first arg
866 MOVL BX, -4(AX) // set calling pc
869 TEXT runtime·getcallersp(SB), NOSPLIT, $0-8
874 // func gogetcallersp(p unsafe.Pointer) uintptr
875 TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-8
876 MOVL p+0(FP),AX // addr of first arg
880 // int64 runtime·cputicks(void), so really
881 // void runtime·cputicks(int64 *ticks)
882 TEXT runtime·cputicks(SB),NOSPLIT,$0-8
884 MOVL AX, ret_lo+0(FP)
885 MOVL DX, ret_hi+4(FP)
888 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
889 // set up ldt 7 to point at tls0
890 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
891 // the entry number is just a hint. setldt will set up GS with what it used.
893 LEAL runtime·tls0(SB), AX
895 MOVL $32, 8(SP) // sizeof(tls array)
896 CALL runtime·setldt(SB)
899 TEXT runtime·emptyfunc(SB),0,$0-0
902 TEXT runtime·abort(SB),NOSPLIT,$0-0
905 // hash function using AES hardware instructions
906 TEXT runtime·aeshash(SB),NOSPLIT,$0-16
907 MOVL p+0(FP), AX // ptr to data
908 MOVL s+4(FP), CX // size
909 JMP runtime·aeshashbody(SB)
911 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-16
912 MOVL p+0(FP), AX // ptr to string object
913 // s+4(FP) is ignored, it is always sizeof(String)
914 MOVL 4(AX), CX // length of string
915 MOVL (AX), AX // string data
916 JMP runtime·aeshashbody(SB)
920 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-16
921 MOVL h+8(FP), X0 // seed to low 32 bits of xmm0
922 PINSRD $1, CX, X0 // size to next 32 bits of xmm0
923 MOVO runtime·aeskeysched+0(SB), X2
924 MOVO runtime·aeskeysched+16(SB), X3
936 // 1-16 bytes remaining
938 // This load may overlap with the previous load above.
939 // We'll hash some bytes twice, but that's ok.
940 MOVOU -16(AX)(CX*1), X1
945 JE finalize // 0 bytes
950 // 16 bytes loaded at this address won't cross
951 // a page boundary, so we can load it directly.
954 PAND masks<>(SB)(CX*8), X1
957 // address ends in 1111xxxx. Might be up against
958 // a page boundary, so load ending at last byte.
959 // Then shift bytes down using pshufb.
960 MOVOU -16(AX)(CX*1), X1
962 PSHUFB shifts<>(SB)(CX*8), X1
964 // incorporate partial block into hash
975 TEXT runtime·aeshash32(SB),NOSPLIT,$0-16
976 MOVL p+0(FP), AX // ptr to data
977 // s+4(FP) is ignored, it is always sizeof(int32)
978 MOVL h+8(FP), X0 // seed
979 PINSRD $1, (AX), X0 // data
980 AESENC runtime·aeskeysched+0(SB), X0
981 AESENC runtime·aeskeysched+16(SB), X0
982 AESENC runtime·aeskeysched+0(SB), X0
986 TEXT runtime·aeshash64(SB),NOSPLIT,$0-16
987 MOVL p+0(FP), AX // ptr to data
988 // s+4(FP) is ignored, it is always sizeof(int64)
989 MOVQ (AX), X0 // data
990 PINSRD $2, h+8(FP), X0 // seed
991 AESENC runtime·aeskeysched+0(SB), X0
992 AESENC runtime·aeskeysched+16(SB), X0
993 AESENC runtime·aeskeysched+0(SB), X0
997 // simple mask to get rid of data in the high part of the register.
998 DATA masks<>+0x00(SB)/4, $0x00000000
999 DATA masks<>+0x04(SB)/4, $0x00000000
1000 DATA masks<>+0x08(SB)/4, $0x00000000
1001 DATA masks<>+0x0c(SB)/4, $0x00000000
1003 DATA masks<>+0x10(SB)/4, $0x000000ff
1004 DATA masks<>+0x14(SB)/4, $0x00000000
1005 DATA masks<>+0x18(SB)/4, $0x00000000
1006 DATA masks<>+0x1c(SB)/4, $0x00000000
1008 DATA masks<>+0x20(SB)/4, $0x0000ffff
1009 DATA masks<>+0x24(SB)/4, $0x00000000
1010 DATA masks<>+0x28(SB)/4, $0x00000000
1011 DATA masks<>+0x2c(SB)/4, $0x00000000
1013 DATA masks<>+0x30(SB)/4, $0x00ffffff
1014 DATA masks<>+0x34(SB)/4, $0x00000000
1015 DATA masks<>+0x38(SB)/4, $0x00000000
1016 DATA masks<>+0x3c(SB)/4, $0x00000000
1018 DATA masks<>+0x40(SB)/4, $0xffffffff
1019 DATA masks<>+0x44(SB)/4, $0x00000000
1020 DATA masks<>+0x48(SB)/4, $0x00000000
1021 DATA masks<>+0x4c(SB)/4, $0x00000000
1023 DATA masks<>+0x50(SB)/4, $0xffffffff
1024 DATA masks<>+0x54(SB)/4, $0x000000ff
1025 DATA masks<>+0x58(SB)/4, $0x00000000
1026 DATA masks<>+0x5c(SB)/4, $0x00000000
1028 DATA masks<>+0x60(SB)/4, $0xffffffff
1029 DATA masks<>+0x64(SB)/4, $0x0000ffff
1030 DATA masks<>+0x68(SB)/4, $0x00000000
1031 DATA masks<>+0x6c(SB)/4, $0x00000000
1033 DATA masks<>+0x70(SB)/4, $0xffffffff
1034 DATA masks<>+0x74(SB)/4, $0x00ffffff
1035 DATA masks<>+0x78(SB)/4, $0x00000000
1036 DATA masks<>+0x7c(SB)/4, $0x00000000
1038 DATA masks<>+0x80(SB)/4, $0xffffffff
1039 DATA masks<>+0x84(SB)/4, $0xffffffff
1040 DATA masks<>+0x88(SB)/4, $0x00000000
1041 DATA masks<>+0x8c(SB)/4, $0x00000000
1043 DATA masks<>+0x90(SB)/4, $0xffffffff
1044 DATA masks<>+0x94(SB)/4, $0xffffffff
1045 DATA masks<>+0x98(SB)/4, $0x000000ff
1046 DATA masks<>+0x9c(SB)/4, $0x00000000
1048 DATA masks<>+0xa0(SB)/4, $0xffffffff
1049 DATA masks<>+0xa4(SB)/4, $0xffffffff
1050 DATA masks<>+0xa8(SB)/4, $0x0000ffff
1051 DATA masks<>+0xac(SB)/4, $0x00000000
1053 DATA masks<>+0xb0(SB)/4, $0xffffffff
1054 DATA masks<>+0xb4(SB)/4, $0xffffffff
1055 DATA masks<>+0xb8(SB)/4, $0x00ffffff
1056 DATA masks<>+0xbc(SB)/4, $0x00000000
1058 DATA masks<>+0xc0(SB)/4, $0xffffffff
1059 DATA masks<>+0xc4(SB)/4, $0xffffffff
1060 DATA masks<>+0xc8(SB)/4, $0xffffffff
1061 DATA masks<>+0xcc(SB)/4, $0x00000000
1063 DATA masks<>+0xd0(SB)/4, $0xffffffff
1064 DATA masks<>+0xd4(SB)/4, $0xffffffff
1065 DATA masks<>+0xd8(SB)/4, $0xffffffff
1066 DATA masks<>+0xdc(SB)/4, $0x000000ff
1068 DATA masks<>+0xe0(SB)/4, $0xffffffff
1069 DATA masks<>+0xe4(SB)/4, $0xffffffff
1070 DATA masks<>+0xe8(SB)/4, $0xffffffff
1071 DATA masks<>+0xec(SB)/4, $0x0000ffff
1073 DATA masks<>+0xf0(SB)/4, $0xffffffff
1074 DATA masks<>+0xf4(SB)/4, $0xffffffff
1075 DATA masks<>+0xf8(SB)/4, $0xffffffff
1076 DATA masks<>+0xfc(SB)/4, $0x00ffffff
1078 GLOBL masks<>(SB),RODATA,$256
1080 // these are arguments to pshufb. They move data down from
1081 // the high bytes of the register to the low bytes of the register.
1082 // index is how many bytes to move.
1083 DATA shifts<>+0x00(SB)/4, $0x00000000
1084 DATA shifts<>+0x04(SB)/4, $0x00000000
1085 DATA shifts<>+0x08(SB)/4, $0x00000000
1086 DATA shifts<>+0x0c(SB)/4, $0x00000000
1088 DATA shifts<>+0x10(SB)/4, $0xffffff0f
1089 DATA shifts<>+0x14(SB)/4, $0xffffffff
1090 DATA shifts<>+0x18(SB)/4, $0xffffffff
1091 DATA shifts<>+0x1c(SB)/4, $0xffffffff
1093 DATA shifts<>+0x20(SB)/4, $0xffff0f0e
1094 DATA shifts<>+0x24(SB)/4, $0xffffffff
1095 DATA shifts<>+0x28(SB)/4, $0xffffffff
1096 DATA shifts<>+0x2c(SB)/4, $0xffffffff
1098 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
1099 DATA shifts<>+0x34(SB)/4, $0xffffffff
1100 DATA shifts<>+0x38(SB)/4, $0xffffffff
1101 DATA shifts<>+0x3c(SB)/4, $0xffffffff
1103 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
1104 DATA shifts<>+0x44(SB)/4, $0xffffffff
1105 DATA shifts<>+0x48(SB)/4, $0xffffffff
1106 DATA shifts<>+0x4c(SB)/4, $0xffffffff
1108 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
1109 DATA shifts<>+0x54(SB)/4, $0xffffff0f
1110 DATA shifts<>+0x58(SB)/4, $0xffffffff
1111 DATA shifts<>+0x5c(SB)/4, $0xffffffff
1113 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
1114 DATA shifts<>+0x64(SB)/4, $0xffff0f0e
1115 DATA shifts<>+0x68(SB)/4, $0xffffffff
1116 DATA shifts<>+0x6c(SB)/4, $0xffffffff
1118 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
1119 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
1120 DATA shifts<>+0x78(SB)/4, $0xffffffff
1121 DATA shifts<>+0x7c(SB)/4, $0xffffffff
1123 DATA shifts<>+0x80(SB)/4, $0x0b0a0908
1124 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
1125 DATA shifts<>+0x88(SB)/4, $0xffffffff
1126 DATA shifts<>+0x8c(SB)/4, $0xffffffff
1128 DATA shifts<>+0x90(SB)/4, $0x0a090807
1129 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
1130 DATA shifts<>+0x98(SB)/4, $0xffffff0f
1131 DATA shifts<>+0x9c(SB)/4, $0xffffffff
1133 DATA shifts<>+0xa0(SB)/4, $0x09080706
1134 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
1135 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
1136 DATA shifts<>+0xac(SB)/4, $0xffffffff
1138 DATA shifts<>+0xb0(SB)/4, $0x08070605
1139 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
1140 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
1141 DATA shifts<>+0xbc(SB)/4, $0xffffffff
1143 DATA shifts<>+0xc0(SB)/4, $0x07060504
1144 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
1145 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
1146 DATA shifts<>+0xcc(SB)/4, $0xffffffff
1148 DATA shifts<>+0xd0(SB)/4, $0x06050403
1149 DATA shifts<>+0xd4(SB)/4, $0x0a090807
1150 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
1151 DATA shifts<>+0xdc(SB)/4, $0xffffff0f
1153 DATA shifts<>+0xe0(SB)/4, $0x05040302
1154 DATA shifts<>+0xe4(SB)/4, $0x09080706
1155 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
1156 DATA shifts<>+0xec(SB)/4, $0xffff0f0e
1158 DATA shifts<>+0xf0(SB)/4, $0x04030201
1159 DATA shifts<>+0xf4(SB)/4, $0x08070605
1160 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
1161 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
1163 GLOBL shifts<>(SB),RODATA,$256
1165 TEXT runtime·memeq(SB),NOSPLIT,$0-13
1169 CALL runtime·memeqbody(SB)
1173 // eqstring tests whether two strings are equal.
1174 // See runtime_test.go:eqstring_generic for
1175 // equivalent Go code.
1176 TEXT runtime·eqstring(SB),NOSPLIT,$0-17
1177 MOVL s1len+4(FP), AX
1178 MOVL s2len+12(FP), BX
1181 MOVL s1str+0(FP), SI
1182 MOVL s2str+8(FP), DI
1185 CALL runtime·memeqbody(SB)
1195 TEXT bytes·Equal(SB),NOSPLIT,$0-25
1196 MOVL a_len+4(FP), BX
1197 MOVL b_len+16(FP), CX
1203 CALL runtime·memeqbody(SB)
1211 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
1217 // 64 bytes at a time using xmm registers
1221 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
1246 // 4 bytes at a time using 32-bit register
1259 // remaining 0-4 bytes
1261 MOVL -4(SI)(BX*1), CX
1262 MOVL -4(DI)(BX*1), DX
1278 // load at SI won't cross a page boundary.
1282 // address ends in 111111xx. Load up to bytes we want, move to correct position.
1283 MOVL -4(SI)(BX*1), SI
1294 MOVL -4(DI)(BX*1), DI
1304 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
1305 MOVL s1_base+0(FP), SI
1306 MOVL s1_len+4(FP), BX
1307 MOVL s2_base+8(FP), DI
1308 MOVL s2_len+12(FP), DX
1309 CALL runtime·cmpbody(SB)
1313 TEXT runtime·cmpbytes(SB),NOSPLIT,$0-28
1318 CALL runtime·cmpbody(SB)
1322 TEXT bytes·IndexByte(SB),NOSPLIT,$0
1324 MOVL s_len+4(FP), CX
1329 MOVL $-1, ret+16(FP)
1336 TEXT strings·IndexByte(SB),NOSPLIT,$0
1338 MOVL s_len+4(FP), CX
1343 MOVL $-1, ret+12(FP)
1357 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
1362 CMOVLLT BX, BP // BP = min(alen, blen)
1365 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
1374 XORL $0xffff, AX // convert EQ to NE
1375 JNE diff16 // branch if at least one byte is not equal
1382 BSFL AX, BX // index of first byte that differs
1387 LEAL -1(AX*2), AX // convert 1/0 to +1/-1
1403 MOVL -4(SI)(BP*1), AX
1404 MOVL -4(DI)(BP*1), CX
1409 BSWAPL AX // reverse order of bytes
1411 XORL AX, CX // find bit differences
1412 BSRL CX, CX // index of highest bit difference
1413 SHRL CX, AX // move a's bit to bottom
1414 ANDL $1, AX // mask bit
1415 LEAL -1(AX*2), AX // 1/0 => +1/-1
1418 // 0-3 bytes in common
1430 MOVL -4(SI)(BP*1), SI
1441 MOVL -4(DI)(BP*1), DI
1446 BSWAPL SI // reverse order of bytes
1448 XORL SI, DI // find bit differences
1450 BSRL DI, CX // index of highest bit difference
1451 SHRL CX, SI // move a's bit to bottom
1452 ANDL $1, SI // mask bit
1453 LEAL -1(SI*2), AX // 1/0 => +1/-1
1456 // all the bytes in common are the same, so we just need
1457 // to compare the lengths.
1462 SETGT AX // 1 if alen > blen
1463 SETEQ CX // 1 if alen == blen
1464 LEAL -1(CX)(AX*2), AX // 1,0,-1 result
1467 // A Duff's device for zeroing memory.
1468 // The compiler jumps to computed addresses within
1469 // this routine to zero chunks of memory. Do not
1470 // change this code without also changing the code
1471 // in ../../cmd/8g/ggen.c:clearfat.
1473 // DI: ptr to memory to be zeroed
1474 // DI is updated as a side effect.
1475 TEXT runtime·duffzero(SB), NOSPLIT, $0-0
1606 // A Duff's device for copying memory.
1607 // The compiler jumps to computed addresses within
1608 // this routine to copy chunks of memory. Source
1609 // and destination must not overlap. Do not
1610 // change this code without also changing the code
1611 // in ../../cmd/6g/cgen.c:sgen.
1612 // SI: ptr to source memory
1613 // DI: ptr to destination memory
1614 // SI and DI are updated as a side effect.
1616 // NOTE: this is equivalent to a sequence of MOVSL but
1617 // for some reason MOVSL is really slow.
1618 TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
2261 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
2265 MOVL m_fastrand(AX), DX
2268 XORL $0x88888eef, DX
2270 MOVL DX, m_fastrand(AX)
2274 TEXT runtime·return0(SB), NOSPLIT, $0
2278 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
2279 // Must obey the gcc calling convention.
2280 TEXT _cgo_topofstack(SB),NOSPLIT,$0
2285 MOVL (g_stack+stack_hi)(AX), AX
2288 // The top-most function running on a goroutine
2289 // returns to goexit+PCQuantum.
2290 TEXT runtime·goexit(SB),NOSPLIT,$0-0
2292 CALL runtime·goexit1(SB) // does not return