1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 #include "zasm_GOOS_GOARCH.h"
9 TEXT runtime·rt0_go(SB),NOSPLIT,$0
10 // copy arguments forward on an even stack
14 SUBL $128, SP // plenty of scratch
21 // create istack out of the given (operating system) stack.
22 MOVL $runtime·g0(SB), DI
23 LEAL (-64*1024+104)(SP), BX
24 MOVL BX, g_stackguard0(DI)
25 MOVL BX, g_stackguard1(DI)
26 MOVL BX, (g_stack+stack_lo)(DI)
27 MOVL SP, (g_stack+stack_hi)(DI)
29 // find out information about the processor we're on
36 MOVL CX, runtime·cpuid_ecx(SB)
37 MOVL DX, runtime·cpuid_edx(SB)
41 LEAL runtime·tls0(SB), DI
42 CALL runtime·settls(SB)
44 // store through it, to make sure it works
47 MOVQ runtime·tls0(SB), AX
52 // set the per-goroutine and per-mach "registers"
54 LEAL runtime·g0(SB), CX
56 LEAL runtime·m0(SB), AX
63 CLD // convention is D is always left cleared
64 CALL runtime·check(SB)
66 MOVL 16(SP), AX // copy argc
68 MOVL 24(SP), AX // copy argv
71 CALL runtime·osinit(SB)
72 CALL runtime·schedinit(SB)
74 // create a new goroutine to start program
75 MOVL $runtime·main·f(SB), AX // entry
78 CALL runtime·newproc(SB)
81 CALL runtime·mstart(SB)
83 MOVL $0xf1, 0xf1 // crash
86 DATA runtime·main·f+0(SB)/4,$runtime·main(SB)
87 GLOBL runtime·main·f(SB),RODATA,$4
89 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
93 TEXT runtime·asminit(SB),NOSPLIT,$0-0
94 // No per-thread init.
101 // void gosave(Gobuf*)
102 // save state in Gobuf; setjmp
103 TEXT runtime·gosave(SB), NOSPLIT, $0-4
104 MOVL buf+0(FP), AX // gobuf
105 LEAL buf+0(FP), BX // caller's SP
106 MOVL BX, gobuf_sp(AX)
107 MOVL 0(SP), BX // caller's PC
108 MOVL BX, gobuf_pc(AX)
109 MOVL $0, gobuf_ctxt(AX)
110 MOVQ $0, gobuf_ret(AX)
117 // restore state from Gobuf; longjmp
118 TEXT runtime·gogo(SB), NOSPLIT, $0-4
119 MOVL buf+0(FP), BX // gobuf
121 MOVL 0(DX), CX // make sure g != nil
124 MOVL gobuf_sp(BX), SP // restore SP
125 MOVL gobuf_ctxt(BX), DX
126 MOVQ gobuf_ret(BX), AX
127 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
128 MOVQ $0, gobuf_ret(BX)
129 MOVL $0, gobuf_ctxt(BX)
130 MOVL gobuf_pc(BX), BX
133 // func mcall(fn func(*g))
134 // Switch to m->g0's stack, call fn(g).
135 // Fn must never return. It should gogo(&g->sched)
136 // to keep running g.
137 TEXT runtime·mcall(SB), NOSPLIT, $0-4
141 MOVL g(CX), AX // save state in g->sched
142 MOVL 0(SP), BX // caller's PC
143 MOVL BX, (g_sched+gobuf_pc)(AX)
144 LEAL fn+0(FP), BX // caller's SP
145 MOVL BX, (g_sched+gobuf_sp)(AX)
146 MOVL AX, (g_sched+gobuf_g)(AX)
148 // switch to m->g0 & its stack, call fn
152 CMPL SI, AX // if g == m->g0 call badmcall
154 MOVL $runtime·badmcall(SB), AX
156 MOVL SI, g(CX) // g = m->g0
157 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
163 MOVL $runtime·badmcall2(SB), AX
167 // switchtoM is a dummy routine that onM leaves at the bottom
168 // of the G stack. We need to distinguish the routine that
169 // lives at the bottom of the G stack from the one that lives
170 // at the top of the M stack because the one at the top of
171 // the M stack terminates the stack walk (see topofstack()).
172 TEXT runtime·switchtoM(SB), NOSPLIT, $0-0
175 // func onM_signalok(fn func())
176 TEXT runtime·onM_signalok(SB), NOSPLIT, $0-4
178 MOVL g(CX), AX // AX = g
179 MOVL g_m(AX), BX // BX = m
180 MOVL m_gsignal(BX), DX // DX = gsignal
186 MOVL fn+0(FP), DI // DI = fn
192 // func onM(fn func())
193 TEXT runtime·onM(SB), NOSPLIT, $0-4
194 MOVL fn+0(FP), DI // DI = fn
196 MOVL g(CX), AX // AX = g
197 MOVL g_m(AX), BX // BX = m
199 MOVL m_g0(BX), DX // DX = g0
207 // Not g0, not curg. Must be gsignal, but that's not allowed.
208 // Hide call from linker nosplit analysis.
209 MOVL $runtime·badonm(SB), AX
213 // save our state in g->sched. Pretend to
214 // be switchtoM if the G stack is scanned.
215 MOVL $runtime·switchtoM(SB), SI
216 MOVL SI, (g_sched+gobuf_pc)(AX)
217 MOVL SP, (g_sched+gobuf_sp)(AX)
218 MOVL AX, (g_sched+gobuf_g)(AX)
222 MOVL (g_sched+gobuf_sp)(DX), SP
224 // call target function
235 MOVL (g_sched+gobuf_sp)(AX), SP
236 MOVL $0, (g_sched+gobuf_sp)(AX)
240 // already on m stack, just call directly
247 * support for morestack
250 // Called during function prolog when more stack is needed.
252 // The traceback routines see morestack on a g0 as being
253 // the top of a stack (for example, morestack calling newstack
254 // calling the scheduler calling newm calling gc), so we must
255 // record an argument size. For that purpose, it has no arguments.
256 TEXT runtime·morestack(SB),NOSPLIT,$0-0
261 // Cannot grow scheduler stack (m->g0).
267 // Cannot grow signal stack (m->gsignal).
268 MOVL m_gsignal(BX), SI
274 // Set m->morebuf to f's caller.
275 MOVL 8(SP), AX // f's caller's PC
276 MOVL AX, (m_morebuf+gobuf_pc)(BX)
277 LEAL 16(SP), AX // f's caller's SP
278 MOVL AX, (m_morebuf+gobuf_sp)(BX)
281 MOVL SI, (m_morebuf+gobuf_g)(BX)
283 // Set g->sched to context in f.
284 MOVL 0(SP), AX // f's PC
285 MOVL AX, (g_sched+gobuf_pc)(SI)
286 MOVL SI, (g_sched+gobuf_g)(SI)
287 LEAL 8(SP), AX // f's SP
288 MOVL AX, (g_sched+gobuf_sp)(SI)
289 MOVL DX, (g_sched+gobuf_ctxt)(SI)
291 // Call newstack on m->g0's stack.
294 MOVL (g_sched+gobuf_sp)(BX), SP
295 CALL runtime·newstack(SB)
296 MOVL $0, 0x1003 // crash if newstack returns
299 // morestack trampolines
300 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
302 JMP runtime·morestack(SB)
304 // reflectcall: call a function with the given argument list
305 // func call(f *FuncVal, arg *byte, argsize, retoffset uint32).
306 // we don't have variable-sized frames, so we use a small number
307 // of constant-sized-frame functions to encode a few bits of size in the pc.
308 // Caution: ugly multiline assembly macros in your future!
310 #define DISPATCH(NAME,MAXSIZE) \
313 MOVL $NAME(SB), AX; \
315 // Note: can't just "JMP NAME(SB)" - bad inlining results.
317 TEXT ·reflectcall(SB), NOSPLIT, $0-16
318 MOVLQZX argsize+8(FP), CX
319 DISPATCH(runtime·call16, 16)
320 DISPATCH(runtime·call32, 32)
321 DISPATCH(runtime·call64, 64)
322 DISPATCH(runtime·call128, 128)
323 DISPATCH(runtime·call256, 256)
324 DISPATCH(runtime·call512, 512)
325 DISPATCH(runtime·call1024, 1024)
326 DISPATCH(runtime·call2048, 2048)
327 DISPATCH(runtime·call4096, 4096)
328 DISPATCH(runtime·call8192, 8192)
329 DISPATCH(runtime·call16384, 16384)
330 DISPATCH(runtime·call32768, 32768)
331 DISPATCH(runtime·call65536, 65536)
332 DISPATCH(runtime·call131072, 131072)
333 DISPATCH(runtime·call262144, 262144)
334 DISPATCH(runtime·call524288, 524288)
335 DISPATCH(runtime·call1048576, 1048576)
336 DISPATCH(runtime·call2097152, 2097152)
337 DISPATCH(runtime·call4194304, 4194304)
338 DISPATCH(runtime·call8388608, 8388608)
339 DISPATCH(runtime·call16777216, 16777216)
340 DISPATCH(runtime·call33554432, 33554432)
341 DISPATCH(runtime·call67108864, 67108864)
342 DISPATCH(runtime·call134217728, 134217728)
343 DISPATCH(runtime·call268435456, 268435456)
344 DISPATCH(runtime·call536870912, 536870912)
345 DISPATCH(runtime·call1073741824, 1073741824)
346 MOVL $runtime·badreflectcall(SB), AX
349 #define CALLFN(NAME,MAXSIZE) \
350 TEXT NAME(SB), WRAPPER, $MAXSIZE-16; \
352 /* copy arguments to stack */ \
353 MOVL argptr+4(FP), SI; \
354 MOVL argsize+8(FP), CX; \
357 /* call function */ \
361 /* copy return values back */ \
362 MOVL argptr+4(FP), DI; \
363 MOVL argsize+8(FP), CX; \
364 MOVL retoffset+12(FP), BX; \
375 CALLFN(·call128, 128)
376 CALLFN(·call256, 256)
377 CALLFN(·call512, 512)
378 CALLFN(·call1024, 1024)
379 CALLFN(·call2048, 2048)
380 CALLFN(·call4096, 4096)
381 CALLFN(·call8192, 8192)
382 CALLFN(·call16384, 16384)
383 CALLFN(·call32768, 32768)
384 CALLFN(·call65536, 65536)
385 CALLFN(·call131072, 131072)
386 CALLFN(·call262144, 262144)
387 CALLFN(·call524288, 524288)
388 CALLFN(·call1048576, 1048576)
389 CALLFN(·call2097152, 2097152)
390 CALLFN(·call4194304, 4194304)
391 CALLFN(·call8388608, 8388608)
392 CALLFN(·call16777216, 16777216)
393 CALLFN(·call33554432, 33554432)
394 CALLFN(·call67108864, 67108864)
395 CALLFN(·call134217728, 134217728)
396 CALLFN(·call268435456, 268435456)
397 CALLFN(·call536870912, 536870912)
398 CALLFN(·call1073741824, 1073741824)
400 // bool cas(int32 *val, int32 old, int32 new)
407 TEXT runtime·cas(SB), NOSPLIT, $0-17
421 TEXT runtime·casuintptr(SB), NOSPLIT, $0-17
424 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-12
425 JMP runtime·atomicload(SB)
427 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-12
428 JMP runtime·atomicload(SB)
430 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-12
431 JMP runtime·atomicstore(SB)
433 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
441 TEXT runtime·cas64(SB), NOSPLIT, $0-25
456 // bool casp(void **val, void *old, void *new)
463 TEXT runtime·casp(SB), NOSPLIT, $0-17
477 // uint32 xadd(uint32 volatile *val, int32 delta)
481 TEXT runtime·xadd(SB), NOSPLIT, $0-12
491 TEXT runtime·xadd64(SB), NOSPLIT, $0-24
501 TEXT runtime·xchg(SB), NOSPLIT, $0-12
508 TEXT runtime·xchg64(SB), NOSPLIT, $0-24
515 TEXT runtime·xchgp(SB), NOSPLIT, $0-12
522 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12
525 TEXT runtime·procyield(SB),NOSPLIT,$0-0
526 MOVL cycles+0(FP), AX
533 TEXT runtime·atomicstorep(SB), NOSPLIT, $0-8
539 TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
545 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
551 // void runtime·atomicor8(byte volatile*, byte);
552 TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
559 // void jmpdefer(fn, sp);
560 // called from deferreturn.
562 // 2. sub 5 bytes from the callers return
563 // 3. jmp to the argument
564 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
567 LEAL -8(BX), SP // caller sp after CALL
568 SUBL $5, (SP) // return to CALL again
570 JMP BX // but first run the deferred function
572 // asmcgocall(void(*fn)(void*), void *arg)
574 TEXT runtime·asmcgocall(SB),NOSPLIT,$0-8
578 // asmcgocall(void(*fn)(void*), void *arg)
580 TEXT runtime·asmcgocall_errno(SB),NOSPLIT,$0-12
584 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
586 TEXT runtime·cgocallback(SB),NOSPLIT,$0-12
590 // void setg(G*); set g. for use by needm.
592 TEXT runtime·setg(SB), NOSPLIT, $0-4
596 // check that SP is in range [g->stack.lo, g->stack.hi)
597 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
600 CMPL (g_stack+stack_hi)(AX), SP
603 CMPL SP, (g_stack+stack_lo)(AX)
608 TEXT runtime·memclr(SB),NOSPLIT,$0-8
623 TEXT runtime·getcallerpc(SB),NOSPLIT,$0-12
624 MOVL argp+0(FP),AX // addr of first arg
625 MOVL -8(AX),AX // get calling pc
629 TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-12
630 MOVL p+0(FP),AX // addr of first arg
631 MOVL -8(AX),AX // get calling pc
635 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
636 MOVL argp+0(FP),AX // addr of first arg
637 MOVL pc+4(FP), BX // pc to set
638 MOVQ BX, -8(AX) // set calling pc
641 TEXT runtime·getcallersp(SB),NOSPLIT,$0-12
646 // func gogetcallersp(p unsafe.Pointer) uintptr
647 TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-12
648 MOVL p+0(FP),AX // addr of first arg
652 // int64 runtime·cputicks(void)
653 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
660 // hash function using AES hardware instructions
661 // For now, our one amd64p32 system (NaCl) does not
662 // support using AES instructions, so have not bothered to
663 // write the implementations. Can copy and adjust the ones
664 // in asm_amd64.s when the time comes.
666 TEXT runtime·aeshash(SB),NOSPLIT,$0-20
670 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20
674 TEXT runtime·aeshash32(SB),NOSPLIT,$0-20
678 TEXT runtime·aeshash64(SB),NOSPLIT,$0-20
682 TEXT runtime·memeq(SB),NOSPLIT,$0-17
686 CALL runtime·memeqbody(SB)
690 // eqstring tests whether two strings are equal.
691 // See runtime_test.go:eqstring_generic for
692 // equivalent Go code.
693 TEXT runtime·eqstring(SB),NOSPLIT,$0-17
695 MOVL s2len+12(FP), BX
702 CALL runtime·memeqbody(SB)
715 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
721 // 64 bytes at a time using xmm registers
748 // 8 bytes at a time using 64-bit register
761 // remaining 0-8 bytes
781 // load at SI won't cross a page boundary.
785 // address ends in 11111xxx. Load up to bytes we want, move to correct position.
810 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
811 MOVL s1_base+0(FP), SI
812 MOVL s1_len+4(FP), BX
813 MOVL s2_base+8(FP), DI
814 MOVL s2_len+12(FP), DX
815 CALL runtime·cmpbody(SB)
819 TEXT runtime·cmpbytes(SB),NOSPLIT,$0-28
824 CALL runtime·cmpbody(SB)
835 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
840 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
851 XORQ $0xffff, AX // convert EQ to NE
852 JNE diff16 // branch if at least one byte is not equal
858 // AX = bit mask of differences
860 BSFQ AX, BX // index of first byte that differs
867 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
870 // 0 through 16 bytes left, alen>=8, blen>=8
886 // AX and CX contain parts of a and b that differ.
888 BSWAPQ AX // reverse order of bytes
891 BSRQ CX, CX // index of highest bit difference
892 SHRQ CX, AX // move a's bit to bottom
893 ANDQ $1, AX // mask bit
894 LEAQ -1(AX*2), AX // 1/0 => +1/-1
897 // 0-7 bytes in common
899 LEAQ (R8*8), CX // bytes left -> bits left
900 NEGQ CX // - bits lift (== 64 - bits left mod 64)
903 // load bytes of a into high bytes of AX
915 // load bytes of b in to high bytes of BX
927 BSWAPQ SI // reverse order of bytes
929 XORQ SI, DI // find bit differences
931 BSRQ DI, CX // index of highest bit difference
932 SHRQ CX, SI // move a's bit to bottom
933 ANDQ $1, SI // mask bit
934 LEAQ -1(SI*2), AX // 1/0 => +1/-1
941 SETGT AX // 1 if alen > blen
942 SETEQ CX // 1 if alen == blen
943 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
946 TEXT bytes·IndexByte(SB),NOSPLIT,$0
950 CALL runtime·indexbytebody(SB)
954 TEXT strings·IndexByte(SB),NOSPLIT,$0
958 CALL runtime·indexbytebody(SB)
968 TEXT runtime·indexbytebody(SB),NOSPLIT,$0
974 // round up to first 16-byte boundary
981 // search the beginning
986 // DI is 16-byte aligned; get ready to search using SSE instructions
988 // round down to last 16-byte boundary
993 // shuffle X0 around so that each byte contains c
1001 // move the next 16-byte chunk of the buffer into X1
1003 // compare bytes in X0 to X1
1005 // take the top bit of each byte in X1 and put the result in DX
1019 // if CX == 0, the zero flag will be set and we'll end up
1020 // returning a false success
1029 // handle for lengths < 16
1037 // we've found the chunk containing the byte
1038 // now just figure out which specific byte it is
1040 // get the index of the least significant set bit
1053 TEXT bytes·Equal(SB),NOSPLIT,$0-25
1054 MOVL a_len+4(FP), BX
1055 MOVL b_len+16(FP), CX
1061 CALL runtime·memeqbody(SB)
1066 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
1070 MOVL m_fastrand(AX), DX
1073 XORL $0x88888eef, DX
1075 MOVL DX, m_fastrand(AX)
1079 TEXT runtime·return0(SB), NOSPLIT, $0
1083 // The top-most function running on a goroutine
1084 // returns to goexit+PCQuantum.
1085 TEXT runtime·goexit(SB),NOSPLIT,$0-0
1087 CALL runtime·goexit1(SB) // does not return