1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
10 TEXT runtime·rt0_go(SB),NOSPLIT,$0
11 // copy arguments forward on an even stack
15 SUBL $128, SP // plenty of scratch
22 // create istack out of the given (operating system) stack.
23 MOVL $runtime·g0(SB), DI
24 LEAL (-64*1024+104)(SP), BX
25 MOVL BX, g_stackguard0(DI)
26 MOVL BX, g_stackguard1(DI)
27 MOVL BX, (g_stack+stack_lo)(DI)
28 MOVL SP, (g_stack+stack_hi)(DI)
30 // find out information about the processor we're on
37 MOVL CX, runtime·cpuid_ecx(SB)
38 MOVL DX, runtime·cpuid_edx(SB)
42 LEAL runtime·tls0(SB), DI
43 CALL runtime·settls(SB)
45 // store through it, to make sure it works
48 MOVQ runtime·tls0(SB), AX
53 // set the per-goroutine and per-mach "registers"
55 LEAL runtime·g0(SB), CX
57 LEAL runtime·m0(SB), AX
64 CLD // convention is D is always left cleared
65 CALL runtime·check(SB)
67 MOVL 16(SP), AX // copy argc
69 MOVL 24(SP), AX // copy argv
72 CALL runtime·osinit(SB)
73 CALL runtime·schedinit(SB)
75 // create a new goroutine to start program
76 MOVL $runtime·main·f(SB), AX // entry
79 CALL runtime·newproc(SB)
82 CALL runtime·mstart(SB)
84 MOVL $0xf1, 0xf1 // crash
87 DATA runtime·main·f+0(SB)/4,$runtime·main(SB)
88 GLOBL runtime·main·f(SB),RODATA,$4
90 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
94 TEXT runtime·asminit(SB),NOSPLIT,$0-0
95 // No per-thread init.
102 // void gosave(Gobuf*)
103 // save state in Gobuf; setjmp
104 TEXT runtime·gosave(SB), NOSPLIT, $0-4
105 MOVL buf+0(FP), AX // gobuf
106 LEAL buf+0(FP), BX // caller's SP
107 MOVL BX, gobuf_sp(AX)
108 MOVL 0(SP), BX // caller's PC
109 MOVL BX, gobuf_pc(AX)
110 MOVL $0, gobuf_ctxt(AX)
111 MOVQ $0, gobuf_ret(AX)
118 // restore state from Gobuf; longjmp
119 TEXT runtime·gogo(SB), NOSPLIT, $0-4
120 MOVL buf+0(FP), BX // gobuf
122 MOVL 0(DX), CX // make sure g != nil
125 MOVL gobuf_sp(BX), SP // restore SP
126 MOVL gobuf_ctxt(BX), DX
127 MOVQ gobuf_ret(BX), AX
128 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
129 MOVQ $0, gobuf_ret(BX)
130 MOVL $0, gobuf_ctxt(BX)
131 MOVL gobuf_pc(BX), BX
134 // func mcall(fn func(*g))
135 // Switch to m->g0's stack, call fn(g).
136 // Fn must never return. It should gogo(&g->sched)
137 // to keep running g.
138 TEXT runtime·mcall(SB), NOSPLIT, $0-4
142 MOVL g(CX), AX // save state in g->sched
143 MOVL 0(SP), BX // caller's PC
144 MOVL BX, (g_sched+gobuf_pc)(AX)
145 LEAL fn+0(FP), BX // caller's SP
146 MOVL BX, (g_sched+gobuf_sp)(AX)
147 MOVL AX, (g_sched+gobuf_g)(AX)
149 // switch to m->g0 & its stack, call fn
153 CMPL SI, AX // if g == m->g0 call badmcall
155 MOVL $runtime·badmcall(SB), AX
157 MOVL SI, g(CX) // g = m->g0
158 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
164 MOVL $runtime·badmcall2(SB), AX
168 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
169 // of the G stack. We need to distinguish the routine that
170 // lives at the bottom of the G stack from the one that lives
171 // at the top of the system stack because the one at the top of
172 // the system stack terminates the stack walk (see topofstack()).
173 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
176 // func systemstack(fn func())
177 TEXT runtime·systemstack(SB), NOSPLIT, $0-4
178 MOVL fn+0(FP), DI // DI = fn
180 MOVL g(CX), AX // AX = g
181 MOVL g_m(AX), BX // BX = m
183 MOVL m_gsignal(BX), DX // DX = gsignal
187 MOVL m_g0(BX), DX // DX = g0
195 // Not g0, not curg. Must be gsignal, but that's not allowed.
196 // Hide call from linker nosplit analysis.
197 MOVL $runtime·badsystemstack(SB), AX
201 // save our state in g->sched. Pretend to
202 // be systemstack_switch if the G stack is scanned.
203 MOVL $runtime·systemstack_switch(SB), SI
204 MOVL SI, (g_sched+gobuf_pc)(AX)
205 MOVL SP, (g_sched+gobuf_sp)(AX)
206 MOVL AX, (g_sched+gobuf_g)(AX)
210 MOVL (g_sched+gobuf_sp)(DX), SP
212 // call target function
223 MOVL (g_sched+gobuf_sp)(AX), SP
224 MOVL $0, (g_sched+gobuf_sp)(AX)
228 // already on m stack, just call directly
235 * support for morestack
238 // Called during function prolog when more stack is needed.
240 // The traceback routines see morestack on a g0 as being
241 // the top of a stack (for example, morestack calling newstack
242 // calling the scheduler calling newm calling gc), so we must
243 // record an argument size. For that purpose, it has no arguments.
244 TEXT runtime·morestack(SB),NOSPLIT,$0-0
249 // Cannot grow scheduler stack (m->g0).
255 // Cannot grow signal stack (m->gsignal).
256 MOVL m_gsignal(BX), SI
262 // Set m->morebuf to f's caller.
263 MOVL 8(SP), AX // f's caller's PC
264 MOVL AX, (m_morebuf+gobuf_pc)(BX)
265 LEAL 16(SP), AX // f's caller's SP
266 MOVL AX, (m_morebuf+gobuf_sp)(BX)
269 MOVL SI, (m_morebuf+gobuf_g)(BX)
271 // Set g->sched to context in f.
272 MOVL 0(SP), AX // f's PC
273 MOVL AX, (g_sched+gobuf_pc)(SI)
274 MOVL SI, (g_sched+gobuf_g)(SI)
275 LEAL 8(SP), AX // f's SP
276 MOVL AX, (g_sched+gobuf_sp)(SI)
277 MOVL DX, (g_sched+gobuf_ctxt)(SI)
279 // Call newstack on m->g0's stack.
282 MOVL (g_sched+gobuf_sp)(BX), SP
283 CALL runtime·newstack(SB)
284 MOVL $0, 0x1003 // crash if newstack returns
287 // morestack trampolines
288 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
290 JMP runtime·morestack(SB)
292 // reflectcall: call a function with the given argument list
293 // func call(f *FuncVal, arg *byte, argsize, retoffset uint32).
294 // we don't have variable-sized frames, so we use a small number
295 // of constant-sized-frame functions to encode a few bits of size in the pc.
296 // Caution: ugly multiline assembly macros in your future!
298 #define DISPATCH(NAME,MAXSIZE) \
301 MOVL $NAME(SB), AX; \
303 // Note: can't just "JMP NAME(SB)" - bad inlining results.
305 TEXT ·reflectcall(SB), NOSPLIT, $0-16
306 MOVLQZX argsize+8(FP), CX
307 DISPATCH(runtime·call16, 16)
308 DISPATCH(runtime·call32, 32)
309 DISPATCH(runtime·call64, 64)
310 DISPATCH(runtime·call128, 128)
311 DISPATCH(runtime·call256, 256)
312 DISPATCH(runtime·call512, 512)
313 DISPATCH(runtime·call1024, 1024)
314 DISPATCH(runtime·call2048, 2048)
315 DISPATCH(runtime·call4096, 4096)
316 DISPATCH(runtime·call8192, 8192)
317 DISPATCH(runtime·call16384, 16384)
318 DISPATCH(runtime·call32768, 32768)
319 DISPATCH(runtime·call65536, 65536)
320 DISPATCH(runtime·call131072, 131072)
321 DISPATCH(runtime·call262144, 262144)
322 DISPATCH(runtime·call524288, 524288)
323 DISPATCH(runtime·call1048576, 1048576)
324 DISPATCH(runtime·call2097152, 2097152)
325 DISPATCH(runtime·call4194304, 4194304)
326 DISPATCH(runtime·call8388608, 8388608)
327 DISPATCH(runtime·call16777216, 16777216)
328 DISPATCH(runtime·call33554432, 33554432)
329 DISPATCH(runtime·call67108864, 67108864)
330 DISPATCH(runtime·call134217728, 134217728)
331 DISPATCH(runtime·call268435456, 268435456)
332 DISPATCH(runtime·call536870912, 536870912)
333 DISPATCH(runtime·call1073741824, 1073741824)
334 MOVL $runtime·badreflectcall(SB), AX
337 #define CALLFN(NAME,MAXSIZE) \
338 TEXT NAME(SB), WRAPPER, $MAXSIZE-16; \
340 /* copy arguments to stack */ \
341 MOVL argptr+4(FP), SI; \
342 MOVL argsize+8(FP), CX; \
345 /* call function */ \
349 /* copy return values back */ \
350 MOVL argptr+4(FP), DI; \
351 MOVL argsize+8(FP), CX; \
352 MOVL retoffset+12(FP), BX; \
363 CALLFN(·call128, 128)
364 CALLFN(·call256, 256)
365 CALLFN(·call512, 512)
366 CALLFN(·call1024, 1024)
367 CALLFN(·call2048, 2048)
368 CALLFN(·call4096, 4096)
369 CALLFN(·call8192, 8192)
370 CALLFN(·call16384, 16384)
371 CALLFN(·call32768, 32768)
372 CALLFN(·call65536, 65536)
373 CALLFN(·call131072, 131072)
374 CALLFN(·call262144, 262144)
375 CALLFN(·call524288, 524288)
376 CALLFN(·call1048576, 1048576)
377 CALLFN(·call2097152, 2097152)
378 CALLFN(·call4194304, 4194304)
379 CALLFN(·call8388608, 8388608)
380 CALLFN(·call16777216, 16777216)
381 CALLFN(·call33554432, 33554432)
382 CALLFN(·call67108864, 67108864)
383 CALLFN(·call134217728, 134217728)
384 CALLFN(·call268435456, 268435456)
385 CALLFN(·call536870912, 536870912)
386 CALLFN(·call1073741824, 1073741824)
388 // bool cas(int32 *val, int32 old, int32 new)
395 TEXT runtime·cas(SB), NOSPLIT, $0-17
409 TEXT runtime·casuintptr(SB), NOSPLIT, $0-17
412 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-12
413 JMP runtime·atomicload(SB)
415 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-12
416 JMP runtime·atomicload(SB)
418 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-12
419 JMP runtime·atomicstore(SB)
421 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
429 TEXT runtime·cas64(SB), NOSPLIT, $0-25
444 // bool casp(void **val, void *old, void *new)
451 TEXT runtime·casp1(SB), NOSPLIT, $0-17
465 // uint32 xadd(uint32 volatile *val, int32 delta)
469 TEXT runtime·xadd(SB), NOSPLIT, $0-12
479 TEXT runtime·xadd64(SB), NOSPLIT, $0-24
489 TEXT runtime·xchg(SB), NOSPLIT, $0-12
496 TEXT runtime·xchg64(SB), NOSPLIT, $0-24
503 TEXT runtime·xchgp1(SB), NOSPLIT, $0-12
510 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12
513 TEXT runtime·procyield(SB),NOSPLIT,$0-0
514 MOVL cycles+0(FP), AX
521 TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8
527 TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
533 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
539 // void runtime·atomicor8(byte volatile*, byte);
540 TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
547 // void jmpdefer(fn, sp);
548 // called from deferreturn.
550 // 2. sub 5 bytes from the callers return
551 // 3. jmp to the argument
552 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
555 LEAL -8(BX), SP // caller sp after CALL
556 SUBL $5, (SP) // return to CALL again
558 JMP BX // but first run the deferred function
560 // asmcgocall(void(*fn)(void*), void *arg)
562 TEXT runtime·asmcgocall(SB),NOSPLIT,$0-8
566 // asmcgocall(void(*fn)(void*), void *arg)
568 TEXT runtime·asmcgocall_errno(SB),NOSPLIT,$0-12
572 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
574 TEXT runtime·cgocallback(SB),NOSPLIT,$0-12
578 // void setg(G*); set g. for use by needm.
580 TEXT runtime·setg(SB), NOSPLIT, $0-4
584 // check that SP is in range [g->stack.lo, g->stack.hi)
585 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
588 CMPL (g_stack+stack_hi)(AX), SP
591 CMPL SP, (g_stack+stack_lo)(AX)
596 TEXT runtime·memclr(SB),NOSPLIT,$0-8
611 TEXT runtime·getcallerpc(SB),NOSPLIT,$0-12
612 MOVL argp+0(FP),AX // addr of first arg
613 MOVL -8(AX),AX // get calling pc
617 TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-12
618 MOVL p+0(FP),AX // addr of first arg
619 MOVL -8(AX),AX // get calling pc
623 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
624 MOVL argp+0(FP),AX // addr of first arg
625 MOVL pc+4(FP), BX // pc to set
626 MOVQ BX, -8(AX) // set calling pc
629 TEXT runtime·getcallersp(SB),NOSPLIT,$0-12
634 // func gogetcallersp(p unsafe.Pointer) uintptr
635 TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-12
636 MOVL p+0(FP),AX // addr of first arg
640 // int64 runtime·cputicks(void)
641 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
648 // hash function using AES hardware instructions
649 // For now, our one amd64p32 system (NaCl) does not
650 // support using AES instructions, so have not bothered to
651 // write the implementations. Can copy and adjust the ones
652 // in asm_amd64.s when the time comes.
654 TEXT runtime·aeshash(SB),NOSPLIT,$0-20
658 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20
662 TEXT runtime·aeshash32(SB),NOSPLIT,$0-20
666 TEXT runtime·aeshash64(SB),NOSPLIT,$0-20
670 TEXT runtime·memeq(SB),NOSPLIT,$0-17
674 CALL runtime·memeqbody(SB)
678 // eqstring tests whether two strings are equal.
679 // See runtime_test.go:eqstring_generic for
680 // equivalent Go code.
681 TEXT runtime·eqstring(SB),NOSPLIT,$0-17
683 MOVL s2len+12(FP), BX
690 CALL runtime·memeqbody(SB)
703 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
709 // 64 bytes at a time using xmm registers
736 // 8 bytes at a time using 64-bit register
749 // remaining 0-8 bytes
769 // load at SI won't cross a page boundary.
773 // address ends in 11111xxx. Load up to bytes we want, move to correct position.
798 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
799 MOVL s1_base+0(FP), SI
800 MOVL s1_len+4(FP), BX
801 MOVL s2_base+8(FP), DI
802 MOVL s2_len+12(FP), DX
803 CALL runtime·cmpbody(SB)
807 TEXT runtime·cmpbytes(SB),NOSPLIT,$0-28
812 CALL runtime·cmpbody(SB)
823 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
828 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
839 XORQ $0xffff, AX // convert EQ to NE
840 JNE diff16 // branch if at least one byte is not equal
846 // AX = bit mask of differences
848 BSFQ AX, BX // index of first byte that differs
855 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
858 // 0 through 16 bytes left, alen>=8, blen>=8
874 // AX and CX contain parts of a and b that differ.
876 BSWAPQ AX // reverse order of bytes
879 BSRQ CX, CX // index of highest bit difference
880 SHRQ CX, AX // move a's bit to bottom
881 ANDQ $1, AX // mask bit
882 LEAQ -1(AX*2), AX // 1/0 => +1/-1
885 // 0-7 bytes in common
887 LEAQ (R8*8), CX // bytes left -> bits left
888 NEGQ CX // - bits lift (== 64 - bits left mod 64)
891 // load bytes of a into high bytes of AX
903 // load bytes of b in to high bytes of BX
915 BSWAPQ SI // reverse order of bytes
917 XORQ SI, DI // find bit differences
919 BSRQ DI, CX // index of highest bit difference
920 SHRQ CX, SI // move a's bit to bottom
921 ANDQ $1, SI // mask bit
922 LEAQ -1(SI*2), AX // 1/0 => +1/-1
929 SETGT AX // 1 if alen > blen
930 SETEQ CX // 1 if alen == blen
931 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
934 TEXT bytes·IndexByte(SB),NOSPLIT,$0
938 CALL runtime·indexbytebody(SB)
942 TEXT strings·IndexByte(SB),NOSPLIT,$0
946 CALL runtime·indexbytebody(SB)
956 TEXT runtime·indexbytebody(SB),NOSPLIT,$0
962 // round up to first 16-byte boundary
969 // search the beginning
974 // DI is 16-byte aligned; get ready to search using SSE instructions
976 // round down to last 16-byte boundary
981 // shuffle X0 around so that each byte contains c
989 // move the next 16-byte chunk of the buffer into X1
991 // compare bytes in X0 to X1
993 // take the top bit of each byte in X1 and put the result in DX
1007 // if CX == 0, the zero flag will be set and we'll end up
1008 // returning a false success
1017 // handle for lengths < 16
1025 // we've found the chunk containing the byte
1026 // now just figure out which specific byte it is
1028 // get the index of the least significant set bit
1041 TEXT bytes·Equal(SB),NOSPLIT,$0-25
1042 MOVL a_len+4(FP), BX
1043 MOVL b_len+16(FP), CX
1049 CALL runtime·memeqbody(SB)
1054 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
1058 MOVL m_fastrand(AX), DX
1061 XORL $0x88888eef, DX
1063 MOVL DX, m_fastrand(AX)
1067 TEXT runtime·return0(SB), NOSPLIT, $0
1071 // The top-most function running on a goroutine
1072 // returns to goexit+PCQuantum.
1073 TEXT runtime·goexit(SB),NOSPLIT,$0-0
1075 CALL runtime·goexit1(SB) // does not return
1077 TEXT runtime·getg(SB),NOSPLIT,$0-4
1083 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
1088 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
1094 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
1099 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4