1 // Copyright 2019 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 // mkpreempt generates the asyncPreempt functions for each
20 // Copied from cmd/compile/internal/ssa/gen/*Ops.go
22 var regNames386 = []string{
41 var regNamesAMD64 = []string{
78 var arches = map[string]func(){
83 "loong64": genLoong64,
84 "mips64x": func() { genMIPS(true) },
85 "mipsx": func() { genMIPS(false) },
87 "riscv64": genRISCV64,
91 var beLe = map[string]bool{"mips64x": true, "mipsx": true, "ppc64x": true}
97 for _, arch := range flag.Args() {
98 gen, ok := arches[arch]
100 log.Fatalf("unknown arch %s", arch)
108 for arch, gen := range arches {
109 f, err := os.Create(fmt.Sprintf("preempt_%s.s", arch))
116 if err := f.Close(); err != nil {
122 func header(arch string) {
123 fmt.Fprintf(out, "// Code generated by mkpreempt.go; DO NOT EDIT.\n\n")
125 base := arch[:len(arch)-1]
126 fmt.Fprintf(out, "//go:build %s || %sle\n\n", base, base)
128 fmt.Fprintf(out, "#include \"go_asm.h\"\n")
130 fmt.Fprintf(out, "#include \"asm_amd64.h\"\n")
132 fmt.Fprintf(out, "#include \"textflag.h\"\n\n")
133 fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n")
136 func p(f string, args ...any) {
137 fmted := fmt.Sprintf(f, args...)
138 fmt.Fprintf(out, "\t%s\n", strings.ReplaceAll(fmted, "\n", "\n\t"))
141 func label(l string) {
142 fmt.Fprintf(out, "%s\n", l)
148 sp string // stack pointer register
158 // If this register requires special save and restore, these
159 // give those operations with a %d placeholder for the stack
164 func (l *layout) add(op, reg string, size int) {
165 l.regs = append(l.regs, regPos{saveOp: op, restoreOp: op, reg: reg, pos: l.stack})
169 func (l *layout) add2(sop, rop, reg string, size int) {
170 l.regs = append(l.regs, regPos{saveOp: sop, restoreOp: rop, reg: reg, pos: l.stack})
174 func (l *layout) addSpecial(save, restore string, size int) {
175 l.regs = append(l.regs, regPos{save: save, restore: restore, pos: l.stack})
179 func (l *layout) save() {
180 for _, reg := range l.regs {
184 p("%s %s, %d(%s)", reg.saveOp, reg.reg, reg.pos, l.sp)
189 func (l *layout) restore() {
190 for i := len(l.regs) - 1; i >= 0; i-- {
192 if reg.restore != "" {
193 p(reg.restore, reg.pos)
195 p("%s %d(%s), %s", reg.restoreOp, reg.pos, l.sp, reg.reg)
202 // Save general purpose registers.
203 var l = layout{sp: "SP"}
204 for _, reg := range regNames386 {
205 if reg == "SP" || strings.HasPrefix(reg, "X") {
208 l.add("MOVL", reg, 4)
211 softfloat := "GO386_softfloat"
213 // Save SSE state only if supported.
214 lSSE := layout{stack: l.stack, sp: "SP"}
215 for i := 0; i < 8; i++ {
216 lSSE.add("MOVUPS", fmt.Sprintf("X%d", i), 16)
219 p("ADJSP $%d", lSSE.stack)
222 p("#ifndef %s", softfloat)
225 p("CALL ·asyncPreempt2(SB)")
226 p("#ifndef %s", softfloat)
230 p("ADJSP $%d", -lSSE.stack)
237 // Assign stack offsets.
238 var l = layout{sp: "SP"}
239 for _, reg := range regNamesAMD64 {
240 if reg == "SP" || reg == "BP" {
243 if !strings.HasPrefix(reg, "X") {
244 l.add("MOVQ", reg, 8)
247 lSSE := layout{stack: l.stack, sp: "SP"}
248 for _, reg := range regNamesAMD64 {
249 if strings.HasPrefix(reg, "X") {
250 lSSE.add("MOVUPS", reg, 16)
254 // TODO: MXCSR register?
258 p("// Save flags before clobbering them")
260 p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
261 p("ADJSP $%d", lSSE.stack)
262 p("// But vet doesn't know ADJSP, so suppress vet stack checking")
267 // Apparently, the signal handling code path in darwin kernel leaves
268 // the upper bits of Y registers in a dirty state, which causes
269 // many SSE operations (128-bit and narrower) become much slower.
270 // Clear the upper bits to get to a clean state. See issue #37174.
271 // It is safe here as Go code don't use the upper bits of Y registers.
272 p("#ifdef GOOS_darwin")
274 p("CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0")
281 p("CALL ·asyncPreempt2(SB)")
284 p("ADJSP $%d", -lSSE.stack)
291 // Add integer registers R0-R12.
292 // R13 (SP), R14 (LR), R15 (PC) are special and not saved here.
293 var l = layout{sp: "R13", stack: 4} // add LR slot
294 for i := 0; i <= 12; i++ {
295 reg := fmt.Sprintf("R%d", i)
297 continue // R10 is g register, no need to save/restore
299 l.add("MOVW", reg, 4)
301 // Add flag register.
303 "MOVW CPSR, R0\nMOVW R0, %d(R13)",
304 "MOVW %d(R13), R0\nMOVW R0, CPSR",
307 // Add floating point registers F0-F15 and flag register.
308 var lfp = layout{stack: l.stack, sp: "R13"}
310 "MOVW FPCR, R0\nMOVW R0, %d(R13)",
311 "MOVW %d(R13), R0\nMOVW R0, FPCR",
313 for i := 0; i <= 15; i++ {
314 reg := fmt.Sprintf("F%d", i)
315 lfp.add("MOVD", reg, 8)
318 p("MOVW.W R14, -%d(R13)", lfp.stack) // allocate frame, save LR
320 p("MOVB ·goarm(SB), R0\nCMP $6, R0\nBLT nofp") // test goarm, and skip FP registers if goarm=5.
323 p("CALL ·asyncPreempt2(SB)")
324 p("MOVB ·goarm(SB), R0\nCMP $6, R0\nBLT nofp2") // test goarm, and skip FP registers if goarm=5.
329 p("MOVW %d(R13), R14", lfp.stack) // sigctxt.pushCall pushes LR on stack, restore it
330 p("MOVW.P %d(R13), R15", lfp.stack+4) // load PC, pop frame (including the space pushed by sigctxt.pushCall)
331 p("UNDEF") // shouldn't get here
335 // Add integer registers R0-R26
336 // R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special
337 // and not saved here.
338 var l = layout{sp: "RSP", stack: 8} // add slot to save PC of interrupted instruction
339 for i := 0; i < 26; i += 2 {
342 continue // R18 is not used, skip
344 reg := fmt.Sprintf("(R%d, R%d)", i, i+1)
345 l.add2("STP", "LDP", reg, 16)
347 // Add flag registers.
349 "MOVD NZCV, R0\nMOVD R0, %d(RSP)",
350 "MOVD %d(RSP), R0\nMOVD R0, NZCV",
353 "MOVD FPSR, R0\nMOVD R0, %d(RSP)",
354 "MOVD %d(RSP), R0\nMOVD R0, FPSR",
356 // TODO: FPCR? I don't think we'll change it, so no need to save.
357 // Add floating point registers F0-F31.
358 for i := 0; i < 31; i += 2 {
359 reg := fmt.Sprintf("(F%d, F%d)", i, i+1)
360 l.add2("FSTPD", "FLDPD", reg, 16)
363 l.stack += 8 // SP needs 16-byte alignment
366 // allocate frame, save PC of interrupted instruction (in LR)
367 p("MOVD R30, %d(RSP)", -l.stack)
368 p("SUB $%d, RSP", l.stack)
369 p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux)
370 p("SUB $8, RSP, R29") // set up new frame pointer
371 // On iOS, save the LR again after decrementing SP. We run the
372 // signal handler on the G stack (as it doesn't support sigaltstack),
373 // so any writes below SP may be clobbered.
379 p("CALL ·asyncPreempt2(SB)")
382 p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
383 p("MOVD -8(RSP), R29") // restore frame pointer
384 p("MOVD (RSP), R27") // load PC to REGTMP
385 p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall)
389 func genMIPS(_64bit bool) {
396 softfloat := "GOMIPS_softfloat"
404 softfloat = "GOMIPS64_softfloat"
407 // Add integer registers R1-R22, R24-R25, R28
408 // R0 (zero), R23 (REGTMP), R29 (SP), R30 (g), R31 (LR) are special,
409 // and not saved here. R26 and R27 are reserved by kernel and not used.
410 var l = layout{sp: "R29", stack: regsize} // add slot to save PC of interrupted instruction (in LR)
411 for i := 1; i <= 25; i++ {
413 continue // R23 is REGTMP
415 reg := fmt.Sprintf("R%d", i)
416 l.add(mov, reg, regsize)
418 l.add(mov, r28, regsize)
420 mov+" HI, R1\n"+mov+" R1, %d(R29)",
421 mov+" %d(R29), R1\n"+mov+" R1, HI",
424 mov+" LO, R1\n"+mov+" R1, %d(R29)",
425 mov+" %d(R29), R1\n"+mov+" R1, LO",
428 // Add floating point control/status register FCR31 (FCR0-FCR30 are irrelevant)
429 var lfp = layout{sp: "R29", stack: l.stack}
431 mov+" FCR31, R1\n"+mov+" R1, %d(R29)",
432 mov+" %d(R29), R1\n"+mov+" R1, FCR31",
434 // Add floating point registers F0-F31.
435 for i := 0; i <= 31; i++ {
436 reg := fmt.Sprintf("F%d", i)
437 lfp.add(movf, reg, regsize)
440 // allocate frame, save PC of interrupted instruction (in LR)
441 p(mov+" R31, -%d(R29)", lfp.stack)
442 p(sub+" $%d, R29", lfp.stack)
445 p("#ifndef %s", softfloat)
448 p("CALL ·asyncPreempt2(SB)")
449 p("#ifndef %s", softfloat)
454 p(mov+" %d(R29), R31", lfp.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
455 p(mov + " (R29), R23") // load PC to REGTMP
456 p(add+" $%d, R29", lfp.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall)
467 // Add integer registers r4-r21 r23-r29 r31
468 // R0 (zero), R30 (REGTMP), R2 (tp), R3 (SP), R22 (g), R1 (LR) are special,
469 var l = layout{sp: "R3", stack: regsize} // add slot to save PC of interrupted instruction (in LR)
470 for i := 4; i <= 31; i++ {
471 if i == 22 || i == 30 {
474 reg := fmt.Sprintf("R%d", i)
475 l.add(mov, reg, regsize)
478 // Add floating point registers F0-F31.
479 for i := 0; i <= 31; i++ {
480 reg := fmt.Sprintf("F%d", i)
481 l.add(movf, reg, regsize)
486 mov+" FCC0, R4\n"+mov+" R4, %d(R3)",
487 mov+" %d(R3), R4\n"+mov+" R4, FCC0",
490 // allocate frame, save PC of interrupted instruction (in LR)
491 p(mov+" R1, -%d(R3)", l.stack)
492 p(sub+" $%d, R3", l.stack)
495 p("CALL ·asyncPreempt2(SB)")
498 p(mov+" %d(R3), R1", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
499 p(mov + " (R3), R30") // load PC to REGTMP
500 p(add+" $%d, R3", l.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall)
505 // Add integer registers R3-R29
506 // R0 (zero), R1 (SP), R30 (g) are special and not saved here.
507 // R2 (TOC pointer in PIC mode), R12 (function entry address in PIC mode) have been saved in sigctxt.pushCall.
508 // R31 (REGTMP) will be saved manually.
509 var l = layout{sp: "R1", stack: 32 + 8} // MinFrameSize on PPC64, plus one word for saving R31
510 for i := 3; i <= 29; i++ {
511 if i == 12 || i == 13 {
512 // R12 has been saved in sigctxt.pushCall.
513 // R13 is TLS pointer, not used by Go code. we must NOT
514 // restore it, otherwise if we parked and resumed on a
515 // different thread we'll mess up TLS addresses.
518 reg := fmt.Sprintf("R%d", i)
519 l.add("MOVD", reg, 8)
522 "MOVW CR, R31\nMOVW R31, %d(R1)",
523 "MOVW %d(R1), R31\nMOVFL R31, $0xff", // this is MOVW R31, CR
524 8) // CR is 4-byte wide, but just keep the alignment
526 "MOVD XER, R31\nMOVD R31, %d(R1)",
527 "MOVD %d(R1), R31\nMOVD R31, XER",
529 // Add floating point registers F0-F31.
530 for i := 0; i <= 31; i++ {
531 reg := fmt.Sprintf("F%d", i)
532 l.add("FMOVD", reg, 8)
534 // Add floating point control/status register FPSCR.
536 "MOVFL FPSCR, F0\nFMOVD F0, %d(R1)",
537 "FMOVD %d(R1), F0\nMOVFL F0, FPSCR",
540 p("MOVD R31, -%d(R1)", l.stack-32) // save R31 first, we'll use R31 for saving LR
542 p("MOVDU R31, -%d(R1)", l.stack) // allocate frame, save PC of interrupted instruction (in LR)
545 p("CALL ·asyncPreempt2(SB)")
548 p("MOVD %d(R1), R31", l.stack) // sigctxt.pushCall has pushed LR, R2, R12 (at interrupt) on stack, restore them
550 p("MOVD %d(R1), R2", l.stack+8)
551 p("MOVD %d(R1), R12", l.stack+16)
552 p("MOVD (R1), R31") // load PC to CTR
554 p("MOVD 32(R1), R31") // restore R31
555 p("ADD $%d, R1", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall)
560 // X0 (zero), X1 (LR), X2 (SP), X3 (GP), X4 (TP), X27 (g), X31 (TMP) are special.
561 var l = layout{sp: "X2", stack: 8}
563 // Add integer registers (X5-X26, X28-30).
564 for i := 5; i < 31; i++ {
568 reg := fmt.Sprintf("X%d", i)
572 // Add floating point registers (F0-F31).
573 for i := 0; i <= 31; i++ {
574 reg := fmt.Sprintf("F%d", i)
575 l.add("MOVD", reg, 8)
578 p("MOV X1, -%d(X2)", l.stack)
579 p("ADD $-%d, X2", l.stack)
581 p("CALL ·asyncPreempt2(SB)")
583 p("MOV %d(X2), X1", l.stack)
585 p("ADD $%d, X2", l.stack+8)
590 // Add integer registers R0-R12
591 // R13 (g), R14 (LR), R15 (SP) are special, and not saved here.
592 // Saving R10 (REGTMP) is not necessary, but it is saved anyway.
593 var l = layout{sp: "R15", stack: 16} // add slot to save PC of interrupted instruction and flags
595 "STMG R0, R12, %d(R15)",
596 "LMG %d(R15), R0, R12",
598 // Add floating point registers F0-F31.
599 for i := 0; i <= 15; i++ {
600 reg := fmt.Sprintf("F%d", i)
601 l.add("FMOVD", reg, 8)
604 // allocate frame, save PC of interrupted instruction (in LR) and flags (condition code)
605 p("IPM R10") // save flags upfront, as ADD will clobber flags
606 p("MOVD R14, -%d(R15)", l.stack)
607 p("ADD $-%d, R15", l.stack)
608 p("MOVW R10, 8(R15)") // save flags
611 p("CALL ·asyncPreempt2(SB)")
614 p("MOVD %d(R15), R14", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
615 p("ADD $%d, R15", l.stack+8) // pop frame (including the space pushed by sigctxt.pushCall)
616 p("MOVWZ -%d(R15), R10", l.stack) // load flags to REGTMP
617 p("TMLH R10, $(3<<12)") // restore flags
618 p("MOVD -%d(R15), R10", l.stack+8) // load PC to REGTMP
623 p("// No async preemption on wasm")
627 func notImplemented() {
628 p("// Not implemented yet")