src/runtime/proc1.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package runtime
   6
   7 import "unsafe"
   8
   9 var (
  10         m0 m
  11         g0 g
  12 )
  13
  14 // Goroutine scheduler
  15 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
  16 //
  17 // The main concepts are:
  18 // G - goroutine.
  19 // M - worker thread, or machine.
  20 // P - processor, a resource that is required to execute Go code.
  21 //     M must have an associated P to execute Go code, however it can be
  22 //     blocked or in a syscall w/o an associated P.
  23 //
  24 // Design doc at http://golang.org/s/go11sched.
  25
  26 const (
  27         // Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
  28         // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
  29         _GoidCacheBatch = 16
  30 )
  31
  32 /*
  33 SchedT  sched;
  34 int32   gomaxprocs;
  35 uint32  needextram;
  36 bool    iscgo;
  37 M       m0;
  38 G       g0;     // idle goroutine for m0
  39 G*      lastg;
  40 M*      allm;
  41 M*      extram;
  42 P*      allp[MaxGomaxprocs+1];
  43 int8*   goos;
  44 int32   ncpu;
  45 int32   newprocs;
  46
  47 Mutex allglock; // the following vars are protected by this lock or by stoptheworld
  48 G**     allg;
  49 Slice   allgs;
  50 uintptr allglen;
  51 ForceGCState    forcegc;
  52
  53 void mstart(void);
  54 static void runqput(P*, G*);
  55 static G* runqget(P*);
  56 static bool runqputslow(P*, G*, uint32, uint32);
  57 static G* runqsteal(P*, P*);
  58 static void mput(M*);
  59 static M* mget(void);
  60 static void mcommoninit(M*);
  61 static void schedule(void);
  62 static void procresize(int32);
  63 static void acquirep(P*);
  64 static P* releasep(void);
  65 static void newm(void(*)(void), P*);
  66 static void stopm(void);
  67 static void startm(P*, bool);
  68 static void handoffp(P*);
  69 static void wakep(void);
  70 static void stoplockedm(void);
  71 static void startlockedm(G*);
  72 static void sysmon(void);
  73 static uint32 retake(int64);
  74 static void incidlelocked(int32);
  75 static void checkdead(void);
  76 static void exitsyscall0(G*);
  77 void park_m(G*);
  78 static void goexit0(G*);
  79 static void gfput(P*, G*);
  80 static G* gfget(P*);
  81 static void gfpurge(P*);
  82 static void globrunqput(G*);
  83 static void globrunqputbatch(G*, G*, int32);
  84 static G* globrunqget(P*, int32);
  85 static P* pidleget(void);
  86 static void pidleput(P*);
  87 static void injectglist(G*);
  88 static bool preemptall(void);
  89 static bool preemptone(P*);
  90 static bool exitsyscallfast(void);
  91 static bool haveexperiment(int8*);
  92 void allgadd(G*);
  93 static void dropg(void);
  94
  95 extern String buildVersion;
  96 */
  97
  98 // The bootstrap sequence is:
  99 //
 100 //      call osinit
 101 //      call schedinit
 102 //      make & queue new G
 103 //      call runtime·mstart
 104 //
 105 // The new G calls runtime·main.
 106 func schedinit() {
 107         // raceinit must be the first call to race detector.
 108         // In particular, it must be done before mallocinit below calls racemapshadow.
 109         _g_ := getg()
 110         if raceenabled {
 111                 _g_.racectx = raceinit()
 112         }
 113
 114         sched.maxmcount = 10000
 115
 116         tracebackinit()
 117         symtabinit()
 118         stackinit()
 119         mallocinit()
 120         mcommoninit(_g_.m)
 121
 122         goargs()
 123         goenvs()
 124         parsedebugvars()
 125         gcinit()
 126
 127         sched.lastpoll = uint64(nanotime())
 128         procs := 1
 129         if n := goatoi(gogetenv("GOMAXPROCS")); n > 0 {
 130                 if n > _MaxGomaxprocs {
 131                         n = _MaxGomaxprocs
 132                 }
 133                 procs = n
 134         }
 135         procresize(int32(procs))
 136
 137         if buildVersion == "" {
 138                 // Condition should never trigger.  This code just serves
 139                 // to ensure runtime·buildVersion is kept in the resulting binary.
 140                 buildVersion = "unknown"
 141         }
 142 }
 143
 144 func newsysmon() {
 145         _newm(sysmon, nil)
 146 }
 147
 148 func dumpgstatus(gp *g) {
 149         _g_ := getg()
 150         print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
 151         print("runtime:  g:  g=", _g_, ", goid=", _g_.goid, ",  g->atomicstatus=", readgstatus(_g_), "\n")
 152 }
 153
 154 func checkmcount() {
 155         // sched lock is held
 156         if sched.mcount > sched.maxmcount {
 157                 print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n")
 158                 gothrow("thread exhaustion")
 159         }
 160 }
 161
 162 func mcommoninit(mp *m) {
 163         _g_ := getg()
 164
 165         // g0 stack won't make sense for user (and is not necessary unwindable).
 166         if _g_ != _g_.m.g0 {
 167                 callers(1, &mp.createstack[0], len(mp.createstack))
 168         }
 169
 170         mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks())
 171         if mp.fastrand == 0 {
 172                 mp.fastrand = 0x49f6428a
 173         }
 174
 175         lock(&sched.lock)
 176         mp.id = sched.mcount
 177         sched.mcount++
 178         checkmcount()
 179         mpreinit(mp)
 180         if mp.gsignal != nil {
 181                 mp.gsignal.stackguard1 = mp.gsignal.stack.lo + _StackGuard
 182         }
 183
 184         // Add to allm so garbage collector doesn't free g->m
 185         // when it is just in a register or thread-local storage.
 186         mp.alllink = allm
 187
 188         // NumCgoCall() iterates over allm w/o schedlock,
 189         // so we need to publish it safely.
 190         atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp))
 191         unlock(&sched.lock)
 192 }
 193
 194 // Mark gp ready to run.
 195 func ready(gp *g) {
 196         status := readgstatus(gp)
 197
 198         // Mark runnable.
 199         _g_ := getg()
 200         _g_.m.locks++ // disable preemption because it can be holding p in a local var
 201         if status&^_Gscan != _Gwaiting {
 202                 dumpgstatus(gp)
 203                 gothrow("bad g->status in ready")
 204         }
 205
 206         // status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
 207         casgstatus(gp, _Gwaiting, _Grunnable)
 208         runqput(_g_.m.p, gp)
 209         if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 { // TODO: fast atomic
 210                 wakep()
 211         }
 212         _g_.m.locks--
 213         if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
 214                 _g_.stackguard0 = stackPreempt
 215         }
 216 }
 217
 218 func gcprocs() int32 {
 219         // Figure out how many CPUs to use during GC.
 220         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 221         lock(&sched.lock)
 222         n := gomaxprocs
 223         if n > ncpu {
 224                 n = ncpu
 225         }
 226         if n > _MaxGcproc {
 227                 n = _MaxGcproc
 228         }
 229         if n > sched.nmidle+1 { // one M is currently running
 230                 n = sched.nmidle + 1
 231         }
 232         unlock(&sched.lock)
 233         return n
 234 }
 235
 236 func needaddgcproc() bool {
 237         lock(&sched.lock)
 238         n := gomaxprocs
 239         if n > ncpu {
 240                 n = ncpu
 241         }
 242         if n > _MaxGcproc {
 243                 n = _MaxGcproc
 244         }
 245         n -= sched.nmidle + 1 // one M is currently running
 246         unlock(&sched.lock)
 247         return n > 0
 248 }
 249
 250 func helpgc(nproc int32) {
 251         _g_ := getg()
 252         lock(&sched.lock)
 253         pos := 0
 254         for n := int32(1); n < nproc; n++ { // one M is currently running
 255                 if allp[pos].mcache == _g_.m.mcache {
 256                         pos++
 257                 }
 258                 mp := mget()
 259                 if mp == nil {
 260                         gothrow("gcprocs inconsistency")
 261                 }
 262                 mp.helpgc = n
 263                 mp.mcache = allp[pos].mcache
 264                 pos++
 265                 notewakeup(&mp.park)
 266         }
 267         unlock(&sched.lock)
 268 }
 269
 270 // Similar to stoptheworld but best-effort and can be called several times.
 271 // There is no reverse operation, used during crashing.
 272 // This function must not lock any mutexes.
 273 func freezetheworld() {
 274         if gomaxprocs == 1 {
 275                 return
 276         }
 277         // stopwait and preemption requests can be lost
 278         // due to races with concurrently executing threads,
 279         // so try several times
 280         for i := 0; i < 5; i++ {
 281                 // this should tell the scheduler to not start any new goroutines
 282                 sched.stopwait = 0x7fffffff
 283                 atomicstore(&sched.gcwaiting, 1)
 284                 // this should stop running goroutines
 285                 if !preemptall() {
 286                         break // no running goroutines
 287                 }
 288                 usleep(1000)
 289         }
 290         // to be sure
 291         usleep(1000)
 292         preemptall()
 293         usleep(1000)
 294 }
 295
 296 func isscanstatus(status uint32) bool {
 297         if status == _Gscan {
 298                 gothrow("isscanstatus: Bad status Gscan")
 299         }
 300         return status&_Gscan == _Gscan
 301 }
 302
 303 // All reads and writes of g's status go through readgstatus, casgstatus
 304 // castogscanstatus, casfrom_Gscanstatus.
 305 //go:nosplit
 306 func readgstatus(gp *g) uint32 {
 307         return atomicload(&gp.atomicstatus)
 308 }
 309
 310 // The Gscanstatuses are acting like locks and this releases them.
 311 // If it proves to be a performance hit we should be able to make these
 312 // simple atomic stores but for now we are going to throw if
 313 // we see an inconsistent state.
 314 func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
 315         success := false
 316
 317         // Check that transition is valid.
 318         switch oldval {
 319         default:
 320                 print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
 321                 dumpgstatus(gp)
 322                 gothrow("casfrom_Gscanstatus:top gp->status is not in scan state")
 323         case _Gscanrunnable,
 324                 _Gscanwaiting,
 325                 _Gscanrunning,
 326                 _Gscansyscall:
 327                 if newval == oldval&^_Gscan {
 328                         success = cas(&gp.atomicstatus, oldval, newval)
 329                 }
 330         case _Gscanenqueue:
 331                 if newval == _Gwaiting {
 332                         success = cas(&gp.atomicstatus, oldval, newval)
 333                 }
 334         }
 335         if !success {
 336                 print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
 337                 dumpgstatus(gp)
 338                 gothrow("casfrom_Gscanstatus: gp->status is not in scan state")
 339         }
 340 }
 341
 342 // This will return false if the gp is not in the expected status and the cas fails.
 343 // This acts like a lock acquire while the casfromgstatus acts like a lock release.
 344 func castogscanstatus(gp *g, oldval, newval uint32) bool {
 345         switch oldval {
 346         case _Grunnable,
 347                 _Gwaiting,
 348                 _Gsyscall:
 349                 if newval == oldval|_Gscan {
 350                         return cas(&gp.atomicstatus, oldval, newval)
 351                 }
 352         case _Grunning:
 353                 if newval == _Gscanrunning || newval == _Gscanenqueue {
 354                         return cas(&gp.atomicstatus, oldval, newval)
 355                 }
 356         }
 357         print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n")
 358         gothrow("castogscanstatus")
 359         panic("not reached")
 360 }
 361
 362 // If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
 363 // and casfrom_Gscanstatus instead.
 364 // casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that
 365 // put it in the Gscan state is finished.
 366 //go:nosplit
 367 func casgstatus(gp *g, oldval, newval uint32) {
 368         if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval {
 369                 systemstack(func() {
 370                         print("casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n")
 371                         gothrow("casgstatus: bad incoming values")
 372                 })
 373         }
 374
 375         // loop if gp->atomicstatus is in a scan state giving
 376         // GC time to finish and change the state to oldval.
 377         for !cas(&gp.atomicstatus, oldval, newval) {
 378                 if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
 379                         systemstack(func() {
 380                                 gothrow("casgstatus: waiting for Gwaiting but is Grunnable")
 381                         })
 382                 }
 383                 // Help GC if needed.
 384                 // if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) {
 385                 //      gp.preemptscan = false
 386                 //      systemstack(func() {
 387                 //              gcphasework(gp)
 388                 //      })
 389                 // }
 390         }
 391 }
 392
 393 // casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable.
 394 // Returns old status. Cannot call casgstatus directly, because we are racing with an
 395 // async wakeup that might come in from netpoll. If we see Gwaiting from the readgstatus,
 396 // it might have become Grunnable by the time we get to the cas. If we called casgstatus,
 397 // it would loop waiting for the status to go back to Gwaiting, which it never will.
 398 //go:nosplit
 399 func casgcopystack(gp *g) uint32 {
 400         for {
 401                 oldstatus := readgstatus(gp) &^ _Gscan
 402                 if oldstatus != _Gwaiting && oldstatus != _Grunnable {
 403                         gothrow("copystack: bad status, not Gwaiting or Grunnable")
 404                 }
 405                 if cas(&gp.atomicstatus, oldstatus, _Gcopystack) {
 406                         return oldstatus
 407                 }
 408         }
 409 }
 410
 411 // stopg ensures that gp is stopped at a GC safe point where its stack can be scanned
 412 // or in the context of a moving collector the pointers can be flipped from pointing
 413 // to old object to pointing to new objects.
 414 // If stopg returns true, the caller knows gp is at a GC safe point and will remain there until
 415 // the caller calls restartg.
 416 // If stopg returns false, the caller is not responsible for calling restartg. This can happen
 417 // if another thread, either the gp itself or another GC thread is taking the responsibility
 418 // to do the GC work related to this thread.
 419 func stopg(gp *g) bool {
 420         for {
 421                 if gp.gcworkdone {
 422                         return false
 423                 }
 424
 425                 switch s := readgstatus(gp); s {
 426                 default:
 427                         dumpgstatus(gp)
 428                         gothrow("stopg: gp->atomicstatus is not valid")
 429
 430                 case _Gdead:
 431                         return false
 432
 433                 case _Gcopystack:
 434                         // Loop until a new stack is in place.
 435
 436                 case _Grunnable,
 437                         _Gsyscall,
 438                         _Gwaiting:
 439                         // Claim goroutine by setting scan bit.
 440                         if !castogscanstatus(gp, s, s|_Gscan) {
 441                                 break
 442                         }
 443                         // In scan state, do work.
 444                         gcphasework(gp)
 445                         return true
 446
 447                 case _Gscanrunnable,
 448                         _Gscanwaiting,
 449                         _Gscansyscall:
 450                         // Goroutine already claimed by another GC helper.
 451                         return false
 452
 453                 case _Grunning:
 454                         // Claim goroutine, so we aren't racing with a status
 455                         // transition away from Grunning.
 456                         if !castogscanstatus(gp, _Grunning, _Gscanrunning) {
 457                                 break
 458                         }
 459
 460                         // Mark gp for preemption.
 461                         if !gp.gcworkdone {
 462                                 gp.preemptscan = true
 463                                 gp.preempt = true
 464                                 gp.stackguard0 = stackPreempt
 465                         }
 466
 467                         // Unclaim.
 468                         casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
 469                         return false
 470                 }
 471         }
 472 }
 473
 474 // The GC requests that this routine be moved from a scanmumble state to a mumble state.
 475 func restartg(gp *g) {
 476         s := readgstatus(gp)
 477         switch s {
 478         default:
 479                 dumpgstatus(gp)
 480                 gothrow("restartg: unexpected status")
 481
 482         case _Gdead:
 483                 // ok
 484
 485         case _Gscanrunnable,
 486                 _Gscanwaiting,
 487                 _Gscansyscall:
 488                 casfrom_Gscanstatus(gp, s, s&^_Gscan)
 489
 490         // Scan is now completed.
 491         // Goroutine now needs to be made runnable.
 492         // We put it on the global run queue; ready blocks on the global scheduler lock.
 493         case _Gscanenqueue:
 494                 casfrom_Gscanstatus(gp, _Gscanenqueue, _Gwaiting)
 495                 if gp != getg().m.curg {
 496                         gothrow("processing Gscanenqueue on wrong m")
 497                 }
 498                 dropg()
 499                 ready(gp)
 500         }
 501 }
 502
 503 func stopscanstart(gp *g) {
 504         _g_ := getg()
 505         if _g_ == gp {
 506                 gothrow("GC not moved to G0")
 507         }
 508         if stopg(gp) {
 509                 if !isscanstatus(readgstatus(gp)) {
 510                         dumpgstatus(gp)
 511                         gothrow("GC not in scan state")
 512                 }
 513                 restartg(gp)
 514         }
 515 }
 516
 517 // Runs on g0 and does the actual work after putting the g back on the run queue.
 518 func mquiesce(gpmaster *g) {
 519         // enqueue the calling goroutine.
 520         restartg(gpmaster)
 521
 522         activeglen := len(allgs)
 523         for i := 0; i < activeglen; i++ {
 524                 gp := allgs[i]
 525                 if readgstatus(gp) == _Gdead {
 526                         gp.gcworkdone = true // noop scan.
 527                 } else {
 528                         gp.gcworkdone = false
 529                 }
 530                 stopscanstart(gp)
 531         }
 532
 533         // Check that the G's gcwork (such as scanning) has been done. If not do it now.
 534         // You can end up doing work here if the page trap on a Grunning Goroutine has
 535         // not been sprung or in some race situations. For example a runnable goes dead
 536         // and is started up again with a gp->gcworkdone set to false.
 537         for i := 0; i < activeglen; i++ {
 538                 gp := allgs[i]
 539                 for !gp.gcworkdone {
 540                         status := readgstatus(gp)
 541                         if status == _Gdead {
 542                                 //do nothing, scan not needed.
 543                                 gp.gcworkdone = true // scan is a noop
 544                                 break
 545                         }
 546                         if status == _Grunning && gp.stackguard0 == uintptr(stackPreempt) && notetsleep(&sched.stopnote, 100*1000) { // nanosecond arg
 547                                 noteclear(&sched.stopnote)
 548                         } else {
 549                                 stopscanstart(gp)
 550                         }
 551                 }
 552         }
 553
 554         for i := 0; i < activeglen; i++ {
 555                 gp := allgs[i]
 556                 status := readgstatus(gp)
 557                 if isscanstatus(status) {
 558                         print("mstopandscang:bottom: post scan bad status gp=", gp, " has status ", hex(status), "\n")
 559                         dumpgstatus(gp)
 560                 }
 561                 if !gp.gcworkdone && status != _Gdead {
 562                         print("mstopandscang:bottom: post scan gp=", gp, "->gcworkdone still false\n")
 563                         dumpgstatus(gp)
 564                 }
 565         }
 566
 567         schedule() // Never returns.
 568 }
 569
 570 // quiesce moves all the goroutines to a GC safepoint which for now is a at preemption point.
 571 // If the global gcphase is GCmark quiesce will ensure that all of the goroutine's stacks
 572 // have been scanned before it returns.
 573 func quiesce(mastergp *g) {
 574         castogscanstatus(mastergp, _Grunning, _Gscanenqueue)
 575         // Now move this to the g0 (aka m) stack.
 576         // g0 will potentially scan this thread and put mastergp on the runqueue
 577         mcall(mquiesce)
 578 }
 579
 580 // This is used by the GC as well as the routines that do stack dumps. In the case
 581 // of GC all the routines can be reliably stopped. This is not always the case
 582 // when the system is in panic or being exited.
 583 func stoptheworld() {
 584         _g_ := getg()
 585
 586         // If we hold a lock, then we won't be able to stop another M
 587         // that is blocked trying to acquire the lock.
 588         if _g_.m.locks > 0 {
 589                 gothrow("stoptheworld: holding locks")
 590         }
 591
 592         lock(&sched.lock)
 593         sched.stopwait = gomaxprocs
 594         atomicstore(&sched.gcwaiting, 1)
 595         preemptall()
 596         // stop current P
 597         _g_.m.p.status = _Pgcstop // Pgcstop is only diagnostic.
 598         sched.stopwait--
 599         // try to retake all P's in Psyscall status
 600         for i := 0; i < int(gomaxprocs); i++ {
 601                 p := allp[i]
 602                 s := p.status
 603                 if s == _Psyscall && cas(&p.status, s, _Pgcstop) {
 604                         sched.stopwait--
 605                 }
 606         }
 607         // stop idle P's
 608         for {
 609                 p := pidleget()
 610                 if p == nil {
 611                         break
 612                 }
 613                 p.status = _Pgcstop
 614                 sched.stopwait--
 615         }
 616         wait := sched.stopwait > 0
 617         unlock(&sched.lock)
 618
 619         // wait for remaining P's to stop voluntarily
 620         if wait {
 621                 for {
 622                         // wait for 100us, then try to re-preempt in case of any races
 623                         if notetsleep(&sched.stopnote, 100*1000) {
 624                                 noteclear(&sched.stopnote)
 625                                 break
 626                         }
 627                         preemptall()
 628                 }
 629         }
 630         if sched.stopwait != 0 {
 631                 gothrow("stoptheworld: not stopped")
 632         }
 633         for i := 0; i < int(gomaxprocs); i++ {
 634                 p := allp[i]
 635                 if p.status != _Pgcstop {
 636                         gothrow("stoptheworld: not stopped")
 637                 }
 638         }
 639 }
 640
 641 func mhelpgc() {
 642         _g_ := getg()
 643         _g_.m.helpgc = -1
 644 }
 645
 646 func starttheworld() {
 647         _g_ := getg()
 648
 649         _g_.m.locks++        // disable preemption because it can be holding p in a local var
 650         gp := netpoll(false) // non-blocking
 651         injectglist(gp)
 652         add := needaddgcproc()
 653         lock(&sched.lock)
 654         if newprocs != 0 {
 655                 procresize(newprocs)
 656                 newprocs = 0
 657         } else {
 658                 procresize(gomaxprocs)
 659         }
 660         sched.gcwaiting = 0
 661
 662         var p1 *p
 663         for {
 664                 p := pidleget()
 665                 if p == nil {
 666                         break
 667                 }
 668                 // procresize() puts p's with work at the beginning of the list.
 669                 // Once we reach a p without a run queue, the rest don't have one either.
 670                 if p.runqhead == p.runqtail {
 671                         pidleput(p)
 672                         break
 673                 }
 674                 p.m = mget()
 675                 p.link = p1
 676                 p1 = p
 677         }
 678         if sched.sysmonwait != 0 {
 679                 sched.sysmonwait = 0
 680                 notewakeup(&sched.sysmonnote)
 681         }
 682         unlock(&sched.lock)
 683
 684         for p1 != nil {
 685                 p := p1
 686                 p1 = p1.link
 687                 if p.m != nil {
 688                         mp := p.m
 689                         p.m = nil
 690                         if mp.nextp != nil {
 691                                 gothrow("starttheworld: inconsistent mp->nextp")
 692                         }
 693                         mp.nextp = p
 694                         notewakeup(&mp.park)
 695                 } else {
 696                         // Start M to run P.  Do not start another M below.
 697                         _newm(nil, p)
 698                         add = false
 699                 }
 700         }
 701
 702         if add {
 703                 // If GC could have used another helper proc, start one now,
 704                 // in the hope that it will be available next time.
 705                 // It would have been even better to start it before the collection,
 706                 // but doing so requires allocating memory, so it's tricky to
 707                 // coordinate.  This lazy approach works out in practice:
 708                 // we don't mind if the first couple gc rounds don't have quite
 709                 // the maximum number of procs.
 710                 _newm(mhelpgc, nil)
 711         }
 712         _g_.m.locks--
 713         if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
 714                 _g_.stackguard0 = stackPreempt
 715         }
 716 }
 717
 718 // Called to start an M.
 719 //go:nosplit
 720 func mstart() {
 721         _g_ := getg()
 722
 723         if _g_.stack.lo == 0 {
 724                 // Initialize stack bounds from system stack.
 725                 // Cgo may have left stack size in stack.hi.
 726                 size := _g_.stack.hi
 727                 if size == 0 {
 728                         size = 8192
 729                 }
 730                 _g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
 731                 _g_.stack.lo = _g_.stack.hi - size + 1024
 732         }
 733         // Initialize stack guards so that we can start calling
 734         // both Go and C functions with stack growth prologues.
 735         _g_.stackguard0 = _g_.stack.lo + _StackGuard
 736         _g_.stackguard1 = _g_.stackguard0
 737         mstart1()
 738 }
 739
 740 func mstart1() {
 741         _g_ := getg()
 742
 743         if _g_ != _g_.m.g0 {
 744                 gothrow("bad runtime·mstart")
 745         }
 746
 747         // Record top of stack for use by mcall.
 748         // Once we call schedule we're never coming back,
 749         // so other calls can reuse this stack space.
 750         gosave(&_g_.m.g0.sched)
 751         _g_.m.g0.sched.pc = ^uintptr(0) // make sure it is never used
 752         asminit()
 753         minit()
 754
 755         // Install signal handlers; after minit so that minit can
 756         // prepare the thread to be able to handle the signals.
 757         if _g_.m == &m0 {
 758                 initsig()
 759         }
 760
 761         if _g_.m.mstartfn != nil {
 762                 fn := *(*func())(unsafe.Pointer(&_g_.m.mstartfn))
 763                 fn()
 764         }
 765
 766         if _g_.m.helpgc != 0 {
 767                 _g_.m.helpgc = 0
 768                 stopm()
 769         } else if _g_.m != &m0 {
 770                 acquirep(_g_.m.nextp)
 771                 _g_.m.nextp = nil
 772         }
 773         schedule()
 774
 775         // TODO(brainman): This point is never reached, because scheduler
 776         // does not release os threads at the moment. But once this path
 777         // is enabled, we must remove our seh here.
 778 }
 779
 780 // When running with cgo, we call _cgo_thread_start
 781 // to start threads for us so that we can play nicely with
 782 // foreign code.
 783 var cgoThreadStart unsafe.Pointer
 784
 785 type cgothreadstart struct {
 786         g   *g
 787         tls *uint64
 788         fn  unsafe.Pointer
 789 }
 790
 791 // Allocate a new m unassociated with any thread.
 792 // Can use p for allocation context if needed.
 793 func allocm(_p_ *p) *m {
 794         _g_ := getg()
 795         _g_.m.locks++ // disable GC because it can be called from sysmon
 796         if _g_.m.p == nil {
 797                 acquirep(_p_) // temporarily borrow p for mallocs in this function
 798         }
 799         mp := newM()
 800         mcommoninit(mp)
 801
 802         // In case of cgo or Solaris, pthread_create will make us a stack.
 803         // Windows and Plan 9 will layout sched stack on OS stack.
 804         if iscgo || GOOS == "solaris" || GOOS == "windows" || GOOS == "plan9" {
 805                 mp.g0 = malg(-1)
 806         } else {
 807                 mp.g0 = malg(8192)
 808         }
 809         mp.g0.m = mp
 810
 811         if _p_ == _g_.m.p {
 812                 releasep()
 813         }
 814         _g_.m.locks--
 815         if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
 816                 _g_.stackguard0 = stackPreempt
 817         }
 818
 819         return mp
 820 }
 821
 822 func allocg() *g {
 823         return newG()
 824 }
 825
 826 // needm is called when a cgo callback happens on a
 827 // thread without an m (a thread not created by Go).
 828 // In this case, needm is expected to find an m to use
 829 // and return with m, g initialized correctly.
 830 // Since m and g are not set now (likely nil, but see below)
 831 // needm is limited in what routines it can call. In particular
 832 // it can only call nosplit functions (textflag 7) and cannot
 833 // do any scheduling that requires an m.
 834 //
 835 // In order to avoid needing heavy lifting here, we adopt
 836 // the following strategy: there is a stack of available m's
 837 // that can be stolen. Using compare-and-swap
 838 // to pop from the stack has ABA races, so we simulate
 839 // a lock by doing an exchange (via casp) to steal the stack
 840 // head and replace the top pointer with MLOCKED (1).
 841 // This serves as a simple spin lock that we can use even
 842 // without an m. The thread that locks the stack in this way
 843 // unlocks the stack by storing a valid stack head pointer.
 844 //
 845 // In order to make sure that there is always an m structure
 846 // available to be stolen, we maintain the invariant that there
 847 // is always one more than needed. At the beginning of the
 848 // program (if cgo is in use) the list is seeded with a single m.
 849 // If needm finds that it has taken the last m off the list, its job
 850 // is - once it has installed its own m so that it can do things like
 851 // allocate memory - to create a spare m and put it on the list.
 852 //
 853 // Each of these extra m's also has a g0 and a curg that are
 854 // pressed into service as the scheduling stack and current
 855 // goroutine for the duration of the cgo callback.
 856 //
 857 // When the callback is done with the m, it calls dropm to
 858 // put the m back on the list.
 859 //go:nosplit
 860 func needm(x byte) {
 861         if needextram != 0 {
 862                 // Can happen if C/C++ code calls Go from a global ctor.
 863                 // Can not throw, because scheduler is not initialized yet.
 864                 // XXX
 865                 // write(2, unsafe.Pointer("fatal error: cgo callback before cgo call\n"), sizeof("fatal error: cgo callback before cgo call\n") - 1)
 866                 exit(1)
 867         }
 868
 869         // Lock extra list, take head, unlock popped list.
 870         // nilokay=false is safe here because of the invariant above,
 871         // that the extra list always contains or will soon contain
 872         // at least one m.
 873         mp := lockextra(false)
 874
 875         // Set needextram when we've just emptied the list,
 876         // so that the eventual call into cgocallbackg will
 877         // allocate a new m for the extra list. We delay the
 878         // allocation until then so that it can be done
 879         // after exitsyscall makes sure it is okay to be
 880         // running at all (that is, there's no garbage collection
 881         // running right now).
 882         mp.needextram = mp.schedlink == nil
 883         unlockextra(mp.schedlink)
 884
 885         // Install g (= m->g0) and set the stack bounds
 886         // to match the current stack. We don't actually know
 887         // how big the stack is, like we don't know how big any
 888         // scheduling stack is, but we assume there's at least 32 kB,
 889         // which is more than enough for us.
 890         setg(mp.g0)
 891         _g_ := getg()
 892         _g_.stack.hi = uintptr(noescape(unsafe.Pointer(&x))) + 1024
 893         _g_.stack.lo = uintptr(noescape(unsafe.Pointer(&x))) - 32*1024
 894         _g_.stackguard0 = _g_.stack.lo + _StackGuard
 895
 896         // Initialize this thread to use the m.
 897         asminit()
 898         minit()
 899 }
 900
 901 // newextram allocates an m and puts it on the extra list.
 902 // It is called with a working local m, so that it can do things
 903 // like call schedlock and allocate.
 904 func newextram() {
 905         // Create extra goroutine locked to extra m.
 906         // The goroutine is the context in which the cgo callback will run.
 907         // The sched.pc will never be returned to, but setting it to
 908         // goexit makes clear to the traceback routines where
 909         // the goroutine stack ends.
 910         mp := allocm(nil)
 911         gp := malg(4096)
 912         gp.sched.pc = funcPC(goexit) + _PCQuantum
 913         gp.sched.sp = gp.stack.hi
 914         gp.sched.sp -= 4 * regSize // extra space in case of reads slightly beyond frame
 915         gp.sched.lr = 0
 916         gp.sched.g = gp
 917         gp.syscallpc = gp.sched.pc
 918         gp.syscallsp = gp.sched.sp
 919         // malg returns status as Gidle, change to Gsyscall before adding to allg
 920         // where GC will see it.
 921         casgstatus(gp, _Gidle, _Gsyscall)
 922         gp.m = mp
 923         mp.curg = gp
 924         mp.locked = _LockInternal
 925         mp.lockedg = gp
 926         gp.lockedm = mp
 927         gp.goid = int64(xadd64(&sched.goidgen, 1))
 928         if raceenabled {
 929                 gp.racectx = racegostart(funcPC(newextram))
 930         }
 931         // put on allg for garbage collector
 932         allgadd(gp)
 933
 934         // Add m to the extra list.
 935         mnext := lockextra(true)
 936         mp.schedlink = mnext
 937         unlockextra(mp)
 938 }
 939
 940 // dropm is called when a cgo callback has called needm but is now
 941 // done with the callback and returning back into the non-Go thread.
 942 // It puts the current m back onto the extra list.
 943 //
 944 // The main expense here is the call to signalstack to release the
 945 // m's signal stack, and then the call to needm on the next callback
 946 // from this thread. It is tempting to try to save the m for next time,
 947 // which would eliminate both these costs, but there might not be
 948 // a next time: the current thread (which Go does not control) might exit.
 949 // If we saved the m for that thread, there would be an m leak each time
 950 // such a thread exited. Instead, we acquire and release an m on each
 951 // call. These should typically not be scheduling operations, just a few
 952 // atomics, so the cost should be small.
 953 //
 954 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
 955 // variable using pthread_key_create. Unlike the pthread keys we already use
 956 // on OS X, this dummy key would never be read by Go code. It would exist
 957 // only so that we could register at thread-exit-time destructor.
 958 // That destructor would put the m back onto the extra list.
 959 // This is purely a performance optimization. The current version,
 960 // in which dropm happens on each cgo call, is still correct too.
 961 // We may have to keep the current version on systems with cgo
 962 // but without pthreads, like Windows.
 963 func dropm() {
 964         // Undo whatever initialization minit did during needm.
 965         unminit()
 966
 967         // Clear m and g, and return m to the extra list.
 968         // After the call to setmg we can only call nosplit functions.
 969         mp := getg().m
 970         setg(nil)
 971
 972         mnext := lockextra(true)
 973         mp.schedlink = mnext
 974         unlockextra(mp)
 975 }
 976
 977 var extram uintptr
 978
 979 // lockextra locks the extra list and returns the list head.
 980 // The caller must unlock the list by storing a new list head
 981 // to extram. If nilokay is true, then lockextra will
 982 // return a nil list head if that's what it finds. If nilokay is false,
 983 // lockextra will keep waiting until the list head is no longer nil.
 984 //go:nosplit
 985 func lockextra(nilokay bool) *m {
 986         const locked = 1
 987
 988         for {
 989                 old := atomicloaduintptr(&extram)
 990                 if old == locked {
 991                         yield := osyield
 992                         yield()
 993                         continue
 994                 }
 995                 if old == 0 && !nilokay {
 996                         usleep(1)
 997                         continue
 998                 }
 999                 if casuintptr(&extram, old, locked) {
1000                         return (*m)(unsafe.Pointer(old))
1001                 }
1002                 yield := osyield
1003                 yield()
1004                 continue
1005         }
1006 }
1007
1008 //go:nosplit
1009 func unlockextra(mp *m) {
1010         atomicstoreuintptr(&extram, uintptr(unsafe.Pointer(mp)))
1011 }
1012
1013 // Create a new m.  It will start off with a call to fn, or else the scheduler.
1014 func _newm(fn func(), _p_ *p) {
1015         mp := allocm(_p_)
1016         mp.nextp = _p_
1017         mp.mstartfn = *(*unsafe.Pointer)(unsafe.Pointer(&fn))
1018
1019         if iscgo {
1020                 var ts cgothreadstart
1021                 if _cgo_thread_start == nil {
1022                         gothrow("_cgo_thread_start missing")
1023                 }
1024                 ts.g = mp.g0
1025                 ts.tls = (*uint64)(unsafe.Pointer(&mp.tls[0]))
1026                 ts.fn = unsafe.Pointer(funcPC(mstart))
1027                 asmcgocall(_cgo_thread_start, unsafe.Pointer(&ts))
1028                 return
1029         }
1030         newosproc(mp, unsafe.Pointer(mp.g0.stack.hi))
1031 }
1032
1033 // Stops execution of the current m until new work is available.
1034 // Returns with acquired P.
1035 func stopm() {
1036         _g_ := getg()
1037
1038         if _g_.m.locks != 0 {
1039                 gothrow("stopm holding locks")
1040         }
1041         if _g_.m.p != nil {
1042                 gothrow("stopm holding p")
1043         }
1044         if _g_.m.spinning {
1045                 _g_.m.spinning = false
1046                 xadd(&sched.nmspinning, -1)
1047         }
1048
1049 retry:
1050         lock(&sched.lock)
1051         mput(_g_.m)
1052         unlock(&sched.lock)
1053         notesleep(&_g_.m.park)
1054         noteclear(&_g_.m.park)
1055         if _g_.m.helpgc != 0 {
1056                 gchelper()
1057                 _g_.m.helpgc = 0
1058                 _g_.m.mcache = nil
1059                 goto retry
1060         }
1061         acquirep(_g_.m.nextp)
1062         _g_.m.nextp = nil
1063 }
1064
1065 func mspinning() {
1066         getg().m.spinning = true
1067 }
1068
1069 // Schedules some M to run the p (creates an M if necessary).
1070 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1071 func startm(_p_ *p, spinning bool) {
1072         lock(&sched.lock)
1073         if _p_ == nil {
1074                 _p_ = pidleget()
1075                 if _p_ == nil {
1076                         unlock(&sched.lock)
1077                         if spinning {
1078                                 xadd(&sched.nmspinning, -1)
1079                         }
1080                         return
1081                 }
1082         }
1083         mp := mget()
1084         unlock(&sched.lock)
1085         if mp == nil {
1086                 var fn func()
1087                 if spinning {
1088                         fn = mspinning
1089                 }
1090                 _newm(fn, _p_)
1091                 return
1092         }
1093         if mp.spinning {
1094                 gothrow("startm: m is spinning")
1095         }
1096         if mp.nextp != nil {
1097                 gothrow("startm: m has p")
1098         }
1099         mp.spinning = spinning
1100         mp.nextp = _p_
1101         notewakeup(&mp.park)
1102 }
1103
1104 // Hands off P from syscall or locked M.
1105 func handoffp(_p_ *p) {
1106         // if it has local work, start it straight away
1107         if _p_.runqhead != _p_.runqtail || sched.runqsize != 0 {
1108                 startm(_p_, false)
1109                 return
1110         }
1111         // no local work, check that there are no spinning/idle M's,
1112         // otherwise our help is not required
1113         if atomicload(&sched.nmspinning)+atomicload(&sched.npidle) == 0 && cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic
1114                 startm(_p_, true)
1115                 return
1116         }
1117         lock(&sched.lock)
1118         if sched.gcwaiting != 0 {
1119                 _p_.status = _Pgcstop
1120                 sched.stopwait--
1121                 if sched.stopwait == 0 {
1122                         notewakeup(&sched.stopnote)
1123                 }
1124                 unlock(&sched.lock)
1125                 return
1126         }
1127         if sched.runqsize != 0 {
1128                 unlock(&sched.lock)
1129                 startm(_p_, false)
1130                 return
1131         }
1132         // If this is the last running P and nobody is polling network,
1133         // need to wakeup another M to poll network.
1134         if sched.npidle == uint32(gomaxprocs-1) && atomicload64(&sched.lastpoll) != 0 {
1135                 unlock(&sched.lock)
1136                 startm(_p_, false)
1137                 return
1138         }
1139         pidleput(_p_)
1140         unlock(&sched.lock)
1141 }
1142
1143 // Tries to add one more P to execute G's.
1144 // Called when a G is made runnable (newproc, ready).
1145 func wakep() {
1146         // be conservative about spinning threads
1147         if !cas(&sched.nmspinning, 0, 1) {
1148                 return
1149         }
1150         startm(nil, true)
1151 }
1152
1153 // Stops execution of the current m that is locked to a g until the g is runnable again.
1154 // Returns with acquired P.
1155 func stoplockedm() {
1156         _g_ := getg()
1157
1158         if _g_.m.lockedg == nil || _g_.m.lockedg.lockedm != _g_.m {
1159                 gothrow("stoplockedm: inconsistent locking")
1160         }
1161         if _g_.m.p != nil {
1162                 // Schedule another M to run this p.
1163                 _p_ := releasep()
1164                 handoffp(_p_)
1165         }
1166         incidlelocked(1)
1167         // Wait until another thread schedules lockedg again.
1168         notesleep(&_g_.m.park)
1169         noteclear(&_g_.m.park)
1170         status := readgstatus(_g_.m.lockedg)
1171         if status&^_Gscan != _Grunnable {
1172                 print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n")
1173                 dumpgstatus(_g_)
1174                 gothrow("stoplockedm: not runnable")
1175         }
1176         acquirep(_g_.m.nextp)
1177         _g_.m.nextp = nil
1178 }
1179
1180 // Schedules the locked m to run the locked gp.
1181 func startlockedm(gp *g) {
1182         _g_ := getg()
1183
1184         mp := gp.lockedm
1185         if mp == _g_.m {
1186                 gothrow("startlockedm: locked to me")
1187         }
1188         if mp.nextp != nil {
1189                 gothrow("startlockedm: m has p")
1190         }
1191         // directly handoff current P to the locked m
1192         incidlelocked(-1)
1193         _p_ := releasep()
1194         mp.nextp = _p_
1195         notewakeup(&mp.park)
1196         stopm()
1197 }
1198
1199 // Stops the current m for stoptheworld.
1200 // Returns when the world is restarted.
1201 func gcstopm() {
1202         _g_ := getg()
1203
1204         if sched.gcwaiting == 0 {
1205                 gothrow("gcstopm: not waiting for gc")
1206         }
1207         if _g_.m.spinning {
1208                 _g_.m.spinning = false
1209                 xadd(&sched.nmspinning, -1)
1210         }
1211         _p_ := releasep()
1212         lock(&sched.lock)
1213         _p_.status = _Pgcstop
1214         sched.stopwait--
1215         if sched.stopwait == 0 {
1216                 notewakeup(&sched.stopnote)
1217         }
1218         unlock(&sched.lock)
1219         stopm()
1220 }
1221
1222 // Schedules gp to run on the current M.
1223 // Never returns.
1224 func execute(gp *g) {
1225         _g_ := getg()
1226
1227         casgstatus(gp, _Grunnable, _Grunning)
1228         gp.waitsince = 0
1229         gp.preempt = false
1230         gp.stackguard0 = gp.stack.lo + _StackGuard
1231         _g_.m.p.schedtick++
1232         _g_.m.curg = gp
1233         gp.m = _g_.m
1234
1235         // Check whether the profiler needs to be turned on or off.
1236         hz := sched.profilehz
1237         if _g_.m.profilehz != hz {
1238                 resetcpuprofiler(hz)
1239         }
1240
1241         gogo(&gp.sched)
1242 }
1243
1244 // Finds a runnable goroutine to execute.
1245 // Tries to steal from other P's, get g from global queue, poll network.
1246 func findrunnable() *g {
1247         _g_ := getg()
1248
1249 top:
1250         if sched.gcwaiting != 0 {
1251                 gcstopm()
1252                 goto top
1253         }
1254         if fingwait && fingwake {
1255                 if gp := wakefing(); gp != nil {
1256                         ready(gp)
1257                 }
1258         }
1259
1260         // local runq
1261         if gp := runqget(_g_.m.p); gp != nil {
1262                 return gp
1263         }
1264
1265         // global runq
1266         if sched.runqsize != 0 {
1267                 lock(&sched.lock)
1268                 gp := globrunqget(_g_.m.p, 0)
1269                 unlock(&sched.lock)
1270                 if gp != nil {
1271                         return gp
1272                 }
1273         }
1274
1275         // poll network - returns list of goroutines
1276         if gp := netpoll(false); gp != nil { // non-blocking
1277                 injectglist(gp.schedlink)
1278                 casgstatus(gp, _Gwaiting, _Grunnable)
1279                 return gp
1280         }
1281
1282         // If number of spinning M's >= number of busy P's, block.
1283         // This is necessary to prevent excessive CPU consumption
1284         // when GOMAXPROCS>>1 but the program parallelism is low.
1285         if !_g_.m.spinning && 2*atomicload(&sched.nmspinning) >= uint32(gomaxprocs)-atomicload(&sched.npidle) { // TODO: fast atomic
1286                 goto stop
1287         }
1288         if !_g_.m.spinning {
1289                 _g_.m.spinning = true
1290                 xadd(&sched.nmspinning, 1)
1291         }
1292         // random steal from other P's
1293         for i := 0; i < int(2*gomaxprocs); i++ {
1294                 if sched.gcwaiting != 0 {
1295                         goto top
1296                 }
1297                 _p_ := allp[fastrand1()%uint32(gomaxprocs)]
1298                 var gp *g
1299                 if _p_ == _g_.m.p {
1300                         gp = runqget(_p_)
1301                 } else {
1302                         gp = runqsteal(_g_.m.p, _p_)
1303                 }
1304                 if gp != nil {
1305                         return gp
1306                 }
1307         }
1308 stop:
1309
1310         // return P and block
1311         lock(&sched.lock)
1312         if sched.gcwaiting != 0 {
1313                 unlock(&sched.lock)
1314                 goto top
1315         }
1316         if sched.runqsize != 0 {
1317                 gp := globrunqget(_g_.m.p, 0)
1318                 unlock(&sched.lock)
1319                 return gp
1320         }
1321         _p_ := releasep()
1322         pidleput(_p_)
1323         unlock(&sched.lock)
1324         if _g_.m.spinning {
1325                 _g_.m.spinning = false
1326                 xadd(&sched.nmspinning, -1)
1327         }
1328
1329         // check all runqueues once again
1330         for i := 0; i < int(gomaxprocs); i++ {
1331                 _p_ := allp[i]
1332                 if _p_ != nil && _p_.runqhead != _p_.runqtail {
1333                         lock(&sched.lock)
1334                         _p_ = pidleget()
1335                         unlock(&sched.lock)
1336                         if _p_ != nil {
1337                                 acquirep(_p_)
1338                                 goto top
1339                         }
1340                         break
1341                 }
1342         }
1343
1344         // poll network
1345         if xchg64(&sched.lastpoll, 0) != 0 {
1346                 if _g_.m.p != nil {
1347                         gothrow("findrunnable: netpoll with p")
1348                 }
1349                 if _g_.m.spinning {
1350                         gothrow("findrunnable: netpoll with spinning")
1351                 }
1352                 gp := netpoll(true) // block until new work is available
1353                 atomicstore64(&sched.lastpoll, uint64(nanotime()))
1354                 if gp != nil {
1355                         lock(&sched.lock)
1356                         _p_ = pidleget()
1357                         unlock(&sched.lock)
1358                         if _p_ != nil {
1359                                 acquirep(_p_)
1360                                 injectglist(gp.schedlink)
1361                                 casgstatus(gp, _Gwaiting, _Grunnable)
1362                                 return gp
1363                         }
1364                         injectglist(gp)
1365                 }
1366         }
1367         stopm()
1368         goto top
1369 }
1370
1371 func resetspinning() {
1372         _g_ := getg()
1373
1374         var nmspinning uint32
1375         if _g_.m.spinning {
1376                 _g_.m.spinning = false
1377                 nmspinning = xadd(&sched.nmspinning, -1)
1378                 if nmspinning < 0 {
1379                         gothrow("findrunnable: negative nmspinning")
1380                 }
1381         } else {
1382                 nmspinning = atomicload(&sched.nmspinning)
1383         }
1384
1385         // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1386         // so see if we need to wakeup another P here.
1387         if nmspinning == 0 && atomicload(&sched.npidle) > 0 {
1388                 wakep()
1389         }
1390 }
1391
1392 // Injects the list of runnable G's into the scheduler.
1393 // Can run concurrently with GC.
1394 func injectglist(glist *g) {
1395         if glist == nil {
1396                 return
1397         }
1398         lock(&sched.lock)
1399         var n int
1400         for n = 0; glist != nil; n++ {
1401                 gp := glist
1402                 glist = gp.schedlink
1403                 casgstatus(gp, _Gwaiting, _Grunnable)
1404                 globrunqput(gp)
1405         }
1406         unlock(&sched.lock)
1407         for ; n != 0 && sched.npidle != 0; n-- {
1408                 startm(nil, false)
1409         }
1410 }
1411
1412 // One round of scheduler: find a runnable goroutine and execute it.
1413 // Never returns.
1414 func schedule() {
1415         _g_ := getg()
1416
1417         if _g_.m.locks != 0 {
1418                 gothrow("schedule: holding locks")
1419         }
1420
1421         if _g_.m.lockedg != nil {
1422                 stoplockedm()
1423                 execute(_g_.m.lockedg) // Never returns.
1424         }
1425
1426 top:
1427         if sched.gcwaiting != 0 {
1428                 gcstopm()
1429                 goto top
1430         }
1431
1432         var gp *g
1433         // Check the global runnable queue once in a while to ensure fairness.
1434         // Otherwise two goroutines can completely occupy the local runqueue
1435         // by constantly respawning each other.
1436         tick := _g_.m.p.schedtick
1437         // This is a fancy way to say tick%61==0,
1438         // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1439         if uint64(tick)-((uint64(tick)*0x4325c53f)>>36)*61 == 0 && sched.runqsize > 0 {
1440                 lock(&sched.lock)
1441                 gp = globrunqget(_g_.m.p, 1)
1442                 unlock(&sched.lock)
1443                 if gp != nil {
1444                         resetspinning()
1445                 }
1446         }
1447         if gp == nil {
1448                 gp = runqget(_g_.m.p)
1449                 if gp != nil && _g_.m.spinning {
1450                         gothrow("schedule: spinning with local work")
1451                 }
1452         }
1453         if gp == nil {
1454                 gp = findrunnable() // blocks until work is available
1455                 resetspinning()
1456         }
1457
1458         if gp.lockedm != nil {
1459                 // Hands off own p to the locked m,
1460                 // then blocks waiting for a new p.
1461                 startlockedm(gp)
1462                 goto top
1463         }
1464
1465         execute(gp)
1466 }
1467
1468 // dropg removes the association between m and the current goroutine m->curg (gp for short).
1469 // Typically a caller sets gp's status away from Grunning and then
1470 // immediately calls dropg to finish the job. The caller is also responsible
1471 // for arranging that gp will be restarted using ready at an
1472 // appropriate time. After calling dropg and arranging for gp to be
1473 // readied later, the caller can do other work but eventually should
1474 // call schedule to restart the scheduling of goroutines on this m.
1475 func dropg() {
1476         _g_ := getg()
1477
1478         if _g_.m.lockedg == nil {
1479                 _g_.m.curg.m = nil
1480                 _g_.m.curg = nil
1481         }
1482 }
1483
1484 // Puts the current goroutine into a waiting state and calls unlockf.
1485 // If unlockf returns false, the goroutine is resumed.
1486 func park(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string) {
1487         _g_ := getg()
1488
1489         _g_.m.waitlock = lock
1490         _g_.m.waitunlockf = *(*unsafe.Pointer)(unsafe.Pointer(&unlockf))
1491         _g_.waitreason = reason
1492         mcall(park_m)
1493 }
1494
1495 func parkunlock_c(gp *g, lock unsafe.Pointer) bool {
1496         unlock((*mutex)(lock))
1497         return true
1498 }
1499
1500 // Puts the current goroutine into a waiting state and unlocks the lock.
1501 // The goroutine can be made runnable again by calling ready(gp).
1502 func parkunlock(lock *mutex, reason string) {
1503         park(parkunlock_c, unsafe.Pointer(lock), reason)
1504 }
1505
1506 // park continuation on g0.
1507 func park_m(gp *g) {
1508         _g_ := getg()
1509
1510         casgstatus(gp, _Grunning, _Gwaiting)
1511         dropg()
1512
1513         if _g_.m.waitunlockf != nil {
1514                 fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf))
1515                 ok := fn(gp, _g_.m.waitlock)
1516                 _g_.m.waitunlockf = nil
1517                 _g_.m.waitlock = nil
1518                 if !ok {
1519                         casgstatus(gp, _Gwaiting, _Grunnable)
1520                         execute(gp) // Schedule it back, never returns.
1521                 }
1522         }
1523         schedule()
1524 }
1525
1526 // Gosched continuation on g0.
1527 func gosched_m(gp *g) {
1528         status := readgstatus(gp)
1529         if status&^_Gscan != _Grunning {
1530                 dumpgstatus(gp)
1531                 gothrow("bad g status")
1532         }
1533         casgstatus(gp, _Grunning, _Grunnable)
1534         dropg()
1535         lock(&sched.lock)
1536         globrunqput(gp)
1537         unlock(&sched.lock)
1538
1539         schedule()
1540 }
1541
1542 // Finishes execution of the current goroutine.
1543 // Must be NOSPLIT because it is called from Go. (TODO - probably not anymore)
1544 //go:nosplit
1545 func goexit1() {
1546         if raceenabled {
1547                 racegoend()
1548         }
1549         mcall(goexit0)
1550 }
1551
1552 // goexit continuation on g0.
1553 func goexit0(gp *g) {
1554         _g_ := getg()
1555
1556         casgstatus(gp, _Grunning, _Gdead)
1557         gp.m = nil
1558         gp.lockedm = nil
1559         _g_.m.lockedg = nil
1560         gp.paniconfault = false
1561         gp._defer = nil // should be true already but just in case.
1562         gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
1563         gp.writebuf = nil
1564         gp.waitreason = ""
1565         gp.param = nil
1566
1567         dropg()
1568
1569         if _g_.m.locked&^_LockExternal != 0 {
1570                 print("invalid m->locked = ", _g_.m.locked, "\n")
1571                 gothrow("internal lockOSThread error")
1572         }
1573         _g_.m.locked = 0
1574         gfput(_g_.m.p, gp)
1575         schedule()
1576 }
1577
1578 //go:nosplit
1579 func save(pc, sp uintptr) {
1580         _g_ := getg()
1581
1582         _g_.sched.pc = pc
1583         _g_.sched.sp = sp
1584         _g_.sched.lr = 0
1585         _g_.sched.ret = 0
1586         _g_.sched.ctxt = nil
1587         // write as uintptr to avoid write barrier, which will smash _g_.sched.
1588         *(*uintptr)(unsafe.Pointer(&_g_.sched.g)) = uintptr(unsafe.Pointer(_g_))
1589 }
1590
1591 // The goroutine g is about to enter a system call.
1592 // Record that it's not using the cpu anymore.
1593 // This is called only from the go syscall library and cgocall,
1594 // not from the low-level system calls used by the
1595 //
1596 // Entersyscall cannot split the stack: the gosave must
1597 // make g->sched refer to the caller's stack segment, because
1598 // entersyscall is going to return immediately after.
1599 //
1600 // Nothing entersyscall calls can split the stack either.
1601 // We cannot safely move the stack during an active call to syscall,
1602 // because we do not know which of the uintptr arguments are
1603 // really pointers (back into the stack).
1604 // In practice, this means that we make the fast path run through
1605 // entersyscall doing no-split things, and the slow path has to use systemstack
1606 // to run bigger things on the system stack.
1607 //
1608 // reentersyscall is the entry point used by cgo callbacks, where explicitly
1609 // saved SP and PC are restored. This is needed when exitsyscall will be called
1610 // from a function further up in the call stack than the parent, as g->syscallsp
1611 // must always point to a valid stack frame. entersyscall below is the normal
1612 // entry point for syscalls, which obtains the SP and PC from the caller.
1613 //go:nosplit
1614 func reentersyscall(pc, sp uintptr) {
1615         _g_ := getg()
1616
1617         // Disable preemption because during this function g is in Gsyscall status,
1618         // but can have inconsistent g->sched, do not let GC observe it.
1619         _g_.m.locks++
1620
1621         // Entersyscall must not call any function that might split/grow the stack.
1622         // (See details in comment above.)
1623         // Catch calls that might, by replacing the stack guard with something that
1624         // will trip any stack check and leaving a flag to tell newstack to die.
1625         _g_.stackguard0 = stackPreempt
1626         _g_.throwsplit = true
1627
1628         // Leave SP around for GC and traceback.
1629         save(pc, sp)
1630         _g_.syscallsp = sp
1631         _g_.syscallpc = pc
1632         casgstatus(_g_, _Grunning, _Gsyscall)
1633         if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
1634                 systemstack(func() {
1635                         print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
1636                         gothrow("entersyscall")
1637                 })
1638         }
1639
1640         if atomicload(&sched.sysmonwait) != 0 { // TODO: fast atomic
1641                 systemstack(entersyscall_sysmon)
1642                 save(pc, sp)
1643         }
1644
1645         _g_.m.mcache = nil
1646         _g_.m.p.m = nil
1647         atomicstore(&_g_.m.p.status, _Psyscall)
1648         if sched.gcwaiting != 0 {
1649                 systemstack(entersyscall_gcwait)
1650                 save(pc, sp)
1651         }
1652
1653         // Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
1654         // We set _StackGuard to StackPreempt so that first split stack check calls morestack.
1655         // Morestack detects this case and throws.
1656         _g_.stackguard0 = stackPreempt
1657         _g_.m.locks--
1658 }
1659
1660 // Standard syscall entry used by the go syscall library and normal cgo calls.
1661 //go:nosplit
1662 func entersyscall(dummy int32) {
1663         reentersyscall(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
1664 }
1665
1666 func entersyscall_sysmon() {
1667         lock(&sched.lock)
1668         if atomicload(&sched.sysmonwait) != 0 {
1669                 atomicstore(&sched.sysmonwait, 0)
1670                 notewakeup(&sched.sysmonnote)
1671         }
1672         unlock(&sched.lock)
1673 }
1674
1675 func entersyscall_gcwait() {
1676         _g_ := getg()
1677
1678         lock(&sched.lock)
1679         if sched.stopwait > 0 && cas(&_g_.m.p.status, _Psyscall, _Pgcstop) {
1680                 if sched.stopwait--; sched.stopwait == 0 {
1681                         notewakeup(&sched.stopnote)
1682                 }
1683         }
1684         unlock(&sched.lock)
1685 }
1686
1687 // The same as entersyscall(), but with a hint that the syscall is blocking.
1688 //go:nosplit
1689 func entersyscallblock(dummy int32) {
1690         _g_ := getg()
1691
1692         _g_.m.locks++ // see comment in entersyscall
1693         _g_.throwsplit = true
1694         _g_.stackguard0 = stackPreempt // see comment in entersyscall
1695
1696         // Leave SP around for GC and traceback.
1697         pc := getcallerpc(unsafe.Pointer(&dummy))
1698         sp := getcallersp(unsafe.Pointer(&dummy))
1699         save(pc, sp)
1700         _g_.syscallsp = _g_.sched.sp
1701         _g_.syscallpc = _g_.sched.pc
1702         if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
1703                 sp1 := sp
1704                 sp2 := _g_.sched.sp
1705                 sp3 := _g_.syscallsp
1706                 systemstack(func() {
1707                         print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
1708                         gothrow("entersyscallblock")
1709                 })
1710         }
1711         casgstatus(_g_, _Grunning, _Gsyscall)
1712         if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
1713                 systemstack(func() {
1714                         print("entersyscallblock inconsistent ", hex(sp), " ", hex(_g_.sched.sp), " ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
1715                         gothrow("entersyscallblock")
1716                 })
1717         }
1718
1719         systemstack(entersyscallblock_handoff)
1720
1721         // Resave for traceback during blocked call.
1722         save(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
1723
1724         _g_.m.locks--
1725 }
1726
1727 func entersyscallblock_handoff() {
1728         handoffp(releasep())
1729 }
1730
1731 // The goroutine g exited its system call.
1732 // Arrange for it to run on a cpu again.
1733 // This is called only from the go syscall library, not
1734 // from the low-level system calls used by the
1735 //go:nosplit
1736 func exitsyscall(dummy int32) {
1737         _g_ := getg()
1738
1739         _g_.m.locks++ // see comment in entersyscall
1740         if getcallersp(unsafe.Pointer(&dummy)) > _g_.syscallsp {
1741                 gothrow("exitsyscall: syscall frame is no longer valid")
1742         }
1743
1744         _g_.waitsince = 0
1745         if exitsyscallfast() {
1746                 if _g_.m.mcache == nil {
1747                         gothrow("lost mcache")
1748                 }
1749                 // There's a cpu for us, so we can run.
1750                 _g_.m.p.syscalltick++
1751                 // We need to cas the status and scan before resuming...
1752                 casgstatus(_g_, _Gsyscall, _Grunning)
1753
1754                 // Garbage collector isn't running (since we are),
1755                 // so okay to clear syscallsp.
1756                 _g_.syscallsp = 0
1757                 _g_.m.locks--
1758                 if _g_.preempt {
1759                         // restore the preemption request in case we've cleared it in newstack
1760                         _g_.stackguard0 = stackPreempt
1761                 } else {
1762                         // otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock
1763                         _g_.stackguard0 = _g_.stack.lo + _StackGuard
1764                 }
1765                 _g_.throwsplit = false
1766                 return
1767         }
1768
1769         _g_.m.locks--
1770
1771         // Call the scheduler.
1772         mcall(exitsyscall0)
1773
1774         if _g_.m.mcache == nil {
1775                 gothrow("lost mcache")
1776         }
1777
1778         // Scheduler returned, so we're allowed to run now.
1779         // Delete the syscallsp information that we left for
1780         // the garbage collector during the system call.
1781         // Must wait until now because until gosched returns
1782         // we don't know for sure that the garbage collector
1783         // is not running.
1784         _g_.syscallsp = 0
1785         _g_.m.p.syscalltick++
1786         _g_.throwsplit = false
1787 }
1788
1789 //go:nosplit
1790 func exitsyscallfast() bool {
1791         _g_ := getg()
1792
1793         // Freezetheworld sets stopwait but does not retake P's.
1794         if sched.stopwait != 0 {
1795                 _g_.m.mcache = nil
1796                 _g_.m.p = nil
1797                 return false
1798         }
1799
1800         // Try to re-acquire the last P.
1801         if _g_.m.p != nil && _g_.m.p.status == _Psyscall && cas(&_g_.m.p.status, _Psyscall, _Prunning) {
1802                 // There's a cpu for us, so we can run.
1803                 _g_.m.mcache = _g_.m.p.mcache
1804                 _g_.m.p.m = _g_.m
1805                 return true
1806         }
1807
1808         // Try to get any other idle P.
1809         _g_.m.mcache = nil
1810         _g_.m.p = nil
1811         if sched.pidle != nil {
1812                 var ok bool
1813                 systemstack(func() {
1814                         ok = exitsyscallfast_pidle()
1815                 })
1816                 if ok {
1817                         return true
1818                 }
1819         }
1820         return false
1821 }
1822
1823 func exitsyscallfast_pidle() bool {
1824         lock(&sched.lock)
1825         _p_ := pidleget()
1826         if _p_ != nil && atomicload(&sched.sysmonwait) != 0 {
1827                 atomicstore(&sched.sysmonwait, 0)
1828                 notewakeup(&sched.sysmonnote)
1829         }
1830         unlock(&sched.lock)
1831         if _p_ != nil {
1832                 acquirep(_p_)
1833                 return true
1834         }
1835         return false
1836 }
1837
1838 // exitsyscall slow path on g0.
1839 // Failed to acquire P, enqueue gp as runnable.
1840 func exitsyscall0(gp *g) {
1841         _g_ := getg()
1842
1843         casgstatus(gp, _Gsyscall, _Grunnable)
1844         dropg()
1845         lock(&sched.lock)
1846         _p_ := pidleget()
1847         if _p_ == nil {
1848                 globrunqput(gp)
1849         } else if atomicload(&sched.sysmonwait) != 0 {
1850                 atomicstore(&sched.sysmonwait, 0)
1851                 notewakeup(&sched.sysmonnote)
1852         }
1853         unlock(&sched.lock)
1854         if _p_ != nil {
1855                 acquirep(_p_)
1856                 execute(gp) // Never returns.
1857         }
1858         if _g_.m.lockedg != nil {
1859                 // Wait until another thread schedules gp and so m again.
1860                 stoplockedm()
1861                 execute(gp) // Never returns.
1862         }
1863         stopm()
1864         schedule() // Never returns.
1865 }
1866
1867 func beforefork() {
1868         gp := getg().m.curg
1869
1870         // Fork can hang if preempted with signals frequently enough (see issue 5517).
1871         // Ensure that we stay on the same M where we disable profiling.
1872         gp.m.locks++
1873         if gp.m.profilehz != 0 {
1874                 resetcpuprofiler(0)
1875         }
1876
1877         // This function is called before fork in syscall package.
1878         // Code between fork and exec must not allocate memory nor even try to grow stack.
1879         // Here we spoil g->_StackGuard to reliably detect any attempts to grow stack.
1880         // runtime_AfterFork will undo this in parent process, but not in child.
1881         gp.stackguard0 = stackFork
1882 }
1883
1884 // Called from syscall package before fork.
1885 //go:nosplit
1886 func syscall_BeforeFork() {
1887         systemstack(beforefork)
1888 }
1889
1890 func afterfork() {
1891         gp := getg().m.curg
1892
1893         // See the comment in beforefork.
1894         gp.stackguard0 = gp.stack.lo + _StackGuard
1895
1896         hz := sched.profilehz
1897         if hz != 0 {
1898                 resetcpuprofiler(hz)
1899         }
1900         gp.m.locks--
1901 }
1902
1903 // Called from syscall package after fork in parent.
1904 //go:nosplit
1905 func syscall_AfterFork() {
1906         systemstack(afterfork)
1907 }
1908
1909 // Allocate a new g, with a stack big enough for stacksize bytes.
1910 func malg(stacksize int32) *g {
1911         newg := allocg()
1912         if stacksize >= 0 {
1913                 stacksize = round2(_StackSystem + stacksize)
1914                 systemstack(func() {
1915                         newg.stack = stackalloc(uint32(stacksize))
1916                 })
1917                 newg.stackguard0 = newg.stack.lo + _StackGuard
1918                 newg.stackguard1 = ^uintptr(0)
1919         }
1920         return newg
1921 }
1922
1923 // Create a new g running fn with siz bytes of arguments.
1924 // Put it on the queue of g's waiting to run.
1925 // The compiler turns a go statement into a call to this.
1926 // Cannot split the stack because it assumes that the arguments
1927 // are available sequentially after &fn; they would not be
1928 // copied if a stack split occurred.
1929 //go:nosplit
1930 func newproc(siz int32, fn *funcval) {
1931         argp := add(unsafe.Pointer(&fn), ptrSize)
1932         if hasLinkRegister {
1933                 argp = add(argp, ptrSize) // skip caller's saved LR
1934         }
1935
1936         pc := getcallerpc(unsafe.Pointer(&siz))
1937         systemstack(func() {
1938                 newproc1(fn, (*uint8)(argp), siz, 0, pc)
1939         })
1940 }
1941
1942 // Create a new g running fn with narg bytes of arguments starting
1943 // at argp and returning nret bytes of results.  callerpc is the
1944 // address of the go statement that created this.  The new g is put
1945 // on the queue of g's waiting to run.
1946 func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr) *g {
1947         _g_ := getg()
1948
1949         if fn == nil {
1950                 _g_.m.throwing = -1 // do not dump full stacks
1951                 gothrow("go of nil func value")
1952         }
1953         _g_.m.locks++ // disable preemption because it can be holding p in a local var
1954         siz := narg + nret
1955         siz = (siz + 7) &^ 7
1956
1957         // We could allocate a larger initial stack if necessary.
1958         // Not worth it: this is almost always an error.
1959         // 4*sizeof(uintreg): extra space added below
1960         // sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
1961         if siz >= _StackMin-4*regSize-regSize {
1962                 gothrow("newproc: function arguments too large for new goroutine")
1963         }
1964
1965         _p_ := _g_.m.p
1966         newg := gfget(_p_)
1967         if newg == nil {
1968                 newg = malg(_StackMin)
1969                 casgstatus(newg, _Gidle, _Gdead)
1970                 allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
1971         }
1972         if newg.stack.hi == 0 {
1973                 gothrow("newproc1: newg missing stack")
1974         }
1975
1976         if readgstatus(newg) != _Gdead {
1977                 gothrow("newproc1: new g is not Gdead")
1978         }
1979
1980         sp := newg.stack.hi
1981         sp -= 4 * regSize // extra space in case of reads slightly beyond frame
1982         sp -= uintptr(siz)
1983         memmove(unsafe.Pointer(sp), unsafe.Pointer(argp), uintptr(narg))
1984         if hasLinkRegister {
1985                 // caller's LR
1986                 sp -= ptrSize
1987                 *(*unsafe.Pointer)(unsafe.Pointer(sp)) = nil
1988         }
1989
1990         memclr(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
1991         newg.sched.sp = sp
1992         newg.sched.pc = funcPC(goexit) + _PCQuantum // +PCQuantum so that previous instruction is in same function
1993         newg.sched.g = newg
1994         gostartcallfn(&newg.sched, fn)
1995         newg.gopc = callerpc
1996         casgstatus(newg, _Gdead, _Grunnable)
1997
1998         if _p_.goidcache == _p_.goidcacheend {
1999                 // Sched.goidgen is the last allocated id,
2000                 // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
2001                 // At startup sched.goidgen=0, so main goroutine receives goid=1.
2002                 _p_.goidcache = xadd64(&sched.goidgen, _GoidCacheBatch)
2003                 _p_.goidcache -= _GoidCacheBatch - 1
2004                 _p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
2005         }
2006         newg.goid = int64(_p_.goidcache)
2007         _p_.goidcache++
2008         if raceenabled {
2009                 newg.racectx = racegostart(callerpc)
2010         }
2011         runqput(_p_, newg)
2012
2013         if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 && unsafe.Pointer(fn.fn) != unsafe.Pointer(funcPC(main)) { // TODO: fast atomic
2014                 wakep()
2015         }
2016         _g_.m.locks--
2017         if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
2018                 _g_.stackguard0 = stackPreempt
2019         }
2020         return newg
2021 }
2022
2023 // Put on gfree list.
2024 // If local list is too long, transfer a batch to the global list.
2025 func gfput(_p_ *p, gp *g) {
2026         if readgstatus(gp) != _Gdead {
2027                 gothrow("gfput: bad status (not Gdead)")
2028         }
2029
2030         stksize := gp.stack.hi - gp.stack.lo
2031
2032         if stksize != _FixedStack {
2033                 // non-standard stack size - free it.
2034                 stackfree(gp.stack)
2035                 gp.stack.lo = 0
2036                 gp.stack.hi = 0
2037                 gp.stackguard0 = 0
2038         }
2039
2040         gp.schedlink = _p_.gfree
2041         _p_.gfree = gp
2042         _p_.gfreecnt++
2043         if _p_.gfreecnt >= 64 {
2044                 lock(&sched.gflock)
2045                 for _p_.gfreecnt >= 32 {
2046                         _p_.gfreecnt--
2047                         gp = _p_.gfree
2048                         _p_.gfree = gp.schedlink
2049                         gp.schedlink = sched.gfree
2050                         sched.gfree = gp
2051                         sched.ngfree++
2052                 }
2053                 unlock(&sched.gflock)
2054         }
2055 }
2056
2057 // Get from gfree list.
2058 // If local list is empty, grab a batch from global list.
2059 func gfget(_p_ *p) *g {
2060 retry:
2061         gp := _p_.gfree
2062         if gp == nil && sched.gfree != nil {
2063                 lock(&sched.gflock)
2064                 for _p_.gfreecnt < 32 && sched.gfree != nil {
2065                         _p_.gfreecnt++
2066                         gp = sched.gfree
2067                         sched.gfree = gp.schedlink
2068                         sched.ngfree--
2069                         gp.schedlink = _p_.gfree
2070                         _p_.gfree = gp
2071                 }
2072                 unlock(&sched.gflock)
2073                 goto retry
2074         }
2075         if gp != nil {
2076                 _p_.gfree = gp.schedlink
2077                 _p_.gfreecnt--
2078                 if gp.stack.lo == 0 {
2079                         // Stack was deallocated in gfput.  Allocate a new one.
2080                         systemstack(func() {
2081                                 gp.stack = stackalloc(_FixedStack)
2082                         })
2083                         gp.stackguard0 = gp.stack.lo + _StackGuard
2084                 } else {
2085                         if raceenabled {
2086                                 racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo)
2087                         }
2088                 }
2089         }
2090         return gp
2091 }
2092
2093 // Purge all cached G's from gfree list to the global list.
2094 func gfpurge(_p_ *p) {
2095         lock(&sched.gflock)
2096         for _p_.gfreecnt != 0 {
2097                 _p_.gfreecnt--
2098                 gp := _p_.gfree
2099                 _p_.gfree = gp.schedlink
2100                 gp.schedlink = sched.gfree
2101                 sched.gfree = gp
2102                 sched.ngfree++
2103         }
2104         unlock(&sched.gflock)
2105 }
2106
2107 // Breakpoint executes a breakpoint trap.
2108 func Breakpoint() {
2109         breakpoint()
2110 }
2111
2112 // dolockOSThread is called by LockOSThread and lockOSThread below
2113 // after they modify m.locked. Do not allow preemption during this call,
2114 // or else the m might be different in this function than in the caller.
2115 //go:nosplit
2116 func dolockOSThread() {
2117         _g_ := getg()
2118         _g_.m.lockedg = _g_
2119         _g_.lockedm = _g_.m
2120 }
2121
2122 //go:nosplit
2123
2124 // LockOSThread wires the calling goroutine to its current operating system thread.
2125 // Until the calling goroutine exits or calls UnlockOSThread, it will always
2126 // execute in that thread, and no other goroutine can.
2127 func LockOSThread() {
2128         getg().m.locked |= _LockExternal
2129         dolockOSThread()
2130 }
2131
2132 //go:nosplit
2133 func lockOSThread() {
2134         getg().m.locked += _LockInternal
2135         dolockOSThread()
2136 }
2137
2138 // dounlockOSThread is called by UnlockOSThread and unlockOSThread below
2139 // after they update m->locked. Do not allow preemption during this call,
2140 // or else the m might be in different in this function than in the caller.
2141 //go:nosplit
2142 func dounlockOSThread() {
2143         _g_ := getg()
2144         if _g_.m.locked != 0 {
2145                 return
2146         }
2147         _g_.m.lockedg = nil
2148         _g_.lockedm = nil
2149 }
2150
2151 //go:nosplit
2152
2153 // UnlockOSThread unwires the calling goroutine from its fixed operating system thread.
2154 // If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op.
2155 func UnlockOSThread() {
2156         getg().m.locked &^= _LockExternal
2157         dounlockOSThread()
2158 }
2159
2160 //go:nosplit
2161 func unlockOSThread() {
2162         _g_ := getg()
2163         if _g_.m.locked < _LockInternal {
2164                 systemstack(badunlockosthread)
2165         }
2166         _g_.m.locked -= _LockInternal
2167         dounlockOSThread()
2168 }
2169
2170 func badunlockosthread() {
2171         gothrow("runtime: internal error: misuse of lockOSThread/unlockOSThread")
2172 }
2173
2174 func gcount() int32 {
2175         n := int32(allglen) - sched.ngfree
2176         for i := 0; ; i++ {
2177                 _p_ := allp[i]
2178                 if _p_ == nil {
2179                         break
2180                 }
2181                 n -= _p_.gfreecnt
2182         }
2183
2184         // All these variables can be changed concurrently, so the result can be inconsistent.
2185         // But at least the current goroutine is running.
2186         if n < 1 {
2187                 n = 1
2188         }
2189         return n
2190 }
2191
2192 func mcount() int32 {
2193         return sched.mcount
2194 }
2195
2196 var prof struct {
2197         lock uint32
2198         hz   int32
2199 }
2200
2201 func _System()       { _System() }
2202 func _ExternalCode() { _ExternalCode() }
2203 func _GC()           { _GC() }
2204
2205 var etext struct{}
2206
2207 // Called if we receive a SIGPROF signal.
2208 func sigprof(pc *uint8, sp *uint8, lr *uint8, gp *g, mp *m) {
2209         var n int32
2210         var traceback bool
2211         var stk [100]uintptr
2212
2213         if prof.hz == 0 {
2214                 return
2215         }
2216
2217         // Profiling runs concurrently with GC, so it must not allocate.
2218         mp.mallocing++
2219
2220         // Define that a "user g" is a user-created goroutine, and a "system g"
2221         // is one that is m->g0 or m->gsignal. We've only made sure that we
2222         // can unwind user g's, so exclude the system g's.
2223         //
2224         // It is not quite as easy as testing gp == m->curg (the current user g)
2225         // because we might be interrupted for profiling halfway through a
2226         // goroutine switch. The switch involves updating three (or four) values:
2227         // g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
2228         // because once it gets updated the new g is running.
2229         //
2230         // When switching from a user g to a system g, LR is not considered live,
2231         // so the update only affects g, SP, and PC. Since PC must be last, there
2232         // the possible partial transitions in ordinary execution are (1) g alone is updated,
2233         // (2) both g and SP are updated, and (3) SP alone is updated.
2234         // If g is updated, we'll see a system g and not look closer.
2235         // If SP alone is updated, we can detect the partial transition by checking
2236         // whether the SP is within g's stack bounds. (We could also require that SP
2237         // be changed only after g, but the stack bounds check is needed by other
2238         // cases, so there is no need to impose an additional requirement.)
2239         //
2240         // There is one exceptional transition to a system g, not in ordinary execution.
2241         // When a signal arrives, the operating system starts the signal handler running
2242         // with an updated PC and SP. The g is updated last, at the beginning of the
2243         // handler. There are two reasons this is okay. First, until g is updated the
2244         // g and SP do not match, so the stack bounds check detects the partial transition.
2245         // Second, signal handlers currently run with signals disabled, so a profiling
2246         // signal cannot arrive during the handler.
2247         //
2248         // When switching from a system g to a user g, there are three possibilities.
2249         //
2250         // First, it may be that the g switch has no PC update, because the SP
2251         // either corresponds to a user g throughout (as in asmcgocall)
2252         // or because it has been arranged to look like a user g frame
2253         // (as in cgocallback_gofunc). In this case, since the entire
2254         // transition is a g+SP update, a partial transition updating just one of
2255         // those will be detected by the stack bounds check.
2256         //
2257         // Second, when returning from a signal handler, the PC and SP updates
2258         // are performed by the operating system in an atomic update, so the g
2259         // update must be done before them. The stack bounds check detects
2260         // the partial transition here, and (again) signal handlers run with signals
2261         // disabled, so a profiling signal cannot arrive then anyway.
2262         //
2263         // Third, the common case: it may be that the switch updates g, SP, and PC
2264         // separately, as in gogo.
2265         //
2266         // Because gogo is the only instance, we check whether the PC lies
2267         // within that function, and if so, not ask for a traceback. This approach
2268         // requires knowing the size of the gogo function, which we
2269         // record in arch_*.h and check in runtime_test.go.
2270         //
2271         // There is another apparently viable approach, recorded here in case
2272         // the "PC within gogo" check turns out not to be usable.
2273         // It would be possible to delay the update of either g or SP until immediately
2274         // before the PC update instruction. Then, because of the stack bounds check,
2275         // the only problematic interrupt point is just before that PC update instruction,
2276         // and the sigprof handler can detect that instruction and simulate stepping past
2277         // it in order to reach a consistent state. On ARM, the update of g must be made
2278         // in two places (in R10 and also in a TLS slot), so the delayed update would
2279         // need to be the SP update. The sigprof handler must read the instruction at
2280         // the current PC and if it was the known instruction (for example, JMP BX or
2281         // MOV R2, PC), use that other register in place of the PC value.
2282         // The biggest drawback to this solution is that it requires that we can tell
2283         // whether it's safe to read from the memory pointed at by PC.
2284         // In a correct program, we can test PC == nil and otherwise read,
2285         // but if a profiling signal happens at the instant that a program executes
2286         // a bad jump (before the program manages to handle the resulting fault)
2287         // the profiling handler could fault trying to read nonexistent memory.
2288         //
2289         // To recap, there are no constraints on the assembly being used for the
2290         // transition. We simply require that g and SP match and that the PC is not
2291         // in gogo.
2292         traceback = true
2293         usp := uintptr(unsafe.Pointer(sp))
2294         gogo := funcPC(gogo)
2295         if gp == nil || gp != mp.curg ||
2296                 usp < gp.stack.lo || gp.stack.hi < usp ||
2297                 (gogo <= uintptr(unsafe.Pointer(pc)) && uintptr(unsafe.Pointer(pc)) < gogo+_RuntimeGogoBytes) {
2298                 traceback = false
2299         }
2300
2301         n = 0
2302         if traceback {
2303                 n = int32(gentraceback(uintptr(unsafe.Pointer(pc)), uintptr(unsafe.Pointer(sp)), uintptr(unsafe.Pointer(lr)), gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap))
2304         }
2305         if !traceback || n <= 0 {
2306                 // Normal traceback is impossible or has failed.
2307                 // See if it falls into several common cases.
2308                 n = 0
2309                 if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 {
2310                         // Cgo, we can't unwind and symbolize arbitrary C code,
2311                         // so instead collect Go stack that leads to the cgo call.
2312                         // This is especially important on windows, since all syscalls are cgo calls.
2313                         n = int32(gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0))
2314                 }
2315                 if GOOS == "windows" && n == 0 && mp.libcallg != nil && mp.libcallpc != 0 && mp.libcallsp != 0 {
2316                         // Libcall, i.e. runtime syscall on windows.
2317                         // Collect Go stack that leads to the call.
2318                         n = int32(gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg, 0, &stk[0], len(stk), nil, nil, 0))
2319                 }
2320                 if n == 0 {
2321                         // If all of the above has failed, account it against abstract "System" or "GC".
2322                         n = 2
2323                         // "ExternalCode" is better than "etext".
2324                         if uintptr(unsafe.Pointer(pc)) > uintptr(unsafe.Pointer(&etext)) {
2325                                 pc = (*uint8)(unsafe.Pointer(uintptr(funcPC(_ExternalCode) + _PCQuantum)))
2326                         }
2327                         stk[0] = uintptr(unsafe.Pointer(pc))
2328                         if mp.gcing != 0 || mp.helpgc != 0 {
2329                                 stk[1] = funcPC(_GC) + _PCQuantum
2330                         } else {
2331                                 stk[1] = funcPC(_System) + _PCQuantum
2332                         }
2333                 }
2334         }
2335
2336         if prof.hz != 0 {
2337                 // Simple cas-lock to coordinate with setcpuprofilerate.
2338                 for !cas(&prof.lock, 0, 1) {
2339                         osyield()
2340                 }
2341                 if prof.hz != 0 {
2342                         cpuproftick(&stk[0], n)
2343                 }
2344                 atomicstore(&prof.lock, 0)
2345         }
2346         mp.mallocing--
2347 }
2348
2349 // Arrange to call fn with a traceback hz times a second.
2350 func setcpuprofilerate_m(hz int32) {
2351         // Force sane arguments.
2352         if hz < 0 {
2353                 hz = 0
2354         }
2355
2356         // Disable preemption, otherwise we can be rescheduled to another thread
2357         // that has profiling enabled.
2358         _g_ := getg()
2359         _g_.m.locks++
2360
2361         // Stop profiler on this thread so that it is safe to lock prof.
2362         // if a profiling signal came in while we had prof locked,
2363         // it would deadlock.
2364         resetcpuprofiler(0)
2365
2366         for !cas(&prof.lock, 0, 1) {
2367                 osyield()
2368         }
2369         prof.hz = hz
2370         atomicstore(&prof.lock, 0)
2371
2372         lock(&sched.lock)
2373         sched.profilehz = hz
2374         unlock(&sched.lock)
2375
2376         if hz != 0 {
2377                 resetcpuprofiler(hz)
2378         }
2379
2380         _g_.m.locks--
2381 }
2382
2383 // Change number of processors.  The world is stopped, sched is locked.
2384 // gcworkbufs are not being modified by either the GC or
2385 // the write barrier code.
2386 func procresize(new int32) {
2387         old := gomaxprocs
2388         if old < 0 || old > _MaxGomaxprocs || new <= 0 || new > _MaxGomaxprocs {
2389                 gothrow("procresize: invalid arg")
2390         }
2391
2392         // initialize new P's
2393         for i := int32(0); i < new; i++ {
2394                 p := allp[i]
2395                 if p == nil {
2396                         p = newP()
2397                         p.id = i
2398                         p.status = _Pgcstop
2399                         atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(p))
2400                 }
2401                 if p.mcache == nil {
2402                         if old == 0 && i == 0 {
2403                                 if getg().m.mcache == nil {
2404                                         gothrow("missing mcache?")
2405                                 }
2406                                 p.mcache = getg().m.mcache // bootstrap
2407                         } else {
2408                                 p.mcache = allocmcache()
2409                         }
2410                 }
2411         }
2412
2413         // redistribute runnable G's evenly
2414         // collect all runnable goroutines in global queue preserving FIFO order
2415         // FIFO order is required to ensure fairness even during frequent GCs
2416         // see http://golang.org/issue/7126
2417         empty := false
2418         for !empty {
2419                 empty = true
2420                 for i := int32(0); i < old; i++ {
2421                         p := allp[i]
2422                         if p.runqhead == p.runqtail {
2423                                 continue
2424                         }
2425                         empty = false
2426                         // pop from tail of local queue
2427                         p.runqtail--
2428                         gp := p.runq[p.runqtail%uint32(len(p.runq))]
2429                         // push onto head of global queue
2430                         gp.schedlink = sched.runqhead
2431                         sched.runqhead = gp
2432                         if sched.runqtail == nil {
2433                                 sched.runqtail = gp
2434                         }
2435                         sched.runqsize++
2436                 }
2437         }
2438
2439         // fill local queues with at most len(p.runq)/2 goroutines
2440         // start at 1 because current M already executes some G and will acquire allp[0] below,
2441         // so if we have a spare G we want to put it into allp[1].
2442         var _p_ p
2443         for i := int32(1); i < new*int32(len(_p_.runq))/2 && sched.runqsize > 0; i++ {
2444                 gp := sched.runqhead
2445                 sched.runqhead = gp.schedlink
2446                 if sched.runqhead == nil {
2447                         sched.runqtail = nil
2448                 }
2449                 sched.runqsize--
2450                 runqput(allp[i%new], gp)
2451         }
2452
2453         // free unused P's
2454         for i := new; i < old; i++ {
2455                 p := allp[i]
2456                 freemcache(p.mcache)
2457                 p.mcache = nil
2458                 gfpurge(p)
2459                 p.status = _Pdead
2460                 // can't free P itself because it can be referenced by an M in syscall
2461         }
2462
2463         _g_ := getg()
2464         if _g_.m.p != nil {
2465                 _g_.m.p.m = nil
2466         }
2467         _g_.m.p = nil
2468         _g_.m.mcache = nil
2469         p := allp[0]
2470         p.m = nil
2471         p.status = _Pidle
2472         acquirep(p)
2473         for i := new - 1; i > 0; i-- {
2474                 p := allp[i]
2475                 p.status = _Pidle
2476                 pidleput(p)
2477         }
2478         var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
2479         atomicstore((*uint32)(unsafe.Pointer(int32p)), uint32(new))
2480 }
2481
2482 // Associate p and the current m.
2483 func acquirep(_p_ *p) {
2484         _g_ := getg()
2485
2486         if _g_.m.p != nil || _g_.m.mcache != nil {
2487                 gothrow("acquirep: already in go")
2488         }
2489         if _p_.m != nil || _p_.status != _Pidle {
2490                 id := int32(0)
2491                 if _p_.m != nil {
2492                         id = _p_.m.id
2493                 }
2494                 print("acquirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
2495                 gothrow("acquirep: invalid p state")
2496         }
2497         _g_.m.mcache = _p_.mcache
2498         _g_.m.p = _p_
2499         _p_.m = _g_.m
2500         _p_.status = _Prunning
2501 }
2502
2503 // Disassociate p and the current m.
2504 func releasep() *p {
2505         _g_ := getg()
2506
2507         if _g_.m.p == nil || _g_.m.mcache == nil {
2508                 gothrow("releasep: invalid arg")
2509         }
2510         _p_ := _g_.m.p
2511         if _p_.m != _g_.m || _p_.mcache != _g_.m.mcache || _p_.status != _Prunning {
2512                 print("releasep: m=", _g_.m, " m->p=", _g_.m.p, " p->m=", _p_.m, " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n")
2513                 gothrow("releasep: invalid p state")
2514         }
2515         _g_.m.p = nil
2516         _g_.m.mcache = nil
2517         _p_.m = nil
2518         _p_.status = _Pidle
2519         return _p_
2520 }
2521
2522 func incidlelocked(v int32) {
2523         lock(&sched.lock)
2524         sched.nmidlelocked += v
2525         if v > 0 {
2526                 checkdead()
2527         }
2528         unlock(&sched.lock)
2529 }
2530
2531 // Check for deadlock situation.
2532 // The check is based on number of running M's, if 0 -> deadlock.
2533 func checkdead() {
2534         // If we are dying because of a signal caught on an already idle thread,
2535         // freezetheworld will cause all running threads to block.
2536         // And runtime will essentially enter into deadlock state,
2537         // except that there is a thread that will call exit soon.
2538         if panicking > 0 {
2539                 return
2540         }
2541
2542         // -1 for sysmon
2543         run := sched.mcount - sched.nmidle - sched.nmidlelocked - 1
2544         if run > 0 {
2545                 return
2546         }
2547         if run < 0 {
2548                 print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", sched.mcount, "\n")
2549                 gothrow("checkdead: inconsistent counts")
2550         }
2551
2552         grunning := 0
2553         lock(&allglock)
2554         for i := 0; i < len(allgs); i++ {
2555                 gp := allgs[i]
2556                 if gp.issystem {
2557                         continue
2558                 }
2559                 s := readgstatus(gp)
2560                 switch s &^ _Gscan {
2561                 case _Gwaiting:
2562                         grunning++
2563                 case _Grunnable,
2564                         _Grunning,
2565                         _Gsyscall:
2566                         unlock(&allglock)
2567                         print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n")
2568                         gothrow("checkdead: runnable g")
2569                 }
2570         }
2571         unlock(&allglock)
2572         if grunning == 0 { // possible if main goroutine calls runtime·Goexit()
2573                 gothrow("no goroutines (main called runtime.Goexit) - deadlock!")
2574         }
2575
2576         // Maybe jump time forward for playground.
2577         gp := timejump()
2578         if gp != nil {
2579                 casgstatus(gp, _Gwaiting, _Grunnable)
2580                 globrunqput(gp)
2581                 _p_ := pidleget()
2582                 if _p_ == nil {
2583                         gothrow("checkdead: no p for timer")
2584                 }
2585                 mp := mget()
2586                 if mp == nil {
2587                         _newm(nil, _p_)
2588                 } else {
2589                         mp.nextp = _p_
2590                         notewakeup(&mp.park)
2591                 }
2592                 return
2593         }
2594
2595         getg().m.throwing = -1 // do not dump full stacks
2596         gothrow("all goroutines are asleep - deadlock!")
2597 }
2598
2599 func sysmon() {
2600         // If we go two minutes without a garbage collection, force one to run.
2601         forcegcperiod := int64(2 * 60 * 1e9)
2602
2603         // If a heap span goes unused for 5 minutes after a garbage collection,
2604         // we hand it back to the operating system.
2605         scavengelimit := int64(5 * 60 * 1e9)
2606
2607         if debug.scavenge > 0 {
2608                 // Scavenge-a-lot for testing.
2609                 forcegcperiod = 10 * 1e6
2610                 scavengelimit = 20 * 1e6
2611         }
2612
2613         lastscavenge := nanotime()
2614         nscavenge := 0
2615
2616         // Make wake-up period small enough for the sampling to be correct.
2617         maxsleep := forcegcperiod / 2
2618         if scavengelimit < forcegcperiod {
2619                 maxsleep = scavengelimit / 2
2620         }
2621
2622         lasttrace := int64(0)
2623         idle := 0 // how many cycles in succession we had not wokeup somebody
2624         delay := uint32(0)
2625         for {
2626                 if idle == 0 { // start with 20us sleep...
2627                         delay = 20
2628                 } else if idle > 50 { // start doubling the sleep after 1ms...
2629                         delay *= 2
2630                 }
2631                 if delay > 10*1000 { // up to 10ms
2632                         delay = 10 * 1000
2633                 }
2634                 usleep(delay)
2635                 if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomicload(&sched.npidle) == uint32(gomaxprocs)) { // TODO: fast atomic
2636                         lock(&sched.lock)
2637                         if atomicload(&sched.gcwaiting) != 0 || atomicload(&sched.npidle) == uint32(gomaxprocs) {
2638                                 atomicstore(&sched.sysmonwait, 1)
2639                                 unlock(&sched.lock)
2640                                 notetsleep(&sched.sysmonnote, maxsleep)
2641                                 lock(&sched.lock)
2642                                 atomicstore(&sched.sysmonwait, 0)
2643                                 noteclear(&sched.sysmonnote)
2644                                 idle = 0
2645                                 delay = 20
2646                         }
2647                         unlock(&sched.lock)
2648                 }
2649                 // poll network if not polled for more than 10ms
2650                 lastpoll := int64(atomicload64(&sched.lastpoll))
2651                 now := nanotime()
2652                 unixnow := unixnanotime()
2653                 if lastpoll != 0 && lastpoll+10*1000*1000 < now {
2654                         cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
2655                         gp := netpoll(false) // non-blocking - returns list of goroutines
2656                         if gp != nil {
2657                                 // Need to decrement number of idle locked M's
2658                                 // (pretending that one more is running) before injectglist.
2659                                 // Otherwise it can lead to the following situation:
2660                                 // injectglist grabs all P's but before it starts M's to run the P's,
2661                                 // another M returns from syscall, finishes running its G,
2662                                 // observes that there is no work to do and no other running M's
2663                                 // and reports deadlock.
2664                                 incidlelocked(-1)
2665                                 injectglist(gp)
2666                                 incidlelocked(1)
2667                         }
2668                 }
2669                 // retake P's blocked in syscalls
2670                 // and preempt long running G's
2671                 if retake(now) != 0 {
2672                         idle = 0
2673                 } else {
2674                         idle++
2675                 }
2676                 // check if we need to force a GC
2677                 lastgc := int64(atomicload64(&memstats.last_gc))
2678                 if lastgc != 0 && unixnow-lastgc > forcegcperiod && atomicload(&forcegc.idle) != 0 {
2679                         lock(&forcegc.lock)
2680                         forcegc.idle = 0
2681                         forcegc.g.schedlink = nil
2682                         injectglist(forcegc.g)
2683                         unlock(&forcegc.lock)
2684                 }
2685                 // scavenge heap once in a while
2686                 if lastscavenge+scavengelimit/2 < now {
2687                         mHeap_Scavenge(int32(nscavenge), uint64(now), uint64(scavengelimit))
2688                         lastscavenge = now
2689                         nscavenge++
2690                 }
2691                 if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace*1000000) <= now {
2692                         lasttrace = now
2693                         schedtrace(debug.scheddetail > 0)
2694                 }
2695         }
2696 }
2697
2698 var pdesc [_MaxGomaxprocs]struct {
2699         schedtick   uint32
2700         schedwhen   int64
2701         syscalltick uint32
2702         syscallwhen int64
2703 }
2704
2705 func retake(now int64) uint32 {
2706         n := 0
2707         for i := int32(0); i < gomaxprocs; i++ {
2708                 _p_ := allp[i]
2709                 if _p_ == nil {
2710                         continue
2711                 }
2712                 pd := &pdesc[i]
2713                 s := _p_.status
2714                 if s == _Psyscall {
2715                         // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
2716                         t := int64(_p_.syscalltick)
2717                         if int64(pd.syscalltick) != t {
2718                                 pd.syscalltick = uint32(t)
2719                                 pd.syscallwhen = now
2720                                 continue
2721                         }
2722                         // On the one hand we don't want to retake Ps if there is no other work to do,
2723                         // but on the other hand we want to retake them eventually
2724                         // because they can prevent the sysmon thread from deep sleep.
2725                         if _p_.runqhead == _p_.runqtail && atomicload(&sched.nmspinning)+atomicload(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {
2726                                 continue
2727                         }
2728                         // Need to decrement number of idle locked M's
2729                         // (pretending that one more is running) before the CAS.
2730                         // Otherwise the M from which we retake can exit the syscall,
2731                         // increment nmidle and report deadlock.
2732                         incidlelocked(-1)
2733                         if cas(&_p_.status, s, _Pidle) {
2734                                 n++
2735                                 handoffp(_p_)
2736                         }
2737                         incidlelocked(1)
2738                 } else if s == _Prunning {
2739                         // Preempt G if it's running for more than 10ms.
2740                         t := int64(_p_.schedtick)
2741                         if int64(pd.schedtick) != t {
2742                                 pd.schedtick = uint32(t)
2743                                 pd.schedwhen = now
2744                                 continue
2745                         }
2746                         if pd.schedwhen+10*1000*1000 > now {
2747                                 continue
2748                         }
2749                         preemptone(_p_)
2750                 }
2751         }
2752         return uint32(n)
2753 }
2754
2755 // Tell all goroutines that they have been preempted and they should stop.
2756 // This function is purely best-effort.  It can fail to inform a goroutine if a
2757 // processor just started running it.
2758 // No locks need to be held.
2759 // Returns true if preemption request was issued to at least one goroutine.
2760 func preemptall() bool {
2761         res := false
2762         for i := int32(0); i < gomaxprocs; i++ {
2763                 _p_ := allp[i]
2764                 if _p_ == nil || _p_.status != _Prunning {
2765                         continue
2766                 }
2767                 if preemptone(_p_) {
2768                         res = true
2769                 }
2770         }
2771         return res
2772 }
2773
2774 // Tell the goroutine running on processor P to stop.
2775 // This function is purely best-effort.  It can incorrectly fail to inform the
2776 // goroutine.  It can send inform the wrong goroutine.  Even if it informs the
2777 // correct goroutine, that goroutine might ignore the request if it is
2778 // simultaneously executing newstack.
2779 // No lock needs to be held.
2780 // Returns true if preemption request was issued.
2781 // The actual preemption will happen at some point in the future
2782 // and will be indicated by the gp->status no longer being
2783 // Grunning
2784 func preemptone(_p_ *p) bool {
2785         mp := _p_.m
2786         if mp == nil || mp == getg().m {
2787                 return false
2788         }
2789         gp := mp.curg
2790         if gp == nil || gp == mp.g0 {
2791                 return false
2792         }
2793
2794         gp.preempt = true
2795
2796         // Every call in a go routine checks for stack overflow by
2797         // comparing the current stack pointer to gp->stackguard0.
2798         // Setting gp->stackguard0 to StackPreempt folds
2799         // preemption into the normal stack overflow check.
2800         gp.stackguard0 = stackPreempt
2801         return true
2802 }
2803
2804 var starttime int64
2805
2806 func schedtrace(detailed bool) {
2807         now := nanotime()
2808         if starttime == 0 {
2809                 starttime = now
2810         }
2811
2812         lock(&sched.lock)
2813         print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", sched.mcount, " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
2814         if detailed {
2815                 print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n")
2816         }
2817         // We must be careful while reading data from P's, M's and G's.
2818         // Even if we hold schedlock, most data can be changed concurrently.
2819         // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
2820         for i := int32(0); i < gomaxprocs; i++ {
2821                 _p_ := allp[i]
2822                 if _p_ == nil {
2823                         continue
2824                 }
2825                 mp := _p_.m
2826                 h := atomicload(&_p_.runqhead)
2827                 t := atomicload(&_p_.runqtail)
2828                 if detailed {
2829                         id := int32(-1)
2830                         if mp != nil {
2831                                 id = mp.id
2832                         }
2833                         print("  P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gfreecnt, "\n")
2834                 } else {
2835                         // In non-detailed mode format lengths of per-P run queues as:
2836                         // [len1 len2 len3 len4]
2837                         print(" ")
2838                         if i == 0 {
2839                                 print("[")
2840                         }
2841                         print(t - h)
2842                         if i == gomaxprocs-1 {
2843                                 print("]\n")
2844                         }
2845                 }
2846         }
2847
2848         if !detailed {
2849                 unlock(&sched.lock)
2850                 return
2851         }
2852
2853         for mp := allm; mp != nil; mp = mp.alllink {
2854                 _p_ := mp.p
2855                 gp := mp.curg
2856                 lockedg := mp.lockedg
2857                 id1 := int32(-1)
2858                 if _p_ != nil {
2859                         id1 = _p_.id
2860                 }
2861                 id2 := int64(-1)
2862                 if gp != nil {
2863                         id2 = gp.goid
2864                 }
2865                 id3 := int64(-1)
2866                 if lockedg != nil {
2867                         id3 = lockedg.goid
2868                 }
2869                 print("  M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " gcing=", mp.gcing, ""+" locks=", mp.locks, " dying=", mp.dying, " helpgc=", mp.helpgc, " spinning=", mp.spinning, " blocked=", getg().m.blocked, " lockedg=", id3, "\n")
2870         }
2871
2872         lock(&allglock)
2873         for gi := 0; gi < len(allgs); gi++ {
2874                 gp := allgs[gi]
2875                 mp := gp.m
2876                 lockedm := gp.lockedm
2877                 id1 := int32(-1)
2878                 if mp != nil {
2879                         id1 = mp.id
2880                 }
2881                 id2 := int32(-1)
2882                 if lockedm != nil {
2883                         id2 = lockedm.id
2884                 }
2885                 print("  G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason, ") m=", id1, " lockedm=", id2, "\n")
2886         }
2887         unlock(&allglock)
2888         unlock(&sched.lock)
2889 }
2890
2891 // Put mp on midle list.
2892 // Sched must be locked.
2893 func mput(mp *m) {
2894         mp.schedlink = sched.midle
2895         sched.midle = mp
2896         sched.nmidle++
2897         checkdead()
2898 }
2899
2900 // Try to get an m from midle list.
2901 // Sched must be locked.
2902 func mget() *m {
2903         mp := sched.midle
2904         if mp != nil {
2905                 sched.midle = mp.schedlink
2906                 sched.nmidle--
2907         }
2908         return mp
2909 }
2910
2911 // Put gp on the global runnable queue.
2912 // Sched must be locked.
2913 func globrunqput(gp *g) {
2914         gp.schedlink = nil
2915         if sched.runqtail != nil {
2916                 sched.runqtail.schedlink = gp
2917         } else {
2918                 sched.runqhead = gp
2919         }
2920         sched.runqtail = gp
2921         sched.runqsize++
2922 }
2923
2924 // Put a batch of runnable goroutines on the global runnable queue.
2925 // Sched must be locked.
2926 func globrunqputbatch(ghead *g, gtail *g, n int32) {
2927         gtail.schedlink = nil
2928         if sched.runqtail != nil {
2929                 sched.runqtail.schedlink = ghead
2930         } else {
2931                 sched.runqhead = ghead
2932         }
2933         sched.runqtail = gtail
2934         sched.runqsize += n
2935 }
2936
2937 // Try get a batch of G's from the global runnable queue.
2938 // Sched must be locked.
2939 func globrunqget(_p_ *p, max int32) *g {
2940         if sched.runqsize == 0 {
2941                 return nil
2942         }
2943
2944         n := sched.runqsize/gomaxprocs + 1
2945         if n > sched.runqsize {
2946                 n = sched.runqsize
2947         }
2948         if max > 0 && n > max {
2949                 n = max
2950         }
2951         if n > int32(len(_p_.runq))/2 {
2952                 n = int32(len(_p_.runq)) / 2
2953         }
2954
2955         sched.runqsize -= n
2956         if sched.runqsize == 0 {
2957                 sched.runqtail = nil
2958         }
2959
2960         gp := sched.runqhead
2961         sched.runqhead = gp.schedlink
2962         n--
2963         for ; n > 0; n-- {
2964                 gp1 := sched.runqhead
2965                 sched.runqhead = gp1.schedlink
2966                 runqput(_p_, gp1)
2967         }
2968         return gp
2969 }
2970
2971 // Put p to on _Pidle list.
2972 // Sched must be locked.
2973 func pidleput(_p_ *p) {
2974         _p_.link = sched.pidle
2975         sched.pidle = _p_
2976         xadd(&sched.npidle, 1) // TODO: fast atomic
2977 }
2978
2979 // Try get a p from _Pidle list.
2980 // Sched must be locked.
2981 func pidleget() *p {
2982         _p_ := sched.pidle
2983         if _p_ != nil {
2984                 sched.pidle = _p_.link
2985                 xadd(&sched.npidle, -1) // TODO: fast atomic
2986         }
2987         return _p_
2988 }
2989
2990 // Try to put g on local runnable queue.
2991 // If it's full, put onto global queue.
2992 // Executed only by the owner P.
2993 func runqput(_p_ *p, gp *g) {
2994 retry:
2995         h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
2996         t := _p_.runqtail
2997         if t-h < uint32(len(_p_.runq)) {
2998                 _p_.runq[t%uint32(len(_p_.runq))] = gp
2999                 atomicstore(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
3000                 return
3001         }
3002         if runqputslow(_p_, gp, h, t) {
3003                 return
3004         }
3005         // the queue is not full, now the put above must suceed
3006         goto retry
3007 }
3008
3009 // Put g and a batch of work from local runnable queue on global queue.
3010 // Executed only by the owner P.
3011 func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
3012         var batch [len(_p_.runq)/2 + 1]*g
3013
3014         // First, grab a batch from local queue.
3015         n := t - h
3016         n = n / 2
3017         if n != uint32(len(_p_.runq)/2) {
3018                 gothrow("runqputslow: queue is not full")
3019         }
3020         for i := uint32(0); i < n; i++ {
3021                 batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))]
3022         }
3023         if !cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
3024                 return false
3025         }
3026         batch[n] = gp
3027
3028         // Link the goroutines.
3029         for i := uint32(0); i < n; i++ {
3030                 batch[i].schedlink = batch[i+1]
3031         }
3032
3033         // Now put the batch on global queue.
3034         lock(&sched.lock)
3035         globrunqputbatch(batch[0], batch[n], int32(n+1))
3036         unlock(&sched.lock)
3037         return true
3038 }
3039
3040 // Get g from local runnable queue.
3041 // Executed only by the owner P.
3042 func runqget(_p_ *p) *g {
3043         for {
3044                 h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
3045                 t := _p_.runqtail
3046                 if t == h {
3047                         return nil
3048                 }
3049                 gp := _p_.runq[h%uint32(len(_p_.runq))]
3050                 if cas(&_p_.runqhead, h, h+1) { // cas-release, commits consume
3051                         return gp
3052                 }
3053         }
3054 }
3055
3056 // Grabs a batch of goroutines from local runnable queue.
3057 // batch array must be of size len(p->runq)/2. Returns number of grabbed goroutines.
3058 // Can be executed by any P.
3059 func runqgrab(_p_ *p, batch []*g) uint32 {
3060         for {
3061                 h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
3062                 t := atomicload(&_p_.runqtail) // load-acquire, synchronize with the producer
3063                 n := t - h
3064                 n = n - n/2
3065                 if n == 0 {
3066                         return 0
3067                 }
3068                 if n > uint32(len(_p_.runq)/2) { // read inconsistent h and t
3069                         continue
3070                 }
3071                 for i := uint32(0); i < n; i++ {
3072                         batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))]
3073                 }
3074                 if cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
3075                         return n
3076                 }
3077         }
3078 }
3079
3080 // Steal half of elements from local runnable queue of p2
3081 // and put onto local runnable queue of p.
3082 // Returns one of the stolen elements (or nil if failed).
3083 func runqsteal(_p_, p2 *p) *g {
3084         var batch [len(_p_.runq) / 2]*g
3085
3086         n := runqgrab(p2, batch[:])
3087         if n == 0 {
3088                 return nil
3089         }
3090         n--
3091         gp := batch[n]
3092         if n == 0 {
3093                 return gp
3094         }
3095         h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
3096         t := _p_.runqtail
3097         if t-h+n >= uint32(len(_p_.runq)) {
3098                 gothrow("runqsteal: runq overflow")
3099         }
3100         for i := uint32(0); i < n; i++ {
3101                 _p_.runq[(t+i)%uint32(len(_p_.runq))] = batch[i]
3102         }
3103         atomicstore(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
3104         return gp
3105 }
3106
3107 func testSchedLocalQueue() {
3108         _p_ := new(p)
3109         gs := make([]g, len(_p_.runq))
3110         for i := 0; i < len(_p_.runq); i++ {
3111                 if runqget(_p_) != nil {
3112                         gothrow("runq is not empty initially")
3113                 }
3114                 for j := 0; j < i; j++ {
3115                         runqput(_p_, &gs[i])
3116                 }
3117                 for j := 0; j < i; j++ {
3118                         if runqget(_p_) != &gs[i] {
3119                                 print("bad element at iter ", i, "/", j, "\n")
3120                                 gothrow("bad element")
3121                         }
3122                 }
3123                 if runqget(_p_) != nil {
3124                         gothrow("runq is not empty afterwards")
3125                 }
3126         }
3127 }
3128
3129 func testSchedLocalQueueSteal() {
3130         p1 := new(p)
3131         p2 := new(p)
3132         gs := make([]g, len(p1.runq))
3133         for i := 0; i < len(p1.runq); i++ {
3134                 for j := 0; j < i; j++ {
3135                         gs[j].sig = 0
3136                         runqput(p1, &gs[j])
3137                 }
3138                 gp := runqsteal(p2, p1)
3139                 s := 0
3140                 if gp != nil {
3141                         s++
3142                         gp.sig++
3143                 }
3144                 for {
3145                         gp = runqget(p2)
3146                         if gp == nil {
3147                                 break
3148                         }
3149                         s++
3150                         gp.sig++
3151                 }
3152                 for {
3153                         gp = runqget(p1)
3154                         if gp == nil {
3155                                 break
3156                         }
3157                         gp.sig++
3158                 }
3159                 for j := 0; j < i; j++ {
3160                         if gs[j].sig != 1 {
3161                                 print("bad element ", j, "(", gs[j].sig, ") at iter ", i, "\n")
3162                                 gothrow("bad element")
3163                         }
3164                 }
3165                 if s != i/2 && s != i/2+1 {
3166                         print("bad steal ", s, ", want ", i/2, " or ", i/2+1, ", iter ", i, "\n")
3167                         gothrow("bad steal")
3168                 }
3169         }
3170 }
3171
3172 func setMaxThreads(in int) (out int) {
3173         lock(&sched.lock)
3174         out = int(sched.maxmcount)
3175         sched.maxmcount = int32(in)
3176         checkmcount()
3177         unlock(&sched.lock)
3178         return
3179 }
3180
3181 var goexperiment string = "GOEXPERIMENT" // TODO: defined in zaexperiment.h
3182
3183 func haveexperiment(name string) bool {
3184         x := goexperiment
3185         for x != "" {
3186                 xname := ""
3187                 i := index(x, ",")
3188                 if i < 0 {
3189                         xname, x = x, ""
3190                 } else {
3191                         xname, x = x[:i], x[i+1:]
3192                 }
3193                 if xname == name {
3194                         return true
3195                 }
3196         }
3197         return false
3198 }
3199
3200 //go:nosplit
3201 func sync_procPin() int {
3202         _g_ := getg()
3203         mp := _g_.m
3204
3205         mp.locks++
3206         return int(mp.p.id)
3207 }
3208
3209 //go:nosplit
3210 func sync_procUnpin() {
3211         _g_ := getg()
3212         _g_.m.locks--
3213 }