1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 //go:build unix || (js && wasm) || wasip1 || windows
10 "runtime/internal/atomic"
11 "runtime/internal/sys"
15 // Integrated network poller (platform-independent part).
16 // A particular implementation (epoll/kqueue/port/AIX/Windows)
17 // must define the following functions:
20 // Initialize the poller. Only called once.
22 // func netpollopen(fd uintptr, pd *pollDesc) int32
23 // Arm edge-triggered notifications for fd. The pd argument is to pass
24 // back to netpollready when fd is ready. Return an errno value.
26 // func netpollclose(fd uintptr) int32
27 // Disable notifications for fd. Return an errno value.
29 // func netpoll(delta int64) gList
30 // Poll the network. If delta < 0, block indefinitely. If delta == 0,
31 // poll without blocking. If delta > 0, block for up to delta nanoseconds.
32 // Return a list of goroutines built by calling netpollready.
34 // func netpollBreak()
35 // Wake up the network poller, assumed to be blocked in netpoll.
37 // func netpollIsPollDescriptor(fd uintptr) bool
38 // Reports whether fd is a file descriptor used by the poller.
40 // Error codes returned by runtime_pollReset and runtime_pollWait.
41 // These must match the values in internal/poll/fd_poll_runtime.go.
43 pollNoError = 0 // no error
44 pollErrClosing = 1 // descriptor is closed
45 pollErrTimeout = 2 // I/O timeout
46 pollErrNotPollable = 3 // general error polling descriptor
49 // pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
50 // goroutines respectively. The semaphore can be in the following states:
52 // pdReady - io readiness notification is pending;
53 // a goroutine consumes the notification by changing the state to pdNil.
54 // pdWait - a goroutine prepares to park on the semaphore, but not yet parked;
55 // the goroutine commits to park by changing the state to G pointer,
56 // or, alternatively, concurrent io notification changes the state to pdReady,
57 // or, alternatively, concurrent timeout/close changes the state to pdNil.
58 // G pointer - the goroutine is blocked on the semaphore;
59 // io notification or timeout/close changes the state to pdReady or pdNil respectively
60 // and unparks the goroutine.
61 // pdNil - none of the above.
68 const pollBlockSize = 4 * 1024
70 // Network poller descriptor.
73 type pollDesc struct {
75 link *pollDesc // in pollcache, protected by pollcache.lock
76 fd uintptr // constant for pollDesc usage lifetime
77 fdseq atomic.Uintptr // protects against stale pollDesc
79 // atomicInfo holds bits from closing, rd, and wd,
80 // which are only ever written while holding the lock,
81 // summarized for use by netpollcheckerr,
82 // which cannot acquire the lock.
83 // After writing these fields under lock in a way that
84 // might change the summary, code must call publishInfo
85 // before releasing the lock.
86 // Code that changes fields and then calls netpollunblock
87 // (while still holding the lock) must call publishInfo
88 // before calling netpollunblock, because publishInfo is what
89 // stops netpollblock from blocking anew
90 // (by changing the result of netpollcheckerr).
91 // atomicInfo also holds the eventErr bit,
92 // recording whether a poll event on the fd got an error;
93 // atomicInfo is the only source of truth for that bit.
94 atomicInfo atomic.Uint32 // atomic pollInfo
96 // rg, wg are accessed atomically and hold g pointers.
97 // (Using atomic.Uintptr here is similar to using guintptr elsewhere.)
98 rg atomic.Uintptr // pdReady, pdWait, G waiting for read or pdNil
99 wg atomic.Uintptr // pdReady, pdWait, G waiting for write or pdNil
101 lock mutex // protects the following fields
103 user uint32 // user settable cookie
104 rseq uintptr // protects from stale read timers
105 rt timer // read deadline timer (set if rt.f != nil)
106 rd int64 // read deadline (a nanotime in the future, -1 when expired)
107 wseq uintptr // protects from stale write timers
108 wt timer // write deadline timer
109 wd int64 // write deadline (a nanotime in the future, -1 when expired)
110 self *pollDesc // storage for indirect interface. See (*pollDesc).makeArg.
113 // pollInfo is the bits needed by netpollcheckerr, stored atomically,
114 // mostly duplicating state that is manipulated under lock in pollDesc.
115 // The one exception is the pollEventErr bit, which is maintained only
120 pollClosing = 1 << iota
122 pollExpiredReadDeadline
123 pollExpiredWriteDeadline
124 pollFDSeq // 20 bit field, low 20 bits of fdseq field
128 pollFDSeqBits = 20 // number of bits in pollFDSeq
129 pollFDSeqMask = 1<<pollFDSeqBits - 1 // mask for pollFDSeq
132 func (i pollInfo) closing() bool { return i&pollClosing != 0 }
133 func (i pollInfo) eventErr() bool { return i&pollEventErr != 0 }
134 func (i pollInfo) expiredReadDeadline() bool { return i&pollExpiredReadDeadline != 0 }
135 func (i pollInfo) expiredWriteDeadline() bool { return i&pollExpiredWriteDeadline != 0 }
137 // info returns the pollInfo corresponding to pd.
138 func (pd *pollDesc) info() pollInfo {
139 return pollInfo(pd.atomicInfo.Load())
142 // publishInfo updates pd.atomicInfo (returned by pd.info)
143 // using the other values in pd.
144 // It must be called while holding pd.lock,
145 // and it must be called after changing anything
146 // that might affect the info bits.
147 // In practice this means after changing closing
148 // or changing rd or wd from < 0 to >= 0.
149 func (pd *pollDesc) publishInfo() {
155 info |= pollExpiredReadDeadline
158 info |= pollExpiredWriteDeadline
160 info |= uint32(pd.fdseq.Load()&pollFDSeqMask) << pollFDSeq
162 // Set all of x except the pollEventErr bit.
163 x := pd.atomicInfo.Load()
164 for !pd.atomicInfo.CompareAndSwap(x, (x&pollEventErr)|info) {
165 x = pd.atomicInfo.Load()
169 // setEventErr sets the result of pd.info().eventErr() to b.
170 // We only change the error bit if seq == 0 or if seq matches pollFDSeq
172 func (pd *pollDesc) setEventErr(b bool, seq uintptr) {
173 mSeq := uint32(seq & pollFDSeqMask)
174 x := pd.atomicInfo.Load()
175 xSeq := (x >> pollFDSeq) & pollFDSeqMask
176 if seq != 0 && xSeq != mSeq {
179 for (x&pollEventErr != 0) != b && !pd.atomicInfo.CompareAndSwap(x, x^pollEventErr) {
180 x = pd.atomicInfo.Load()
181 xSeq := (x >> pollFDSeq) & pollFDSeqMask
182 if seq != 0 && xSeq != mSeq {
188 type pollCache struct {
191 // PollDesc objects must be type-stable,
192 // because we can get ready notification from epoll/kqueue
193 // after the descriptor is closed/reused.
194 // Stale notifications are detected using seq variable,
195 // seq is incremented when deadlines are changed or descriptor is reused.
199 netpollInitLock mutex
200 netpollInited atomic.Uint32
203 netpollWaiters atomic.Uint32
206 //go:linkname poll_runtime_pollServerInit internal/poll.runtime_pollServerInit
207 func poll_runtime_pollServerInit() {
211 func netpollGenericInit() {
212 if netpollInited.Load() == 0 {
213 lockInit(&netpollInitLock, lockRankNetpollInit)
214 lock(&netpollInitLock)
215 if netpollInited.Load() == 0 {
217 netpollInited.Store(1)
219 unlock(&netpollInitLock)
223 func netpollinited() bool {
224 return netpollInited.Load() != 0
227 //go:linkname poll_runtime_isPollServerDescriptor internal/poll.runtime_isPollServerDescriptor
229 // poll_runtime_isPollServerDescriptor reports whether fd is a
230 // descriptor being used by netpoll.
231 func poll_runtime_isPollServerDescriptor(fd uintptr) bool {
232 return netpollIsPollDescriptor(fd)
235 //go:linkname poll_runtime_pollOpen internal/poll.runtime_pollOpen
236 func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) {
237 pd := pollcache.alloc()
240 if wg != pdNil && wg != pdReady {
241 throw("runtime: blocked write on free polldesc")
244 if rg != pdNil && rg != pdReady {
245 throw("runtime: blocked read on free polldesc")
248 if pd.fdseq.Load() == 0 {
249 // The value 0 is special in setEventErr, so don't use it.
253 pd.setEventErr(false, 0)
264 errno := netpollopen(fd, pd)
267 return nil, int(errno)
272 //go:linkname poll_runtime_pollClose internal/poll.runtime_pollClose
273 func poll_runtime_pollClose(pd *pollDesc) {
275 throw("runtime: close polldesc w/o unblock")
278 if wg != pdNil && wg != pdReady {
279 throw("runtime: blocked write on closing polldesc")
282 if rg != pdNil && rg != pdReady {
283 throw("runtime: blocked read on closing polldesc")
289 func (c *pollCache) free(pd *pollDesc) {
290 // pd can't be shared here, but lock anyhow because
291 // that's what publishInfo documents.
294 // Increment the fdseq field, so that any currently
295 // running netpoll calls will not mark pd as ready.
296 fdseq := pd.fdseq.Load()
297 fdseq = (fdseq + 1) & (1<<taggedPointerBits - 1)
298 pd.fdseq.Store(fdseq)
310 // poll_runtime_pollReset, which is internal/poll.runtime_pollReset,
311 // prepares a descriptor for polling in mode, which is 'r' or 'w'.
312 // This returns an error code; the codes are defined above.
314 //go:linkname poll_runtime_pollReset internal/poll.runtime_pollReset
315 func poll_runtime_pollReset(pd *pollDesc, mode int) int {
316 errcode := netpollcheckerr(pd, int32(mode))
317 if errcode != pollNoError {
322 } else if mode == 'w' {
328 // poll_runtime_pollWait, which is internal/poll.runtime_pollWait,
329 // waits for a descriptor to be ready for reading or writing,
330 // according to mode, which is 'r' or 'w'.
331 // This returns an error code; the codes are defined above.
333 //go:linkname poll_runtime_pollWait internal/poll.runtime_pollWait
334 func poll_runtime_pollWait(pd *pollDesc, mode int) int {
335 errcode := netpollcheckerr(pd, int32(mode))
336 if errcode != pollNoError {
339 // As for now only Solaris, illumos, and AIX use level-triggered IO.
340 if GOOS == "solaris" || GOOS == "illumos" || GOOS == "aix" {
343 for !netpollblock(pd, int32(mode), false) {
344 errcode = netpollcheckerr(pd, int32(mode))
345 if errcode != pollNoError {
348 // Can happen if timeout has fired and unblocked us,
349 // but before we had a chance to run, timeout has been reset.
350 // Pretend it has not happened and retry.
355 //go:linkname poll_runtime_pollWaitCanceled internal/poll.runtime_pollWaitCanceled
356 func poll_runtime_pollWaitCanceled(pd *pollDesc, mode int) {
357 // This function is used only on windows after a failed attempt to cancel
358 // a pending async IO operation. Wait for ioready, ignore closing or timeouts.
359 for !netpollblock(pd, int32(mode), true) {
363 //go:linkname poll_runtime_pollSetDeadline internal/poll.runtime_pollSetDeadline
364 func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) {
370 rd0, wd0 := pd.rd, pd.wd
371 combo0 := rd0 > 0 && rd0 == wd0
375 // If the user has a deadline in the future, but the delay calculation
376 // overflows, then set the deadline to the maximum possible value.
380 if mode == 'r' || mode == 'r'+'w' {
383 if mode == 'w' || mode == 'r'+'w' {
387 combo := pd.rd > 0 && pd.rd == pd.wd
388 rtf := netpollReadDeadline
390 rtf = netpollDeadline
395 // Copy current seq into the timer arg.
396 // Timer func will check the seq against current descriptor seq,
397 // if they differ the descriptor was reused or timers were reset.
398 pd.rt.arg = pd.makeArg()
400 resettimer(&pd.rt, pd.rd)
402 } else if pd.rd != rd0 || combo != combo0 {
403 pd.rseq++ // invalidate current timers
405 modtimer(&pd.rt, pd.rd, 0, rtf, pd.makeArg(), pd.rseq)
412 if pd.wd > 0 && !combo {
413 pd.wt.f = netpollWriteDeadline
414 pd.wt.arg = pd.makeArg()
416 resettimer(&pd.wt, pd.wd)
418 } else if pd.wd != wd0 || combo != combo0 {
419 pd.wseq++ // invalidate current timers
420 if pd.wd > 0 && !combo {
421 modtimer(&pd.wt, pd.wd, 0, netpollWriteDeadline, pd.makeArg(), pd.wseq)
427 // If we set the new deadline in the past, unblock currently pending IO if any.
428 // Note that pd.publishInfo has already been called, above, immediately after modifying rd and wd.
431 rg = netpollunblock(pd, 'r', false)
434 wg = netpollunblock(pd, 'w', false)
438 netpollgoready(rg, 3)
441 netpollgoready(wg, 3)
445 //go:linkname poll_runtime_pollUnblock internal/poll.runtime_pollUnblock
446 func poll_runtime_pollUnblock(pd *pollDesc) {
449 throw("runtime: unblock on closing polldesc")
456 rg = netpollunblock(pd, 'r', false)
457 wg = netpollunblock(pd, 'w', false)
468 netpollgoready(rg, 3)
471 netpollgoready(wg, 3)
475 // netpollready is called by the platform-specific netpoll function.
476 // It declares that the fd associated with pd is ready for I/O.
477 // The toRun argument is used to build a list of goroutines to return
478 // from netpoll. The mode argument is 'r', 'w', or 'r'+'w' to indicate
479 // whether the fd is ready for reading or writing or both.
481 // This may run while the world is stopped, so write barriers are not allowed.
484 func netpollready(toRun *gList, pd *pollDesc, mode int32) {
486 if mode == 'r' || mode == 'r'+'w' {
487 rg = netpollunblock(pd, 'r', true)
489 if mode == 'w' || mode == 'r'+'w' {
490 wg = netpollunblock(pd, 'w', true)
500 func netpollcheckerr(pd *pollDesc, mode int32) int {
503 return pollErrClosing
505 if (mode == 'r' && info.expiredReadDeadline()) || (mode == 'w' && info.expiredWriteDeadline()) {
506 return pollErrTimeout
508 // Report an event scanning error only on a read event.
509 // An error on a write event will be captured in a subsequent
510 // write call that is able to report a more specific error.
511 if mode == 'r' && info.eventErr() {
512 return pollErrNotPollable
517 func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool {
518 r := atomic.Casuintptr((*uintptr)(gpp), pdWait, uintptr(unsafe.Pointer(gp)))
520 // Bump the count of goroutines waiting for the poller.
521 // The scheduler uses this to decide whether to block
522 // waiting for the poller if there is nothing else to do.
523 netpollWaiters.Add(1)
528 func netpollgoready(gp *g, traceskip int) {
529 netpollWaiters.Add(-1)
530 goready(gp, traceskip+1)
533 // returns true if IO is ready, or false if timed out or closed
534 // waitio - wait only for completed IO, ignore errors
535 // Concurrent calls to netpollblock in the same mode are forbidden, as pollDesc
536 // can hold only a single waiting goroutine for each mode.
537 func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
543 // set the gpp semaphore to pdWait
545 // Consume notification if already ready.
546 if gpp.CompareAndSwap(pdReady, pdNil) {
549 if gpp.CompareAndSwap(pdNil, pdWait) {
553 // Double check that this isn't corrupt; otherwise we'd loop
555 if v := gpp.Load(); v != pdReady && v != pdNil {
556 throw("runtime: double wait")
560 // need to recheck error states after setting gpp to pdWait
561 // this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
562 // do the opposite: store to closing/rd/wd, publishInfo, load of rg/wg
563 if waitio || netpollcheckerr(pd, mode) == pollNoError {
564 gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceBlockNet, 5)
566 // be careful to not lose concurrent pdReady notification
567 old := gpp.Swap(pdNil)
569 throw("runtime: corrupted polldesc")
571 return old == pdReady
574 func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
585 if old == pdNil && !ioready {
586 // Only set pdReady for ioready. runtime_pollWait
587 // will check for timeout/cancel before waiting.
594 if gpp.CompareAndSwap(old, new) {
598 return (*g)(unsafe.Pointer(old))
603 func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
605 // Seq arg is seq when the timer was set.
606 // If it's stale, ignore the timer event.
607 currentSeq := pd.rseq
611 if seq != currentSeq {
612 // The descriptor was reused or timers were reset.
618 if pd.rd <= 0 || pd.rt.f == nil {
619 throw("runtime: inconsistent read deadline")
623 rg = netpollunblock(pd, 'r', false)
627 if pd.wd <= 0 || pd.wt.f == nil && !read {
628 throw("runtime: inconsistent write deadline")
632 wg = netpollunblock(pd, 'w', false)
636 netpollgoready(rg, 0)
639 netpollgoready(wg, 0)
643 func netpollDeadline(arg any, seq uintptr) {
644 netpolldeadlineimpl(arg.(*pollDesc), seq, true, true)
647 func netpollReadDeadline(arg any, seq uintptr) {
648 netpolldeadlineimpl(arg.(*pollDesc), seq, true, false)
651 func netpollWriteDeadline(arg any, seq uintptr) {
652 netpolldeadlineimpl(arg.(*pollDesc), seq, false, true)
655 func (c *pollCache) alloc() *pollDesc {
658 const pdSize = unsafe.Sizeof(pollDesc{})
659 n := pollBlockSize / pdSize
663 // Must be in non-GC memory because can be referenced
664 // only from epoll/kqueue internals.
665 mem := persistentalloc(n*pdSize, 0, &memstats.other_sys)
666 for i := uintptr(0); i < n; i++ {
667 pd := (*pollDesc)(add(mem, i*pdSize))
674 lockInit(&pd.lock, lockRankPollDesc)
679 // makeArg converts pd to an interface{}.
680 // makeArg does not do any allocation. Normally, such
681 // a conversion requires an allocation because pointers to
682 // types which embed runtime/internal/sys.NotInHeap (which pollDesc is)
683 // must be stored in interfaces indirectly. See issue 42076.
684 func (pd *pollDesc) makeArg() (i any) {
685 x := (*eface)(unsafe.Pointer(&i))
687 x.data = unsafe.Pointer(&pd.self)
692 pdEface any = (*pollDesc)(nil)
693 pdType *_type = efaceOf(&pdEface)._type