diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index e59c14fa9d17..cf64df7c811b 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -1570,7 +1570,6 @@ MLINKS+=timeout.9 callout.9 \ timeout.9 callout_active.9 \ timeout.9 callout_deactivate.9 \ timeout.9 callout_drain.9 \ - timeout.9 callout_drain_async.9 \ timeout.9 callout_handle_init.9 \ timeout.9 callout_init.9 \ timeout.9 callout_init_mtx.9 \ diff --git a/share/man/man9/timeout.9 b/share/man/man9/timeout.9 index 4f52a19cb61f..7202815a7824 100644 --- a/share/man/man9/timeout.9 +++ b/share/man/man9/timeout.9 @@ -29,14 +29,13 @@ .\" .\" $FreeBSD$ .\" -.Dd January 14, 2015 +.Dd October 8, 2014 .Dt TIMEOUT 9 .Os .Sh NAME .Nm callout_active , .Nm callout_deactivate , .Nm callout_drain , -.Nm callout_drain_async , .Nm callout_handle_init , .Nm callout_init , .Nm callout_init_mtx , @@ -64,232 +63,279 @@ .In sys/systm.h .Bd -literal typedef void timeout_t (void *); -typedef void callout_func_t (void *); .Ed +.Ft int +.Fn callout_active "struct callout *c" +.Ft void +.Fn callout_deactivate "struct callout *c" +.Ft int +.Fn callout_drain "struct callout *c" +.Ft void +.Fn callout_handle_init "struct callout_handle *handle" +.Bd -literal +struct callout_handle handle = CALLOUT_HANDLE_INITIALIZER(&handle); +.Ed +.Ft void +.Fn callout_init "struct callout *c" "int mpsafe" +.Ft void +.Fn callout_init_mtx "struct callout *c" "struct mtx *mtx" "int flags" +.Ft void +.Fn callout_init_rm "struct callout *c" "struct rmlock *rm" "int flags" +.Ft void +.Fn callout_init_rw "struct callout *c" "struct rwlock *rw" "int flags" +.Ft int +.Fn callout_pending "struct callout *c" +.Ft int +.Fn callout_reset "struct callout *c" "int ticks" "timeout_t *func" "void *arg" +.Ft int +.Fn callout_reset_curcpu "struct callout *c" "int ticks" "timeout_t *func" \ +"void *arg" +.Ft int +.Fn callout_reset_on "struct callout *c" "int ticks" "timeout_t *func" \ +"void *arg" "int cpu" +.Ft int +.Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "timeout_t *func" "void *arg" "int flags" +.Ft int +.Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "timeout_t *func" "void *arg" "int flags" +.Ft int +.Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "timeout_t *func" "void *arg" "int cpu" "int flags" +.Ft int +.Fn callout_schedule "struct callout *c" "int ticks" +.Ft int +.Fn callout_schedule_curcpu "struct callout *c" "int ticks" +.Ft int +.Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu" +.Ft int +.Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "int flags" +.Ft int +.Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "int flags" +.Ft int +.Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "int cpu" "int flags" +.Ft int +.Fn callout_stop "struct callout *c" +.Ft struct callout_handle +.Fn timeout "timeout_t *func" "void *arg" "int ticks" +.Ft void +.Fn untimeout "timeout_t *func" "void *arg" "struct callout_handle handle" .Sh DESCRIPTION The .Nm callout API is used to schedule a call to an arbitrary function at a specific -time in the future in a single-shot fashion. -Consumers of this API are required to allocate a +time in the future. +Consumers of this API are required to allocate a callout structure .Pq struct callout -structure for each pending function invocation. -The -.Pq struct callout -structure stores the full state about any pending function call and -should be drained by a call to -.Fn callout_drain +for each pending function invocation. +This structure stores state about the pending function invocation including +the function to be called and the time at which the function should be invoked. +Pending function calls can be cancelled or rescheduled to a different time. +In addition, +a callout structure may be reused to schedule a new function call after a +scheduled call is completed. +.Pp +Callouts only provide a single-shot mode. +If a consumer requires a periodic timer, +it must explicitly reschedule each function call. +This is normally done by rescheduling the subsequent call within the called +function. +.Pp +Callout functions must not sleep. +They may not acquire sleepable locks, +wait on condition variables, +perform blocking allocation requests, +or invoke any other action that might sleep. +.Pp +Each callout structure must be initialized by +.Fn callout_init , +.Fn callout_init_mtx , +.Fn callout_init_rm , or -.Fn callout_drain_async -before freeing. -.Sh INITIALISATION -.Ft void -.Fn callout_handle_init "struct callout_handle *handle" -This function is deprecated and is used to prepare a -.Pq struct callout_handle -structure before it can be used the first time. -If this function is called on a pending timeout, the pending timeout -cannot be cancelled and the -.Fn untimeout -function will return as if there was no timeout pending. -.Pp -.Fn CALLOUT_HANDLE_INITIALIZER "&handle" -This macro is deprecated and can be used instead of -.Fn callout_handle_init -to assign the default state to the -.Pq struct callout_handle -structure when declaring static timeouts. -.Pp -.Ft void -.Fn callout_init "struct callout *c" "int mpsafe" -This function prepares a -.Pq struct callout -structure before it can be used. -This function should not be used when the callout is pending a timeout. +.Fn callout_init_rw +before it is passed to any of the other callout functions. +The +.Fn callout_init +function initializes a callout structure in +.Fa c +that is not associated with a specific lock. If the .Fa mpsafe -argument is non-zero, the callback function will be running unlocked. -Else the Giant mutex will be locked before calling the callback function. +argument is zero, +the callout structure is not considered to be +.Dq multi-processor safe ; +and the Giant lock will be acquired before calling the callout function +and released when the callout function returns. .Pp -.Ft void -.Fn callout_init_mtx "struct callout *c" "struct mtx *mtx" "int flags" -This function prepares a -.Pq struct callout -structure before it can be used. -This function should not be used when the callout is pending a timeout. The -.Fa mtx -argument should be non-zero and should specify a pointer to a valid -spinlock type of mutex or a valid regular non-sleepable mutex which -the callback subsystem should lock before calling the callback -function. -Valid +.Fn callout_init_mtx , +.Fn callout_init_rm , +and +.Fn callout_init_rw +functions initialize a callout structure in +.Fa c +that is associated with a specific lock. +The lock is specified by the +.Fa mtx , +.Fa rm , +or +.Fa rw +parameter. +The associated lock must be held while stopping or rescheduling the +callout. +The callout subsystem acquires the associated lock before calling the +callout function and releases it after the function returns. +If the callout was cancelled while the callout subsystem waited for the +associated lock, +the callout function is not called, +and the associated lock is released. +This ensures that stopping or rescheduling the callout will abort any +previously scheduled invocation. +.Pp +Only regular mutexes may be used with +.Fn callout_init_mtx ; +spin mutexes are not supported. +A sleepable read-mostly lock +.Po +one initialized with the +.Dv RM_SLEEPABLE +flag +.Pc +may not be used with +.Fn callout_init_rm . +Similarly, other sleepable lock types such as +.Xr sx 9 +and +.Xr lockmgr 9 +cannot be used with callouts because sleeping is not permitted in +the callout subsystem. +.Pp +These .Fa flags -are: +may be specified for +.Fn callout_init_mtx , +.Fn callout_init_rm , +or +.Fn callout_init_rw : .Bl -tag -width ".Dv CALLOUT_RETURNUNLOCKED" .It Dv CALLOUT_RETURNUNLOCKED -It is assumed that the callout function has released the specified -mutex before returning. -Else the callout subsystem will release the specified mutex after the -callout function has returned. +The callout function will release the associated lock itself, +so the callout subsystem should not attempt to unlock it +after the callout function returns. +.It Dv CALLOUT_SHAREDLOCK +The lock is only acquired in read mode when running the callout handler. +This flag is ignored by +.Fn callout_init_mtx . .El .Pp -.Ft void -.Fn callout_init_rm "struct callout *c" "struct rmlock *rm" "int flags" -This function is the same like the -.Fn callout_init_mtx -function except it accepts a read-mostly type of lock. -The read-mostly lock must not be initialised with the -.Dv RM_SLEEPABLE -flag. +The function +.Fn callout_stop +cancels a callout +.Fa c +if it is currently pending. +If the callout is pending, then +.Fn callout_stop +returns a non-zero value. +If the callout is not set, +has already been serviced, +or is currently being serviced, +then zero will be returned. +If the callout has an associated lock, +then that lock must be held when this function is called. .Pp -.Ft void -.Fn callout_init_rw "struct callout *c" "struct rwlock *rw" "int flags" -This function is the same like the -.Fn callout_init_mtx -function except it accepts a reader-writer type of lock. -.Sh SCHEDULING CALLOUTS -.Ft struct callout_handle -.Fn timeout "timeout_t *func" "void *arg" "int ticks" -This function is deprecated and schedules a call to the function given by the argument -.Fa func -to take place after +The function +.Fn callout_drain +is identical to +.Fn callout_stop +except that it will wait for the callout +.Fa c +to complete if it is already in progress. +This function MUST NOT be called while holding any +locks on which the callout might block, or deadlock will result. +Note that if the callout subsystem has already begun processing this +callout, then the callout function may be invoked before +.Fn callout_drain +returns. +However, the callout subsystem does guarantee that the callout will be +fully stopped before +.Fn callout_drain +returns. +.Pp +The +.Fn callout_reset +and +.Fn callout_schedule +function families schedule a future function invocation for callout +.Fa c . +If +.Fa c +already has a pending callout, +it is cancelled before the new invocation is scheduled. +These functions return a non-zero value if a pending callout was cancelled +and zero if there was no pending callout. +If the callout has an associated lock, +then that lock must be held when any of these functions are called. +.Pp +The time at which the callout function will be invoked is determined by +either the +.Fa ticks +argument or the +.Fa sbt , +.Fa pr , +and +.Fa flags +arguments. +When +.Fa ticks +is used, +the callout is scheduled to execute after .Fa ticks Ns No /hz seconds. Non-positive values of .Fa ticks are silently converted to the value .Sq 1 . +.Pp The -.Fa func -argument should be a valid pointer to a function that takes a single -.Fa void * -argument. -Upon invocation, the -.Fa func -function will receive -.Fa arg -as its only argument. -The Giant lock is locked when the -.Fa arg -function is invoked and should not be unlocked by this function. -The returned value from -.Fn timeout -is a -.Ft struct callout_handle -structure which can be used in conjunction with the -.Fn untimeout -function to request that a scheduled timeout be cancelled. -As handles are recycled by the system, it is possible, although unlikely, -that a handle from one invocation of -.Fn timeout -may match the handle of another invocation of -.Fn timeout -if both calls used the same function pointer and argument, and the first -timeout is expired or canceled before the second call. -Please ensure that the function and argument pointers are unique when using this function. -.Pp -.Ft int -.Fn callout_reset "struct callout *c" "int ticks" "callout_func_t *func" "void *arg" -This function is used to schedule or re-schedule a callout. -This function at first stops the callout given by the -.Fa c -argument, if any. -Then it will start the callout given by the -.Fa c -argument. -The relative time until the timeout callback happens is given by the -.Fa ticks -argument. -The number of ticks in a second is defined by -.Dv hz -and can vary from system to system. -This function returns a non-zero value if the given callout was pending and -the callback function was prevented from being called. -Else a value of zero is returned. -If a lock is associated with the callout given by the -.Fa c -argument and it is exclusivly locked when this function is called this -function will always ensure that previous callback function, if any, -is never reached. -In other words the callout will be atomically restarted. -Else there is no such guarantee. -The callback function is given by the -.Fa func -argument and its function argument is given by the -.Fa arg -argument. -.Pp -.Ft int -.Fn callout_reset_curcpu "struct callout *c" "int ticks" "callout_func_t *func" \ -"void *arg" -This function works the same like the -.Fn callout_reset -function except the callback function given by the -.Fa func -argument will be executed on the same CPU which called this function. -A change in the CPU selection can happen if the callout has a lock -associated with it and is locked when this function is called. -A change in the CPU selection cannot happen if this function is -re-scheduled inside a callout function. -Else the callback function given by the -.Fa func -argument will be executed on the same CPU like previously done. -.Pp -.Ft int -.Fn callout_reset_on "struct callout *c" "int ticks" "callout_func_t *func" \ -"void *arg" "int cpu" -This function works the same like the -.Fn callout_reset -function except the callback function given by the -.Fa func -argument will be executed on the CPU given by the -.Fa cpu -argument. -A change in the CPU selection can happen if the callout has a lock -associated with it and is locked when this function is called. -A change in the CPU selection cannot happen if this function is -re-scheduled inside a callout function. -Else the callback function given by the -.Fa func -argument will be executed on the same CPU like previously done. -.Pp -.Ft int -.Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "callout_func_t *func" "void *arg" "int flags" -This function works the same like the -.Fn callout_reset -function except the relative or absolute time after which the timeout -callback should happen is given by the +.Fa sbt , +.Fa pr , +and +.Fa flags +arguments provide more control over the scheduled time including +support for higher resolution times, +specifying the precision of the scheduled time, +and setting an absolute deadline instead of a relative timeout. +The callout is scheduled to execute in a time window which begins at +the time specified in .Fa sbt -argument and extends for the amount of time specified in the -.Fa pr -argument. -This function is used when you need high precision timeouts. -If the +and extends for the amount of time specified in +.Fa pr . +If .Fa sbt -argument specifies a time in the past, +specifies a time in the past, the window is adjusted to start at the current time. A non-zero value for .Fa pr allows the callout subsystem to coalesce callouts scheduled close to each other into fewer timer interrupts, reducing processing overhead and power consumption. -The +These .Fa flags -argument may be non-zero to adjust the interpretation of the +may be specified to adjust the interpretation of .Fa sbt -and the -.Fa pr -arguments: +and +.Fa pr : .Bl -tag -width ".Dv C_DIRECT_EXEC" .It Dv C_ABSOLUTE Handle the .Fa sbt argument as an absolute time since boot. -By default, the +By default, .Fa sbt -argument is treated like a relative amount of time, +is treated as a relative amount of time, similar to .Fa ticks . .It Dv C_DIRECT_EXEC @@ -301,7 +347,7 @@ Callout functions run in this context may use only spin mutexes for locking and should be as small as possible because they run with absolute priority. .It Fn C_PREL Specifies relative event time precision as binary logarithm of time interval -divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4 and so on. +divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4, etc. Note that the larger of .Fa pr or this value is used as the length of the time window. @@ -314,215 +360,65 @@ Align the timeouts to calls if possible. .El .Pp -.Ft int -.Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "callout_func_t *func" "void *arg" "int flags" -This function works the same like the -.Fn callout_reset_sbt -function except the callback function given by the -.Fa func -argument will be executed on the same CPU which called this function. -A change in the CPU selection can happen if the callout has a lock -associated with it and is locked when this function is called. -A change in the CPU selection cannot happen if this function is -re-scheduled inside a callout function. -Else the callback function given by the -.Fa func -argument will be executed on the same CPU like previously done. -.Pp -.Ft int -.Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "callout_func_t *func" "void *arg" "int cpu" "int flags" -This function works the same like the -.Fn callout_reset_sbt -function except the callback function given by the -.Fa func -argument will be executed on the CPU given by the -.Fa cpu -argument. -A change in the CPU selection can happen if the callout has a lock -associated with it and is locked when this function is called. -A change in the CPU selection cannot happen if this function is -re-scheduled inside a callout function. -Else the callback function given by the -.Fa func -argument will be executed on the same CPU like previously done. -.Pp -.Ft int -.Fn callout_schedule "struct callout *c" "int ticks" -This function works the same like the +The .Fn callout_reset -function except it re-uses the callback function and the callback argument -already stored in the -.Pq struct callout -structure. -.Pp -.Ft int -.Fn callout_schedule_curcpu "struct callout *c" "int ticks" -This function works the same like the -.Fn callout_reset_curcpu -function except it re-uses the callback function and the callback argument -already stored in the -.Pq struct callout -structure. -.Pp -.Ft int -.Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu" -This function works the same like the -.Fn callout_reset_on -function except it re-uses the callback function and the callback argument -already stored in the -.Pq struct callout -structure. -.Pp -.Ft int -.Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "int flags" -This function works the same like the -.Fn callout_reset_sbt -function except it re-uses the callback function and the callback argument -already stored in the -.Pq struct callout -structure. -.Pp -.Ft int -.Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "int flags" -This function works the same like the -.Fn callout_reset_sbt_curcpu -function except it re-uses the callback function and the callback argument -already stored in the -.Pq struct callout -structure. -.Pp -.Ft int -.Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "int cpu" "int flags" -This function works the same like the -.Fn callout_reset_sbt_on -function except it re-uses the callback function and the callback argument -already stored in the -.Pq struct callout -structure. -.Sh CHECKING THE STATE OF CALLOUTS -.Ft int -.Fn callout_pending "struct callout *c" -This function returns non-zero if the callout pointed to by the -.Fa c -argument is pending for callback. -Else this function returns zero. -This function returns zero when inside the callout function if the -callout is not re-scheduled. -.Pp -.Ft int -.Fn callout_active "struct callout *c" -This function is deprecated and returns non-zero if the callout -pointed to by the -.Fa c -argument was scheduled in the past. -Else this function returns zero. -This function also returns zero after the -.Fn callout_deactivate -or the -.Fn callout_stop -or the -.Fn callout_drain -or the -.Fn callout_drain_async -function is called on the same callout as given by the -.Fa c -argument. -.Pp -.Ft void -.Fn callout_deactivate "struct callout *c" -This function is deprecated and ensures that subsequent calls to the -.Fn callout_activate -function returns zero until the callout is scheduled again. -.Sh STOPPING CALLOUTS -.Ft void -.Fn untimeout "timeout_t *func" "void *arg" "struct callout_handle handle" -This function is deprecated and cancels the timeout associated with the -.Fa handle -argument using the function pointed to by the +functions accept a .Fa func -argument and having the -.Fa arg -arguments to validate the handle. -If the handle does not correspond to a timeout with -the function -.Fa func -taking the argument -.Fa arg -no action is taken. The -.Fa handle -must be initialised by a previous call to -.Fn timeout , -.Fn callout_handle_init -or assigned the value of -.Fn CALLOUT_HANDLE_INITIALIZER "&handle" -before being passed to -.Fn untimeout . -The behavior of calling -.Fn untimeout -with an uninitialised handle -is undefined. -.Pp -.Ft int -.Fn callout_stop "struct callout *c" -This function is used to stop a timeout function invocation associated with the callout pointed to by the -.Fa c -argument, in a non-blocking fashion. -This function can be called multiple times in a row with no side effects, even if the callout is already stopped. This function however should not be called before the callout has been initialised. -This function returns a non-zero value if the given callout was pending and -the callback function was prevented from being called. -Else a value of zero is returned. -If a lock is associated with the callout given by the -.Fa c -argument and it is exclusivly locked when this function is called, the -.Fn callout_stop -function will always ensure that the callback function is never reached. -In other words the callout will be atomically stopped. -Else there is no such guarantee. -.Sh DRAINING CALLOUTS -.Ft int -.Fn callout_drain "struct callout *c" -This function works the same like the -.Fn callout_stop -function except it ensures that all callback functions have returned and there are no more references to the callout pointed to by the -.Fa c -argument inside the callout subsystem before it returns. -Also this function ensures that the lock, if any, associated with the -callout is no longer being used. -When this function returns, it is safe to free the callout structure pointed to by the -.Fa c +argument which identifies the function to be called when the time expires. +It must be a pointer to a function that takes a single +.Fa void * argument. -.Pp -.Ft int -.Fn callout_drain_async "struct callout *c" "callout_func_t *fn" "void *arg" -This function is non-blocking and works the same like the -.Fn callout_stop -function except if it returns non-zero it means the callback function pointed to by the -.Fa fn -argument will be called back with the +Upon invocation, +.Fa func +will receive .Fa arg -argument when all references to the callout pointed to by the -.Fa c -argument are gone. -If this function returns zero, it is safe to free the callout structure pointed to by the -.Fa c -argument right away. -.Sh CALLOUT FUNCTION RESTRICTIONS -Callout functions must not sleep. -They may not acquire sleepable locks, wait on condition variables, -perform blocking allocation requests, or invoke any other action that -might sleep. -.Sh CALLOUT SUBSYSTEM INTERNALS -The callout subsystem has its own set of spinlocks to protect its internal state. -The callout subsystem provides a softclock thread for each CPU in the -system. -Callouts are assigned to a single CPU and are executed by the -softclock thread for that CPU. -Initially, callouts are assigned to CPU 0. +as its only argument. +The +.Fn callout_schedule +functions reuse the +.Fa func +and +.Fa arg +arguments from the previous callout. +Note that one of the +.Fn callout_reset +functions must always be called to initialize +.Fa func +and +.Fa arg +before one of the +.Fn callout_schedule +functions can be used. +.Pp +The callout subsystem provides a softclock thread for each CPU in the system. +Callouts are assigned to a single CPU and are executed by the softclock thread +for that CPU. +Initially, +callouts are assigned to CPU 0. +The +.Fn callout_reset_on , +.Fn callout_reset_sbt_on , +.Fn callout_schedule_on +and +.Fn callout_schedule_sbt_on +functions assign the callout to CPU +.Fa cpu . +The +.Fn callout_reset_curcpu , +.Fn callout_reset_sbt_curpu , +.Fn callout_schedule_curcpu +and +.Fn callout_schedule_sbt_curcpu +functions assign the callout to the current CPU. +The +.Fn callout_reset , +.Fn callout_reset_sbt , +.Fn callout_schedule +and +.Fn callout_schedule_sbt +functions schedule the callout to execute in the softclock thread of the CPU +to which it is currently assigned. +.Pp Softclock threads are not pinned to their respective CPUs by default. The softclock thread for CPU 0 can be pinned to CPU 0 by setting the .Va kern.pin_default_swi @@ -531,7 +427,50 @@ Softclock threads for CPUs other than zero can be pinned to their respective CPUs by setting the .Va kern.pin_pcpu_swi loader tunable to a non-zero value. -.Sh "AVOIDING RACE CONDITIONS" +.Pp +The macros +.Fn callout_pending , +.Fn callout_active +and +.Fn callout_deactivate +provide access to the current state of the callout. +The +.Fn callout_pending +macro checks whether a callout is +.Em pending ; +a callout is considered +.Em pending +when a timeout has been set but the time has not yet arrived. +Note that once the timeout time arrives and the callout subsystem +starts to process this callout, +.Fn callout_pending +will return +.Dv FALSE +even though the callout function may not have finished +.Pq or even begun +executing. +The +.Fn callout_active +macro checks whether a callout is marked as +.Em active , +and the +.Fn callout_deactivate +macro clears the callout's +.Em active +flag. +The callout subsystem marks a callout as +.Em active +when a timeout is set and it clears the +.Em active +flag in +.Fn callout_stop +and +.Fn callout_drain , +but it +.Em does not +clear it when a callout expires normally via the execution of the +callout function. +.Ss "Avoiding Race Conditions" The callout subsystem invokes callout functions from its own thread context. Without some kind of synchronization, @@ -548,7 +487,7 @@ synchronization concerns. The first approach is preferred as it is the simplest: .Bl -enum -offset indent .It -Callouts can be associated with a specific lock when they are initialised +Callouts can be associated with a specific lock when they are initialized by .Fn callout_init_mtx , .Fn callout_init_rm , @@ -569,7 +508,7 @@ or .Fn callout_schedule functions to provide this safety. .Pp -A callout initialised via +A callout initialized via .Fn callout_init with .Fa mpsafe @@ -592,8 +531,9 @@ function families .Pc indicates whether or not the callout was removed. If it is known that the callout was set and the callout function has -not yet executed, then a return value of zero indicates that the -callout function is about to be called. +not yet executed, then a return value of +.Dv FALSE +indicates that the callout function is about to be called. For example: .Bd -literal -offset indent if (sc->sc_flags & SCFLG_CALLOUT_RUNNING) { @@ -649,14 +589,16 @@ The callout function should first check the .Em pending flag and return without action if .Fn callout_pending -returns non-zero. +returns +.Dv TRUE . This indicates that the callout was rescheduled using .Fn callout_reset just before the callout function was invoked. If .Fn callout_active -returns zero then the callout function should also return without -action. +returns +.Dv FALSE +then the callout function should also return without action. This indicates that the callout has been stopped. Finally, the callout function should call .Fn callout_deactivate @@ -726,13 +668,129 @@ a callout should always be drained prior to destroying its associated lock or releasing the storage for the callout structure. .Sh LEGACY API .Bf Sy -The -.Fn timeout -and -.Fn untimeout -functions are a legacy API that will be removed in a future release. +The functions below are a legacy API that will be removed in a future release. New code should not use these routines. .Ef +.Pp +The function +.Fn timeout +schedules a call to the function given by the argument +.Fa func +to take place after +.Fa ticks Ns No /hz +seconds. +Non-positive values of +.Fa ticks +are silently converted to the value +.Sq 1 . +.Fa func +should be a pointer to a function that takes a +.Fa void * +argument. +Upon invocation, +.Fa func +will receive +.Fa arg +as its only argument. +The return value from +.Fn timeout +is a +.Ft struct callout_handle +which can be used in conjunction with the +.Fn untimeout +function to request that a scheduled timeout be canceled. +.Pp +The function +.Fn callout_handle_init +can be used to initialize a handle to a state which will cause +any calls to +.Fn untimeout +with that handle to return with no side +effects. +.Pp +Assigning a callout handle the value of +.Fn CALLOUT_HANDLE_INITIALIZER +performs the same function as +.Fn callout_handle_init +and is provided for use on statically declared or global callout handles. +.Pp +The function +.Fn untimeout +cancels the timeout associated with +.Fa handle +using the +.Fa func +and +.Fa arg +arguments to validate the handle. +If the handle does not correspond to a timeout with +the function +.Fa func +taking the argument +.Fa arg +no action is taken. +.Fa handle +must be initialized by a previous call to +.Fn timeout , +.Fn callout_handle_init , +or assigned the value of +.Fn CALLOUT_HANDLE_INITIALIZER "&handle" +before being passed to +.Fn untimeout . +The behavior of calling +.Fn untimeout +with an uninitialized handle +is undefined. +.Pp +As handles are recycled by the system, it is possible (although unlikely) +that a handle from one invocation of +.Fn timeout +may match the handle of another invocation of +.Fn timeout +if both calls used the same function pointer and argument, and the first +timeout is expired or canceled before the second call. +The timeout facility offers O(1) running time for +.Fn timeout +and +.Fn untimeout . +Timeouts are executed from +.Fn softclock +with the +.Va Giant +lock held. +Thus they are protected from re-entrancy. +.Sh RETURN VALUES +The +.Fn callout_active +macro returns the state of a callout's +.Em active +flag. +.Pp +The +.Fn callout_pending +macro returns the state of a callout's +.Em pending +flag. +.Pp +The +.Fn callout_reset +and +.Fn callout_schedule +function families return non-zero if the callout was pending before the new +function invocation was scheduled. +.Pp +The +.Fn callout_stop +and +.Fn callout_drain +functions return non-zero if the callout was still pending when it was +called or zero otherwise. +The +.Fn timeout +function returns a +.Ft struct callout_handle +that can be passed to +.Fn untimeout . .Sh HISTORY The current timeout and untimeout routines are based on the work of .An Adam M. Costello @@ -757,4 +815,4 @@ The current implementation replaces the long standing .Bx linked list callout mechanism which offered O(n) insertion and removal running time -and did not generate or require handles for untimeout operations. +but did not generate or require handles for untimeout operations. diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 922959f8fa57..beb49bc56962 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -504,8 +504,7 @@ proc0_init(void *dummy __unused) callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0); callout_init_mtx(&p->p_limco, &p->p_mtx, 0); - mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN); - callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0); + callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); /* Create credentials. */ p->p_ucred = crget(); diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c index 8c2691babb3f..2700a25d477c 100644 --- a/sys/kern/kern_condvar.c +++ b/sys/kern/kern_condvar.c @@ -313,13 +313,15 @@ _cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt, DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); - sleepq_release(cvp); sleepq_set_timeout_sbt(cvp, sbt, pr, flags); if (lock != &Giant.lock_object) { + if (class->lc_flags & LC_SLEEPABLE) + sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); + if (class->lc_flags & LC_SLEEPABLE) + sleepq_lock(cvp); } - sleepq_lock(cvp); rval = sleepq_timedwait(cvp, 0); #ifdef KTRACE @@ -381,13 +383,15 @@ _cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock, sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | SLEEPQ_INTERRUPTIBLE, 0); - sleepq_release(cvp); sleepq_set_timeout_sbt(cvp, sbt, pr, flags); if (lock != &Giant.lock_object) { + if (class->lc_flags & LC_SLEEPABLE) + sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); + if (class->lc_flags & LC_SLEEPABLE) + sleepq_lock(cvp); } - sleepq_lock(cvp); rval = sleepq_timedwait_sig(cvp, 0); #ifdef KTRACE diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c index 38c870709c31..36a8470d2971 100644 --- a/sys/kern/kern_lock.c +++ b/sys/kern/kern_lock.c @@ -210,11 +210,9 @@ sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk, GIANT_SAVE(); sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ? SLEEPQ_INTERRUPTIBLE : 0), queue); - if ((flags & LK_TIMELOCK) && timo) { - sleepq_release(&lk->lock_object); + if ((flags & LK_TIMELOCK) && timo) sleepq_set_timeout(&lk->lock_object, timo); - sleepq_lock(&lk->lock_object); - } + /* * Decisional switch for real sleeping. */ diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c index 61d914944640..d0009b1042f3 100644 --- a/sys/kern/kern_switch.c +++ b/sys/kern/kern_switch.c @@ -93,6 +93,8 @@ SCHED_STAT_DEFINE_VAR(turnstile, &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), ""); SCHED_STAT_DEFINE_VAR(sleepq, &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), ""); +SCHED_STAT_DEFINE_VAR(sleepqtimo, + &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), ""); SCHED_STAT_DEFINE_VAR(relinquish, &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), ""); SCHED_STAT_DEFINE_VAR(needresched, diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 19bf4e8a094a..9501ba2ba07d 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -236,16 +236,12 @@ _sleep(void *ident, struct lock_object *lock, int priority, * return from cursig(). */ sleepq_add(ident, lock, wmesg, sleepq_flags, 0); + if (sbt != 0) + sleepq_set_timeout_sbt(ident, sbt, pr, flags); if (lock != NULL && class->lc_flags & LC_SLEEPABLE) { sleepq_release(ident); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); - if (sbt != 0) - sleepq_set_timeout_sbt(ident, sbt, pr, flags); - sleepq_lock(ident); - } else if (sbt != 0) { - sleepq_release(ident); - sleepq_set_timeout_sbt(ident, sbt, pr, flags); sleepq_lock(ident); } if (sbt != 0 && catch) @@ -310,11 +306,8 @@ msleep_spin_sbt(void *ident, struct mtx *mtx, const char *wmesg, * We put ourselves on the sleep queue and start our timeout. */ sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0); - if (sbt != 0) { - sleepq_release(ident); + if (sbt != 0) sleepq_set_timeout_sbt(ident, sbt, pr, flags); - sleepq_lock(ident); - } /* * Can't call ktrace with any spin locks held so it can lock the diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index b1e1a1263597..2d0b0d278a56 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -149,9 +149,6 @@ thread_ctor(void *mem, int size, void *arg, int flags) audit_thread_alloc(td); #endif umtx_thread_alloc(td); - - mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN); - callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0); return (0); } @@ -165,10 +162,6 @@ thread_dtor(void *mem, int size, void *arg) td = (struct thread *)mem; - /* make sure to drain any use of the "td->td_slpcallout" */ - callout_drain(&td->td_slpcallout); - mtx_destroy(&td->td_slpmutex); - #ifdef INVARIANTS /* Verify that this thread is in a safe state to free. */ switch (td->td_state) { @@ -551,6 +544,7 @@ thread_link(struct thread *td, struct proc *p) LIST_INIT(&td->td_lprof[0]); LIST_INIT(&td->td_lprof[1]); sigqueue_init(&td->td_sigqueue, p); + callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist); p->p_numthreads++; } diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index 4336faab3bf7..13822fd99ac6 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -54,8 +54,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include #include #include #include @@ -126,216 +124,37 @@ SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_ */ u_int callwheelsize, callwheelmask; -typedef void callout_mutex_op_t(struct lock_object *); -typedef int callout_owned_op_t(struct lock_object *); - -struct callout_mutex_ops { - callout_mutex_op_t *lock; - callout_mutex_op_t *unlock; - callout_owned_op_t *owned; -}; - -enum { - CALLOUT_LC_UNUSED_0, - CALLOUT_LC_UNUSED_1, - CALLOUT_LC_UNUSED_2, - CALLOUT_LC_UNUSED_3, - CALLOUT_LC_SPIN, - CALLOUT_LC_MUTEX, - CALLOUT_LC_RW, - CALLOUT_LC_RM, -}; - -static void -callout_mutex_op_none(struct lock_object *lock) -{ -} - -static int -callout_owned_op_none(struct lock_object *lock) -{ - return (0); -} - -static void -callout_mutex_lock(struct lock_object *lock) -{ - mtx_lock((struct mtx *)lock); -} - -static void -callout_mutex_unlock(struct lock_object *lock) -{ - mtx_unlock((struct mtx *)lock); -} - -static void -callout_mutex_lock_spin(struct lock_object *lock) -{ - mtx_lock_spin((struct mtx *)lock); -} - -static void -callout_mutex_unlock_spin(struct lock_object *lock) -{ - mtx_unlock_spin((struct mtx *)lock); -} - -static int -callout_mutex_owned(struct lock_object *lock) -{ - return (mtx_owned((struct mtx *)lock)); -} - -static void -callout_rm_wlock(struct lock_object *lock) -{ - rm_wlock((struct rmlock *)lock); -} - -static void -callout_rm_wunlock(struct lock_object *lock) -{ - rm_wunlock((struct rmlock *)lock); -} - -static int -callout_rm_owned(struct lock_object *lock) -{ - return (rm_wowned((struct rmlock *)lock)); -} - -static void -callout_rw_wlock(struct lock_object *lock) -{ - rw_wlock((struct rwlock *)lock); -} - -static void -callout_rw_wunlock(struct lock_object *lock) -{ - rw_wunlock((struct rwlock *)lock); -} - -static int -callout_rw_owned(struct lock_object *lock) -{ - return (rw_wowned((struct rwlock *)lock)); -} - -static const struct callout_mutex_ops callout_mutex_ops[8] = { - [CALLOUT_LC_UNUSED_0] = { - .lock = callout_mutex_op_none, - .unlock = callout_mutex_op_none, - .owned = callout_owned_op_none, - }, - [CALLOUT_LC_UNUSED_1] = { - .lock = callout_mutex_op_none, - .unlock = callout_mutex_op_none, - .owned = callout_owned_op_none, - }, - [CALLOUT_LC_UNUSED_2] = { - .lock = callout_mutex_op_none, - .unlock = callout_mutex_op_none, - .owned = callout_owned_op_none, - }, - [CALLOUT_LC_UNUSED_3] = { - .lock = callout_mutex_op_none, - .unlock = callout_mutex_op_none, - .owned = callout_owned_op_none, - }, - [CALLOUT_LC_SPIN] = { - .lock = callout_mutex_lock_spin, - .unlock = callout_mutex_unlock_spin, - .owned = callout_mutex_owned, - }, - [CALLOUT_LC_MUTEX] = { - .lock = callout_mutex_lock, - .unlock = callout_mutex_unlock, - .owned = callout_mutex_owned, - }, - [CALLOUT_LC_RW] = { - .lock = callout_rw_wlock, - .unlock = callout_rw_wunlock, - .owned = callout_rw_owned, - }, - [CALLOUT_LC_RM] = { - .lock = callout_rm_wlock, - .unlock = callout_rm_wunlock, - .owned = callout_rm_owned, - }, -}; - -static void -callout_lock_client(int c_flags, struct lock_object *c_lock) -{ - callout_mutex_ops[CALLOUT_GET_LC(c_flags)].lock(c_lock); -} - -static void -callout_unlock_client(int c_flags, struct lock_object *c_lock) -{ - callout_mutex_ops[CALLOUT_GET_LC(c_flags)].unlock(c_lock); -} - -#ifdef SMP -static int -callout_lock_owned_client(int c_flags, struct lock_object *c_lock) -{ - return (callout_mutex_ops[CALLOUT_GET_LC(c_flags)].owned(c_lock)); -} -#endif - /* - * The callout CPU exec structure represent information necessary for - * describing the state of callouts currently running on the CPU and - * for handling deferred callout restarts. - * - * In particular, the first entry of the array cc_exec_entity holds - * information for callouts running from the SWI thread context, while - * the second one holds information for callouts running directly from - * the hardware interrupt context. + * The callout cpu exec entities represent informations necessary for + * describing the state of callouts currently running on the CPU and the ones + * necessary for migrating callouts to the new callout cpu. In particular, + * the first entry of the array cc_exec_entity holds informations for callout + * running in SWI thread context, while the second one holds informations + * for callout running directly from hardware interrupt context. + * The cached informations are very important for deferring migration when + * the migrating callout is already running. */ struct cc_exec { - /* - * The "cc_curr" points to the currently executing callout and - * is protected by the "cc_lock" spinlock. If no callback is - * currently executing it is equal to "NULL". - */ + struct callout *cc_next; struct callout *cc_curr; - /* - * The "cc_restart_args" structure holds the argument for a - * deferred callback restart and is protected by the "cc_lock" - * spinlock. The structure is only valid if "cc_restart" is - * "true". If "cc_restart" is "false" the information in the - * "cc_restart_args" structure shall be ignored. - */ - struct callout_args cc_restart_args; - bool cc_restart; - /* - * The "cc_cancel" variable allows the currently pending - * callback to be atomically cancelled. This field is write - * protected by the "cc_lock" spinlock. - */ - bool cc_cancel; - /* - * The "cc_drain_fn" points to a function which shall be - * called with the argument stored in "cc_drain_arg" when an - * asynchronous drain is performed. This field is write - * protected by the "cc_lock" spinlock. - */ - callout_func_t *cc_drain_fn; - void *cc_drain_arg; +#ifdef SMP + void (*ce_migration_func)(void *); + void *ce_migration_arg; + int ce_migration_cpu; + sbintime_t ce_migration_time; + sbintime_t ce_migration_prec; +#endif + bool cc_cancel; + bool cc_waiting; }; /* - * There is one "struct callout_cpu" per CPU, holding all relevant + * There is one struct callout_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. */ struct callout_cpu { struct mtx_padalign cc_lock; struct cc_exec cc_exec_entity[2]; - struct callout *cc_exec_next_dir; struct callout *cc_callout; struct callout_list *cc_callwheel; struct callout_tailq cc_expireq; @@ -347,7 +166,27 @@ struct callout_cpu { char cc_ktr_event_name[20]; }; +#define cc_exec_curr cc_exec_entity[0].cc_curr +#define cc_exec_next cc_exec_entity[0].cc_next +#define cc_exec_cancel cc_exec_entity[0].cc_cancel +#define cc_exec_waiting cc_exec_entity[0].cc_waiting +#define cc_exec_curr_dir cc_exec_entity[1].cc_curr +#define cc_exec_next_dir cc_exec_entity[1].cc_next +#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel +#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting + #ifdef SMP +#define cc_migration_func cc_exec_entity[0].ce_migration_func +#define cc_migration_arg cc_exec_entity[0].ce_migration_arg +#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu +#define cc_migration_time cc_exec_entity[0].ce_migration_time +#define cc_migration_prec cc_exec_entity[0].ce_migration_prec +#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func +#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg +#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu +#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time +#define cc_migration_prec_dir cc_exec_entity[1].ce_migration_prec + struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU #define CC_CPU(cpu) (&cc_cpu[(cpu)]) @@ -372,9 +211,60 @@ static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); +/** + * Locked by cc_lock: + * cc_curr - If a callout is in progress, it is cc_curr. + * If cc_curr is non-NULL, threads waiting in + * callout_drain() will be woken up as soon as the + * relevant callout completes. + * cc_cancel - Changing to 1 with both callout_lock and cc_lock held + * guarantees that the current callout will not run. + * The softclock() function sets this to 0 before it + * drops callout_lock to acquire c_lock, and it calls + * the handler only if curr_cancelled is still 0 after + * cc_lock is successfully acquired. + * cc_waiting - If a thread is waiting in callout_drain(), then + * callout_wait is nonzero. Set only when + * cc_curr is non-NULL. + */ + /* - * Kernel low level callwheel initialization called from cpu0 during - * kernel startup: + * Resets the execution entity tied to a specific callout cpu. + */ +static void +cc_cce_cleanup(struct callout_cpu *cc, int direct) +{ + + cc->cc_exec_entity[direct].cc_curr = NULL; + cc->cc_exec_entity[direct].cc_next = NULL; + cc->cc_exec_entity[direct].cc_cancel = false; + cc->cc_exec_entity[direct].cc_waiting = false; +#ifdef SMP + cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK; + cc->cc_exec_entity[direct].ce_migration_time = 0; + cc->cc_exec_entity[direct].ce_migration_prec = 0; + cc->cc_exec_entity[direct].ce_migration_func = NULL; + cc->cc_exec_entity[direct].ce_migration_arg = NULL; +#endif +} + +/* + * Checks if migration is requested by a specific callout cpu. + */ +static int +cc_cce_migrating(struct callout_cpu *cc, int direct) +{ + +#ifdef SMP + return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK); +#else + return (0); +#endif +} + +/* + * Kernel low level callwheel initialization + * called on cpu0 during kernel startup. */ static void callout_callwheel_init(void *dummy) @@ -434,6 +324,8 @@ callout_cpu_init(struct callout_cpu *cc, int cpu) LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); cc->cc_firstevent = SBT_MAX; + for (i = 0; i < 2; i++) + cc_cce_cleanup(cc, i); snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), "callwheel cpu %d", cpu); if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ @@ -441,11 +333,41 @@ callout_cpu_init(struct callout_cpu *cc, int cpu) for (i = 0; i < ncallout; i++) { c = &cc->cc_callout[i]; callout_init(c, 0); - c->c_flags |= CALLOUT_LOCAL_ALLOC; + c->c_flags = CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } } +#ifdef SMP +/* + * Switches the cpu tied to a specific callout. + * The function expects a locked incoming callout cpu and returns with + * locked outcoming callout cpu. + */ +static struct callout_cpu * +callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) +{ + struct callout_cpu *new_cc; + + MPASS(c != NULL && cc != NULL); + CC_LOCK_ASSERT(cc); + + /* + * Avoid interrupts and preemption firing after the callout cpu + * is blocked in order to avoid deadlocks as the new thread + * may be willing to acquire the callout cpu lock. + */ + c->c_cpu = CPUBLOCK; + spinlock_enter(); + CC_UNLOCK(cc); + new_cc = CC_CPU(new_cpu); + CC_LOCK(new_cc); + spinlock_exit(); + c->c_cpu = new_cpu; + return (new_cc); +} +#endif + /* * Start standard softclock thread. */ @@ -522,8 +444,9 @@ callout_process(sbintime_t now) #ifdef CALLOUT_PROFILING int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; #endif + cc = CC_SELF(); - CC_LOCK(cc); + mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); /* Compute the buckets of the last scan and present times. */ firstb = callout_hash(cc->cc_lastscan); @@ -626,7 +549,7 @@ next: avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; #endif - CC_UNLOCK(cc); + mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); /* * swi_sched acquires the thread lock, so we don't want to call it * with cc_lock held; incorrect locking order. @@ -639,55 +562,49 @@ static struct callout_cpu * callout_lock(struct callout *c) { struct callout_cpu *cc; - cc = CC_CPU(c->c_cpu); - CC_LOCK(cc); + int cpu; + + for (;;) { + cpu = c->c_cpu; +#ifdef SMP + if (cpu == CPUBLOCK) { + while (c->c_cpu == CPUBLOCK) + cpu_spinwait(); + continue; + } +#endif + cc = CC_CPU(cpu); + CC_LOCK(cc); + if (cpu == c->c_cpu) + break; + CC_UNLOCK(cc); + } return (cc); } -static struct callout_cpu * -callout_cc_add_locked(struct callout *c, struct callout_cpu *cc, - struct callout_args *coa, bool can_swap_cpu) +static void +callout_cc_add(struct callout *c, struct callout_cpu *cc, + sbintime_t sbt, sbintime_t precision, void (*func)(void *), + void *arg, int cpu, int flags) { -#ifndef NO_EVENTTIMERS - sbintime_t sbt; -#endif int bucket; CC_LOCK_ASSERT(cc); - - /* update flags before swapping locks, if any */ - c->c_flags &= ~(CALLOUT_PROCESSED | CALLOUT_DIRECT | CALLOUT_DEFRESTART); - if (coa->flags & C_DIRECT_EXEC) - c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | CALLOUT_DIRECT); - else - c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); - -#ifdef SMP - /* - * Check if we are changing the CPU on which the callback - * should be executed and if we have a lock protecting us: - */ - if (can_swap_cpu != false && coa->cpu != c->c_cpu && - callout_lock_owned_client(c->c_flags, c->c_lock) != 0) { - CC_UNLOCK(cc); - c->c_cpu = coa->cpu; - cc = callout_lock(c); - } -#endif - if (coa->time < cc->cc_lastscan) - coa->time = cc->cc_lastscan; - c->c_arg = coa->arg; - c->c_func = coa->func; - c->c_time = coa->time; - c->c_precision = coa->precision; - + if (sbt < cc->cc_lastscan) + sbt = cc->cc_lastscan; + c->c_arg = arg; + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); + if (flags & C_DIRECT_EXEC) + c->c_flags |= CALLOUT_DIRECT; + c->c_flags &= ~CALLOUT_PROCESSED; + c->c_func = func; + c->c_time = sbt; + c->c_precision = precision; bucket = callout_get_bucket(c->c_time); CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", c, (int)(c->c_precision >> 32), (u_int)(c->c_precision & 0xffffffff)); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); - - /* Ensure we are first to be scanned, if called via a callback */ if (cc->cc_bucket == bucket) cc->cc_exec_next_dir = c; #ifndef NO_EVENTTIMERS @@ -700,16 +617,17 @@ callout_cc_add_locked(struct callout *c, struct callout_cpu *cc, sbt = c->c_time + c->c_precision; if (sbt < cc->cc_firstevent) { cc->cc_firstevent = sbt; - cpu_new_callout(coa->cpu, sbt, c->c_time); + cpu_new_callout(cpu, sbt, c->c_time); } #endif - return (cc); } static void callout_cc_del(struct callout *c, struct callout_cpu *cc) { + if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0) + return; c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } @@ -721,10 +639,20 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, #endif int direct) { - callout_func_t *c_func; + struct rm_priotracker tracker; + void (*c_func)(void *); void *c_arg; + struct lock_class *class; struct lock_object *c_lock; + uintptr_t lock_status; int c_flags; +#ifdef SMP + struct callout_cpu *new_cc; + void (*new_func)(void *); + void *new_arg; + int flags, new_cpu; + sbintime_t new_prec, new_time; +#endif #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbintime_t sbt1, sbt2; struct timespec ts2; @@ -735,39 +663,37 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == (CALLOUT_PENDING | CALLOUT_ACTIVE), ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); + class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; + lock_status = 0; + if (c->c_flags & CALLOUT_SHAREDLOCK) { + if (class == &lock_class_rm) + lock_status = (uintptr_t)&tracker; + else + lock_status = 1; + } c_lock = c->c_lock; c_func = c->c_func; c_arg = c->c_arg; c_flags = c->c_flags; - - /* remove pending bit */ - c->c_flags &= ~CALLOUT_PENDING; - - /* reset our local state */ + if (c->c_flags & CALLOUT_LOCAL_ALLOC) + c->c_flags = CALLOUT_LOCAL_ALLOC; + else + c->c_flags &= ~CALLOUT_PENDING; cc->cc_exec_entity[direct].cc_curr = c; - cc->cc_exec_entity[direct].cc_restart = false; - cc->cc_exec_entity[direct].cc_drain_fn = NULL; - cc->cc_exec_entity[direct].cc_drain_arg = NULL; - + cc->cc_exec_entity[direct].cc_cancel = false; + CC_UNLOCK(cc); if (c_lock != NULL) { - cc->cc_exec_entity[direct].cc_cancel = false; - CC_UNLOCK(cc); - - /* unlocked region for switching locks */ - - callout_lock_client(c_flags, c_lock); - + class->lc_lock(c_lock, lock_status); /* - * Check if the callout may have been cancelled while - * we were switching locks. Even though the callout is - * specifying a lock, it might not be certain this - * lock is locked when starting and stopping callouts. + * The callout may have been cancelled + * while we switched locks. */ - CC_LOCK(cc); if (cc->cc_exec_entity[direct].cc_cancel) { - callout_unlock_client(c_flags, c_lock); - goto skip_cc_locked; + class->lc_unlock(c_lock); + goto skip; } + /* The callout cannot be stopped now. */ + cc->cc_exec_entity[direct].cc_cancel = true; if (c_lock == &Giant.lock_object) { #ifdef CALLOUT_PROFILING (*gcalls)++; @@ -788,11 +714,6 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, CTR3(KTR_CALLOUT, "callout %p func %p arg %p", c, c_func, c_arg); } - /* The callout cannot be stopped now! */ - cc->cc_exec_entity[direct].cc_cancel = true; - CC_UNLOCK(cc); - - /* unlocked region */ KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) @@ -819,40 +740,85 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, #endif KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); CTR1(KTR_CALLOUT, "callout %p finished", c); - - /* - * At this point the callback structure might have been freed, - * so we need to check the previously copied value of - * "c->c_flags": - */ if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) - callout_unlock_client(c_flags, c_lock); - + class->lc_unlock(c_lock); +skip: CC_LOCK(cc); - -skip_cc_locked: KASSERT(cc->cc_exec_entity[direct].cc_curr == c, ("mishandled cc_curr")); cc->cc_exec_entity[direct].cc_curr = NULL; - - /* Check if there is anything which needs draining */ - if (cc->cc_exec_entity[direct].cc_drain_fn != NULL) { + if (cc->cc_exec_entity[direct].cc_waiting) { /* - * Unlock the CPU callout last, so that any use of - * structures belonging to the callout are complete: + * There is someone waiting for the + * callout to complete. + * If the callout was scheduled for + * migration just cancel it. */ + if (cc_cce_migrating(cc, direct)) { + cc_cce_cleanup(cc, direct); + + /* + * It should be assert here that the callout is not + * destroyed but that is not easy. + */ + c->c_flags &= ~CALLOUT_DFRMIGRATION; + } + cc->cc_exec_entity[direct].cc_waiting = false; CC_UNLOCK(cc); - /* call drain function unlocked */ - cc->cc_exec_entity[direct].cc_drain_fn( - cc->cc_exec_entity[direct].cc_drain_arg); + wakeup(&cc->cc_exec_entity[direct].cc_waiting); CC_LOCK(cc); - } else if (c_flags & CALLOUT_LOCAL_ALLOC) { - /* return callout back to freelist */ - callout_cc_del(c, cc); - } else if (cc->cc_exec_entity[direct].cc_restart) { - /* [re-]schedule callout, if any */ - cc = callout_cc_add_locked(c, cc, - &cc->cc_exec_entity[direct].cc_restart_args, false); + } else if (cc_cce_migrating(cc, direct)) { + KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0, + ("Migrating legacy callout %p", c)); +#ifdef SMP + /* + * If the callout was scheduled for + * migration just perform it now. + */ + new_cpu = cc->cc_exec_entity[direct].ce_migration_cpu; + new_time = cc->cc_exec_entity[direct].ce_migration_time; + new_prec = cc->cc_exec_entity[direct].ce_migration_prec; + new_func = cc->cc_exec_entity[direct].ce_migration_func; + new_arg = cc->cc_exec_entity[direct].ce_migration_arg; + cc_cce_cleanup(cc, direct); + + /* + * It should be assert here that the callout is not destroyed + * but that is not easy. + * + * As first thing, handle deferred callout stops. + */ + if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) { + CTR3(KTR_CALLOUT, + "deferred cancelled %p func %p arg %p", + c, new_func, new_arg); + callout_cc_del(c, cc); + return; + } + c->c_flags &= ~CALLOUT_DFRMIGRATION; + + new_cc = callout_cpu_switch(c, cc, new_cpu); + flags = (direct) ? C_DIRECT_EXEC : 0; + callout_cc_add(c, new_cc, new_time, new_prec, new_func, + new_arg, new_cpu, flags); + CC_UNLOCK(new_cc); + CC_LOCK(cc); +#else + panic("migration should not happen"); +#endif } + /* + * If the current callout is locally allocated (from + * timeout(9)) then put it on the freelist. + * + * Note: we need to check the cached copy of c_flags because + * if it was not local, then it's not safe to deref the + * callout pointer. + */ + KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 || + c->c_flags == CALLOUT_LOCAL_ALLOC, + ("corrupted callout")); + if (c_flags & CALLOUT_LOCAL_ALLOC) + callout_cc_del(c, cc); } /* @@ -933,11 +899,10 @@ timeout(timeout_t *ftn, void *arg, int to_ticks) /* XXX Attempt to malloc first */ panic("timeout table full"); SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); + callout_reset(new, to_ticks, ftn, arg); handle.callout = new; CC_UNLOCK(cc); - callout_reset(new, to_ticks, ftn, arg); - return (handle); } @@ -945,7 +910,6 @@ void untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) { struct callout_cpu *cc; - bool match; /* * Check for a handle that was initialized @@ -956,11 +920,9 @@ untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) return; cc = callout_lock(handle.callout); - match = (handle.callout->c_func == ftn && handle.callout->c_arg == arg); - CC_UNLOCK(cc); - - if (match) + if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) callout_stop(handle.callout); + CC_UNLOCK(cc); } void @@ -969,119 +931,6 @@ callout_handle_init(struct callout_handle *handle) handle->callout = NULL; } -static int -callout_restart_async(struct callout *c, struct callout_args *coa, - callout_func_t *drain_fn, void *drain_arg) -{ - struct callout_cpu *cc; - int cancelled; - int direct; - - cc = callout_lock(c); - - /* Figure out if the callout is direct or not */ - direct = ((c->c_flags & CALLOUT_DIRECT) != 0); - - /* - * Check if the callback is currently scheduled for - * completion: - */ - if (cc->cc_exec_entity[direct].cc_curr == c) { - /* - * Try to prevent the callback from running by setting - * the "cc_cancel" variable to "true". Also check if - * the callout was previously subject to a deferred - * callout restart: - */ - if (cc->cc_exec_entity[direct].cc_cancel == false || - (c->c_flags & CALLOUT_DEFRESTART) != 0) { - cc->cc_exec_entity[direct].cc_cancel = true; - cancelled = 1; - } else { - cancelled = 0; - } - - /* - * Prevent callback restart if "callout_drain_xxx()" - * is being called or we are stopping the callout or - * the callback was preallocated by us: - */ - if (cc->cc_exec_entity[direct].cc_drain_fn != NULL || - coa == NULL || (c->c_flags & CALLOUT_LOCAL_ALLOC) != 0) { - CTR4(KTR_CALLOUT, "%s %p func %p arg %p", - cancelled ? "cancelled and draining" : "draining", - c, c->c_func, c->c_arg); - - /* clear old flags, if any */ - c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | - CALLOUT_DEFRESTART | CALLOUT_PROCESSED); - - /* clear restart flag, if any */ - cc->cc_exec_entity[direct].cc_restart = false; - - /* set drain function, if any */ - if (drain_fn != NULL) { - cc->cc_exec_entity[direct].cc_drain_fn = drain_fn; - cc->cc_exec_entity[direct].cc_drain_arg = drain_arg; - cancelled |= 2; /* XXX define the value */ - } - } else { - CTR4(KTR_CALLOUT, "%s %p func %p arg %p", - cancelled ? "cancelled and restarting" : "restarting", - c, c->c_func, c->c_arg); - - /* get us back into the game */ - c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | - CALLOUT_DEFRESTART); - c->c_flags &= ~CALLOUT_PROCESSED; - - /* enable deferred restart */ - cc->cc_exec_entity[direct].cc_restart = true; - - /* store arguments for the deferred restart, if any */ - cc->cc_exec_entity[direct].cc_restart_args = *coa; - } - } else { - /* stop callout */ - if (c->c_flags & CALLOUT_PENDING) { - /* - * The callback has not yet been executed, and - * we simply just need to unlink it: - */ - if ((c->c_flags & CALLOUT_PROCESSED) == 0) { - if (cc->cc_exec_next_dir == c) - cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); - LIST_REMOVE(c, c_links.le); - } else { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - } - cancelled = 1; - } else { - cancelled = 0; - } - - CTR4(KTR_CALLOUT, "%s %p func %p arg %p", - cancelled ? "rescheduled" : "scheduled", - c, c->c_func, c->c_arg); - - /* [re-]schedule callout, if any */ - if (coa != NULL) { - cc = callout_cc_add_locked(c, cc, coa, true); - } else { - /* clear old flags, if any */ - c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | - CALLOUT_DEFRESTART | CALLOUT_PROCESSED); - - /* return callback to pre-allocated list, if any */ - if ((c->c_flags & CALLOUT_LOCAL_ALLOC) && cancelled != 0) { - callout_cc_del(c, cc); - } - } - } - CC_UNLOCK(cc); - return (cancelled); -} - /* * New interface; clients allocate their own callout structures. * @@ -1100,32 +949,25 @@ callout_restart_async(struct callout *c, struct callout_args *coa, */ int callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, - callout_func_t *ftn, void *arg, int cpu, int flags) + void (*ftn)(void *), void *arg, int cpu, int flags) { - struct callout_args coa; + sbintime_t to_sbt, pr; + struct callout_cpu *cc; + int cancelled, direct; - /* store arguments for callout add function */ - coa.func = ftn; - coa.arg = arg; - coa.precision = precision; - coa.flags = flags; - coa.cpu = cpu; - - /* compute the rest of the arguments needed */ - if (coa.flags & C_ABSOLUTE) { - coa.time = sbt; + cancelled = 0; + if (flags & C_ABSOLUTE) { + to_sbt = sbt; } else { - sbintime_t pr; - - if ((coa.flags & C_HARDCLOCK) && (sbt < tick_sbt)) + if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) sbt = tick_sbt; - if ((coa.flags & C_HARDCLOCK) || + if ((flags & C_HARDCLOCK) || #ifdef NO_EVENTTIMERS sbt >= sbt_timethreshold) { - coa.time = getsbinuptime(); + to_sbt = getsbinuptime(); /* Add safety belt for the case of hz > 1000. */ - coa.time += tc_tick_sbt - tick_sbt; + to_sbt += tc_tick_sbt - tick_sbt; #else sbt >= sbt_tickthreshold) { /* @@ -1135,29 +977,101 @@ callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, * active ones. */ #ifdef __LP64__ - coa.time = DPCPU_GET(hardclocktime); + to_sbt = DPCPU_GET(hardclocktime); #else spinlock_enter(); - coa.time = DPCPU_GET(hardclocktime); + to_sbt = DPCPU_GET(hardclocktime); spinlock_exit(); #endif #endif - if ((coa.flags & C_HARDCLOCK) == 0) - coa.time += tick_sbt; + if ((flags & C_HARDCLOCK) == 0) + to_sbt += tick_sbt; } else - coa.time = sbinuptime(); - if (SBT_MAX - coa.time < sbt) - coa.time = SBT_MAX; + to_sbt = sbinuptime(); + if (SBT_MAX - to_sbt < sbt) + to_sbt = SBT_MAX; else - coa.time += sbt; - pr = ((C_PRELGET(coa.flags) < 0) ? sbt >> tc_precexp : - sbt >> C_PRELGET(coa.flags)); - if (pr > coa.precision) - coa.precision = pr; + to_sbt += sbt; + pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : + sbt >> C_PRELGET(flags)); + if (pr > precision) + precision = pr; + } + /* + * Don't allow migration of pre-allocated callouts lest they + * become unbalanced. + */ + if (c->c_flags & CALLOUT_LOCAL_ALLOC) + cpu = c->c_cpu; + direct = (c->c_flags & CALLOUT_DIRECT) != 0; + KASSERT(!direct || c->c_lock == NULL, + ("%s: direct callout %p has lock", __func__, c)); + cc = callout_lock(c); + if (cc->cc_exec_entity[direct].cc_curr == c) { + /* + * We're being asked to reschedule a callout which is + * currently in progress. If there is a lock then we + * can cancel the callout if it has not really started. + */ + if (c->c_lock != NULL && !cc->cc_exec_entity[direct].cc_cancel) + cancelled = cc->cc_exec_entity[direct].cc_cancel = true; + if (cc->cc_exec_entity[direct].cc_waiting) { + /* + * Someone has called callout_drain to kill this + * callout. Don't reschedule. + */ + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "cancelled" : "failed to cancel", + c, c->c_func, c->c_arg); + CC_UNLOCK(cc); + return (cancelled); + } + } + if (c->c_flags & CALLOUT_PENDING) { + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + if (cc->cc_exec_next_dir == c) + cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); + LIST_REMOVE(c, c_links.le); + } else + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + cancelled = 1; + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); } - /* get callback started, if any */ - return (callout_restart_async(c, &coa, NULL, NULL)); +#ifdef SMP + /* + * If the callout must migrate try to perform it immediately. + * If the callout is currently running, just defer the migration + * to a more appropriate moment. + */ + if (c->c_cpu != cpu) { + if (cc->cc_exec_entity[direct].cc_curr == c) { + cc->cc_exec_entity[direct].ce_migration_cpu = cpu; + cc->cc_exec_entity[direct].ce_migration_time + = to_sbt; + cc->cc_exec_entity[direct].ce_migration_prec + = precision; + cc->cc_exec_entity[direct].ce_migration_func = ftn; + cc->cc_exec_entity[direct].ce_migration_arg = arg; + c->c_flags |= CALLOUT_DFRMIGRATION; + CTR6(KTR_CALLOUT, + "migration of %p func %p arg %p in %d.%08x to %u deferred", + c, c->c_func, c->c_arg, (int)(to_sbt >> 32), + (u_int)(to_sbt & 0xffffffff), cpu); + CC_UNLOCK(cc); + return (cancelled); + } + cc = callout_cpu_switch(c, cc, cpu); + } +#endif + + callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); + CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", + cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), + (u_int)(to_sbt & 0xffffffff)); + CC_UNLOCK(cc); + + return (cancelled); } /* @@ -1176,105 +1090,204 @@ callout_schedule(struct callout *c, int to_ticks) } int -callout_stop(struct callout *c) +_callout_stop_safe(struct callout *c, int safe) { - /* get callback stopped, if any */ - return (callout_restart_async(c, NULL, NULL, NULL)); -} + struct callout_cpu *cc, *old_cc; + struct lock_class *class; + int direct, sq_locked, use_lock; -static void -callout_drain_function(void *arg) -{ - wakeup(arg); -} + /* + * Some old subsystems don't hold Giant while running a callout_stop(), + * so just discard this check for the moment. + */ + if (!safe && c->c_lock != NULL) { + if (c->c_lock == &Giant.lock_object) + use_lock = mtx_owned(&Giant); + else { + use_lock = 1; + class = LOCK_CLASS(c->c_lock); + class->lc_assert(c->c_lock, LA_XLOCKED); + } + } else + use_lock = 0; + direct = (c->c_flags & CALLOUT_DIRECT) != 0; + sq_locked = 0; + old_cc = NULL; +again: + cc = callout_lock(c); -int -callout_drain_async(struct callout *c, callout_func_t *fn, void *arg) -{ - /* get callback stopped, if any */ - return (callout_restart_async(c, NULL, fn, arg) & 2); -} + /* + * If the callout was migrating while the callout cpu lock was + * dropped, just drop the sleepqueue lock and check the states + * again. + */ + if (sq_locked != 0 && cc != old_cc) { +#ifdef SMP + CC_UNLOCK(cc); + sleepq_release(&old_cc->cc_exec_entity[direct].cc_waiting); + sq_locked = 0; + old_cc = NULL; + goto again; +#else + panic("migration should not happen"); +#endif + } -int -callout_drain(struct callout *c) -{ - int cancelled; - - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, - "Draining callout"); - - callout_lock_client(c->c_flags, c->c_lock); - - /* at this point the "c->c_cpu" field is not changing */ - - cancelled = callout_drain_async(c, &callout_drain_function, c); - - if (cancelled != 0) { - struct callout_cpu *cc; - int direct; - - CTR3(KTR_CALLOUT, "need to drain %p func %p arg %p", - c, c->c_func, c->c_arg); - - cc = callout_lock(c); - direct = ((c->c_flags & CALLOUT_DIRECT) != 0); + /* + * If the callout isn't pending, it's not on the queue, so + * don't attempt to remove it from the queue. We can try to + * stop it by other means however. + */ + if (!(c->c_flags & CALLOUT_PENDING)) { + c->c_flags &= ~CALLOUT_ACTIVE; /* - * We've gotten our callout CPU lock, it is safe to - * drop the initial lock: + * If it wasn't on the queue and it isn't the current + * callout, then we can't stop it, so just bail. */ - callout_unlock_client(c->c_flags, c->c_lock); + if (cc->cc_exec_entity[direct].cc_curr != c) { + CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", + c, c->c_func, c->c_arg); + CC_UNLOCK(cc); + if (sq_locked) + sleepq_release( + &cc->cc_exec_entity[direct].cc_waiting); + return (0); + } - /* Wait for drain to complete */ + if (safe) { + /* + * The current callout is running (or just + * about to run) and blocking is allowed, so + * just wait for the current invocation to + * finish. + */ + while (cc->cc_exec_entity[direct].cc_curr == c) { + /* + * Use direct calls to sleepqueue interface + * instead of cv/msleep in order to avoid + * a LOR between cc_lock and sleepqueue + * chain spinlocks. This piece of code + * emulates a msleep_spin() call actually. + * + * If we already have the sleepqueue chain + * locked, then we can safely block. If we + * don't already have it locked, however, + * we have to drop the cc_lock to lock + * it. This opens several races, so we + * restart at the beginning once we have + * both locks. If nothing has changed, then + * we will end up back here with sq_locked + * set. + */ + if (!sq_locked) { + CC_UNLOCK(cc); + sleepq_lock( + &cc->cc_exec_entity[direct].cc_waiting); + sq_locked = 1; + old_cc = cc; + goto again; + } - while (cc->cc_exec_entity[direct].cc_curr == c) - msleep_spin(c, (struct mtx *)&cc->cc_lock, "codrain", 0); + /* + * Migration could be cancelled here, but + * as long as it is still not sure when it + * will be packed up, just let softclock() + * take care of it. + */ + cc->cc_exec_entity[direct].cc_waiting = true; + DROP_GIANT(); + CC_UNLOCK(cc); + sleepq_add( + &cc->cc_exec_entity[direct].cc_waiting, + &cc->cc_lock.lock_object, "codrain", + SLEEPQ_SLEEP, 0); + sleepq_wait( + &cc->cc_exec_entity[direct].cc_waiting, + 0); + sq_locked = 0; + old_cc = NULL; + /* Reacquire locks previously released. */ + PICKUP_GIANT(); + CC_LOCK(cc); + } + } else if (use_lock && + !cc->cc_exec_entity[direct].cc_cancel) { + /* + * The current callout is waiting for its + * lock which we hold. Cancel the callout + * and return. After our caller drops the + * lock, the callout will be skipped in + * softclock(). + */ + cc->cc_exec_entity[direct].cc_cancel = true; + CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", + c, c->c_func, c->c_arg); + KASSERT(!cc_cce_migrating(cc, direct), + ("callout wrongly scheduled for migration")); + CC_UNLOCK(cc); + KASSERT(!sq_locked, ("sleepqueue chain locked")); + return (1); + } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) { + c->c_flags &= ~CALLOUT_DFRMIGRATION; + CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", + c, c->c_func, c->c_arg); + CC_UNLOCK(cc); + return (1); + } + CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", + c, c->c_func, c->c_arg); CC_UNLOCK(cc); - } else { - callout_unlock_client(c->c_flags, c->c_lock); + KASSERT(!sq_locked, ("sleepqueue chain still locked")); + return (0); } + if (sq_locked) + sleepq_release(&cc->cc_exec_entity[direct].cc_waiting); + + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + if (cc->cc_exec_next_dir == c) + cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); + LIST_REMOVE(c, c_links.le); + } else + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + callout_cc_del(c, cc); - return (cancelled & 1); + CC_UNLOCK(cc); + return (1); } void callout_init(struct callout *c, int mpsafe) { + bzero(c, sizeof *c); if (mpsafe) { - _callout_init_lock(c, NULL, CALLOUT_RETURNUNLOCKED); + c->c_lock = NULL; + c->c_flags = CALLOUT_RETURNUNLOCKED; } else { - _callout_init_lock(c, &Giant.lock_object, 0); + c->c_lock = &Giant.lock_object; + c->c_flags = 0; } + c->c_cpu = timeout_cpu; } void _callout_init_lock(struct callout *c, struct lock_object *lock, int flags) { bzero(c, sizeof *c); - KASSERT((flags & ~CALLOUT_RETURNUNLOCKED) == 0, - ("callout_init_lock: bad flags 0x%08x", flags)); - flags &= CALLOUT_RETURNUNLOCKED; - if (lock != NULL) { - struct lock_class *class = LOCK_CLASS(lock); - if (class == &lock_class_mtx_sleep) - flags |= CALLOUT_SET_LC(CALLOUT_LC_MUTEX); - else if (class == &lock_class_mtx_spin) - flags |= CALLOUT_SET_LC(CALLOUT_LC_SPIN); - else if (class == &lock_class_rm) - flags |= CALLOUT_SET_LC(CALLOUT_LC_RM); - else if (class == &lock_class_rw) - flags |= CALLOUT_SET_LC(CALLOUT_LC_RW); - else - panic("callout_init_lock: Unsupported lock class '%s'\n", class->lc_name); - } else { - flags |= CALLOUT_SET_LC(CALLOUT_LC_UNUSED_0); - } c->c_lock = lock; - c->c_flags = flags; + KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, + ("callout_init_lock: bad flags %d", flags)); + KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, + ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); + KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & + (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", + __func__)); + c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); c->c_cpu = timeout_cpu; } diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c index 18dc2a058f50..bbbec920ef0d 100644 --- a/sys/kern/subr_sleepqueue.c +++ b/sys/kern/subr_sleepqueue.c @@ -152,8 +152,7 @@ static uma_zone_t sleepq_zone; */ static int sleepq_catch_signals(void *wchan, int pri); static int sleepq_check_signals(void); -static int sleepq_check_timeout(struct thread *); -static void sleepq_stop_timeout(struct thread *); +static int sleepq_check_timeout(void); #ifdef INVARIANTS static void sleepq_dtor(void *mem, int size, void *arg); #endif @@ -374,14 +373,17 @@ void sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr, int flags) { + struct sleepqueue_chain *sc; struct thread *td; td = curthread; - - mtx_lock_spin(&td->td_slpmutex); + sc = SC_LOOKUP(wchan); + mtx_assert(&sc->sc_lock, MA_OWNED); + MPASS(TD_ON_SLEEPQ(td)); + MPASS(td->td_sleepqueue == NULL); + MPASS(wchan != NULL); callout_reset_sbt_on(&td->td_slpcallout, sbt, pr, sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC); - mtx_unlock_spin(&td->td_slpmutex); } /* @@ -557,8 +559,11 @@ sleepq_switch(void *wchan, int pri) * Check to see if we timed out. */ static int -sleepq_check_timeout(struct thread *td) +sleepq_check_timeout(void) { + struct thread *td; + + td = curthread; THREAD_LOCK_ASSERT(td, MA_OWNED); /* @@ -568,18 +573,25 @@ sleepq_check_timeout(struct thread *td) td->td_flags &= ~TDF_TIMEOUT; return (EWOULDBLOCK); } - return (0); -} -/* - * Atomically stop the timeout by using a mutex. - */ -static void -sleepq_stop_timeout(struct thread *td) -{ - mtx_lock_spin(&td->td_slpmutex); - callout_stop(&td->td_slpcallout); - mtx_unlock_spin(&td->td_slpmutex); + /* + * If TDF_TIMOFAIL is set, the timeout ran after we had + * already been woken up. + */ + if (td->td_flags & TDF_TIMOFAIL) + td->td_flags &= ~TDF_TIMOFAIL; + + /* + * If callout_stop() fails, then the timeout is running on + * another CPU, so synchronize with it to avoid having it + * accidentally wake up a subsequent sleep. + */ + else if (callout_stop(&td->td_slpcallout) == 0) { + td->td_flags |= TDF_TIMEOUT; + TD_SET_SLEEPING(td); + mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL); + } + return (0); } /* @@ -652,11 +664,9 @@ sleepq_timedwait(void *wchan, int pri) MPASS(!(td->td_flags & TDF_SINTR)); thread_lock(td); sleepq_switch(wchan, pri); - rval = sleepq_check_timeout(td); + rval = sleepq_check_timeout(); thread_unlock(td); - sleepq_stop_timeout(td); - return (rval); } @@ -667,18 +677,12 @@ sleepq_timedwait(void *wchan, int pri) int sleepq_timedwait_sig(void *wchan, int pri) { - struct thread *td; int rcatch, rvalt, rvals; - td = curthread; - rcatch = sleepq_catch_signals(wchan, pri); - rvalt = sleepq_check_timeout(td); + rvalt = sleepq_check_timeout(); rvals = sleepq_check_signals(); - thread_unlock(td); - - sleepq_stop_timeout(td); - + thread_unlock(curthread); if (rcatch) return (rcatch); if (rvals) @@ -885,49 +889,64 @@ sleepq_broadcast(void *wchan, int flags, int pri, int queue) static void sleepq_timeout(void *arg) { - struct thread *td = arg; - int wakeup_swapper = 0; + struct sleepqueue_chain *sc; + struct sleepqueue *sq; + struct thread *td; + void *wchan; + int wakeup_swapper; + td = arg; + wakeup_swapper = 0; CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name); - /* Handle the three cases which can happen */ - + /* + * First, see if the thread is asleep and get the wait channel if + * it is. + */ thread_lock(td); - if (TD_ON_SLEEPQ(td)) { - if (TD_IS_SLEEPING(td)) { - struct sleepqueue_chain *sc; - struct sleepqueue *sq; - void *wchan; - - /* - * Case I - thread is asleep and needs to be - * awoken: - */ - wchan = td->td_wchan; - sc = SC_LOOKUP(wchan); - THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock); - sq = sleepq_lookup(wchan); - MPASS(sq != NULL); - td->td_flags |= TDF_TIMEOUT; - wakeup_swapper = sleepq_resume_thread(sq, td, 0); - } else { - /* - * Case II - cancel going to sleep by setting - * the timeout flag because the target thread - * is not asleep yet. It can be on another CPU - * in between sleepq_add() and one of the - * sleepq_*wait*() routines or it can be in - * sleepq_catch_signals(). - */ - td->td_flags |= TDF_TIMEOUT; - } - } else { - /* - * Case III - thread is already woken up by a wakeup - * call and should not timeout. Nothing to do! - */ + if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) { + wchan = td->td_wchan; + sc = SC_LOOKUP(wchan); + THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock); + sq = sleepq_lookup(wchan); + MPASS(sq != NULL); + td->td_flags |= TDF_TIMEOUT; + wakeup_swapper = sleepq_resume_thread(sq, td, 0); + thread_unlock(td); + if (wakeup_swapper) + kick_proc0(); + return; } + + /* + * If the thread is on the SLEEPQ but isn't sleeping yet, it + * can either be on another CPU in between sleepq_add() and + * one of the sleepq_*wait*() routines or it can be in + * sleepq_catch_signals(). + */ + if (TD_ON_SLEEPQ(td)) { + td->td_flags |= TDF_TIMEOUT; + thread_unlock(td); + return; + } + + /* + * Now check for the edge cases. First, if TDF_TIMEOUT is set, + * then the other thread has already yielded to us, so clear + * the flag and resume it. If TDF_TIMEOUT is not set, then the + * we know that the other thread is not on a sleep queue, but it + * hasn't resumed execution yet. In that case, set TDF_TIMOFAIL + * to let it know that the timeout has already run and doesn't + * need to be canceled. + */ + if (td->td_flags & TDF_TIMEOUT) { + MPASS(TD_IS_SLEEPING(td)); + td->td_flags &= ~TDF_TIMEOUT; + TD_CLR_SLEEPING(td); + wakeup_swapper = setrunnable(td); + } else + td->td_flags |= TDF_TIMOFAIL; thread_unlock(td); if (wakeup_swapper) kick_proc0(); diff --git a/sys/ofed/include/linux/completion.h b/sys/ofed/include/linux/completion.h index 088828399c78..df4aec3595e4 100644 --- a/sys/ofed/include/linux/completion.h +++ b/sys/ofed/include/linux/completion.h @@ -105,9 +105,7 @@ _wait_for_timeout_common(struct completion *c, long timeout, int flags) if (c->done) break; sleepq_add(c, NULL, "completion", flags, 0); - sleepq_release(c); sleepq_set_timeout(c, end - ticks); - sleepq_lock(c); if (flags & SLEEPQ_INTERRUPTIBLE) { if (sleepq_timedwait_sig(c, 0) != 0) return (-ERESTARTSYS); diff --git a/sys/sys/_callout.h b/sys/sys/_callout.h index f58a3835c973..e186aecb8ddf 100644 --- a/sys/sys/_callout.h +++ b/sys/sys/_callout.h @@ -46,30 +46,19 @@ LIST_HEAD(callout_list, callout); SLIST_HEAD(callout_slist, callout); TAILQ_HEAD(callout_tailq, callout); -typedef void callout_func_t(void *); - -struct callout_args { - sbintime_t time; /* absolute time for the event */ - sbintime_t precision; /* delta allowed wrt opt */ - void *arg; /* function argument */ - callout_func_t *func; /* function to call */ - int flags; /* flags passed to callout_reset() */ - int cpu; /* CPU we're scheduled on */ -}; - struct callout { union { LIST_ENTRY(callout) le; SLIST_ENTRY(callout) sle; TAILQ_ENTRY(callout) tqe; } c_links; - sbintime_t c_time; /* absolute time for the event */ + sbintime_t c_time; /* ticks to the event */ sbintime_t c_precision; /* delta allowed wrt opt */ void *c_arg; /* function argument */ - callout_func_t *c_func; /* function to call */ - struct lock_object *c_lock; /* callback lock */ + void (*c_func)(void *); /* function to call */ + struct lock_object *c_lock; /* lock to handle */ int c_flags; /* state of this entry */ - int c_cpu; /* CPU we're scheduled on */ + volatile int c_cpu; /* CPU we're scheduled on */ }; #endif diff --git a/sys/sys/callout.h b/sys/sys/callout.h index 235da99f039a..1096cb26ff92 100644 --- a/sys/sys/callout.h +++ b/sys/sys/callout.h @@ -45,12 +45,10 @@ #define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */ #define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */ #define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */ -#define CALLOUT_UNUSED_5 0x0020 /* --available-- */ -#define CALLOUT_DEFRESTART 0x0040 /* callout restart is deferred */ +#define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */ +#define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */ #define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */ #define CALLOUT_DIRECT 0x0100 /* allow exec from hw int context */ -#define CALLOUT_SET_LC(x) (((x) & 7) << 16) /* set lock class */ -#define CALLOUT_GET_LC(x) (((x) >> 16) & 7) /* get lock class */ #define C_DIRECT_EXEC 0x0001 /* direct execution of callout */ #define C_PRELBITS 7 @@ -67,8 +65,7 @@ struct callout_handle { #ifdef _KERNEL #define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE) #define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE) -int callout_drain(struct callout *); -int callout_drain_async(struct callout *, callout_func_t *, void *); +#define callout_drain(c) _callout_stop_safe(c, 1) void callout_init(struct callout *, int); void _callout_init_lock(struct callout *, struct lock_object *, int); #define callout_init_mtx(c, mtx, flags) \ @@ -82,7 +79,7 @@ void _callout_init_lock(struct callout *, struct lock_object *, int); NULL, (flags)) #define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING) int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t, - callout_func_t *, void *, int, int); + void (*)(void *), void *, int, int); #define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \ callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), (c)->c_cpu, (flags)) #define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \ @@ -106,7 +103,8 @@ int callout_schedule(struct callout *, int); int callout_schedule_on(struct callout *, int, int); #define callout_schedule_curcpu(c, on_tick) \ callout_schedule_on((c), (on_tick), PCPU_GET(cpuid)) -int callout_stop(struct callout *); +#define callout_stop(c) _callout_stop_safe(c, 0) +int _callout_stop_safe(struct callout *, int); void callout_process(sbintime_t now); #endif diff --git a/sys/sys/param.h b/sys/sys/param.h index 0babf847870c..bf59b0bf7226 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1100054 /* Master, propagated to newvers */ +#define __FreeBSD_version 1100055 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/proc.h b/sys/sys/proc.h index e694d918c45e..9b6c695d37fb 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -308,7 +308,6 @@ struct thread { } td_uretoff; /* (k) Syscall aux returns. */ #define td_retval td_uretoff.tdu_retval struct callout td_slpcallout; /* (h) Callout for sleep. */ - struct mtx td_slpmutex; /* (h) Mutex for sleep callout */ struct trapframe *td_frame; /* (k) */ struct vm_object *td_kstack_obj;/* (a) Kstack object. */ vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */ @@ -365,7 +364,7 @@ do { \ #define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */ #define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */ #define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */ -#define TDF_UNUSED12 0x00001000 /* --available-- */ +#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */ #define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */ #define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */ #define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */ @@ -707,7 +706,7 @@ struct proc { #define SWT_OWEPREEMPT 2 /* Switching due to opepreempt. */ #define SWT_TURNSTILE 3 /* Turnstile contention. */ #define SWT_SLEEPQ 4 /* Sleepq wait. */ -#define SWT_UNUSED5 5 /* --available-- */ +#define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */ #define SWT_RELINQUISH 6 /* yield call. */ #define SWT_NEEDRESCHED 7 /* NEEDRESCHED was set. */ #define SWT_IDLE 8 /* Switching from the idle thread. */