2013-04-08 19:40:53 +00:00
|
|
|
/*-
|
2017-11-27 15:20:12 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
|
|
|
*
|
2013-04-08 19:40:53 +00:00
|
|
|
* Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/kernel.h>
|
Fix issues with zeroing and fetching the counters, on x86 and ppc64.
Issues were noted by Bruce Evans and are present on all architectures.
On i386, a counter fetch should use atomic read of 64bit value,
otherwise carry from the increment on other CPU could be lost for the
given fetch, making error of 2^32. If 64bit read (cmpxchg8b) is not
available on the machine, it cannot be SMP and it is enough to disable
preemption around read to avoid the split read.
On x86 the counter increment is not atomic on purpose, which makes it
possible for the store of the incremented result to override just
zeroed per-cpu slot. The effect would be a counter going off by
arbitrary value after zeroing. Perform the counter zeroing on the
same processor which does the increments, making the operations
mutually exclusive. On i386, same as for the fetching, if the
cmpxchg8b is not available, machine is not SMP and we disable
preemption for zeroing.
PowerPC64 is treated the same as amd64.
For other architectures, the changes made to allow the compilation to
succeed, without fixing the issues with zeroing or fetching. It
should be possible to handle them by using the 64bit loads and stores
atomic WRT preemption (assuming the architectures also converted from
using critical sections to proper asm). If architecture does not
provide the facility, using global (spin) mutex would be non-optimal
but working solution.
Noted by: bde
Sponsored by: The FreeBSD Foundation
2013-07-01 02:48:27 +00:00
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/mutex.h>
|
|
|
|
#include <sys/proc.h>
|
|
|
|
#include <sys/sched.h>
|
2013-04-08 19:40:53 +00:00
|
|
|
#include <sys/smp.h>
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <vm/uma.h>
|
Fix issues with zeroing and fetching the counters, on x86 and ppc64.
Issues were noted by Bruce Evans and are present on all architectures.
On i386, a counter fetch should use atomic read of 64bit value,
otherwise carry from the increment on other CPU could be lost for the
given fetch, making error of 2^32. If 64bit read (cmpxchg8b) is not
available on the machine, it cannot be SMP and it is enough to disable
preemption around read to avoid the split read.
On x86 the counter increment is not atomic on purpose, which makes it
possible for the store of the incremented result to override just
zeroed per-cpu slot. The effect would be a counter going off by
arbitrary value after zeroing. Perform the counter zeroing on the
same processor which does the increments, making the operations
mutually exclusive. On i386, same as for the fetching, if the
cmpxchg8b is not available, machine is not SMP and we disable
preemption for zeroing.
PowerPC64 is treated the same as amd64.
For other architectures, the changes made to allow the compilation to
succeed, without fixing the issues with zeroing or fetching. It
should be possible to handle them by using the 64bit loads and stores
atomic WRT preemption (assuming the architectures also converted from
using critical sections to proper asm). If architecture does not
provide the facility, using global (spin) mutex would be non-optimal
but working solution.
Noted by: bde
Sponsored by: The FreeBSD Foundation
2013-07-01 02:48:27 +00:00
|
|
|
|
|
|
|
#define IN_SUBR_COUNTER_C
|
|
|
|
#include <sys/counter.h>
|
2018-07-06 02:06:03 +00:00
|
|
|
|
2013-04-08 19:40:53 +00:00
|
|
|
void
|
|
|
|
counter_u64_zero(counter_u64_t c)
|
|
|
|
{
|
|
|
|
|
Fix issues with zeroing and fetching the counters, on x86 and ppc64.
Issues were noted by Bruce Evans and are present on all architectures.
On i386, a counter fetch should use atomic read of 64bit value,
otherwise carry from the increment on other CPU could be lost for the
given fetch, making error of 2^32. If 64bit read (cmpxchg8b) is not
available on the machine, it cannot be SMP and it is enough to disable
preemption around read to avoid the split read.
On x86 the counter increment is not atomic on purpose, which makes it
possible for the store of the incremented result to override just
zeroed per-cpu slot. The effect would be a counter going off by
arbitrary value after zeroing. Perform the counter zeroing on the
same processor which does the increments, making the operations
mutually exclusive. On i386, same as for the fetching, if the
cmpxchg8b is not available, machine is not SMP and we disable
preemption for zeroing.
PowerPC64 is treated the same as amd64.
For other architectures, the changes made to allow the compilation to
succeed, without fixing the issues with zeroing or fetching. It
should be possible to handle them by using the 64bit loads and stores
atomic WRT preemption (assuming the architectures also converted from
using critical sections to proper asm). If architecture does not
provide the facility, using global (spin) mutex would be non-optimal
but working solution.
Noted by: bde
Sponsored by: The FreeBSD Foundation
2013-07-01 02:48:27 +00:00
|
|
|
counter_u64_zero_inline(c);
|
2013-04-08 19:40:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
counter_u64_fetch(counter_u64_t c)
|
|
|
|
{
|
|
|
|
|
Fix issues with zeroing and fetching the counters, on x86 and ppc64.
Issues were noted by Bruce Evans and are present on all architectures.
On i386, a counter fetch should use atomic read of 64bit value,
otherwise carry from the increment on other CPU could be lost for the
given fetch, making error of 2^32. If 64bit read (cmpxchg8b) is not
available on the machine, it cannot be SMP and it is enough to disable
preemption around read to avoid the split read.
On x86 the counter increment is not atomic on purpose, which makes it
possible for the store of the incremented result to override just
zeroed per-cpu slot. The effect would be a counter going off by
arbitrary value after zeroing. Perform the counter zeroing on the
same processor which does the increments, making the operations
mutually exclusive. On i386, same as for the fetching, if the
cmpxchg8b is not available, machine is not SMP and we disable
preemption for zeroing.
PowerPC64 is treated the same as amd64.
For other architectures, the changes made to allow the compilation to
succeed, without fixing the issues with zeroing or fetching. It
should be possible to handle them by using the 64bit loads and stores
atomic WRT preemption (assuming the architectures also converted from
using critical sections to proper asm). If architecture does not
provide the facility, using global (spin) mutex would be non-optimal
but working solution.
Noted by: bde
Sponsored by: The FreeBSD Foundation
2013-07-01 02:48:27 +00:00
|
|
|
return (counter_u64_fetch_inline(c));
|
2013-04-08 19:40:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
counter_u64_t
|
|
|
|
counter_u64_alloc(int flags)
|
|
|
|
{
|
|
|
|
|
Fix pre-SI_SUB_CPU initialization of per-CPU counters.
r336020 introduced pcpu_page_alloc(), replacing page_alloc() as the
backend allocator for PCPU UMA zones. Unlike page_alloc(), it does
not honour malloc(9) flags such as M_ZERO or M_NODUMP, so fix that.
r336020 also changed counter(9) to initialize each counter using a
CPU_FOREACH() loop instead of an SMP rendezvous. Before SI_SUB_CPU,
smp_rendezvous() will only execute the callback on the current CPU
(i.e., CPU 0), so only one counter gets zeroed. The rest are zeroed
by virtue of the fact that UMA gratuitously zeroes slabs when importing
them into a zone.
Prior to SI_SUB_CPU, all_cpus is clear, so with r336020 we weren't
zeroing vm_cnt counters during boot: the CPU_FOREACH() loop had no
effect, and pcpu_page_alloc() didn't honour M_ZERO. Fix this by
iterating over the full range of CPU IDs when zeroing counters,
ignoring whether the corresponding bits in all_cpus are set.
Reported and tested by: pho (previous version)
Reviewed by: kib (previous version)
Differential Revision: https://reviews.freebsd.org/D16190
2018-07-10 00:18:12 +00:00
|
|
|
return (uma_zalloc_pcpu(pcpu_zone_64, flags | M_ZERO));
|
2013-04-08 19:40:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
counter_u64_free(counter_u64_t c)
|
|
|
|
{
|
|
|
|
|
2018-06-08 22:06:32 +00:00
|
|
|
uma_zfree_pcpu(pcpu_zone_64, c);
|
2013-04-08 19:40:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
sysctl_handle_counter_u64(SYSCTL_HANDLER_ARGS)
|
|
|
|
{
|
|
|
|
uint64_t out;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
out = counter_u64_fetch(*(counter_u64_t *)arg1);
|
|
|
|
|
|
|
|
error = SYSCTL_OUT(req, &out, sizeof(uint64_t));
|
|
|
|
|
|
|
|
if (error || !req->newptr)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Any write attempt to a counter zeroes it.
|
|
|
|
*/
|
|
|
|
counter_u64_zero(*(counter_u64_t *)arg1);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
2016-03-15 00:05:00 +00:00
|
|
|
|
|
|
|
int
|
|
|
|
sysctl_handle_counter_u64_array(SYSCTL_HANDLER_ARGS)
|
|
|
|
{
|
|
|
|
uint64_t *out;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
out = malloc(arg2 * sizeof(uint64_t), M_TEMP, M_WAITOK);
|
|
|
|
for (int i = 0; i < arg2; i++)
|
|
|
|
out[i] = counter_u64_fetch(((counter_u64_t *)arg1)[i]);
|
|
|
|
|
|
|
|
error = SYSCTL_OUT(req, out, arg2 * sizeof(uint64_t));
|
2016-03-15 00:21:32 +00:00
|
|
|
free(out, M_TEMP);
|
2016-03-15 00:05:00 +00:00
|
|
|
|
|
|
|
if (error || !req->newptr)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Any write attempt to a counter zeroes it.
|
|
|
|
*/
|
|
|
|
for (int i = 0; i < arg2; i++)
|
|
|
|
counter_u64_zero(((counter_u64_t *)arg1)[i]);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
2016-12-09 17:58:34 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* MP-friendly version of ppsratecheck().
|
|
|
|
*
|
|
|
|
* Returns non-negative if we are in the rate, negative otherwise.
|
|
|
|
* 0 - rate limit not reached.
|
|
|
|
* -1 - rate limit reached.
|
|
|
|
* >0 - rate limit was reached before, and was just reset. The return value
|
|
|
|
* is number of events since last reset.
|
|
|
|
*/
|
|
|
|
int64_t
|
|
|
|
counter_ratecheck(struct counter_rate *cr, int64_t limit)
|
|
|
|
{
|
|
|
|
int64_t val;
|
|
|
|
int now;
|
|
|
|
|
|
|
|
val = cr->cr_over;
|
|
|
|
now = ticks;
|
|
|
|
|
2018-06-27 22:00:50 +00:00
|
|
|
if ((u_int)(now - cr->cr_ticks) >= hz) {
|
2016-12-09 17:58:34 +00:00
|
|
|
/*
|
|
|
|
* Time to clear the structure, we are in the next second.
|
|
|
|
* First try unlocked read, and then proceed with atomic.
|
|
|
|
*/
|
|
|
|
if ((cr->cr_lock == 0) &&
|
2016-12-09 19:07:31 +00:00
|
|
|
atomic_cmpset_acq_int(&cr->cr_lock, 0, 1)) {
|
2016-12-09 17:58:34 +00:00
|
|
|
/*
|
|
|
|
* Check if other thread has just went through the
|
|
|
|
* reset sequence before us.
|
|
|
|
*/
|
2018-06-27 22:00:50 +00:00
|
|
|
if ((u_int)(now - cr->cr_ticks) >= hz) {
|
2016-12-09 17:58:34 +00:00
|
|
|
val = counter_u64_fetch(cr->cr_rate);
|
|
|
|
counter_u64_zero(cr->cr_rate);
|
|
|
|
cr->cr_over = 0;
|
|
|
|
cr->cr_ticks = now;
|
2016-12-13 20:11:45 +00:00
|
|
|
if (val <= limit)
|
|
|
|
val = 0;
|
2016-12-09 17:58:34 +00:00
|
|
|
}
|
|
|
|
atomic_store_rel_int(&cr->cr_lock, 0);
|
|
|
|
} else
|
|
|
|
/*
|
|
|
|
* We failed to lock, in this case other thread may
|
|
|
|
* be running counter_u64_zero(), so it is not safe
|
|
|
|
* to do an update, we skip it.
|
|
|
|
*/
|
|
|
|
return (val);
|
|
|
|
}
|
|
|
|
|
|
|
|
counter_u64_add(cr->cr_rate, 1);
|
|
|
|
if (cr->cr_over != 0)
|
|
|
|
return (-1);
|
|
|
|
if (counter_u64_fetch(cr->cr_rate) > limit)
|
|
|
|
val = cr->cr_over = -1;
|
|
|
|
|
|
|
|
return (val);
|
|
|
|
}
|