tcp: For hostcache performance, use atomics instead of counters

As accessing the tcp hostcache happens frequently on some
classes of servers, it was recommended to use atomic_add/subtract
rather than (per-CPU distributed) counters, which have to be
summed up at high cost to cache efficiency.

PR: 254333
MFC after: 2 weeks
Sponsored by: NetApp, Inc.
Reviewed By: #transport, tuexen, jtl
Differential Revision: https://reviews.freebsd.org/D29522
This commit is contained in:
Richard Scheffenegger 2021-04-01 10:00:32 +02:00
parent 03efa462b2
commit 529a2a0f27
2 changed files with 12 additions and 14 deletions

View File

@ -147,8 +147,8 @@ SYSCTL_UINT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit,
CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_hostcache.bucket_limit), 0,
"Per-bucket hash limit for hostcache");
SYSCTL_COUNTER_U64(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(tcp_hostcache.cache_count),
SYSCTL_UINT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(tcp_hostcache.cache_count), 0,
"Current number of entries in hostcache");
SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, expire, CTLFLAG_VNET | CTLFLAG_RW,
@ -199,8 +199,7 @@ tcp_hc_init(void)
/*
* Initialize hostcache structures.
*/
V_tcp_hostcache.cache_count = counter_u64_alloc(M_WAITOK);
counter_u64_zero(V_tcp_hostcache.cache_count);
atomic_store_int(&V_tcp_hostcache.cache_count, 0);
V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE;
V_tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT;
V_tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE;
@ -268,9 +267,6 @@ tcp_hc_destroy(void)
/* Purge all hc entries. */
tcp_hc_purge_internal(1);
/* Release the counter */
counter_u64_free(V_tcp_hostcache.cache_count);
/* Free the uma zone and the allocated hash table. */
uma_zdestroy(V_tcp_hostcache.zone);
@ -378,7 +374,7 @@ tcp_hc_insert(struct in_conninfo *inc)
* If the bucket limit is reached, reuse the least-used element.
*/
if (hc_head->hch_length >= V_tcp_hostcache.bucket_limit ||
counter_u64_fetch(V_tcp_hostcache.cache_count) >= V_tcp_hostcache.cache_limit) {
atomic_load_int(&V_tcp_hostcache.cache_count) >= V_tcp_hostcache.cache_limit) {
hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead);
/*
* At first we were dropping the last element, just to
@ -395,7 +391,7 @@ tcp_hc_insert(struct in_conninfo *inc)
}
TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q);
V_tcp_hostcache.hashbase[hash].hch_length--;
counter_u64_add(V_tcp_hostcache.cache_count, -1);
atomic_subtract_int(&V_tcp_hostcache.cache_count, 1);
TCPSTAT_INC(tcps_hc_bucketoverflow);
#if 0
uma_zfree(V_tcp_hostcache.zone, hc_entry);
@ -428,7 +424,7 @@ tcp_hc_insert(struct in_conninfo *inc)
*/
TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
V_tcp_hostcache.hashbase[hash].hch_length++;
counter_u64_add(V_tcp_hostcache.cache_count, 1);
atomic_add_int(&V_tcp_hostcache.cache_count, 1);
TCPSTAT_INC(tcps_hc_added);
return hc_entry;
@ -644,7 +640,8 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
/* Optimize Buffer length query by sbin/sysctl */
if (req->oldptr == NULL) {
len = (counter_u64_fetch(V_tcp_hostcache.cache_count) + 1) * linesize;
len = (atomic_load_int(&V_tcp_hostcache.cache_count) + 1) *
linesize;
return (SYSCTL_OUT(req, NULL, len));
}
@ -654,7 +651,8 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
}
/* Use a buffer sized for one full bucket */
sbuf_new_for_sysctl(&sb, NULL, V_tcp_hostcache.bucket_limit * linesize, req);
sbuf_new_for_sysctl(&sb, NULL, V_tcp_hostcache.bucket_limit *
linesize, req);
sbuf_printf(&sb,
"\nIP address MTU SSTRESH RTT RTTVAR "
@ -716,7 +714,7 @@ tcp_hc_purge_internal(int all)
hc_entry, rmx_q);
uma_zfree(V_tcp_hostcache.zone, hc_entry);
V_tcp_hostcache.hashbase[i].hch_length--;
counter_u64_add(V_tcp_hostcache.cache_count, -1);
atomic_subtract_int(&V_tcp_hostcache.cache_count, 1);
} else
hc_entry->rmx_expire -= V_tcp_hostcache.prune;
}

View File

@ -74,7 +74,7 @@ struct tcp_hostcache {
u_int hashsize;
u_int hashmask;
u_int bucket_limit;
counter_u64_t cache_count;
u_int cache_count;
u_int cache_limit;
int expire;
int prune;