prometheus_sysctl_exporter: fix metric aliasing

When exporting sysctls to Prometheus, the exporter replaces "." with
"_".  This caused several metrics to alias, confusing the Prometheus
server.  Fix it by:

* Renaming the "tcp_log_bucket" UMA zone to "tcp_log_id_bucket".  Also,
  rename "tcp_log_node" to "tcp_log_id_node" for consistency.

* Not exporting sysctls with "(LEGACY)" in the description.  That is
  used by ZFS sysctls that have been replaced by others, many of which
  alias to the same Prometheus metric name (like "vfs.zfs.arc_max" and
  "vfs.zfs.arc.max").

PR:		259607
Reported by:	delphij
MFC after:	2 weeks
Sponsored by:	Axcient
Reviewed by:	delphij,rew,thj
Differential Revision: https://reviews.freebsd.org/D34952
This commit is contained in:
Alan Somers 2022-04-18 15:29:37 -06:00
parent 19447fc488
commit 8c47d8f538
2 changed files with 26 additions and 18 deletions

View File

@ -65,7 +65,7 @@ __FBSDID("$FreeBSD$");
#define TCP_LOG_EXPIRE_INTVL ((sbintime_t)5 * SBT_1S)
bool tcp_log_verbose;
static uma_zone_t tcp_log_bucket_zone, tcp_log_node_zone, tcp_log_zone;
static uma_zone_t tcp_log_id_bucket_zone, tcp_log_id_node_zone, tcp_log_zone;
static int tcp_log_session_limit = TCP_LOG_BUF_DEFAULT_SESSION_LIMIT;
static uint32_t tcp_log_version = TCP_LOG_BUF_VER;
RB_HEAD(tcp_log_id_tree, tcp_log_id_bucket);
@ -99,16 +99,16 @@ SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_global_entries, CTLFLAG_RD,
&tcp_log_zone, "Current number of events maintained for all TCP sessions");
SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_limit, CTLFLAG_RW,
&tcp_log_bucket_zone, "Maximum number of log IDs");
&tcp_log_id_bucket_zone, "Maximum number of log IDs");
SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_entries, CTLFLAG_RD,
&tcp_log_bucket_zone, "Current number of log IDs");
&tcp_log_id_bucket_zone, "Current number of log IDs");
SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_limit, CTLFLAG_RW,
&tcp_log_node_zone, "Maximum number of tcpcbs with log IDs");
&tcp_log_id_node_zone, "Maximum number of tcpcbs with log IDs");
SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_entries, CTLFLAG_RD,
&tcp_log_node_zone, "Current number of tcpcbs with log IDs");
&tcp_log_id_node_zone, "Current number of tcpcbs with log IDs");
SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_version, CTLFLAG_RD, &tcp_log_version,
0, "Version of log formats exported");
@ -360,7 +360,7 @@ tcp_log_remove_bucket(struct tcp_log_id_bucket *tlb)
}
TCPID_BUCKET_LOCK_DESTROY(tlb);
counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1);
uma_zfree(tcp_log_bucket_zone, tlb);
uma_zfree(tcp_log_id_bucket_zone, tlb);
}
/*
@ -669,7 +669,7 @@ tcp_log_set_id(struct tcpcb *tp, char *id)
* will unlock the bucket.
*/
if (tln != NULL)
uma_zfree(tcp_log_node_zone, tln);
uma_zfree(tcp_log_id_node_zone, tln);
tln = tp->t_lin;
tlb = NULL;
bucket_locked = false;
@ -702,7 +702,8 @@ tcp_log_set_id(struct tcpcb *tp, char *id)
if (*id) {
/* Get a new tln, if we don't already have one to reuse. */
if (tln == NULL) {
tln = uma_zalloc(tcp_log_node_zone, M_NOWAIT | M_ZERO);
tln = uma_zalloc(tcp_log_id_node_zone,
M_NOWAIT | M_ZERO);
if (tln == NULL) {
rv = ENOBUFS;
goto done;
@ -756,7 +757,7 @@ tcp_log_set_id(struct tcpcb *tp, char *id)
/* If we need to add a new bucket, do it now. */
if (tmp_tlb == NULL) {
/* Allocate new bucket. */
tlb = uma_zalloc(tcp_log_bucket_zone, M_NOWAIT);
tlb = uma_zalloc(tcp_log_id_bucket_zone, M_NOWAIT);
if (tlb == NULL) {
rv = ENOBUFS;
goto done_noinp;
@ -803,7 +804,7 @@ tcp_log_set_id(struct tcpcb *tp, char *id)
#define FREE_NEW_TLB() do { \
TCPID_BUCKET_LOCK_DESTROY(tlb); \
uma_zfree(tcp_log_bucket_zone, tlb); \
uma_zfree(tcp_log_id_bucket_zone, tlb); \
counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1); \
counter_u64_add(tcp_log_pcb_ids_tot, (int64_t)-1); \
bucket_locked = false; \
@ -906,7 +907,7 @@ tcp_log_set_id(struct tcpcb *tp, char *id)
} else
TCPID_TREE_UNLOCK_ASSERT();
if (tln != NULL)
uma_zfree(tcp_log_node_zone, tln);
uma_zfree(tcp_log_id_node_zone, tln);
return (rv);
}
@ -1146,10 +1147,10 @@ tcp_log_init(void)
#endif
NULL, UMA_ALIGN_PTR, 0);
(void)uma_zone_set_max(tcp_log_zone, TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT);
tcp_log_bucket_zone = uma_zcreate("tcp_log_bucket",
tcp_log_id_bucket_zone = uma_zcreate("tcp_log_id_bucket",
sizeof(struct tcp_log_id_bucket), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
tcp_log_node_zone = uma_zcreate("tcp_log_node",
tcp_log_id_node_zone = uma_zcreate("tcp_log_id_node",
sizeof(struct tcp_log_id_node), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
#ifdef TCPLOG_DEBUG_COUNTERS
@ -1258,7 +1259,7 @@ tcp_log_expire(void *unused __unused)
tcp_log_free_entries(&tln->tln_entries, &tln->tln_count);
/* Free the node. */
uma_zfree(tcp_log_node_zone, tln);
uma_zfree(tcp_log_id_node_zone, tln);
/* Relock the expiry queue. */
TCPLOG_EXPIREQ_LOCK();
@ -2441,7 +2442,7 @@ tcp_log_dumpbucketlogs(struct tcp_log_id_bucket *tlb, char *reason)
*/
tcp_log_free_entries(&cur_tln->tln_entries,
&cur_tln->tln_count);
uma_zfree(tcp_log_node_zone, cur_tln);
uma_zfree(tcp_log_id_node_zone, cur_tln);
goto done;
}
@ -2463,7 +2464,7 @@ tcp_log_dumpbucketlogs(struct tcp_log_id_bucket *tlb, char *reason)
}
/* No matter what, we are done with the node now. */
uma_zfree(tcp_log_node_zone, cur_tln);
uma_zfree(tcp_log_id_node_zone, cur_tln);
/*
* Because we removed this entry from the list, prev_tln

View File

@ -498,6 +498,7 @@ oid_print(const struct oid *o, struct oidname *on, bool print_description,
struct oidvalue ov;
struct oiddescription od;
char metric[BUFSIZ];
bool has_desc;
if (!oid_get_format(o, &of) || !oid_get_value(o, &of, &ov))
return;
@ -511,14 +512,20 @@ oid_print(const struct oid *o, struct oidname *on, bool print_description,
if (include && regexec(&inc_regex, metric, 0, NULL, 0) != 0)
return;
has_desc = oid_get_description(o, &od);
/*
* Skip metrics with "(LEGACY)" in the name. It's used by several
* redundant ZFS sysctls whose names alias with the non-legacy versions.
*/
if (has_desc && strnstr(od.description, "(LEGACY)", BUFSIZ) != NULL)
return;
/*
* Print the line with the description. Prometheus expects a
* single unique description for every metric, which cannot be
* guaranteed by sysctl if labels are present. Omit the
* description if labels are present.
*/
if (print_description && !oidname_has_labels(on) &&
oid_get_description(o, &od)) {
if (print_description && !oidname_has_labels(on) && has_desc) {
fprintf(fp, "# HELP ");
fprintf(fp, "%s", metric);
fputc(' ', fp);