From f4d3aa74908496f1f5815caca94ebd86944b17cb Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Wed, 30 Nov 2022 12:15:23 +0000 Subject: [PATCH] netlink: suppress sending NLMSG_ERROR if NLMSG_DONE is already sent Netlink has a confirmation/error reporting mechanism for the sent messages. Kernel explicitly acks each messages if requested (NLM_F_ACK) or if message processing results in an error. Similarly, for multipart messages - typically dumps, where each message represents a single object like an interface or a route - another message, NLMSG_DONE is used to indicate the end of dump and the resulting status. As a result, successfull dump ends with both NLMSG_DONE and NLMSG_ERROR messages. RFC 3549 does not say anything specific about such case. Linux adopted an optimisation which suppresses NLMSG_ERROR message when NLMSG_DONE is already sent. Certain libraries/applications like libnl depends on such behavior. Suppress sending NLMSG_ERROR if NLMSG_DONE is already sent, by setting newly-added 'suppress_ack' flag in the writer and checking this flag when generating ack. This change restores libnl compatibility. Before: ``` ~ nl-link-list Error: Unable to allocate link cache: Message sequence number mismatch ```` After: ``` ~ nl-link-list vtnet0 ether 52:54:00:14:e3:19 lo0 ieee1394 ``` Reviewed by: bapt,pauamma Tested by: bapt Differential Revision: https://reviews.freebsd.org/D37565 --- share/man/man4/netlink.4 | 9 +++++++-- sys/netlink/netlink_io.c | 13 ++++++++----- sys/netlink/netlink_message_writer.c | 4 ++++ sys/netlink/netlink_message_writer.h | 1 + 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/share/man/man4/netlink.4 b/share/man/man4/netlink.4 index c75366f560f0..bbfa55049e2e 100644 --- a/share/man/man4/netlink.4 +++ b/share/man/man4/netlink.4 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 1, 2022 +.Dd November 30, 2022 .Dt NETLINK 4 .Os .Sh NAME @@ -212,11 +212,16 @@ If the socket option is not set, the remainder of the original message will follow. If the .Dv NETLINK_EXT_ACK -socket option is set, kernel may add a +socket option is set, the kernel may add a .Dv NLMSGERR_ATTR_MSG string TLV with the textual error description, optionally followed by the .Dv NLMSGERR_ATTR_OFFS TLV, indicating the offset from the message start that triggered an error. +If the operation reply is a multipart message, then no +.Dv NLMSG_ERROR +reply is generated, only a +.Dv NLMSG_DONE +message, closing multipart sequence. .Pp .Dv NLMSG_DONE indicates the end of the message group: typically, the end of the dump. diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c index b2a0023a143b..fb8006f689e4 100644 --- a/sys/netlink/netlink_io.c +++ b/sys/netlink/netlink_io.c @@ -405,8 +405,9 @@ nl_receive_message(struct nlmsghdr *hdr, int remaining_length, nl_handler_f handler = nl_handlers[nlp->nl_proto].cb; int error = 0; - NL_LOG(LOG_DEBUG2, "msg len: %d type: %d", hdr->nlmsg_len, - hdr->nlmsg_type); + NLP_LOG(LOG_DEBUG2, nlp, "msg len: %u type: %d: flags: 0x%X seq: %u pid: %u", + hdr->nlmsg_len, hdr->nlmsg_type, hdr->nlmsg_flags, hdr->nlmsg_seq, + hdr->nlmsg_pid); if (__predict_false(hdr->nlmsg_len > remaining_length)) { NLP_LOG(LOG_DEBUG, nlp, "message is not entirely present: want %d got %d", @@ -439,9 +440,10 @@ nl_receive_message(struct nlmsghdr *hdr, int remaining_length, NL_LOG(LOG_DEBUG2, "retcode: %d", error); } if ((hdr->nlmsg_flags & NLM_F_ACK) || (error != 0 && error != EINTR)) { - NL_LOG(LOG_DEBUG3, "ack"); - nlmsg_ack(nlp, error, hdr, npt); - NL_LOG(LOG_DEBUG3, "done"); + if (!npt->nw->suppress_ack) { + NL_LOG(LOG_DEBUG3, "ack"); + nlmsg_ack(nlp, error, hdr, npt); + } } return (0); @@ -455,6 +457,7 @@ npt_clear(struct nl_pstate *npt) npt->err_msg = NULL; npt->err_off = 0; npt->hdr = NULL; + npt->nw->suppress_ack = false; } /* diff --git a/sys/netlink/netlink_message_writer.c b/sys/netlink/netlink_message_writer.c index 1856f2859b01..37414703c6f6 100644 --- a/sys/netlink/netlink_message_writer.c +++ b/sys/netlink/netlink_message_writer.c @@ -600,6 +600,9 @@ nlmsg_end(struct nl_writer *nw) } nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr); + NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u", + nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags, + nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid); nw->hdr = NULL; nw->num_messages++; return (true); @@ -681,6 +684,7 @@ nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr) nw->offset, perror); *perror = error; nlmsg_end(nw); + nw->suppress_ack = true; return (true); } diff --git a/sys/netlink/netlink_message_writer.h b/sys/netlink/netlink_message_writer.h index 424983282e59..99f50fb94213 100644 --- a/sys/netlink/netlink_message_writer.h +++ b/sys/netlink/netlink_message_writer.h @@ -55,6 +55,7 @@ struct nl_writer { uint8_t writer_target; /* NS_WRITER_TARGET_* */ bool ignore_limit; /* If true, ignores RCVBUF limit */ bool enomem; /* True if ENOMEM occured */ + bool suppress_ack; /* If true, don't send NLMSG_ERR */ }; #define NS_WRITER_TARGET_SOCKET 0 #define NS_WRITER_TARGET_GROUP 1