netlink: suppress sending NLMSG_ERROR if NLMSG_DONE is already sent

Netlink has a confirmation/error reporting mechanism for the sent
messages. Kernel explicitly acks each messages if requested (NLM_F_ACK)
 or if message processing results in an error.
Similarly, for multipart messages - typically dumps, where each message
 represents a single object like an interface or a route - another
 message, NLMSG_DONE is used to indicate the end of dump and the
 resulting status.
As a result, successfull dump ends with both NLMSG_DONE and NLMSG_ERROR
 messages.
RFC 3549 does not say anything specific about such case.
Linux adopted an optimisation which suppresses NLMSG_ERROR message
 when NLMSG_DONE is already sent. Certain libraries/applications like
 libnl depends on such behavior.

Suppress sending NLMSG_ERROR if NLMSG_DONE is already sent, by
 setting newly-added 'suppress_ack' flag in the writer and checking
 this flag when generating ack.

This change restores libnl compatibility.

Before:
```
~ nl-link-list
Error: Unable to allocate link cache: Message sequence number mismatch
````

After:
```
~ nl-link-list
vtnet0 ether 52:54:00:14:e3:19 <broadcast,multicast,up,running>
lo0 ieee1394 <loopback,multicast,up,running>
```

Reviewed by:	bapt,pauamma
Tested by:	bapt
Differential Revision: https://reviews.freebsd.org/D37565
This commit is contained in:
Alexander V. Chernikov 2022-11-30 12:15:23 +00:00
parent 821549a9df
commit f4d3aa7490
4 changed files with 20 additions and 7 deletions

View File

@ -24,7 +24,7 @@
.\" .\"
.\" $FreeBSD$ .\" $FreeBSD$
.\" .\"
.Dd November 1, 2022 .Dd November 30, 2022
.Dt NETLINK 4 .Dt NETLINK 4
.Os .Os
.Sh NAME .Sh NAME
@ -212,11 +212,16 @@ If the
socket option is not set, the remainder of the original message will follow. socket option is not set, the remainder of the original message will follow.
If the If the
.Dv NETLINK_EXT_ACK .Dv NETLINK_EXT_ACK
socket option is set, kernel may add a socket option is set, the kernel may add a
.Dv NLMSGERR_ATTR_MSG .Dv NLMSGERR_ATTR_MSG
string TLV with the textual error description, optionally followed by the string TLV with the textual error description, optionally followed by the
.Dv NLMSGERR_ATTR_OFFS .Dv NLMSGERR_ATTR_OFFS
TLV, indicating the offset from the message start that triggered an error. TLV, indicating the offset from the message start that triggered an error.
If the operation reply is a multipart message, then no
.Dv NLMSG_ERROR
reply is generated, only a
.Dv NLMSG_DONE
message, closing multipart sequence.
.Pp .Pp
.Dv NLMSG_DONE .Dv NLMSG_DONE
indicates the end of the message group: typically, the end of the dump. indicates the end of the message group: typically, the end of the dump.

View File

@ -405,8 +405,9 @@ nl_receive_message(struct nlmsghdr *hdr, int remaining_length,
nl_handler_f handler = nl_handlers[nlp->nl_proto].cb; nl_handler_f handler = nl_handlers[nlp->nl_proto].cb;
int error = 0; int error = 0;
NL_LOG(LOG_DEBUG2, "msg len: %d type: %d", hdr->nlmsg_len, NLP_LOG(LOG_DEBUG2, nlp, "msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
hdr->nlmsg_type); hdr->nlmsg_len, hdr->nlmsg_type, hdr->nlmsg_flags, hdr->nlmsg_seq,
hdr->nlmsg_pid);
if (__predict_false(hdr->nlmsg_len > remaining_length)) { if (__predict_false(hdr->nlmsg_len > remaining_length)) {
NLP_LOG(LOG_DEBUG, nlp, "message is not entirely present: want %d got %d", NLP_LOG(LOG_DEBUG, nlp, "message is not entirely present: want %d got %d",
@ -439,9 +440,10 @@ nl_receive_message(struct nlmsghdr *hdr, int remaining_length,
NL_LOG(LOG_DEBUG2, "retcode: %d", error); NL_LOG(LOG_DEBUG2, "retcode: %d", error);
} }
if ((hdr->nlmsg_flags & NLM_F_ACK) || (error != 0 && error != EINTR)) { if ((hdr->nlmsg_flags & NLM_F_ACK) || (error != 0 && error != EINTR)) {
NL_LOG(LOG_DEBUG3, "ack"); if (!npt->nw->suppress_ack) {
nlmsg_ack(nlp, error, hdr, npt); NL_LOG(LOG_DEBUG3, "ack");
NL_LOG(LOG_DEBUG3, "done"); nlmsg_ack(nlp, error, hdr, npt);
}
} }
return (0); return (0);
@ -455,6 +457,7 @@ npt_clear(struct nl_pstate *npt)
npt->err_msg = NULL; npt->err_msg = NULL;
npt->err_off = 0; npt->err_off = 0;
npt->hdr = NULL; npt->hdr = NULL;
npt->nw->suppress_ack = false;
} }
/* /*

View File

@ -600,6 +600,9 @@ nlmsg_end(struct nl_writer *nw)
} }
nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr); nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr);
NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags,
nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid);
nw->hdr = NULL; nw->hdr = NULL;
nw->num_messages++; nw->num_messages++;
return (true); return (true);
@ -681,6 +684,7 @@ nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
nw->offset, perror); nw->offset, perror);
*perror = error; *perror = error;
nlmsg_end(nw); nlmsg_end(nw);
nw->suppress_ack = true;
return (true); return (true);
} }

View File

@ -55,6 +55,7 @@ struct nl_writer {
uint8_t writer_target; /* NS_WRITER_TARGET_* */ uint8_t writer_target; /* NS_WRITER_TARGET_* */
bool ignore_limit; /* If true, ignores RCVBUF limit */ bool ignore_limit; /* If true, ignores RCVBUF limit */
bool enomem; /* True if ENOMEM occured */ bool enomem; /* True if ENOMEM occured */
bool suppress_ack; /* If true, don't send NLMSG_ERR */
}; };
#define NS_WRITER_TARGET_SOCKET 0 #define NS_WRITER_TARGET_SOCKET 0
#define NS_WRITER_TARGET_GROUP 1 #define NS_WRITER_TARGET_GROUP 1