netlink: allow netlink sockets in non-vnet jails.

This change allow to open Netlink sockets in the non-vnet jails, even for
 unpriviledged processes.
The security model largely follows the existing one. To be more specific:
* by default, every `NETLINK_ROUTE` command is **NOT** allowed in non-VNET
 jail UNLESS `RTNL_F_ALLOW_NONVNET_JAIL` flag is specified in the command
 handler.
* All notifications are **disabled** for non-vnet jails (requests to
 subscribe for the notifications are ignored). This will change to be more
 fine-grained model once the first netlink provider requiring this gets
 committed.
* Listing interfaces (RTM_GETLINK) is **allowed** w/o limits (**including**
 interfaces w/o any addresses attached to the jail). The value of this is
 questionable, but it follows the existing approach.
* Listing ARP/NDP neighbours is **forbidden**. This is a **change** from the
 current approach - currently we list static ARP/ND entries belonging to the
 addresses attached to the jail.
* Listing interface addresses is **allowed**, but the addresses are filtered
 to match only ones attached to the jail.
* Listing routes is **allowed**, but the routes are filtered to provide only
 host routes matching the addresses attached to the jail.
* By default, every `NETLINK_GENERIC` command is **allowed** in non-VNET jail
 (as sub-families may be unrelated to network at all).
 It is the goal of the family author to implement the restriction if
 necessary.

Differential Revision: https://reviews.freebsd.org/D39206
MFC after:	1 month
This commit is contained in:
Alexander V. Chernikov 2023-03-26 08:42:51 +00:00 committed by Oscar Zhao
parent 8f031528db
commit 6888b50fdd
9 changed files with 36 additions and 2 deletions

View File

@ -3440,6 +3440,7 @@ prison_check_af(struct ucred *cred, int af)
#endif
case AF_LOCAL:
case AF_ROUTE:
case AF_NETLINK:
break;
default:
if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF))

View File

@ -81,6 +81,7 @@ bool netlink_unregister_proto(int proto);
bool nl_has_listeners(int netlink_family, uint32_t groups_mask);
bool nlp_has_priv(struct nlpcb *nlp, int priv);
struct ucred *nlp_get_cred(struct nlpcb *nlp);
bool nlp_unconstrained_vnet(const struct nlpcb *nlp);
/* netlink_generic.c */
struct genl_cmd {

View File

@ -36,6 +36,7 @@
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/domain.h>
#include <sys/jail.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/proc.h>
@ -111,6 +112,10 @@ nl_add_group_locked(struct nlpcb *nlp, unsigned int group_id)
MPASS(group_id <= NLP_MAX_GROUPS);
--group_id;
/* TODO: add family handler callback */
if (!nlp_unconstrained_vnet(nlp))
return;
nlp->nl_groups[group_id / 64] |= (uint64_t)1 << (group_id % 64);
}
@ -212,6 +217,12 @@ nlp_has_priv(struct nlpcb *nlp, int priv)
return (priv_check_cred(nlp->nl_cred, priv) == 0);
}
bool
nlp_unconstrained_vnet(const struct nlpcb *nlp)
{
return (nlp->nl_unconstrained_vnet);
}
struct ucred *
nlp_get_cred(struct nlpcb *nlp)
{
@ -308,6 +319,7 @@ nl_pru_attach(struct socket *so, int proto, struct thread *td)
nlp->nl_process_id = curproc->p_pid;
nlp->nl_linux = is_linux;
nlp->nl_active = true;
nlp->nl_unconstrained_vnet = !jailed_without_vnet(so->so_cred);
NLP_LOCK_INIT(nlp);
refcount_init(&nlp->nl_refcount, 1);
nl_init_io(nlp);

View File

@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$");
#include <sys/ck.h>
#include <sys/epoch.h>
#include <sys/kernel.h>
#include <sys/jail.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/priv.h>

View File

@ -93,6 +93,11 @@ rtnl_handle_message(struct nlmsghdr *hdr, struct nl_pstate *npt)
} else if (cmd->priv != 0)
NLP_LOG(LOG_DEBUG3, nlp, "priv %d check passed for msg %s", cmd->priv, cmd->name);
if (!nlp_unconstrained_vnet(nlp) && (cmd->flags & RTNL_F_ALLOW_NONVNET_JAIL) == 0) {
NLP_LOG(LOG_DEBUG2, nlp, "jail check failed for msg %s", cmd->name);
return (EPERM);
}
bool need_epoch = !(cmd->flags & RTNL_F_NOEPOCH);
if (need_epoch)

View File

@ -61,6 +61,7 @@ struct nlpcb {
bool nl_task_pending;
bool nl_tx_blocked; /* No new requests accepted */
bool nl_linux; /* true if running under compat */
bool nl_unconstrained_vnet; /* true if running under VNET jail (or without jail) */
struct nl_io_queue rx_queue;
struct nl_io_queue tx_queue;
struct taskqueue *nl_taskqueue;

View File

@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <sys/jail.h>
#include <sys/malloc.h>
#include <sys/socket.h>
#include <sys/sockio.h>
@ -61,6 +62,7 @@ struct netlink_walkargs {
struct nl_writer *nw;
struct nlmsghdr hdr;
struct nlpcb *so;
struct ucred *cred;
uint32_t fibnum;
int family;
int error;
@ -833,6 +835,8 @@ dump_iface_addrs(struct netlink_walkargs *wa, struct ifnet *ifp)
continue;
if (ifa->ifa_addr->sa_family == AF_LINK)
continue;
if (prison_if(wa->cred, ifa->ifa_addr) != 0)
continue;
wa->count++;
if (!dump_iface_addr(wa->nw, ifp, ifa, &wa->hdr))
return (ENOMEM);
@ -856,6 +860,7 @@ rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *n
struct netlink_walkargs wa = {
.so = nlp,
.nw = npt->nw,
.cred = nlp_get_cred(nlp),
.family = attrs.ifa_family,
.hdr.nlmsg_pid = hdr->nlmsg_pid,
.hdr.nlmsg_seq = hdr->nlmsg_seq,
@ -977,7 +982,7 @@ static const struct rtnl_cmd_handler cmd_handlers[] = {
.cmd = NL_RTM_GETLINK,
.name = "RTM_GETLINK",
.cb = &rtnl_handle_getlink,
.flags = RTNL_F_NOEPOCH,
.flags = RTNL_F_NOEPOCH | RTNL_F_ALLOW_NONVNET_JAIL,
},
{
.cmd = NL_RTM_DELLINK,
@ -997,6 +1002,7 @@ static const struct rtnl_cmd_handler cmd_handlers[] = {
.cmd = NL_RTM_GETADDR,
.name = "RTM_GETADDR",
.cb = &rtnl_handle_getaddr,
.flags = RTNL_F_ALLOW_NONVNET_JAIL,
},
{
.cmd = NL_RTM_NEWADDR,

View File

@ -48,7 +48,8 @@ struct rtnl_cmd_handler {
int flags;
};
#define RTNL_F_NOEPOCH 0x01
#define RTNL_F_NOEPOCH 0x01 /* Do not enter epoch when handling command */
#define RTNL_F_ALLOW_NONVNET_JAIL 0x02 /* Allow command execution inside non-VNET jail */
bool rtnl_register_messages(const struct rtnl_cmd_handler *handlers, int count);

View File

@ -513,6 +513,8 @@ dump_rtentry(struct rtentry *rt, void *_arg)
wa->count++;
if (wa->error != 0)
return (0);
if (!rt_is_exportable(rt, nlp_get_cred(wa->nlp)))
return (0);
wa->dumped++;
rt_get_rnd(rt, &wa->rnd);
@ -606,6 +608,9 @@ handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs,
RIB_RUNLOCK(rnh);
if (!rt_is_exportable(rt, nlp_get_cred(nlp)))
return (ESRCH);
IF_DEBUG_LEVEL(LOG_DEBUG2) {
char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused;
FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s",
@ -1026,6 +1031,7 @@ static const struct rtnl_cmd_handler cmd_handlers[] = {
.cmd = NL_RTM_GETROUTE,
.name = "RTM_GETROUTE",
.cb = &rtnl_handle_getroute,
.flags = RTNL_F_ALLOW_NONVNET_JAIL,
},
{
.cmd = NL_RTM_DELROUTE,