Permit buiding kernels with options VIMAGE, restricted to only a single

active network stack instance.  Turning on options VIMAGE at compile
time yields the following changes relative to default kernel build:

1) V_ accessor macros for virtualized variables resolve to structure
fields via base pointers, instead of being resolved as fields in global
structs or plain global variables.  As an example, V_ifnet becomes:

    options VIMAGE:          ((struct vnet_net *) vnet_net)->_ifnet
    default build:           vnet_net_0._ifnet
    options VIMAGE_GLOBALS:  ifnet

2) INIT_VNET_* macros will declare and set up base pointers to be used
by V_ accessor macros, instead of resolving to whitespace:

    INIT_VNET_NET(ifp->if_vnet); becomes

    struct vnet_net *vnet_net = (ifp->if_vnet)->mod_data[VNET_MOD_NET];

3) Memory for vnet modules registered via vnet_mod_register() is now
allocated at run time in sys/kern/kern_vimage.c, instead of per vnet
module structs being declared as globals.  If required, vnet modules
can now request the framework to provide them with allocated bzeroed
memory by filling in the vmi_size field in their vmi_modinfo structures.

4) structs socket, ifnet, inpcbinfo, tcpcb and syncache_head are
extended to hold a pointer to the parent vnet.  options VIMAGE builds
will fill in those fields as required.

5) curvnet is introduced as a new global variable in options VIMAGE
builds, always pointing to the default and only struct vnet.

6) struct sysctl_oid has been extended with additional two fields to
store major and minor virtualization module identifiers, oid_v_subs and
oid_v_mod.  SYSCTL_V_* family of macros will fill in those fields
accordingly, and store the offset in the appropriate vnet container
struct in oid_arg1.
In sysctl handlers dealing with virtualized sysctls, the
SYSCTL_RESOLVE_V_ARG1() macro will compute the address of the target
variable and make it available in arg1 variable for further processing.

Unused fields in structs vnet_inet, vnet_inet6 and vnet_ipfw have
been deleted.

Reviewed by:	bz, rwatson
Approved by:	julian (mentor)
This commit is contained in:
zec 2009-04-30 13:36:26 +00:00
parent e030268f0d
commit 39b6dc8ba2
35 changed files with 319 additions and 63 deletions

View File

@ -22,6 +22,14 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW:
to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
20090430:
The layout of the following structs has changed: sysctl_oid,
socket, ifnet, inpcbinfo, tcpcb, syncache_head, vnet_inet,
vnet_inet6 and vnet_ipfw. Most modules need to be rebuild or
panics may be experienced. World rebuild is required for
correctly checking networking state from userland.
Bump __FreeBSD_version to 800085.
20090429:
MLDv2 and Source-Specific Multicast (SSM) have been merged
to the IPv6 stack. VIMAGE hooks are in but not yet used.

View File

@ -208,9 +208,8 @@ static char machine_arch[] = MACHINE_ARCH;
SYSCTL_STRING(_hw, HW_MACHINE_ARCH, machine_arch, CTLFLAG_RD,
machine_arch, 0, "System architecture");
#ifndef VIMAGE
/* should become #ifndef VIMAGE */
char hostname[MAXHOSTNAMELEN];
#endif
/*
* This mutex is used to protect the hostname and domainname variables, and
@ -349,9 +348,8 @@ SYSCTL_PROC(_kern, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RW,
0, 0, sysctl_kern_config, "", "Kernel configuration file");
#endif
#ifndef VIMAGE
/* should become #ifndef VIMAGE */
char domainname[MAXHOSTNAMELEN]; /* Protected by hostname_mtx. */
#endif
static int
sysctl_domainname(SYSCTL_HANDLER_ARGS)

View File

@ -934,6 +934,30 @@ sysctl_handle_int(SYSCTL_HANDLER_ARGS)
return (error);
}
#ifdef VIMAGE
int
sysctl_handle_v_int(SYSCTL_HANDLER_ARGS)
{
int tmpout, error = 0;
SYSCTL_RESOLVE_V_ARG1();
/*
* Attempt to get a coherent snapshot by making a copy of the data.
*/
tmpout = *(int *)arg1;
error = SYSCTL_OUT(req, &tmpout, sizeof(int));
if (error || !req->newptr)
return (error);
if (!arg1)
error = EPERM;
else
error = SYSCTL_IN(req, arg1, sizeof(int));
return (error);
}
#endif
/*
* Based on on sysctl_handle_int() convert milliseconds into ticks.
@ -944,7 +968,9 @@ sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
{
int error, s, tt;
tt = *(int *)oidp->oid_arg1;
SYSCTL_RESOLVE_V_ARG1();
tt = *(int *)arg1;
s = (int)((int64_t)tt * 1000 / hz);
error = sysctl_handle_int(oidp, &s, 0, req);
@ -955,7 +981,7 @@ sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
if (tt < 1)
return (EINVAL);
*(int *)oidp->oid_arg1 = tt;
*(int *)arg1 = tt;
return (0);
}
@ -1069,6 +1095,47 @@ sysctl_handle_string(SYSCTL_HANDLER_ARGS)
return (error);
}
#ifdef VIMAGE
int
sysctl_handle_v_string(SYSCTL_HANDLER_ARGS)
{
int error=0;
char *tmparg;
size_t outlen;
SYSCTL_RESOLVE_V_ARG1();
/*
* Attempt to get a coherent snapshot by copying to a
* temporary kernel buffer.
*/
retry:
outlen = strlen((char *)arg1)+1;
tmparg = malloc(outlen, M_SYSCTLTMP, M_WAITOK);
if (strlcpy(tmparg, (char *)arg1, outlen) >= outlen) {
free(tmparg, M_SYSCTLTMP);
goto retry;
}
error = SYSCTL_OUT(req, tmparg, outlen);
free(tmparg, M_SYSCTLTMP);
if (error || !req->newptr)
return (error);
if ((req->newlen - req->newidx) >= arg2) {
error = EINVAL;
} else {
arg2 = (req->newlen - req->newidx);
error = SYSCTL_IN(req, arg1, arg2);
((char *)arg1)[arg2] = '\0';
}
return (error);
}
#endif
/*
* Handle any kind of opaque data.
* arg1 points to it, arg2 is the size.
@ -1106,6 +1173,35 @@ sysctl_handle_opaque(SYSCTL_HANDLER_ARGS)
return (error);
}
#ifdef VIMAGE
int
sysctl_handle_v_opaque(SYSCTL_HANDLER_ARGS)
{
int error, tries;
u_int generation;
struct sysctl_req req2;
SYSCTL_RESOLVE_V_ARG1();
tries = 0;
req2 = *req;
retry:
generation = curthread->td_generation;
error = SYSCTL_OUT(req, arg1, arg2);
if (error)
return (error);
tries++;
if (generation != curthread->td_generation && tries < 3) {
*req = req2;
goto retry;
}
error = SYSCTL_IN(req, arg1, arg2);
return (error);
}
#endif
/*
* Transfer functions to/from kernel space.
* XXX: rather untested at this point

View File

@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#ifndef VIMAGE_GLOBALS
MALLOC_DEFINE(M_VIMAGE, "vimage", "vimage resource container");
MALLOC_DEFINE(M_VNET, "vnet", "network stack control block");
static TAILQ_HEAD(vnet_modlink_head, vnet_modlink) vnet_modlink_head;
static TAILQ_HEAD(vnet_modpending_head, vnet_modlink) vnet_modpending_head;
@ -49,6 +50,12 @@ static void vnet_mod_complete_registration(struct vnet_modlink *);
static int vnet_mod_constructor(struct vnet_modlink *);
static int vnet_mod_destructor(struct vnet_modlink *);
#ifdef VIMAGE
/* curvnet should be thread-local - this is only a temporary step. */
struct vnet *curvnet;
struct vnet_list_head vnet_head;
#endif
void
vnet_mod_register(const struct vnet_modinfo *vmi)
{
@ -263,7 +270,14 @@ vi_symlookup(struct kld_sym_lookup *lookup, char *symstr)
for (mapentry = vml->vml_modinfo->vmi_symmap;
mapentry->name != NULL; mapentry++) {
if (strcmp(symstr, mapentry->name) == 0) {
lookup->symvalue = (u_long) mapentry->base;
#ifdef VIMAGE
lookup->symvalue =
(u_long) curvnet->mod_data[
vml->vml_modinfo->vmi_id];
lookup->symvalue += mapentry->offset;
#else
lookup->symvalue = (u_long) mapentry->offset;
#endif
lookup->symsize = mapentry->size;
return (0);
}
@ -275,9 +289,23 @@ vi_symlookup(struct kld_sym_lookup *lookup, char *symstr)
static void
vi_init(void *unused)
{
#ifdef VIMAGE
struct vnet *vnet;
#endif
TAILQ_INIT(&vnet_modlink_head);
TAILQ_INIT(&vnet_modpending_head);
#ifdef VIMAGE
LIST_INIT(&vnet_head);
vnet = malloc(sizeof(struct vnet), M_VNET, M_NOWAIT | M_ZERO);
if (vnet == NULL)
panic("vi_alloc: malloc failed");
LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le);
curvnet = LIST_FIRST(&vnet_head);
#endif
}
static void

View File

@ -130,6 +130,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/uio.h>
#include <sys/jail.h>
#include <sys/vimage.h>
#include <security/mac/mac_framework.h>
@ -284,6 +285,9 @@ soalloc(void)
mtx_lock(&so_global_mtx);
so->so_gencnt = ++so_gencnt;
++numopensockets;
#ifdef VIMAGE
so->so_vnet = curvnet;
#endif
mtx_unlock(&so_global_mtx);
return (so);
}

View File

@ -186,6 +186,7 @@ static struct vnet_symmap vnet_net_symmap[] = {
static const vnet_modinfo_t vnet_net_modinfo = {
.vmi_id = VNET_MOD_NET,
.vmi_name = "net",
.vmi_size = sizeof(struct vnet_net),
.vmi_symmap = vnet_net_symmap,
.vmi_iattach = vnet_net_iattach
};
@ -545,6 +546,7 @@ if_alloc(u_char type)
static void
if_free_internal(struct ifnet *ifp)
{
INIT_VNET_NET(ifp->if_vnet);
KASSERT((ifp->if_flags & IFF_DYING),
("if_free_internal: interface not dying"));
@ -582,7 +584,6 @@ if_free_internal(struct ifnet *ifp)
void
if_free_type(struct ifnet *ifp, u_char type)
{
INIT_VNET_NET(curvnet); /* ifp->if_vnet can be NULL here ! */
KASSERT(ifp->if_alloctype == type,
("if_free_type: type (%d) != alloctype (%d)", type,
@ -673,6 +674,10 @@ if_attach(struct ifnet *ifp)
panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
ifp->if_xname);
#ifdef VIMAGE
ifp->if_vnet = curvnet;
#endif
if_addgroup(ifp, IFG_ALL);
getmicrotime(&ifp->if_lastchange);
@ -978,6 +983,9 @@ if_detach(struct ifnet *ifp)
}
IF_AFDATA_UNLOCK(ifp);
ifq_detach(&ifp->if_snd);
#ifdef VIMAGE
ifp->if_vnet = NULL;
#endif
splx(s);
}

View File

@ -127,6 +127,7 @@ static int vnet_gif_iattach(const void *);
static const vnet_modinfo_t vnet_gif_modinfo = {
.vmi_id = VNET_MOD_GIF,
.vmi_name = "gif",
.vmi_size = sizeof(struct vnet_gif),
.vmi_dependson = VNET_MOD_NET,
.vmi_iattach = vnet_gif_iattach
};
@ -303,7 +304,9 @@ gifmodevent(mod, type, data)
if_clone_detach(&gif_cloner);
mtx_destroy(&gif_mtx);
#ifdef INET6
#ifndef VIMAGE
V_ip6_gif_hlim = 0; /* XXX -> vnet_gif_idetach() */
#endif
#endif
break;
default:

View File

@ -77,7 +77,6 @@ SYSCTL_V_INT(V_NET, vnet_net, _net_link_generic_system, IFMIB_IFCOUNT,
static int
sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
{
INIT_VNET_NET(curvnet);
int *name = (int *)arg1;
int error;
u_int namelen = arg2;

View File

@ -117,6 +117,7 @@ struct ifqueue {
struct ifnet {
void *if_softc; /* pointer to driver state */
void *if_l2com; /* pointer to protocol bits */
struct vnet *if_vnet; /* pointer to network stack instance */
TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
char if_xname[IFNAMSIZ]; /* external name (name + unit) */
const char *if_dname; /* driver name */

View File

@ -126,7 +126,9 @@ sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
INIT_VNET_INET(curvnet);
int error;
error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
SYSCTL_RESOLVE_V_ARG1();
error = sysctl_handle_int(oidp, arg1, arg2, req);
if (error == 0) {
RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);

View File

@ -224,6 +224,8 @@ struct inpcb {
#define in6p_icmp6filt inp_depend6.inp6_icmp6filt
#define in6p_cksum inp_depend6.inp6_cksum
#define inp_vnet inp_pcbinfo->ipi_vnet
/*
* The range of the generation count, as used in this implementation, is 9e19.
* We would have to create 300 billion connections per second for this number
@ -301,8 +303,12 @@ struct inpcbinfo {
struct rwlock ipi_lock;
/*
* vimage 1
* general use 1
* Pointer to network stack instance
*/
struct vnet *ipi_vnet;
/*
* general use 2
*/
void *ipi_pspare[2];
};

View File

@ -162,6 +162,9 @@ div_init(void)
INP_INFO_LOCK_INIT(&V_divcbinfo, "div");
LIST_INIT(&V_divcb);
V_divcbinfo.ipi_listhead = &V_divcb;
#ifdef VIMAGE
V_divcbinfo.ipi_vnet = curvnet;
#endif
/*
* XXX We don't use the hash list for divert IP, but it's easier
* to allocate a one entry hash list than it is to check all

View File

@ -695,7 +695,6 @@ struct vnet_ipfw {
int _fw_deny_unknown_exthdrs;
int _fw_verbose;
int _verbose_limit;
int _fw_debug; /* actually unused */
int _autoinc_step;
ipfw_dyn_rule **_ipfw_dyn_v;
uma_zone_t _ipfw_dyn_rule_zone;
@ -740,7 +739,6 @@ extern struct vnet_ipfw vnet_ipfw_0;
#define V_fw_deny_unknown_exthdrs VNET_IPFW(fw_deny_unknown_exthdrs)
#define V_fw_verbose VNET_IPFW(fw_verbose)
#define V_verbose_limit VNET_IPFW(verbose_limit)
#define V_fw_debug VNET_IPFW(fw_debug)
#define V_autoinc_step VNET_IPFW(autoinc_step)
#define V_ipfw_dyn_v VNET_IPFW(ipfw_dyn_v)
#define V_ipfw_dyn_rule_zone VNET_IPFW(ipfw_dyn_rule_zone)

View File

@ -95,6 +95,7 @@ int
ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
struct inpcb *inp)
{
INIT_VNET_INET(curvnet);
struct ip_fw_args args;
struct ng_ipfw_tag *ng_tag;
struct m_tag *dn_tag;
@ -224,6 +225,7 @@ int
ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
struct inpcb *inp)
{
INIT_VNET_INET(curvnet);
struct ip_fw_args args;
struct ng_ipfw_tag *ng_tag;
struct m_tag *dn_tag;

View File

@ -237,6 +237,7 @@ static void vnet_inet_register(void);
static const vnet_modinfo_t vnet_inet_modinfo = {
.vmi_id = VNET_MOD_INET,
.vmi_name = "inet",
.vmi_size = sizeof(struct vnet_inet)
};
static void vnet_inet_register()

View File

@ -187,6 +187,9 @@ rip_init(void)
INP_INFO_LOCK_INIT(&V_ripcbinfo, "rip");
LIST_INIT(&V_ripcb);
#ifdef VIMAGE
V_ripcbinfo.ipi_vnet = curvnet;
#endif
V_ripcbinfo.ipi_listhead = &V_ripcb;
V_ripcbinfo.ipi_hashbase =
hashinit(INP_PCBHASH_RAW_SIZE, M_PCB, &V_ripcbinfo.ipi_hashmask);

View File

@ -359,6 +359,9 @@ tcp_init(void)
INP_INFO_LOCK_INIT(&V_tcbinfo, "tcp");
LIST_INIT(&V_tcb);
#ifdef VIMAGE
V_tcbinfo.ipi_vnet = curvnet;
#endif
V_tcbinfo.ipi_listhead = &V_tcb;
hashsize = TCBHASHSIZE;
TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize);
@ -703,6 +706,9 @@ tcp_newtcpcb(struct inpcb *inp)
if (tm == NULL)
return (NULL);
tp = &tm->tcb;
#ifdef VIMAGE
tp->t_vnet = inp->inp_vnet;
#endif
tp->t_timers = &tm->tt;
/* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */
tp->t_maxseg = tp->t_maxopd =

View File

@ -259,6 +259,9 @@ syncache_init(void)
/* Initialize the hash buckets. */
for (i = 0; i < V_tcp_syncache.hashsize; i++) {
#ifdef VIMAGE
V_tcp_syncache.hashbase[i].sch_vnet = curvnet;
#endif
TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket);
mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
NULL, MTX_DEF);

View File

@ -96,6 +96,7 @@ struct syncache {
#define SYNCOOKIE_LIFETIME 16 /* seconds */
struct syncache_head {
struct vnet *sch_vnet;
struct mtx sch_mtx;
TAILQ_HEAD(sch_head, syncache) sch_bucket;
struct callout sch_timer;

View File

@ -35,6 +35,8 @@
#include <netinet/tcp.h>
struct vnet;
/*
* Kernel variables for tcp.
*/
@ -106,6 +108,8 @@ struct tcpcb {
int t_state; /* state of this connection */
u_int t_flags;
struct vnet *t_vnet; /* back pointer to parent vnet */
tcp_seq snd_una; /* send unacknowledged */
tcp_seq snd_max; /* highest sequence number sent;
* used to recognize retransmits
@ -186,8 +190,8 @@ struct tcpcb {
int t_rttlow; /* smallest observerved RTT */
u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
int rfbuf_cnt; /* recv buffer autoscaling byte count */
void *t_pspare[3]; /* toe usrreqs / toepcb * / congestion algo / vimage / 1 general use */
struct toe_usrreqs *t_tu; /* offload operations vector */
void *t_pspare[3]; /* toe usrreqs / toepcb * / congestion algo / 1 general use */
struct toe_usrreqs *t_tu; /* offload operations vector */
void *t_toe; /* TOE pcb pointer */
int t_bytes_acked; /* # bytes acked during current RTT */
};

View File

@ -179,6 +179,9 @@ udp_init(void)
INP_INFO_LOCK_INIT(&V_udbinfo, "udp");
LIST_INIT(&V_udb);
#ifdef VIMAGE
V_udbinfo.ipi_vnet = curvnet;
#endif
V_udbinfo.ipi_listhead = &V_udb;
V_udbinfo.ipi_hashbase = hashinit(UDBHASHSIZE, M_PCB,
&V_udbinfo.ipi_hashmask);

View File

@ -54,7 +54,6 @@ struct vnet_inet {
struct in_ifaddrhashhead *_in_ifaddrhashtbl;
struct in_ifaddrhead _in_ifaddrhead;
u_long _in_ifaddrhmask;
struct in_multihead _in_multihead; /* XXX unused */
int _arpt_keep;
int _arp_maxtries;
@ -269,7 +268,6 @@ extern struct vnet_inet vnet_inet_0;
#define V_in_ifaddrhashtbl VNET_INET(in_ifaddrhashtbl)
#define V_in_ifaddrhead VNET_INET(in_ifaddrhead)
#define V_in_ifaddrhmask VNET_INET(in_ifaddrhmask)
#define V_in_multihead VNET_INET(in_multihead)
#define V_ip_checkinterface VNET_INET(ip_checkinterface)
#define V_ip_defttl VNET_INET(ip_defttl)
#define V_ip_do_randomid VNET_INET(ip_do_randomid)

View File

@ -888,8 +888,9 @@ in6_get_tmpifid(struct ifnet *ifp, u_int8_t *retbuf,
}
void
in6_tmpaddrtimer(void *ignored_arg)
in6_tmpaddrtimer(void *arg)
{
CURVNET_SET((struct vnet *) arg);
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
struct nd_ifinfo *ndi;
@ -898,7 +899,7 @@ in6_tmpaddrtimer(void *ignored_arg)
callout_reset(&V_in6_tmpaddrtimer_ch,
(V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
V_ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, NULL);
V_ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, curvnet);
bzero(nullbuf, sizeof(nullbuf));
for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
@ -914,12 +915,12 @@ in6_tmpaddrtimer(void *ignored_arg)
}
}
CURVNET_RESTORE();
}
static void
in6_purgemaddrs(struct ifnet *ifp)
{
INIT_VNET_INET6(ifp->if_vnet);
LIST_HEAD(,in6_multi) purgeinms;
struct in6_multi *inm, *tinm;
struct ifmultiaddr *ifma;

View File

@ -1302,7 +1302,6 @@ static int
in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
struct group_source_req gsr;
sockunion_t *gsa, *ssa;
struct ifnet *ifp;
@ -1463,6 +1462,7 @@ in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
static struct ip6_moptions *
in6p_findmoptions(struct inpcb *inp)
{
INIT_VNET_INET6(curvnet);
struct ip6_moptions *imo;
struct in6_multi **immp;
struct in6_mfilter *imfp;
@ -1745,7 +1745,6 @@ static struct ifnet *
in6p_lookup_mcast_ifp(const struct inpcb *in6p __unused,
const struct sockaddr_in6 *gsin6)
{
INIT_VNET_INET6(curvnet);
struct route_in6 ro6;
struct ifnet *ifp;
@ -2032,7 +2031,6 @@ static int
in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET(curvnet);
struct group_source_req gsr;
sockunion_t *gsa, *ssa;
struct ifnet *ifp;
@ -2249,7 +2247,6 @@ static int
in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
struct ifnet *ifp;
struct ip6_moptions *imo;
u_int ifindex;
@ -2454,6 +2451,7 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
int
ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt)
{
INIT_VNET_INET6(curvnet);
struct ip6_moptions *im6o;
int error;

View File

@ -447,6 +447,8 @@ sysctl_ip6_temppltime(SYSCTL_HANDLER_ARGS)
int error = 0;
int old;
SYSCTL_RESOLVE_V_ARG1();
error = SYSCTL_OUT(req, arg1, sizeof(int));
if (error || !req->newptr)
return (error);
@ -467,6 +469,8 @@ sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS)
int error = 0;
int old;
SYSCTL_RESOLVE_V_ARG1();
error = SYSCTL_OUT(req, arg1, sizeof(int));
if (error || !req->newptr)
return (error);

View File

@ -161,6 +161,7 @@ static void vnet_inet6_register(void);
static const vnet_modinfo_t vnet_inet6_modinfo = {
.vmi_id = VNET_MOD_INET6,
.vmi_name = "inet6",
.vmi_size = sizeof(struct vnet_inet6),
.vmi_dependson = VNET_MOD_INET /* XXX revisit - TCP/UDP needs this? */
};
@ -307,14 +308,14 @@ ip6_init2_vnet(const void *unused __unused)
/* nd6_timer_init */
callout_init(&V_nd6_timer_ch, 0);
callout_reset(&V_nd6_timer_ch, hz, nd6_timer, NULL);
callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet);
/* timer for regeneranation of temporary addresses randomize ID */
callout_init(&V_in6_tmpaddrtimer_ch, 0);
callout_reset(&V_in6_tmpaddrtimer_ch,
(V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
V_ip6_temp_regen_advance) * hz,
in6_tmpaddrtimer, NULL);
in6_tmpaddrtimer, curvnet);
return (0);
}

View File

@ -435,7 +435,6 @@ mld_dispatch_queue(struct ifqueue *ifq, int limit)
static __inline int
mld_is_addr_reported(const struct in6_addr *addr)
{
INIT_VNET_INET6(curvnet);
KASSERT(IN6_IS_ADDR_MULTICAST(addr), ("%s: not multicast", __func__));
@ -639,7 +638,6 @@ static int
mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
const struct mld_hdr *mld)
{
INIT_VNET_INET6(ifp->if_vnet);
struct ifmultiaddr *ifma;
struct mld_ifinfo *mli;
struct in6_multi *inm;
@ -1034,7 +1032,6 @@ static int
mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
const struct mld_hdr *mld)
{
INIT_VNET_INET6(curvnet);
struct in6_ifaddr *ia;
struct in6_multi *inm;
#ifdef KTR
@ -1646,7 +1643,6 @@ mld_slowtimo_vnet(void)
static void
mld_v1_process_querier_timers(struct mld_ifinfo *mli)
{
INIT_VNET_INET6(curvnet);
MLD_LOCK_ASSERT();
@ -3009,7 +3005,6 @@ mld_dispatch_packet(struct mbuf *m)
static struct mbuf *
mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
{
INIT_VNET_INET6(curvnet);
struct mbuf *mh;
struct mldv2_report *mld;
struct ip6_hdr *ip6;

View File

@ -191,7 +191,7 @@ nd6_init(void)
/* start timer */
callout_init(&V_nd6_slowtimo_ch, 0);
callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
nd6_slowtimo, NULL);
nd6_slowtimo, curvnet);
nd6_init_done = 1;
@ -593,7 +593,7 @@ void
nd6_timer(void *arg)
{
CURVNET_SET_QUIET((struct vnet *) arg);
INIT_VNET_INET6((struct vnet *) arg);
INIT_VNET_INET6(curvnet);
int s;
struct nd_defrouter *dr;
struct nd_prefix *pr;
@ -601,7 +601,7 @@ nd6_timer(void *arg)
struct in6_addrlifetime *lt6;
callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
nd6_timer, NULL);
nd6_timer, curvnet);
/* expire default router list */
s = splnet();
@ -872,7 +872,6 @@ nd6_purge(struct ifnet *ifp)
struct llentry *
nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
{
INIT_VNET_INET6(curvnet);
struct sockaddr_in6 sin6;
struct llentry *ln;
int llflags = 0;
@ -1669,7 +1668,7 @@ nd6_slowtimo(void *arg)
struct ifnet *ifp;
callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
nd6_slowtimo, NULL);
nd6_slowtimo, curvnet);
IFNET_RLOCK();
for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
ifp = TAILQ_NEXT(ifp, if_list)) {

View File

@ -651,6 +651,7 @@ rip6_attach(struct socket *so, int proto, struct thread *td)
static void
rip6_detach(struct socket *so)
{
INIT_VNET_INET(so->so_vnet);
INIT_VNET_INET6(so->so_vnet);
struct inpcb *inp;

View File

@ -54,8 +54,6 @@ struct vnet_inet6 {
u_int _frag6_nfrags;
struct ip6q _ip6q;
struct route_in6 _ip6_forward_rt; /* XXX remove */
struct in6_addrpolicy _defaultaddrpolicy;
TAILQ_HEAD(, addrsel_policyent) _addrsel_policytab;
u_int _in6_maxmtu;
@ -122,10 +120,6 @@ struct vnet_inet6 {
int _udp6_recvspace;
int _ip6qmaxlen;
int _ip6_prefer_tempaddr;
int _ip6_forward_srcrt; /* XXX remove */
int _ip6_sourcecheck; /* XXX remove */
int _ip6_sourcecheck_interval; /* XXX remove */
int _ip6_ours_check_algorithm; /* XXX remove */
int _nd6_prune;
int _nd6_delay;

View File

@ -248,6 +248,7 @@ MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy");
static const vnet_modinfo_t vnet_ipsec_modinfo = {
.vmi_id = VNET_MOD_IPSEC,
.vmi_name = "ipsec",
.vmi_size = sizeof(struct vnet_ipsec),
.vmi_dependson = VNET_MOD_INET, /* XXX revisit - INET6 ? */
.vmi_iattach = ipsec_iattach
};

View File

@ -57,7 +57,7 @@
* is created, otherwise 1.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 800084 /* Master, propagated to newvers */
#define __FreeBSD_version 800085 /* Master, propagated to newvers */
#ifndef LOCORE
#include <sys/types.h>

View File

@ -45,6 +45,8 @@
#include <sys/sockopt.h>
#endif
struct vnet;
/*
* Kernel structure per socket.
* Contains send and receive buffer queues,
@ -72,6 +74,7 @@ struct socket {
short so_state; /* (b) internal state flags SS_* */
int so_qstate; /* (e) internal state flags SQ_* */
void *so_pcb; /* protocol control block */
struct vnet *so_vnet; /* network stack instance */
struct protosw *so_proto; /* (a) protocol handle */
/*
* Variables for connection queuing.

View File

@ -163,6 +163,8 @@ struct sysctl_oid {
const char *oid_fmt;
int oid_refcnt;
const char *oid_descr;
short oid_v_subs;
short oid_v_mod;
};
#define SYSCTL_IN(r, p, l) (r->newfunc)(r, p, l)
@ -292,7 +294,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#ifdef VIMAGE
#define SYSCTL_V_INT(subs, mod, parent, nbr, name, access, sym, val, descr) \
SYSCTL_V_OID(subs, mod, parent, nbr, name, CTLTYPE_INT|(access), \
SYSCTL_V_OID(subs, mod, parent, nbr, name, \
CTLTYPE_INT|CTLFLAG_MPSAFE|(access), \
sym, val, sysctl_handle_v_int, "I", descr)
#else
#ifdef VIMAGE_GLOBALS
@ -317,7 +320,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#ifdef VIMAGE
#define SYSCTL_V_UINT(subs, mod, parent, nbr, name, access, sym, val, descr) \
SYSCTL_V_OID(subs, mod, parent, nbr, name, CTLTYPE_UINT|(access), \
SYSCTL_V_OID(subs, mod, parent, nbr, name, \
CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \
sym, val, sysctl_handle_v_int, "IU", descr)
#else
#ifdef VIMAGE_GLOBALS
@ -440,6 +444,29 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define FEATURE(name, desc) \
SYSCTL_INT(_kern_features, OID_AUTO, name, CTLFLAG_RD, 0, 1, desc)
/*
* Resolve void *arg1 in a proper virtualization container.
*/
#ifdef VIMAGE
#define SYSCTL_RESOLVE_V_ARG1() do { \
char *cp; \
switch (oidp->oid_v_subs) { \
case V_GLOBAL: \
/* do nothing - this is NOT a virtualized variable! */ \
break; \
case V_NET: \
cp = (char *) \
TD_TO_VNET(curthread)->mod_data[oidp->oid_v_mod]; \
arg1 = cp + (size_t) arg1; \
break; \
default: \
panic("unsupported module id %d", oidp->oid_v_subs); \
} \
} while (0)
#else
#define SYSCTL_RESOLVE_V_ARG1()
#endif
#endif /* _KERNEL */
/*

View File

@ -39,6 +39,10 @@
#error "You cannot have both option VIMAGE and option VIMAGE_GLOBALS!"
#endif
#ifdef INVARIANTS
#define VNET_DEBUG
#endif
typedef int vnet_attach_fn(const void *);
typedef int vnet_detach_fn(const void *);
@ -48,8 +52,8 @@ struct kld_sym_lookup;
struct vnet_symmap {
char *name;
void *base;
size_t size;
size_t offset;
size_t size;
};
typedef struct vnet_symmap vnet_symmap_t;
@ -59,7 +63,7 @@ struct vnet_modinfo {
char *vmi_name;
vnet_attach_fn *vmi_iattach;
vnet_detach_fn *vmi_idetach;
size_t vmi_struct_size;
size_t vmi_size;
struct vnet_symmap *vmi_symmap;
};
typedef struct vnet_modinfo vnet_modinfo_t;
@ -71,13 +75,7 @@ struct vnet_modlink {
const char *vml_iname;
};
#define VNET_SYMMAP(mod, name) \
{ #name, &(vnet_ ## mod ## _0._ ## name), \
sizeof(vnet_ ## mod ## _0._ ## name) }
#define VNET_SYMMAP_END { NULL, 0 }
/* stateful modules */
/* Stateful modules. */
#define VNET_MOD_NET 0 /* MUST be 0 - implicit dependency */
#define VNET_MOD_NETGRAPH 1
#define VNET_MOD_INET 2
@ -93,7 +91,7 @@ struct vnet_modlink {
#define VNET_MOD_IGMP 12
#define VNET_MOD_MLD 13
/* stateless modules */
/* Stateless modules. */
#define VNET_MOD_NG_ETHER 20
#define VNET_MOD_NG_IFACE 21
#define VNET_MOD_NG_EIFACE 22
@ -109,7 +107,11 @@ struct vnet_modlink {
#define VNET_MOD_DYNAMIC_START 32
#define VNET_MOD_MAX 64
/* Sysctl virtualization macros need these name mappings bellow */
/* Major module IDs for vimage sysctl virtualization. */
#define V_GLOBAL 0 /* global variable - no indirection */
#define V_NET 1
/* Name mappings for minor module IDs in vimage sysctl virtualization. */
#define V_MOD_vnet_net VNET_MOD_NET
#define V_MOD_vnet_netgraph VNET_MOD_NETGRAPH
#define V_MOD_vnet_inet VNET_MOD_INET
@ -131,27 +133,82 @@ void vnet_mod_deregister_multi(const struct vnet_modinfo *, void *, char *);
#define VSYM(base, sym) (sym)
#else
#ifdef VIMAGE
#error "No option VIMAGE yet!"
#define VSYM(base, sym) ((base)->_ ## sym)
#else
#define VSYM(base, sym) (base ## _0._ ## sym)
#endif
#endif
#ifndef VIMAGE_GLOBALS
#ifdef VIMAGE
/*
* Casted NULL hack is needed for harvesting sizeofs() of fields inside
* struct vnet_* containers at compile time.
*/
#define VNET_SYMMAP(mod, name) \
{ #name, offsetof(struct vnet_ ## mod, _ ## name), \
sizeof(((struct vnet_ ## mod *) NULL)->_ ## name) }
#else
#define VNET_SYMMAP(mod, name) \
{ #name, (size_t) &(vnet_ ## mod ## _0._ ## name), \
sizeof(vnet_ ## mod ## _0._ ## name) }
#endif
#define VNET_SYMMAP_END { NULL, 0 }
#endif /* !VIMAGE_GLOBALS */
#ifdef VIMAGE
struct vnet {
void *mod_data[VNET_MOD_MAX];
LIST_ENTRY(vnet) vnet_le; /* all vnets list */
u_int vnet_magic_n;
};
#endif
#ifdef VIMAGE
extern struct vnet *curvnet; /* XXX will become thread-local soon */
#else
#define curvnet NULL
#endif
#ifdef VIMAGE
#ifdef VNET_DEBUG
#define INIT_FROM_VNET(vnet, modindex, modtype, sym) \
if (vnet == NULL || vnet != curvnet) \
panic("in %s:%d %s()\n vnet=%p curvnet=%p", \
__FILE__, __LINE__, __FUNCTION__, \
vnet, curvnet); \
modtype *sym = (vnet)->mod_data[modindex];
#else /* !VNET_DEBUG */
#define INIT_FROM_VNET(vnet, modindex, modtype, sym) \
modtype *sym = (vnet)->mod_data[modindex];
#endif /* !VNET_DEBUG */
#else /* !VIMAGE */
#define INIT_FROM_VNET(vnet, modindex, modtype, sym)
#endif
#ifdef VIMAGE
LIST_HEAD(vnet_list_head, vnet);
extern struct vnet_list_head vnet_head;
#define VNET_ITERATOR_DECL(arg) struct vnet *arg;
#define VNET_FOREACH(arg) LIST_FOREACH(arg, &vnet_head, vnet_le)
#else
#define VNET_ITERATOR_DECL(arg)
#define VNET_FOREACH(arg)
#endif
#define TD_TO_VNET(td) curvnet
/* Non-VIMAGE null-macros */
#define IS_DEFAULT_VNET(arg) 1
#define CURVNET_SET(arg)
#define CURVNET_SET_QUIET(arg)
#define CURVNET_RESTORE()
#define VNET_ASSERT(condition)
#define INIT_FROM_VNET(vnet, modindex, modtype, sym)
#define VNET_ITERATOR_DECL(arg)
#define VNET_FOREACH(arg)
#define VNET_LIST_RLOCK()
#define VNET_LIST_RUNLOCK()
#define INIT_VPROCG(arg)
#define INIT_VCPU(arg)
#define TD_TO_VIMAGE(td)
#define TD_TO_VNET(td)
#define TD_TO_VPROCG(td)
#define TD_TO_VCPU(td)
#define P_TO_VIMAGE(p)