MFp4: @179066
Add page which describes VNET network stack virtualization infrastructure. Submitted by: bz Sponsored by: The FreeBSD Foundation Sponsored by: CK Software GmbH
This commit is contained in:
parent
3cdc37eb52
commit
cf0bd1042f
@ -345,6 +345,7 @@ MAN= accept_filter.9 \
|
||||
vmem.9 \
|
||||
vn_fullpath.9 \
|
||||
vn_isdisk.9 \
|
||||
vnet.9 \
|
||||
vnode.9 \
|
||||
VOP_ACCESS.9 \
|
||||
VOP_ACLCHECK.9 \
|
||||
|
502
share/man/man9/vnet.9
Normal file
502
share/man/man9/vnet.9
Normal file
@ -0,0 +1,502 @@
|
||||
.\"-
|
||||
.\" Copyright (c) 2010 The FreeBSD Foundation
|
||||
.\" All rights reserved.
|
||||
.\"
|
||||
.\" This documentation was written by CK Software GmbH under sponsorship from
|
||||
.\" the FreeBSD Foundation.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd November 20, 2014
|
||||
.Dt VNET 9
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm VNET
|
||||
.Nd "network subsystem virtualization infrastructure"
|
||||
.Sh SYNOPSIS
|
||||
.Cd "options VIMAGE"
|
||||
.Cd "options VNET_DEBUG"
|
||||
.Pp
|
||||
.In sys/vnet.h
|
||||
.Pp
|
||||
.\"------------------------------------------------------------
|
||||
.Ss "Constants and Global Variables"
|
||||
.\"
|
||||
.Dv VNET_SETNAME
|
||||
.\" "set_vnet"
|
||||
.Dv VNET_SYMPREFIX
|
||||
.\" "vnet_entry_"
|
||||
.Vt extern struct vnet *vnet0;
|
||||
.\"------------------------------------------------------------
|
||||
.Ss "Variable Declaration"
|
||||
.Fo VNET
|
||||
.Fa "name"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo VNET_NAME
|
||||
.Fa "name"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo VNET_DECLARE
|
||||
.Fa "type" "name"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo VNET_DEFINE
|
||||
.Fa "type" "name"
|
||||
.Fc
|
||||
.\"
|
||||
.Bd -literal
|
||||
#define V_name VNET(name)
|
||||
.Ed
|
||||
.\" ------------------------------------------------------------
|
||||
.Ss "Virtual Instance Selection"
|
||||
.\"
|
||||
.Fo CRED_TO_VNET
|
||||
.Fa "struct ucred *"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo TD_TO_VNET
|
||||
.Fa "struct thread *"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo P_TO_VNET
|
||||
.Fa "struct proc *"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo IS_DEFAULT_VNET
|
||||
.Fa "struct vnet *"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo VNET_ASSERT
|
||||
.Fa exp msg
|
||||
.Fc
|
||||
.\"
|
||||
.Fo CURVNET_SET
|
||||
.Fa "struct vnet *"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo CURVNET_SET_QUIET
|
||||
.Fa "struct vnet *"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo CURVNET_RESTORE
|
||||
.Fc
|
||||
.\"
|
||||
.Fo VNET_ITERATOR_DECL
|
||||
.Fa "struct vnet *"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo VNET_FOREACH
|
||||
.Fa "struct vnet *"
|
||||
.Fc
|
||||
.\" ------------------------------------------------------------
|
||||
.Ss "Locking"
|
||||
.\"
|
||||
.Fo VNET_LIST_RLOCK
|
||||
.Fc
|
||||
.Fo VNET_LIST_RUNLOCK
|
||||
.Fc
|
||||
.Fo VNET_LIST_RLOCK_NOSLEEP
|
||||
.Fc
|
||||
.Fo VNET_LIST_RUNLOCK_NOSLEEP
|
||||
.Fc
|
||||
.\" ------------------------------------------------------------
|
||||
.Ss "Startup and Teardown Functions"
|
||||
.\"
|
||||
.Ft "struct vnet *"
|
||||
.Fo vnet_alloc
|
||||
.Fa void
|
||||
.Fc
|
||||
.\"
|
||||
.Ft void
|
||||
.Fo vnet_destroy
|
||||
.Fa "struct vnet *"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo VNET_SYSINIT
|
||||
.Fa ident
|
||||
.Fa "enum sysinit_sub_id subsystem"
|
||||
.Fa "enum sysinit_elem_order order"
|
||||
.Fa "sysinit_cfunc_t func"
|
||||
.Fa "const void *arg"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo VNET_SYSUNINIT
|
||||
.Fa ident
|
||||
.Fa "enum sysinit_sub_id subsystem"
|
||||
.Fa "enum sysinit_elem_order order"
|
||||
.Fa "sysinit_cfunc_t func"
|
||||
.Fa "const void *arg"
|
||||
.Fc
|
||||
.\" ------------------------------------------------------------
|
||||
.Ss "Eventhandlers"
|
||||
.\"
|
||||
.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER
|
||||
.Fa "const char *name"
|
||||
.Fa "void *func"
|
||||
.Fa "void *arg"
|
||||
.Fa "int priority"
|
||||
.Fc
|
||||
.\"
|
||||
.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
|
||||
.Fa "eventhandler_tag tag"
|
||||
.Fa "const char *name"
|
||||
.Fa "void *func"
|
||||
.Fa "void *arg"
|
||||
.Fa "int priority"
|
||||
.Fc
|
||||
.\" ------------------------------------------------------------
|
||||
.Ss "Sysctl Handling"
|
||||
.Fo SYSCTL_VNET_INT
|
||||
.Fa parent nbr name access ptr val descr
|
||||
.Fc
|
||||
.Fo SYSCTL_VNET_PROC
|
||||
.Fa parent nbr name access ptr arg handler fmt descr
|
||||
.Fc
|
||||
.Fo SYSCTL_VNET_STRING
|
||||
.Fa parent nbr name access arg len descr
|
||||
.Fc
|
||||
.Fo SYSCTL_VNET_STRUCT
|
||||
.Fa parent nbr name access ptr type descr
|
||||
.Fc
|
||||
.Fo SYSCTL_VNET_UINT
|
||||
.Fa parent nbr name access ptr val descr
|
||||
.Fc
|
||||
.Fo VNET_SYSCTL_ARG
|
||||
.Fa req arg1
|
||||
.Fc
|
||||
.\" ------------------------------------------------------------
|
||||
.Sh DESCRIPTION
|
||||
.Nm
|
||||
is the name of a technique to virtualize the network stack.
|
||||
The basic idea is to change global resources most notably variables into
|
||||
per network stack resources and have functions, sysctls, eventhandlers,
|
||||
etc. access and handle them in the context of the correct instance.
|
||||
Each (virtual) network stack is attached to a
|
||||
.Em prison ,
|
||||
with
|
||||
.Vt vnet0
|
||||
being the unrestricted default network stack of the base system.
|
||||
.Pp
|
||||
The global defines for
|
||||
.Dv VNET_SETNAME
|
||||
and
|
||||
.Dv VNET_SYMPREFIX
|
||||
are shared with
|
||||
.Xr kvm 3
|
||||
to access internals for debugging reasons.
|
||||
.\" ------------------------------------------------------------
|
||||
.Ss "Variable Declaration"
|
||||
.\"
|
||||
Variables are virtualized by using the
|
||||
.Fn VNET_DEFINE
|
||||
macro rather than writing them out as
|
||||
.Em type name .
|
||||
One can still use static initialization or storage class specifiers, e.g.,
|
||||
.Pp
|
||||
.Dl Li static VNET_DEFINE(int, foo) = 1;
|
||||
or
|
||||
.Dl Li static VNET_DEFINE(SLIST_HEAD(, bar), bars);
|
||||
.Pp
|
||||
Static initialization is not possible when the virtualized variable
|
||||
would need to be referenced, e.g., with
|
||||
.Dq TAILQ_HEAD_INITIALIZER() .
|
||||
In that case a
|
||||
.Fn VNET_SYSINIT
|
||||
based initialization function must be used.
|
||||
.Pp
|
||||
External variables have to be declared using the
|
||||
.Fn VNET_DECLARE
|
||||
macro.
|
||||
In either case the convention is to define another macro,
|
||||
that is then used throughout the implementation to access that variable.
|
||||
The variable name is usually prefixed by
|
||||
.Em V_
|
||||
to express that it is virtualized.
|
||||
The
|
||||
.Fn VNET
|
||||
macro will then translate accesses to that variable to the copy of the
|
||||
currently selected instance (see the
|
||||
.Sx "Virtual instance selection"
|
||||
section):
|
||||
.Pp
|
||||
.Dl Li #define V_name VNET(name)
|
||||
.Pp
|
||||
.Em NOTE:
|
||||
Do not confuse this with the convention used by
|
||||
.Xr VFS 9 .
|
||||
.Pp
|
||||
The
|
||||
.Fn VNET_NAME
|
||||
macro returns the offset within the memory region of the virtual network
|
||||
stack instance.
|
||||
It is usually only used with
|
||||
.Fn SYSCTL_VNET_*
|
||||
macros.
|
||||
.\" ------------------------------------------------------------
|
||||
.Ss "Virtual Instance Selection"
|
||||
.\"
|
||||
There are three different places where the current virtual
|
||||
network stack pointer is stored and can be taken from:
|
||||
.Bl -enum -offset indent
|
||||
.It
|
||||
a
|
||||
.Em prison :
|
||||
.Dl "(struct prison *)->pr_vnet"
|
||||
.Pp
|
||||
For convenience the following macros are provided:
|
||||
.Bd -literal -compact -offset indent
|
||||
.Fn CRED_TO_VNET "struct ucred *"
|
||||
.Fn TD_TO_VNET "struct thread *"
|
||||
.Fn P_TO_VNET "struct proc *"
|
||||
.Ed
|
||||
.It
|
||||
a
|
||||
.Em socket :
|
||||
.Dl "(struct socket *)->so_vnet"
|
||||
.It
|
||||
an
|
||||
.Em interface :
|
||||
.Dl "(struct ifnet *)->if_vnet"
|
||||
.El
|
||||
.Pp
|
||||
.\"
|
||||
In addition the currently active instance is cached in
|
||||
.Dq "curthread->td_vnet"
|
||||
which is usually only accessed through the
|
||||
.Dv curvnet
|
||||
macro.
|
||||
.Pp
|
||||
.\"
|
||||
To set the correct context of the current virtual network instance, use the
|
||||
.Fn CURVNET_SET
|
||||
or
|
||||
.Fn CURVNET_SET_QUIET
|
||||
macros.
|
||||
The
|
||||
.Fn CURVNET_SET_QUIET
|
||||
version will not record vnet recursions in case the kernel was compiled
|
||||
with
|
||||
.Cd "options VNET_DEBUG"
|
||||
and should thus only be used in well known cases, where recursion is
|
||||
unavoidable.
|
||||
Both macros will save the previous state on the stack and it must be restored
|
||||
with the
|
||||
.Fn CURVNET_RESTORE
|
||||
macro.
|
||||
.Pp
|
||||
.Em NOTE:
|
||||
As the previous state is saved on the stack, you cannot have multiple
|
||||
.Fn CURVNET_SET
|
||||
calls in the same block.
|
||||
.Pp
|
||||
.Em NOTE:
|
||||
As the previous state is saved on the stack, a
|
||||
.Fn CURVNET_RESTORE
|
||||
call has to be in the same block as the
|
||||
.Fn CURVNET_SET
|
||||
call or in a subblock with the same idea of the saved instances as the
|
||||
outer block.
|
||||
.Pp
|
||||
.Em NOTE:
|
||||
As each macro is a set of operations and, as previously explained, cannot
|
||||
be put into its own block when defined, one cannot conditionally set
|
||||
the current vnet context.
|
||||
The following will
|
||||
.Em not
|
||||
work:
|
||||
.Bd -literal -offset indent
|
||||
if (condition)
|
||||
CURVNET_SET(vnet);
|
||||
.Ed
|
||||
.Pp
|
||||
nor would this work:
|
||||
.Bd -literal -offset indent
|
||||
if (condition) {
|
||||
CURVNET_SET(vnet);
|
||||
}
|
||||
CURVNET_RESTORE();
|
||||
.Ed
|
||||
.Pp
|
||||
.\"
|
||||
Sometimes one needs to loop over all virtual instances, for example to update
|
||||
virtual from global state, to run a function from a
|
||||
.Xr callout 9
|
||||
for each instance, etc.
|
||||
For those cases the
|
||||
.Fn VNET_ITERATOR_DECL
|
||||
and
|
||||
.Fn VNET_FOREACH
|
||||
macros are provided.
|
||||
The former macro defines the variable that iterates over the loop,
|
||||
and the latter loops over all of the virtual network stack instances.
|
||||
See
|
||||
.Sx "Locking"
|
||||
for how to savely traverse the list of all virtual instances.
|
||||
.Pp
|
||||
.\"
|
||||
The
|
||||
.Fn IS_DEFAULT_VNET
|
||||
macro provides a safe way to check whether the currently active instance is the
|
||||
unrestricted default network stack of the base system
|
||||
.Pq Vt vnet0 .
|
||||
.Pp
|
||||
.\"
|
||||
The
|
||||
.Fn VNET_ASSERT
|
||||
macro provides a way to conditionally add assertions that are only active with
|
||||
.Cd "options VIMAGE"
|
||||
compiled in and either
|
||||
.Cd "options VNET_DEBUG"
|
||||
or
|
||||
.Cd "options INVARIANTS"
|
||||
enabled as well.
|
||||
It uses the same semantics as
|
||||
.Xr KASSERT 9 .
|
||||
.\" ------------------------------------------------------------
|
||||
.Ss "Locking"
|
||||
.\"
|
||||
For public access to the list of virtual network stack instances
|
||||
e.g., by the
|
||||
.Fn VNET_FOREACH
|
||||
macro, read locks are provided.
|
||||
Macros are used to abstract from the actual type of the locks.
|
||||
If a caller may sleep while traversing the list, it must use the
|
||||
.Fn VNET_LIST_RLOCK
|
||||
and
|
||||
.Fn VNET_LIST_RUNLOCK
|
||||
macros.
|
||||
Otherwise, the caller can use
|
||||
.Fn VNET_LIST_RLOCK_NOSLEEP
|
||||
and
|
||||
.Fn VNET_LIST_RUNLOCK_NOSLEEP .
|
||||
.\" ------------------------------------------------------------
|
||||
.Ss "Startup and Teardown Functions"
|
||||
.\"
|
||||
To start or tear down a virtual network stack instance the internal
|
||||
functions
|
||||
.Fn vnet_alloc
|
||||
and
|
||||
.Fn vnet_destroy
|
||||
are provided and called from the jail framework.
|
||||
They run the publicly provided methods to handle network stack
|
||||
startup and teardown.
|
||||
.Pp
|
||||
For public control, the system startup interface has been enhanced
|
||||
to not only handle a system boot but to also handle a virtual
|
||||
network stack startup and teardown.
|
||||
To the base system the
|
||||
.Fn VNET_SYSINIT
|
||||
and
|
||||
.Fn VNET_SYSUNINIT
|
||||
macros look exactly as if there were no virtual network stack.
|
||||
In fact, if
|
||||
.Cd "options VIMAGE"
|
||||
is not compiled in they are compiled to the standard
|
||||
.Fn SYSINIT
|
||||
macros.
|
||||
In addition to that they are run for each virtual network stack
|
||||
when starting or, in reverse order, when shutting down.
|
||||
.\" ------------------------------------------------------------
|
||||
.Ss "Eventhandlers"
|
||||
.\"
|
||||
Eventhandlers can be handled in two ways:
|
||||
.Pp
|
||||
.Bl -enum -offset indent -compact
|
||||
.It
|
||||
save the
|
||||
.Em tags
|
||||
returned in each virtual instance and properly free the eventhandlers
|
||||
on teardown using those, or
|
||||
.It
|
||||
use one eventhandler that will iterate over all virtual network
|
||||
stack instances.
|
||||
.El
|
||||
.Pp
|
||||
For the first case one can just use the normal
|
||||
.Xr EVENTHANDLER 9
|
||||
functions, while for the second case the
|
||||
.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER
|
||||
and
|
||||
.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
|
||||
macros are provided.
|
||||
These differ in that
|
||||
.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
|
||||
takes an extra first argument that will carry the
|
||||
.Fa "tag"
|
||||
upon return.
|
||||
Eventhandlers registered with either of these will not run
|
||||
.Fa func
|
||||
directly but
|
||||
.Fa func
|
||||
will be called from an internal iterator function for each vnet.
|
||||
Both macros can only be used for eventhandlers that do not take
|
||||
additional arguments, as the variadic arguments from an
|
||||
.Xr EVENTHANDLER_INVOKE 9
|
||||
call will be ignored.
|
||||
.\" ------------------------------------------------------------
|
||||
.Ss "Sysctl Handling"
|
||||
.\"
|
||||
A
|
||||
.Xr sysctl 9
|
||||
can be virtualized by using one of the
|
||||
.Fn SYSCTL_VNET_*
|
||||
macros.
|
||||
.Pp
|
||||
They take the same arguments as the standard
|
||||
.Xr sysctl 9
|
||||
functions, with the only difference, that the
|
||||
.Fa ptr
|
||||
argument has to be passed as
|
||||
.Ql &VNET_NAME(foo)
|
||||
instead of
|
||||
.Ql &foo
|
||||
so that the variable can be selected from the correct memory
|
||||
region of the virtual network stack instance of the caller.
|
||||
.Pp
|
||||
For the very rare case a sysctl handler function would want to
|
||||
handle
|
||||
.Fa arg1
|
||||
itself the
|
||||
.Fn VNET_SYSCTL_ARG req arg1
|
||||
is provided that will translate the
|
||||
.Fa arg1
|
||||
argument to the correct memory address in the virtual network stack
|
||||
context of the caller.
|
||||
.\" ------------------------------------------------------------
|
||||
.Sh SEE ALSO
|
||||
.Xr jail 2 ,
|
||||
.Xr kvm 3 ,
|
||||
.Xr EVENTHANDLER 9 ,
|
||||
.\" .Xr pcpu 9 ,
|
||||
.Xr KASSERT 9 ,
|
||||
.Xr sysctl 9
|
||||
.\" .Xr SYSINIT 9
|
||||
.Sh HISTORY
|
||||
The virtual network stack implementation first appeared in
|
||||
.Fx 8.0 .
|
||||
.Sh AUTHORS
|
||||
This manual page was written by
|
||||
.An Bjoern A. Zeeb, CK Software GmbH,
|
||||
under sponsorship from the FreeBSD Foundation.
|
Loading…
Reference in New Issue
Block a user