freebsd-dev/etc/rc.d/jail

#!/bin/sh
#
# $FreeBSD$
#

# PROVIDE: jail
# REQUIRE: LOGIN cleanvar
# BEFORE: securelevel
# KEYWORD: nojail shutdown

# WARNING: This script deals with untrusted data (the data and
# processes inside the jails) and care must be taken when changing the
# code related to this!  If you have any doubt whether a change is
# correct and have security impact, please get the patch reviewed by
# the FreeBSD Security Team prior to commit.

. /etc/rc.subr

name="jail"
rcvar=`set_rcvar`

start_precmd="jail_prestart"
start_cmd="jail_start"
stop_cmd="jail_stop"

# init_variables _j
#	Initialize the various jail variables for jail _j.
#
init_variables()
{
	_j="$1"

	if [ -z "$_j" ]; then
		warn "init_variables: you must specify a jail"
		return
	fi

	eval _rootdir=\"\$jail_${_j}_rootdir\"
	_devdir="${_rootdir}/dev"
	_fdescdir="${_devdir}/fd"
	_procdir="${_rootdir}/proc"
	eval _hostname=\"\$jail_${_j}_hostname\"
	eval _ip=\"\$jail_${_j}_ip\"
	eval _interface=\"\${jail_${_j}_interface:-${jail_interface}}\"
	eval _exec=\"\$jail_${_j}_exec\"

	i=0
	while : ; do
		eval _exec_prestart${i}=\"\${jail_${_j}_exec_prestart${i}:-\${jail_exec_prestart${i}}}\"
		[ -z "$(eval echo \"\$_exec_prestart${i}\")" ] && break
		i=$((i + 1))
	done

	eval _exec_start=\"\${jail_${_j}_exec_start:-${jail_exec_start}}\"

	i=1
	while : ; do
		eval _exec_afterstart${i}=\"\${jail_${_j}_exec_afterstart${i}:-\${jail_exec_afterstart${i}}}\"
		[ -z "$(eval echo \"\$_exec_afterstart${i}\")" ] &&  break
		i=$((i + 1))
	done

	i=0
	while : ; do
		eval _exec_poststart${i}=\"\${jail_${_j}_exec_poststart${i}:-\${jail_exec_poststart${i}}}\"
		[ -z "$(eval echo \"\$_exec_poststart${i}\")" ] && break
		i=$((i + 1))
	done

	i=0
	while : ; do
		eval _exec_prestop${i}=\"\${jail_${_j}_exec_prestop${i}:-\${jail_exec_prestop${i}}}\"
		[ -z "$(eval echo \"\$_exec_prestop${i}\")" ] && break
		i=$((i + 1))
	done

	eval _exec_stop=\"\${jail_${_j}_exec_stop:-${jail_exec_stop}}\"

	i=0
	while : ; do
		eval _exec_poststop${i}=\"\${jail_${_j}_exec_poststop${i}:-\${jail_exec_poststop${i}}}\"
		[ -z "$(eval echo \"\$_exec_poststop${i}\")" ] && break
		i=$((i + 1))
	done

	if [ -n "${_exec}" ]; then
		#   simple/backward-compatible execution
		_exec_start="${_exec}"
		_exec_stop=""
	else
		#   flexible execution
		if [ -z "${_exec_start}" ]; then
			_exec_start="/bin/sh /etc/rc"
			if [ -z "${_exec_stop}" ]; then
				_exec_stop="/bin/sh /etc/rc.shutdown"
			fi
		fi
	fi

	# The default jail ruleset will be used by rc.subr if none is specified.
	eval _ruleset=\"\${jail_${_j}_devfs_ruleset:-${jail_devfs_ruleset}}\"
	eval _devfs=\"\${jail_${_j}_devfs_enable:-${jail_devfs_enable}}\"
	[ -z "${_devfs}" ] && _devfs="NO"
	eval _fdescfs=\"\${jail_${_j}_fdescfs_enable:-${jail_fdescfs_enable}}\"
	[ -z "${_fdescfs}" ] && _fdescfs="NO"
	eval _procfs=\"\${jail_${_j}_procfs_enable:-${jail_procfs_enable}}\"
	[ -z "${_procfs}" ] && _procfs="NO"

	eval _mount=\"\${jail_${_j}_mount_enable:-${jail_mount_enable}}\"
	[ -z "${_mount}" ] && _mount="NO"
	# "/etc/fstab.${_j}" will be used for {,u}mount(8) if none is specified.
	eval _fstab=\"\${jail_${_j}_fstab:-${jail_fstab}}\"
	[ -z "${_fstab}" ] && _fstab="/etc/fstab.${_j}"
	eval _flags=\"\${jail_${_j}_flags:-${jail_flags}}\"
	[ -z "${_flags}" ] && _flags="-l -U root"
	eval _consolelog=\"\${jail_${_j}_consolelog:-${jail_consolelog}}\"
	[ -z "${_consolelog}" ] && _consolelog="/var/log/jail_${_j}_console.log"
	eval _fib=\"\${jail_${_j}_fib:-${jail_fib}}\"

	# Debugging aid
	#
	debug "$_j devfs enable: $_devfs"
	debug "$_j fdescfs enable: $_fdescfs"
	debug "$_j procfs enable: $_procfs"
	debug "$_j mount enable: $_mount"
	debug "$_j hostname: $_hostname"
	debug "$_j ip: $_ip"
	jail_show_addresses ${_j}
	debug "$_j interface: $_interface"
	debug "$_j fib: $_fib"
	debug "$_j root: $_rootdir"
	debug "$_j devdir: $_devdir"
	debug "$_j fdescdir: $_fdescdir"
	debug "$_j procdir: $_procdir"
	debug "$_j ruleset: $_ruleset"
	debug "$_j fstab: $_fstab"

	i=0
	while : ; do
		eval out=\"\${_exec_prestart${i}:-''}\"
		if [ -z "$out" ]; then
			break
		fi
		debug "$_j exec pre-start #${i}: ${out}"
		i=$((i + 1))
	done

	debug "$_j exec start: $_exec_start"

	i=1
	while : ; do
		eval out=\"\${_exec_afterstart${i}:-''}\"

		if [ -z "$out" ]; then
			break;
		fi

		debug "$_j exec after start #${i}: ${out}"
		i=$((i + 1))
	done

	i=0
	while : ; do
		eval out=\"\${_exec_poststart${i}:-''}\"
		if [ -z "$out" ]; then
			break
		fi
		debug "$_j exec post-start #${i}: ${out}"
		i=$((i + 1))
	done

	i=0
	while : ; do
		eval out=\"\${_exec_prestop${i}:-''}\"
		if [ -z "$out" ]; then
			break
		fi
		debug "$_j exec pre-stop #${i}: ${out}"
		i=$((i + 1))
	done

	debug "$_j exec stop: $_exec_stop"

	i=0
	while : ; do
		eval out=\"\${_exec_poststop${i}:-''}\"
		if [ -z "$out" ]; then
			break
		fi
		debug "$_j exec post-stop #${i}: ${out}"
		i=$((i + 1))
	done

	debug "$_j flags: $_flags"
	debug "$_j consolelog: $_consolelog"

	if [ -z "${_hostname}" ]; then
		err 3 "$name: No hostname has been defined for ${_j}"
	fi
	if [ -z "${_rootdir}" ]; then
		err 3 "$name: No root directory has been defined for ${_j}"
	fi
}

# set_sysctl rc_knob mib msg
#	If the mib sysctl is set according to what rc_knob
#	specifies, this function does nothing. However if
#	rc_knob is set differently than mib, then the mib
#	is set accordingly and msg is displayed followed by
#	an '=" sign and the word 'YES' or 'NO'.
#
set_sysctl()
{
	_knob="$1"
	_mib="$2"
	_msg="$3"

	_current=`${SYSCTL} -n $_mib 2>/dev/null`
	if checkyesno $_knob ; then
		if [ "$_current" -ne 1 ]; then
			echo -n " ${_msg}=YES"
			${SYSCTL_W} 1>/dev/null ${_mib}=1
		fi
	else
		if [ "$_current" -ne 0 ]; then
			echo -n " ${_msg}=NO"
			${SYSCTL_W} 1>/dev/null ${_mib}=0
		fi
	fi
}

# is_current_mountpoint()
#	Is the directory mount point for a currently mounted file
#	system?
#
is_current_mountpoint()
{
	local _dir _dir2

	_dir=$1

	_dir=`echo $_dir | sed -Ee 's#//+#/#g' -e 's#/$##'`
	[ ! -d "${_dir}" ] && return 1
	_dir2=`df ${_dir} | tail +2 | awk '{ print $6 }'`
	[ "${_dir}" = "${_dir2}" ]
	return $?
}

# is_symlinked_mountpoint()
#	Is a mount point, or any of its parent directories, a symlink?
#
is_symlinked_mountpoint()
{
	local _dir

	_dir=$1

	[ -L "$_dir" ] && return 0
	[ "$_dir" = "/" ] && return 1
	is_symlinked_mountpoint `dirname $_dir`
	return $?
}

# secure_umount
#	Try to unmount a mount point without being vulnerable to
#	symlink attacks.
#
secure_umount()
{
	local _dir

	_dir=$1

	if is_current_mountpoint ${_dir}; then
		umount -f ${_dir} >/dev/null 2>&1
	else
		debug "Nothing mounted on ${_dir} - not unmounting"
	fi
}


# jail_umount_fs
#	This function unmounts certain special filesystems in the
#	currently selected jail. The caller must call the init_variables()
#	routine before calling this one.
#
jail_umount_fs()
{
	local _device _mountpt _rest

	if checkyesno _fdescfs; then
		if [ -d "${_fdescdir}" ] ; then
			secure_umount ${_fdescdir}
		fi
	fi
	if checkyesno _devfs; then
		if [ -d "${_devdir}" ] ; then
			secure_umount ${_devdir}
		fi
	fi
	if checkyesno _procfs; then
		if [ -d "${_procdir}" ] ; then
			secure_umount ${_procdir}
		fi
	fi
	if checkyesno _mount; then
		[ -f "${_fstab}" ] || warn "${_fstab} does not exist"
		tail -r ${_fstab} | while read _device _mountpt _rest; do
			case ":${_device}" in
			:#* | :)
				continue
				;;
			esac
			secure_umount ${_mountpt}
		done
	fi
}

# jail_mount_fstab()
#	Mount file systems from a per jail fstab while trying to
#	secure against symlink attacks at the mount points.
#
#	If we are certain we cannot secure against symlink attacks we
#	do not mount all of the file systems (since we cannot just not
#	mount the file system with the problematic mount point).
#
#	The caller must call the init_variables() routine before
#	calling this one.
#
jail_mount_fstab()
{
	local _device _mountpt _rest

	while read _device _mountpt _rest; do
		case ":${_device}" in
		:#* | :)
			continue
			;;
		esac
		if is_symlinked_mountpoint ${_mountpt}; then
			warn "${_mountpt} has symlink as parent - not mounting from ${_fstab}"
			return
		fi
	done <${_fstab}
	mount -a -F "${_fstab}"
}

# jail_show_addresses jail
#	Debug print the input for the given _multi aliases
#	for a jail for init_variables().
#
jail_show_addresses()
{
	local _j _type alias
	_j="$1"
	alias=0

	if [ -z "${_j}" ]; then
		warn "jail_show_addresses: you must specify a jail"
		return
	fi

	while : ; do
		eval _addr=\"\$jail_${_j}_ip_multi${alias}\"
		if [ -n "${_addr}" ]; then
			debug "${_j} ip_multi${alias}: $_addr"
			alias=$((${alias} + 1))
		else
			break
		fi
	done
}

# jail_extract_address argument
#	The second argument is the string from one of the _ip
#	or the _multi variables. In case of a comma separated list
#	only one argument must be passed in at a time.
#	The function alters the _type, _iface, _addr and _mask variables.
#
jail_extract_address()
{
	local _i
	_i=$1

	if [ -z "${_i}" ]; then
		warn "jail_extract_address: called without input"
		return
	fi

	# Check if we have an interface prefix given and split into
	# iFace and rest.
	case "${_i}" in
	*\|*)	# ifN|.. prefix there
		_iface=${_i%%|*}
		_r=${_i##*|}
		;;
	*)	_iface=""
		_r=${_i}
		;;
	esac

	# In case the IP has no interface given, check if we have a global one.
	_iface=${_iface:-${_interface}}

	# Set address, cut off any prefix/netmask/prefixlen.
	_addr=${_r}
	_addr=${_addr%%[/ ]*}

	# Theoretically we can return here if interface is not set,
	# as we only care about the _mask if we call ifconfig.
	# This is not done because we may want to santize IP addresses
	# based on _type later, and optionally change the type as well.

	# Extract the prefix/netmask/prefixlen part by cutting off the address.
	_mask=${_r}
	_mask=`expr "${_mask}" : "${_addr}\(.*\)"`

	# Identify type {inet,inet6}.
	case "${_addr}" in
	*\.*\.*\.*)	_type="inet" ;;
	*:*)		_type="inet6" ;;
	*)		warn "jail_extract_address: type not identified"
			;;
	esac

	# Handle the special /netmask instead of /prefix or
	# "netmask xxx" case for legacy IP.
	# We do NOT support shortend class-full netmasks.
	if [ "${_type}" = "inet" ]; then
		case "${_mask}" in
		/*\.*\.*\.*)	_mask=" netmask ${_mask#/}" ;;
		*)		;;
		esac

		# In case _mask is still not set use /32.
		_mask=${_mask:-/32}

	elif [ "${_type}" = "inet6" ]; then
		# In case _maske is not set for IPv6, use /128.
		_mask=${_mask:-/128}
	fi
}

# jail_handle_ips_option {add,del} input
#	Handle a single argument imput which can be a comma separated
#	list of addresses (theoretically with an option interface and
#	prefix/netmask/prefixlen).
#
jail_handle_ips_option()
{
	local _x _action _type _i
	_action=$1
	_x=$2

	if [ -z "${_x}" ]; then
		# No IP given. This can happen for the primary address
		# of each address family.
		return
	fi

	# Loop, in case we find a comma separated list, we need to handle
	# each argument on its own.
	while [ ${#_x} -gt 0 ]; do
		case "${_x}" in
		*,*)	# Extract the first argument and strip it off the list.
			_i=`expr "${_x}" : '^\([^,]*\)'`
			_x=`expr "${_x}" : "^[^,]*,\(.*\)"`
			;;
		*)	_i=${_x}
			_x=""
			;;
		esac

		_type=""
		_iface=""
		_addr=""
		_mask=""
		jail_extract_address "${_i}"

		# make sure we got an address.
		case "${_addr}" in
		"")	continue ;;
		*)	;;
		esac

		# Append address to list of addresses for the jail command.
		case "${_addrl}" in
		"")	_addrl="${_addr}" ;;
		*)	_addrl="${_addrl},${_addr}" ;;
		esac

		# Configure interface alias if requested by a given interface
		# and if we could correctly parse everything.
		case "${_iface}" in
		"")	continue ;;
		esac
		case "${_type}" in
		inet)	;;
		inet6)	;;
		*)	warn "Could not determine address family.  Not going" \
			    "to ${_action} address '${_addr}' for ${_jail}."
			continue
			;;
		esac
		case "${_action}" in
		add)	ifconfig ${_iface} ${_type} ${_addr}${_mask} alias
			;;
		del)	# When removing the IP, ignore the _mask.
			ifconfig ${_iface} ${_type} ${_addr} -alias
			;;
		esac
	done
}

# jail_ips {add,del}
#	Extract the comma separated list of addresses and return them
#	for the jail command.
#	Handle more than one address via the _multi option as well.
#	If an interface is given also add/remove an alias for the
#	address with an optional netmask.
#
jail_ips()
{
	local _action
	_action=$1

	case "${_action}" in
	add)	;;
	del)	;;
	*)	warn "jail_ips: invalid action '${_action}'"
		return
		;;
	esac

	# Handle addresses.
	jail_handle_ips_option ${_action} "${_ip}"
	# Handle jail_xxx_ip_multi<N>
	alias=0
	while : ; do
		eval _x=\"\$jail_${_jail}_ip_multi${alias}\"
		case "${_x}" in
		"")	break ;;
		*)	jail_handle_ips_option ${_action} "${_x}"
			alias=$((${alias} + 1))
			;;
		esac
	done
}

jail_prestart()
{
	if checkyesno jail_parallel_start; then
		command_args='&'
	fi
}

jail_start()
{
	echo -n 'Configuring jails:'
	set_sysctl jail_set_hostname_allow security.jail.set_hostname_allowed \
	    set_hostname_allow
	set_sysctl jail_socket_unixiproute_only \
	    security.jail.socket_unixiproute_only unixiproute_only
	set_sysctl jail_sysvipc_allow security.jail.sysvipc_allowed \
	    sysvipc_allow
	echo '.'

	echo -n 'Starting jails:'
	_tmp_dir=`mktemp -d /tmp/jail.XXXXXXXX` || \
	    err 3 "$name: Can't create temp dir, exiting..."
	for _jail in ${jail_list}
	do
		init_variables $_jail
		if [ -f /var/run/jail_${_jail}.id ]; then
			echo -n " [${_hostname} already running (/var/run/jail_${_jail}.id exists)]"
			continue;
		fi
		_addrl=""
		jail_ips "add"
		if [ -n "${_fib}" ]; then
			_setfib="setfib -F '${_fib}'"
		else
			_setfib=""
		fi
		if checkyesno _mount; then
			info "Mounting fstab for jail ${_jail} (${_fstab})"
			if [ ! -f "${_fstab}" ]; then
				err 3 "$name: ${_fstab} does not exist"
			fi
			jail_mount_fstab
		fi
		if checkyesno _devfs; then
			# If devfs is already mounted here, skip it.
			df -t devfs "${_devdir}" >/dev/null
			if [ $? -ne 0 ]; then
				if is_symlinked_mountpoint ${_devdir}; then
					warn "${_devdir} has symlink as parent - not starting jail ${_jail}"
					continue
				fi
				info "Mounting devfs on ${_devdir}"
				devfs_mount_jail "${_devdir}" ${_ruleset}
				# Transitional symlink for old binaries
				if [ ! -L "${_devdir}/log" ]; then
					__pwd="`pwd`"
					cd "${_devdir}"
					ln -sf ../var/run/log log
					cd "$__pwd"
				fi
			fi

			# XXX - It seems symlinks don't work when there
			#	is a devfs(5) device of the same name.
			# Jail console output
			#	__pwd="`pwd`"
			#	cd "${_devdir}"
			#	ln -sf ../var/log/console console
			#	cd "$__pwd"
		fi
		if checkyesno _fdescfs; then
			if is_symlinked_mountpoint ${_fdescdir}; then
				warn "${_fdescdir} has symlink as parent, not mounting"
			else
				info "Mounting fdescfs on ${_fdescdir}"
				mount -t fdescfs fdesc "${_fdescdir}"
			fi
		fi
		if checkyesno _procfs; then
			if is_symlinked_mountpoint ${_procdir}; then
				warn "${_procdir} has symlink as parent, not mounting"
			else
				info "Mounting procfs onto ${_procdir}"
				if [ -d "${_procdir}" ] ; then
					mount -t procfs proc "${_procdir}"
				fi
			fi
		fi
		_tmp_jail=${_tmp_dir}/jail.$$

		i=0
		while : ; do
			eval out=\"\${_exec_prestart${i}:-''}\"
			[ -z "$out" ] && break
			${out}
			i=$((i + 1))
		done

		eval ${_setfib} jail ${_flags} -i ${_rootdir} ${_hostname} \
			\"${_addrl}\" ${_exec_start} > ${_tmp_jail} 2>&1 \
			</dev/null

		if [ "$?" -eq 0 ] ; then
			_jail_id=$(head -1 ${_tmp_jail})
			i=1
			while : ; do
				eval out=\"\${_exec_afterstart${i}:-''}\"

				if [ -z "$out" ]; then
					break;
				fi

				jexec "${_jail_id}" ${out}
				i=$((i + 1))
			done

			echo -n " $_hostname"
			tail +2 ${_tmp_jail} >${_consolelog}
			echo ${_jail_id} > /var/run/jail_${_jail}.id

			i=0
			while : ; do
				eval out=\"\${_exec_poststart${i}:-''}\"
				[ -z "$out" ] && break
				${out}
				i=$((i + 1))
			done
		else
			jail_umount_fs
			jail_ips "del"
			echo " cannot start jail \"${_jail}\": "
			tail +2 ${_tmp_jail}
		fi
		rm -f ${_tmp_jail}
	done
	rmdir ${_tmp_dir}
	echo '.'
}

jail_stop()
{
	echo -n 'Stopping jails:'
	for _jail in ${jail_list}
	do
		if [ -f "/var/run/jail_${_jail}.id" ]; then
			_jail_id=$(cat /var/run/jail_${_jail}.id)
			if [ ! -z "${_jail_id}" ]; then
				init_variables $_jail

				i=0
				while : ; do
					eval out=\"\${_exec_prestop${i}:-''}\"
					[ -z "$out" ] && break
					${out}
					i=$((i + 1))
				done

				if [ -n "${_exec_stop}" ]; then
					eval env -i /usr/sbin/jexec ${_jail_id} ${_exec_stop} \
						>> ${_consolelog} 2>&1
				fi
				killall -j ${_jail_id} -TERM > /dev/null 2>&1
				sleep 1
				killall -j ${_jail_id} -KILL > /dev/null 2>&1
				jail_umount_fs
				echo -n " $_hostname"

				i=0
				while : ; do
					eval out=\"\${_exec_poststop${i}:-''}\"
					[ -z "$out" ] && break
					${out}
					i=$((i + 1))
				done
			fi
			jail_ips "del"
			rm /var/run/jail_${_jail}.id
		else
			echo " cannot stop jail ${_jail}. No jail id in /var/run"
		fi
	done
	echo '.'
}

load_rc_config $name
cmd="$1"
if [ $# -gt 0 ]; then
	shift
fi
if [ -n "$*" ]; then
	jail_list="$*"
fi

run_rc_command "${cmd}"