[arch-projects] [INITSCRIPTS][PATCH 6/6] Introduce cgroups

Dave Reisner d at falconindy.com
Mon Jun 11 18:01:41 EDT 2012


On Mon, Jun 11, 2012 at 11:10:40PM +0200, Sébastien Luttringer wrote:
> This patch use Linux kernel cgroups to manage daemons. No controler is used.
> Cgroups are only used to track launched process and prevent escape.
> 
> Notable changes:
> ----------------
> 
> - /run/daemons have switched to /sys/fs/cgroups/initscripts/daemons
>   We now embded running dameon information into cgroup filesystem directly. No
>   need to maintain both hierarchies.
> 
> - DAEMONS array in rc.conf understand a new modifier: -
>   Some daemons can be launched out of its cgroup container. This will avoid
>   stopping it remove all its children (e.g: sshd)
> 
> - rc scripts must be launched with rc.d tool.
>   Launching daemon directly from /etc/rc.d/x inherits current environment and
>   it may differ from environment at startup and leads to errors.
>   Here is one stone hits two. We clean environment and we use this to auto cgroup
>   launched daemons without need to update each rc script.
> 
> - hooks calling directly rc scripts will fail (e.g: sshd). They must be fixed to
>   use proper initscripts functions (run_daemon ssh stop).
> 
> - rc.d tools now handle same syntax as DAEMONS array. By example,
>   rc.d start @sshd or rc.d start -- -sshd
> 
> This patch should be fully backward compatible with existant rc.d scripts but
> not with hooks.

So what do people who compile their own kernels without cgroups do?

Unless I'm missing something, you get a huge NACK from me based on the
way you're shutting down processes. Comments inline.

> ---
>  Makefile        |    2 -
>  bash-completion |    4 +-
>  functions       |  123 +++++++++++++++++++++++++++++++++++++++++++------------
>  rc.conf.5.txt   |    1 +
>  rc.d            |   49 +++++++++++-----------
>  rc.d.8.txt      |    6 +++
>  rc.multi        |    6 +--
>  rc.single       |    2 +-
>  rc.sysinit      |    7 +++-
>  tmpfiles.conf   |    5 ---
>  zsh-completion  |    4 +-
>  11 files changed, 139 insertions(+), 70 deletions(-)
>  delete mode 100644 tmpfiles.conf
> 
> diff --git a/Makefile b/Makefile
> index 825eb11..cbde5b2 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -6,7 +6,6 @@ DIRS := \
>  	/etc/rc.d/functions.d \
>  	/etc/logrotate.d \
>  	/etc/profile.d \
> -	/usr/lib/tmpfiles.d \
>  	/usr/sbin \
>  	/usr/share/bash-completion/completions \
>  	/usr/share/zsh/site-functions \
> @@ -28,7 +27,6 @@ install: installdirs doc
>  	install -m755 -t $(DESTDIR)/usr/sbin rc.d
>  	install -m644 -t $(DESTDIR)/usr/share/man/man5 rc.conf.5
>  	install -m644 -t $(DESTDIR)/usr/share/man/man8 rc.d.8
> -	install -m644 tmpfiles.conf $(DESTDIR)/usr/lib/tmpfiles.d/initscripts.conf
>  	install -m644 -T bash-completion $(DESTDIR)/usr/share/bash-completion/completions/rc.d
>  	install -m644 -T zsh-completion $(DESTDIR)/usr/share/zsh/site-functions/_rc.d
>  
> diff --git a/bash-completion b/bash-completion
> index 4b4593b..dd8b006 100644
> --- a/bash-completion
> +++ b/bash-completion
> @@ -12,9 +12,9 @@ _rc_d()
>  	elif [[ "$arg" == help ]]; then
>  		COMPREPLY=()
>  	elif [[ "$arg" == start ]]; then
> -		COMPREPLY=($(comm -23 <(cd /etc/rc.d && compgen -f -X 'functions*' "$cur"|sort) <(cd /run/daemons/ && compgen -f "$cur"|sort)))
> +		COMPREPLY=($(comm -23 <(cd /etc/rc.d && compgen -f -X 'functions*' "$cur"|sort) <(cd /sys/fs/cgroup/initscripts/daemons/ && compgen -d "$cur"|sort)))
>  	elif [[ "$arg" =~ stop|restart|reload ]]; then
> -		COMPREPLY=($(cd /run/daemons/ && compgen -f "$cur"|sort))
> +		COMPREPLY=($(cd /sys/fs/cgroup/initscripts/daemons/ && compgen -d "$cur"|sort))
>  	else
>  		COMPREPLY=($(compgen -W "${options} $(cd /etc/rc.d && compgen -f -X 'functions*')" -- "$cur"))
>  	fi
> diff --git a/functions b/functions
> index fd52317..0669810 100644
> --- a/functions
> +++ b/functions
> @@ -11,14 +11,6 @@ localevars=(LANG LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY
>  
>  vconsolevars=(KEYMAP KEYMAP_TOGGLE FONT FONT_MAP FONT_UNIMAP)
>  
> -if [[ $1 == "start" ]]; then
> -	if [[ $STARTING ]]; then
> -		echo "A daemon is starting another daemon, this is unlikely to work as intended."
> -	else
> -		export STARTING=1
> -	fi
> -fi
> -
>  # width:
>  calc_columns () {
>  	STAT_COL=80
> @@ -198,44 +190,116 @@ in_array() {
>  
>  # daemons:
>  
> +# Deprecated function used by daemon scripts to tell a daemon is running

"to tell if a daemon is running"

>  add_daemon() {
> -	[[ -d /run/daemons ]] || mkdir -p /run/daemons
> -	>| /run/daemons/"$1"
> +	true
>  }

>  
> +# Deprecated function used by daemon scripts to tell a daemon is no more running

"to tell if a daemon is not running"

>  rm_daemon() {
> -	rm -f /run/daemons/"$1"
> +	true
> +}
> +
> +# Return array with daemon name and prefix in fixed order
> +# $1 daemon name with prefix
> +# return ('daemon' '@' '-' '!')
> +strip_daemon() {
> +	local name=$1
> +	while (( ${#name} > 0 )); do
> +		case ${name:0:1} in
> +			'!'|'@'|'-') name=${name:1};;

No need for a while loop here just to trim a string...

  name=${name##+(!|@|-)}

> +			*) break;;
> +		esac
> +	done
> +	echo $name

Quote, please.

>  }
>  
> +# Return 1 if daemon $1 is running, else 0
> +# Daemons started without running process return 1
> +# Keep check on /run/daemons/$1 (useful until first user reboot)
>  ck_daemon() {
> -	[[ ! -f /run/daemons/$1 ]]
> +	[[ ! -d /sys/fs/cgroup/initscripts/daemons/$1 && ! -f /run/daemons/$1 ]]
>  }
>  
> -# Check if $1 is a valid daemon name
> +# Return 0 if $1 is a valid daemon name
>  have_daemon() {
>  	[[ -f /etc/rc.d/$1 && -x /etc/rc.d/$1 ]]
>  }
>  
>  # Check if $1 is started at boot
> +# Return 1 if autostarted, otherwise 0
>  ck_autostart() {
>  	local daemon
>  	for daemon in "${DAEMONS[@]}"; do
> -		[[ $1 = "${daemon#@}" ]] && return 1
> +		[[ $daemon =~ ^[@-]*${1}$ ]] && return 1

No need for a regex.

  [[ $daemon = *([@-])$1 ]] && return 1

>  	done
>  	return 0
>  }
>  
> -start_daemon() {
> -	have_daemon "$1" && /etc/rc.d/"$1" start
> +# Run action $2 for daemon $1
> +# This function understand prefixes: ! @ -

understands

> +run_daemon() {
> +	local background=0
> +	local cgroup=1
> +	local name=$1
> +	while (( ${#name} > 0 )); do
> +		case ${name:0:1} in
> +			'!') return 1;;
> +			'@') background=1;;
> +			'-') cgroup=0;;
> +			*) break;;
> +		esac
> +		name=${name:1}
> +	done
> +	have_daemon "$name" || return 1
> +	# start action register daemon
> +	if [[ $2 == start ]]; then
> +		if [[ -d /sys/fs/cgroup/initscripts/daemons ]]; then
> +			mkdir -p -m755 /sys/fs/cgroup/initscripts/daemons/"$name"
> +		elif [[ -d /run/daemons ]]; then
> +			>| /run/daemons/"$name"
> +		fi
> +	fi
> +	# daemon script stuff
> +	(
> +		# use cgroup jail
> +		(( $cgroup )) && [[ -w "/sys/fs/cgroup/initscripts/daemons/$name/tasks" ]] &&

Don't force this expansion (( cgroup )) is fine. The quoting isn't
needed either.

> +			echo $BASHPID > "/sys/fs/cgroup/initscripts/daemons/$name/tasks"
> +		# execute script
> +		if (( $background )); then

Same here.

> +			stat_bkgd "${2^} $name"

This is the only place you sentence case a daemon name in the output.
Intentional?

> +			exec_daemon /etc/rc.d/"$name" "$2" >/dev/null &
> +		else
> +			exec_daemon /etc/rc.d/"$name" "$2"
> +		fi
> +	)
> +	# stop action unregister and cleanup
> +	if [[ $2 == stop ]]; then
> +		kill_daemon "$1"

So that's it? We just call kill daemon here and don't even honor the
script's own stop function anymore?

> +		rmdir "/sys/fs/cgroup/initscripts/daemons/$1" 2>/dev/null
> +		# remove legacy file (useful until first user reboot)
> +		rm -f "/run/daemons/$1" 2>/dev/null
> +	fi
>  }
>  
> -start_daemon_bkgd() {
> -	stat_bkgd "Starting $1"
> -	(start_daemon "$1") >/dev/null &
> +# Execute a daemon script code properly

'script code' is rather redundant.

  "Execute a script with a clean environment"

> +# This means with a clean environment and under the root directory
> +exec_daemon() {
> +	ENV=('PATH=/bin:/usr/bin:/sbin:/usr/sbin'
> +		'CONSOLE=/dev/console'
> +		'TERM=linux'
> +		'INITSCRIPTS_LAUNCHER=1')
> +	cd /
> +	exec env -i "${ENV[@]}" bash -l "$@"
>  }
>  
> -stop_daemon() {
> -	have_daemon "$1" && /etc/rc.d/"$1" stop
> +# use cgroup to kill all process runned by dameon $1

"run by daemon"

> +kill_daemon() {
> +	[[ -r /sys/fs/cgroup/initscripts/daemons/$1/tasks ]] || return
> +	d=$(< "/sys/fs/cgroup/initscripts/daemons/$1/tasks")
> +	[[ -n $d ]] && { kill -15 $d &>/dev/null; sleep .25; } || return

OCD: Is it really too much to ask to write this as a proper if/then?

After only 1/4 of a second you send the death touch onto all tasks? Go
take a look at a service like squid or mysql that can survive at _least_
5-10s after receiving a TERM. What about services that don't respond to
a TERM, but would exit on a HUP? They're going to be invariably death
touched. This is way too simple of a heuristic to be healthy.

> +	d=$(< "/sys/fs/cgroup/initscripts/daemons/$1/tasks")
> +	[[ -n $d ]] && kill -9 $d &>/dev/null
>  }
>  
>  # Status functions
> @@ -277,15 +341,15 @@ add_omit_pids() {
>  }
>  
>  # Stop all daemons
> -# This function should *never* ever perform any other actions beside calling stop_daemon()!
> +# This function should *never* ever perform any other actions beside stoping daemons

stopping

>  # It might be used by a splash system etc. to get a list of daemons to be stopped.
>  stop_all_daemons() {
>  	# Find daemons NOT in the DAEMONS array. Shut these down first
>  	local daemon
> -	for daemon in /run/daemons/*; do
> -		[[ -f $daemon ]] || continue
> +	for daemon in /sys/fs/cgroup/initscripts/daemons/*; do
> +		[[ -d $daemon ]] || continue
>  		daemon=${daemon##*/}
> -		ck_autostart "$daemon" && stop_daemon "$daemon"
> +		ck_autostart "$daemon" && run_daemon "$daemon" stop
>  	done
>  
>  	# Shutdown daemons in reverse order
> @@ -293,7 +357,7 @@ stop_all_daemons() {
>  	for (( i=${#DAEMONS[@]}-1; i>=0; i-- )); do
>  		[[ ${DAEMONS[i]} = '!'* ]] && continue
>  		daemon=${DAEMONS[i]#@}
> -		ck_daemon "$daemon" || stop_daemon "$daemon"
> +		ck_daemon "$daemon" || run_daemon "$daemon" stop
>  	done
>  }
>  
> @@ -696,5 +760,12 @@ for f in /etc/rc.d/functions.d/*; do
>  	[[ -e $f ]] && . "$f"
>  done
>  
> +# initscripts must not be called directly
> +if [[ $0 =~ /etc/rc.d/* && -z $INITSCRIPTS_LAUNCHER ]]; then

This doesn't match what you want, nor is $0 a reliable way to check the
path of a script. The regex as you have it will match "/etc/rcdd". You
really just wanted a glob here.

> +	printf "${C_FAIL}You have to run your initscript with rc.d tools${C_CLEAR}\n" >&2

"Daemons must be controlled via rc.d"

> +	printf "e.g: ${C_MAIN}rc.d ${1:-start} ${0##*/}${C_CLEAR}\n" >&2

Magic help messages which change text? Can't we just generalize it?

> +	exit 42
> +fi
> +
>  # End of file
>  # vim: set ts=2 sw=2 noet:
> diff --git a/rc.conf.5.txt b/rc.conf.5.txt
> index 726ba23..a102f4c 100644
> --- a/rc.conf.5.txt
> +++ b/rc.conf.5.txt
> @@ -210,6 +210,7 @@ DAEMONS[[D]]
>  Daemons to start at boot-up (in this order)
>  	- prefix a daemon with a ! to disable it
>  	- prefix a daemon with a @ to start it up in the background
> +	- prefix a daemon with a - to start it outside a cgroup
>  
>  If you are sure nothing else touches your hardware clock (such as ntpd or
>  a dual-boot), you might want to enable 'hwclock'. Note that this will only
> diff --git a/rc.d b/rc.d
> index 77852d7..fad1e18 100755
> --- a/rc.d
> +++ b/rc.d
> @@ -23,6 +23,8 @@ e.g: $name list
>       $name list sshd gpm
>       $name list --started gpm
>       $name start sshd gpm
> +     $name start -- -sshd
> +     $name start -- @acpid -sshd gpm
>       $name help
>  EOF
>  	exit ${1:-1}
> @@ -30,8 +32,7 @@ EOF
>  
>  # filter list of daemons
>  filter_daemons() {
> -	local -a new_daemons=()
> -	for daemon in "${daemons[@]}"; do
> +	for daemon in "${!daemons[@]}"; do
>  		# check if daemons is valid
>  		if ! have_daemon "$daemon"; then
>  			printf "${C_FAIL}:: ${C_DONE}Daemon script ${C_FAIL}${daemon}${C_DONE} does \
> @@ -39,13 +40,11 @@ not exist or is not executable.${C_CLEAR}\n" >&2
>  			exit 2
>  		fi
>  		# check filter
> -		(( ${filter[started]} )) && ck_daemon "$daemon" && continue
> -		(( ${filter[stopped]} )) && ! ck_daemon "$daemon" && continue
> -		(( ${filter[auto]} )) && ck_autostart "$daemon" && continue
> -		(( ${filter[noauto]} )) && ! ck_autostart "$daemon" && continue
> -		new_daemons+=("$daemon")
> +		(( ${filter[started]} )) && ck_daemon "$daemon" && unset daemons["$daemon"] && continue
> +		(( ${filter[stopped]} )) && ! ck_daemon "$daemon" && unset daemons["$daemon"] && continue
> +		(( ${filter[auto]} )) && ck_autostart "$daemon" && unset daemons["$daemon"] && continue
> +		(( ${filter[noauto]} )) && ! ck_autostart "$daemon" && unset daemons["$daemon"] && continue
>  	done
> -	daemons=("${new_daemons[@]}")
>  }
>  
>  (( $# < 1 )) && usage
> @@ -53,7 +52,7 @@ not exist or is not executable.${C_CLEAR}\n" >&2
>  # ret store the return code of rc.d
>  declare -i ret=0
>  # daemons store daemons on which action will be executed
> -declare -a daemons=()
> +declare -A daemons=()
>  # filter store current filter mode
>  declare -A filter=([started]=0 [stopped]=0 [auto]=0 [noauto]=0)
>  
> @@ -80,7 +79,7 @@ shift
>  
>  # get initial daemons list
>  for daemon; do
> -	daemons+=("$daemon")
> +	daemons[$(strip_daemon "$daemon")]=$daemon
>  done
>  
>  # going into script directory
> @@ -91,10 +90,15 @@ case $action in
>  		usage 0 2>&1
>  	;;
>  	list)
> -		# list take all daemons by default
> -		[[ -z $daemons ]] && for d in *; do have_daemon "$d" && daemons+=("$d"); done
> -		filter_daemons
> -		for daemon in "${daemons[@]}"; do
> +		# list take all daemons if not specified
> +		if (( ${#daemons[*]} == 0)); then
> +			for d in *; do
> +				have_daemon "$d" && daemons[$(strip_daemon "$d")]=$daemon
> +			done
> +		fi
> +		# use filters if needed
> +		(( ${#filter[*]} > 0 )) && filter_daemons
> +		for daemon in "${!daemons[@]}"; do
>  			# print running / stopped satus
>  			if ! ck_daemon "$daemon"; then
>  				s_status="${C_OTHER}[${C_DONE}STARTED${C_OTHER}]"
> @@ -112,18 +116,11 @@ case $action in
>  	;;
>  	*)
>  		# other actions need an explicit daemons list
> -		[[ -z $daemons ]] && usage
> -		filter_daemons
> -		# set same environment variables as init
> -		runlevel=$(/sbin/runlevel)
> -		ENV=('PATH=/bin:/usr/bin:/sbin:/usr/sbin'
> -			"PREVLEVEL=${runlevel%% *}"
> -			"RUNLEVEL=${runlevel##* }"
> -			"CONSOLE=${CONSOLE:-/dev/console}"
> -			"TERM=$TERM")
> -		cd /
> -		for daemon in "${daemons[@]}"; do
> -			env -i "${ENV[@]}" "/etc/rc.d/$daemon" "$action"
> +		(( ${#daemons[*]} == 0 )) && usage
> +		# use filters if needed
> +		(( ${#filter[*]} > 0 )) && filter_daemons
> +		for daemon in "${!daemons[@]}"; do
> +			run_daemon "${daemons[$daemon]}" "$action"
>  			(( ret += !! $? ))  # clamp exit value to 0/1
>  		done
>  	;;
> diff --git a/rc.d.8.txt b/rc.d.8.txt
> index 0f35884..3f7e41f 100644
> --- a/rc.d.8.txt
> +++ b/rc.d.8.txt
> @@ -72,6 +72,12 @@ Examples[[E]]
>  *rc.d start sshd gpm*::
>  	Starts *sshd* and *gpm* scripts.
>  
> +*rc.d start -- -sshd*::
> +	Starts *sshd* without cgroup jaling.
> +
> +*rc.d start -- @-sshd @acpid @gpm*::
> +	Starts *sshd*, *acpid* and *gpm* in background.
> +
>  *rc.d stop crond*::
>  	Stops the *crond* script.
>  
> diff --git a/rc.multi b/rc.multi
> index d558753..d747113 100755
> --- a/rc.multi
> +++ b/rc.multi
> @@ -16,11 +16,7 @@ run_hook multi_start
>  
>  # Start daemons
>  for daemon in "${DAEMONS[@]}"; do
> -	case ${daemon:0:1} in
> -		'!') continue;;     # Skip this daemon.
> -		'@') start_daemon_bkgd "${daemon#@}";;
> -		*)   start_daemon "$daemon";;
> -	esac
> +	run_daemon "$daemon" start
>  done
>  
>  [[ -x /etc/rc.local ]] && /etc/rc.local
> diff --git a/rc.single b/rc.single
> index aec026c..ec343fe 100755
> --- a/rc.single
> +++ b/rc.single
> @@ -20,7 +20,7 @@ if [[ $PREVLEVEL != N ]]; then
>  
>  	# Start/trigger UDev, load MODULES and settle UDev
>  	udevd_modprobe single
> -	
> +
>  	# Removing leftover files
>  	remove_leftover
>  fi
> diff --git a/rc.sysinit b/rc.sysinit
> index 12339b6..bba57b5 100755
> --- a/rc.sysinit
> +++ b/rc.sysinit
> @@ -12,7 +12,7 @@ printhl "${C_H2}http://www.archlinux.org"
>  printsep
>  
>  # mount the api filesystems
> -# /proc, /sys, /run, /dev, /run/lock, /dev/pts, /dev/shm
> +# /proc, /sys, /run, /dev, /run/lock, /dev/pts, /dev/shm, /sys/fs/cgroup/initscripts
>  mountpoint -q /proc    || mount -t proc proc /proc -o nosuid,noexec,nodev
>  mountpoint -q /sys     || mount -t sysfs sys /sys -o nosuid,noexec,nodev
>  mountpoint -q /run     || mount -t tmpfs run /run -o mode=0755,nosuid,nodev
> @@ -25,6 +25,11 @@ mountpoint -q /dev/shm || mount /dev/shm &>/dev/null ||
>  	mount -t tmpfs shm /dev/shm -o mode=1777,nosuid,nodev
>  mountpoint -q /proc/sys/fs/binfmt_misc || mount /proc/sys/fs/binfmt_misc &>/dev/null ||
>  	mount -t binfmt_misc binfmt /proc/sys/fs/binfmt_misc
> +mountpoint -q /sys/fs/cgroup ||
> +	mount -n -t tmpfs cgroup /sys/fs/cgroup -o mode=0755,nosuid,noexec,nodev
> +mkdir -p /sys/fs/cgroup/initscripts
> +mount -t cgroup -o none,name=initscripts cgroup /sys/fs/cgroup/initscripts
> +mkdir -p /sys/fs/cgroup/initscripts/daemons
>  
>  if [[ ! -e /run/initramfs/fsck-root ]]; then
>  	# remount root ro to allow for fsck later on, we remount now to
> diff --git a/tmpfiles.conf b/tmpfiles.conf
> deleted file mode 100644
> index 8f99a99..0000000
> --- a/tmpfiles.conf
> +++ /dev/null
> @@ -1,5 +0,0 @@
> -#
> -# /usr/lib/tmpfiles.d/arch.conf
> -#
> -
> -d /run/daemons      0755 root root -
> diff --git a/zsh-completion b/zsh-completion
> index d860e51..64cbc2a 100644
> --- a/zsh-completion
> +++ b/zsh-completion
> @@ -18,10 +18,10 @@ _rc.d () {
>  					_arguments "*: :"
>  					;;
>  				start)
> -					_arguments "*: :($(comm -23 <(echo /etc/rc.d/*(N-*:t)|tr ' ' '\n') <(echo /run/daemons/*(N:t)|tr ' ' '\n')))"
> +					_arguments "*: :($(comm -23 <(echo /etc/rc.d/*(N-*:t)|tr ' ' '\n') <(echo /sys/fs/cgroup/initscripts/daemons/*(N/:t)|tr ' ' '\n')))"
>  					;;
>  				stop|restart|reload)
> -					_arguments "*: :(/run/daemons/*(N:t))"
> +					_arguments "*: :(/sys/fs/cgroup/initscripts/daemons/*(N/:t))"
>  					;;
>  				*)
>  					_arguments "*: :(/etc/rc.d/*(N-*:t))"
> -- 
> Sebastien "Seblu" Luttringer
> 


More information about the arch-projects mailing list