From: Tim Starling Date: Tue, 5 Feb 2013 04:19:53 +0000 (+1100) Subject: Improve the shell cgroup feature X-Git-Tag: 1.31.0-rc.0~20769^2 X-Git-Url: http://git.cyclocoop.org/%22%20.%20generer_url_ecrire%28%22statistiques_visites%22%2C%22%22%29%20.%20%22?a=commitdiff_plain;h=674962b339795029e0bf404563288a919605bfc6;p=lhc%2Fweb%2Fwiklou.git Improve the shell cgroup feature * Make the cgroup name configurable, so that it can be switched on and off in configuration, or so a different cgroup can be used for different wikis. The DefaultSettings.php entry also makes the feature more discoverable and so more useful to non-Wikimedia users. * Send named parameters to the shell script instead of numbered parameters. Send the command as argument 1, for easier ps -f interpretation. * Move the wrapper script to includes/, since it doesn't need to be executable anymore, so it doesn't make sense for it to be in bin/. * Remove the requirement for a release script to be used. Clean up in the wrapper script instead. The release script had to be configured globally, which might not be suitable for all MediaWiki installations. Change-Id: Ic8362cab63df162852d5df56cf695ef163cd6232 --- diff --git a/RELEASE-NOTES-1.21 b/RELEASE-NOTES-1.21 index 9852162c93..d7666c0e9f 100644 --- a/RELEASE-NOTES-1.21 +++ b/RELEASE-NOTES-1.21 @@ -85,6 +85,9 @@ production. a security fix (bug 42202). * Added the ability to limit the wall clock time used by shell processes, as well as the CPU time. Configurable with $wgMaxShellWallClockTime. +* Allow memory of shell subprocesses to be limited using Linux cgroups + instead of ulimit -v, which tends to cause deadlocks in recent versions + of ImageMagick. Configurable with $wgShellCgroup. * Added $wgWhitelistReadRegexp for regex whitelisting. * (bug 5346) Categories that are redirects will be displayed italic in the category links section at the bottom of a page. diff --git a/bin/ulimit5.sh b/bin/ulimit5.sh deleted file mode 100644 index 36c317650a..0000000000 --- a/bin/ulimit5.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -if [ "$1" -gt 0 ]; then - ulimit -t "$1" -fi -if [ "$2" -gt 0 ]; then - if [ -e /sys/fs/cgroup/memory/mediawiki/job/ ]; then - mkdir -m 0700 /sys/fs/cgroup/memory/mediawiki/job/$$ - echo $$ > /sys/fs/cgroup/memory/mediawiki/job/$$/tasks - echo "1" > /sys/fs/cgroup/memory/mediawiki/job/$$/notify_on_release - #memory - echo $(($2*1024)) > /sys/fs/cgroup/memory/mediawiki/job/$$/memory.limit_in_bytes - #memory+swap - echo $(($2*1024)) > /sys/fs/cgroup/memory/mediawiki/job/$$/memory.memsw.limit_in_bytes - fi - ulimit -v "$2" -fi -if [ "$3" -gt 0 ]; then - ulimit -f "$3" -fi -if [ "$4" -gt 0 -a -x "/usr/bin/timeout" ]; then - /usr/bin/timeout $4 /bin/bash -c "$5" - STATUS="$?" - if [ "$STATUS" == 124 ]; then - echo "ulimit5.sh: timed out." 1>&2 - fi - exit "$STATUS" -else - eval "$5" -fi diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 68c4542ce8..04a90047e5 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -6186,6 +6186,31 @@ $wgMaxShellTime = 180; */ $wgMaxShellWallClockTime = 180; +/** + * Under Linux: a cgroup directory used to constrain memory usage of shell + * commands. The directory must be writable by the user which runs MediaWiki. + * + * If specified, this is used instead of ulimit, which is inaccurate, and + * causes malloc() to return NULL, which exposes bugs in C applications, making + * them segfault or deadlock. + * + * A wrapper script will create a cgroup for each shell command that runs, as + * a subgroup of the specified cgroup. If the memory limit is exceeded, the + * kernel will send a SIGKILL signal to a process in the subgroup. + * + * @par Example: + * @code + * mkdir -p /sys/fs/cgroup/memory/mediawiki + * mkdir -m 0777 /sys/fs/cgroup/memory/mediawiki/job + * echo '$wgShellCgroup = "/sys/fs/cgroup/memory/mediawiki/job";' >> LocalSettings.php + * @endcode + * + * The reliability of cgroup cleanup can be improved by installing a + * notify_on_release script in the root cgroup, see e.g. + * https://gerrit.wikimedia.org/r/#/c/40784 + */ +$wgShellCgroup = false; + /** * Executable path of the PHP cli binary (php/php5). Should be set up on install. */ diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index c57018ef98..ce52c74c20 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -2776,7 +2776,7 @@ function wfEscapeShellArg( ) { */ function wfShellExec( $cmd, &$retval = null, $environ = array(), $limits = array() ) { global $IP, $wgMaxShellMemory, $wgMaxShellFileSize, $wgMaxShellTime, - $wgMaxShellWallClockTime; + $wgMaxShellWallClockTime, $wgShellCgroup; static $disabled; if ( is_null( $disabled ) ) { @@ -2835,8 +2835,15 @@ function wfShellExec( $cmd, &$retval = null, $environ = array(), $limits = array $filesize = intval ( isset( $limits['filesize'] ) ? $limits['filesize'] : $wgMaxShellFileSize ); if ( $time > 0 || $mem > 0 || $filesize > 0 || $wallTime > 0 ) { - $cmd = '/bin/bash ' . escapeshellarg( "$IP/bin/ulimit5.sh" ) . - " $time $mem $filesize $wallTime " . escapeshellarg( $cmd ); + $cmd = '/bin/bash ' . escapeshellarg( "$IP/includes/limit.sh" ) . ' ' . + escapeshellarg( $cmd ) . ' ' . + escapeshellarg( + "MW_CPU_LIMIT=$time; " . + 'MW_CGROUP=' . escapeshellarg( $wgShellCgroup ) . '; ' . + "MW_MEM_LIMIT=$mem; " . + "MW_FILE_SIZE_LIMIT=$filesize; " . + "MW_WALL_CLOCK_LIMIT=$wallTime" + ); } } wfDebug( "wfShellExec: $cmd\n" ); diff --git a/includes/limit.sh b/includes/limit.sh new file mode 100644 index 0000000000..44b9edc272 --- /dev/null +++ b/includes/limit.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# +# Resource limiting wrapper for command execution +# +# Why is this in shell script? Because bash has a setrlimit() wrapper +# and is available on most Linux systems. If Perl was distributed with +# BSD::Resource included, we would happily use that instead, but it isn't. + +MW_CPU_LIMIT=0 +MW_CGROUP= +MW_MEM_LIMIT=0 +MW_FILE_SIZE_LIMIT=0 +MW_WALL_CLOCK_LIMIT=0 + +# Override settings +eval "$2" + +if [ "$MW_CPU_LIMIT" -gt 0 ]; then + ulimit -t "$MW_CPU_LIMIT" +fi +if [ "$MW_MEM_LIMIT" -gt 0 ]; then + if [ -n "$MW_CGROUP" ]; then + # Create cgroup + if ! mkdir -m 0700 "$MW_CGROUP"/$$; then + echo "limit.sh: failed to create the cgroup." 1>&2 + exit 1 + fi + echo $$ > "$MW_CGROUP"/$$/tasks + if [ -n "$MW_CGROUP_NOTIFY" ]; then + echo "1" > "$MW_CGROUP"/$$/notify_on_release + fi + # Memory + echo $(($MW_MEM_LIMIT*1024)) > "$MW_CGROUP"/$$/memory.limit_in_bytes + # Memory+swap + echo $(($MW_MEM_LIMIT*1024)) > "$MW_CGROUP"/$$/memory.memsw.limit_in_bytes + else + ulimit -v "$MW_MEM_LIMIT" + fi +fi +if [ "$MW_FILE_SIZE_LIMIT" -gt 0 ]; then + ulimit -f "$MW_FILE_SIZE_LIMIT" +fi +if [ "$MW_WALL_CLOCK_LIMIT" -gt 0 -a -x "/usr/bin/timeout" ]; then + /usr/bin/timeout $MW_WALL_CLOCK_LIMIT /bin/bash -c "$1" + STATUS="$?" + if [ "$STATUS" == 124 ]; then + echo "limit.sh: timed out." 1>&2 + fi +else + eval "$1" + STATUS="$?" +fi + +# Clean up cgroup +cleanup() { + # First we have to move the current task into a "garbage" group, otherwise + # the cgroup will not be empty, and attempting to remove it will fail with + # "Device or resource busy" + if [ -w "$MW_CGROUP"/tasks ]; then + GARBAGE="$MW_CGROUP" + else + GARBAGE="$MW_CGROUP"/garbage-"$USER" + if [ ! -e "$GARBAGE" ]; then + mkdir -m 0700 "$GARBAGE" + fi + fi + echo $BASHPID > "$GARBAGE"/tasks + + # Suppress errors in case the cgroup has disappeared due to a release script + rmdir "$MW_CGROUP"/$$ 2>/dev/null +} + +updateTaskCount() { + # There are lots of ways to count lines in a file in shell script, but this + # is one of the few that doesn't create another process, which would + # increase the returned number of tasks. + readarray < "$MW_CGROUP"/$$/tasks + NUM_TASKS=${#MAPFILE[*]} +} + +if [ -n "$MW_CGROUP" ]; then + updateTaskCount + + if [ $NUM_TASKS -gt 1 ]; then + # Spawn a monitor process which will continue to poll for completion + # of all processes in the cgroup after termination of the parent shell + ( + while [ $NUM_TASKS -gt 1 ]; do + sleep 10 + updateTaskCount + done + cleanup + ) >&/dev/null < /dev/null & + disown -a + else + cleanup + fi +fi +exit "$STATUS" +