From 0eeabce5917d5197c01acac2f8c8070918d2e53b Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Wed, 24 Oct 2012 15:00:20 -0700 Subject: [PATCH] sched: add sysctl for controlling task migrations on wake The PF_WAKE_UP_IDLE per-task flag made it impossible to enable the old behavior of SD_SHARE_PKG_RESOURCES, where every task migrates to an idle CPU on wakeup. The sched_wake_to_idle sysctl value, when made nonzero, will cause all tasks to migrate to an idle CPU if one is available when the task is woken up. This is regardless of how PF_WAKE_UP_IDLE is configured for tasks in the system. Similar to PF_WAKE_UP_IDLE, the SD_SHARE_PKG_RESOURCES scheduler domain flag must be enabled for the sysctl value to have an effect. Change-Id: I23bed846d26502c7aed600bfcf1c13053a7e5f61 Signed-off-by: Steve Muckle (cherry picked from commit 9d5b38dc0025d19df5b756b16024b4269e73f282) Conflicts: kernel/sched/fair.c --- include/linux/sched.h | 1 + kernel/sched/fair.c | 11 ++++++++++- kernel/sysctl.c | 7 +++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 095f5e62cbe..fe0d9d92c70 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2038,6 +2038,7 @@ extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; +extern unsigned int sysctl_sched_wake_to_idle; enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9885a9ea54a..b1750730306 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -76,6 +76,14 @@ static unsigned int sched_nr_latency = 8; */ unsigned int sysctl_sched_child_runs_first __read_mostly; +/* + * Controls whether, when SD_SHARE_PKG_RESOURCES is on, if all + * tasks go to idle CPUs when woken. If this is off, note that the + * per-task flag PF_WAKE_ON_IDLE can still cause a task to go to an + * idle CPU upon being woken. + */ +unsigned int __read_mostly sysctl_sched_wake_to_idle; + /* * SCHED_OTHER wake-up granularity. * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) @@ -2654,7 +2662,8 @@ static int select_idle_sibling(struct task_struct *p, int target) if (target == prev_cpu && idle_cpu(prev_cpu)) return prev_cpu; - if (!(current->flags & PF_WAKE_UP_IDLE) && + if (!sysctl_sched_wake_to_idle && + !(current->flags & PF_WAKE_UP_IDLE) && !(p->flags & PF_WAKE_UP_IDLE)) return target; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4ab11879aeb..0b2180b5338 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -263,6 +263,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sched_wake_to_idle", + .data = &sysctl_sched_wake_to_idle, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_SCHED_DEBUG { .procname = "sched_min_granularity_ns",