-
-
Save mitake/9fdbc7ebfbf64b4b6602 to your computer and use it in GitHub Desktop.
Borrowed Virtual Time patch against Linux 3.5.0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/include/linux/sched.h b/include/linux/sched.h | |
index 4a1f493..ef258ef 100644 | |
--- a/include/linux/sched.h | |
+++ b/include/linux/sched.h | |
@@ -1179,6 +1179,10 @@ struct sched_entity { | |
u64 exec_start; | |
u64 sum_exec_runtime; | |
u64 vruntime; | |
+#ifdef CONFIG_CFS_BVT | |
+ u64 effective_vruntime; | |
+ unsigned int is_warped; | |
+#endif | |
u64 prev_sum_exec_runtime; | |
u64 nr_migrations; | |
@@ -2061,6 +2065,10 @@ static inline void sched_autogroup_exit(struct signal_struct *sig) { } | |
extern unsigned int sysctl_sched_cfs_bandwidth_slice; | |
#endif | |
+#ifdef CONFIG_CFS_BVT | |
+extern unsigned int sysctl_sched_bvt_place_epsilon; | |
+#endif | |
+ | |
#ifdef CONFIG_RT_MUTEXES | |
extern int rt_mutex_getprio(struct task_struct *p); | |
extern void rt_mutex_setprio(struct task_struct *p, int prio); | |
diff --git a/init/Kconfig b/init/Kconfig | |
index d07dcf9..b0ac6b3 100644 | |
--- a/init/Kconfig | |
+++ b/init/Kconfig | |
@@ -787,6 +787,20 @@ config CFS_BANDWIDTH | |
restriction. | |
See tip/Documentation/scheduler/sched-bwc.txt for more information. | |
+config CFS_BVT | |
+ bool "Borrowed-Virtual Time support for CFS" | |
+ depends on EXPERIMENTAL | |
+ depends on FAIR_GROUP_SCHED | |
+ default n | |
+ help | |
+ This feature enables BVT extensions to CFS. This feature | |
+ allows you to bias the wakeup preemption decisions that the | |
+ CFS scheduler makes. For instance, you can guarantee that | |
+ latency-sensitive task groups are not preempted by waking | |
+ batch-oriented task groups, independent of how much CPU share | |
+ you allocate to each group. | |
+ FIXME: More documentation. | |
+ | |
config RT_GROUP_SCHED | |
bool "Group scheduling for SCHED_RR/FIFO" | |
depends on EXPERIMENTAL | |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c | |
index 468bdd4..497d946 100644 | |
--- a/kernel/sched/core.c | |
+++ b/kernel/sched/core.c | |
@@ -1711,6 +1711,10 @@ static void __sched_fork(struct task_struct *p) | |
p->se.prev_sum_exec_runtime = 0; | |
p->se.nr_migrations = 0; | |
p->se.vruntime = 0; | |
+#ifdef CONFIG_CFS_BVT | |
+ p->se.effective_vruntime = 0; | |
+ p->se.is_warped = 0; | |
+#endif | |
INIT_LIST_HEAD(&p->se.group_node); | |
#ifdef CONFIG_SCHEDSTATS | |
@@ -7284,6 +7288,9 @@ void __init sched_init(void) | |
*/ | |
init_cfs_bandwidth(&root_task_group.cfs_bandwidth); | |
init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); | |
+#ifdef CONFIG_CFS_BVT | |
+ root_task_group.bvt_warp_ns = 0; | |
+#endif /* CONFIG_CFS_BVT */ | |
#endif /* CONFIG_FAIR_GROUP_SCHED */ | |
rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; | |
@@ -8217,6 +8224,23 @@ static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft, | |
return 0; | |
} | |
#endif /* CONFIG_CFS_BANDWIDTH */ | |
+ | |
+#ifdef CONFIG_CFS_BVT | |
+static int cpu_bvt_warp_write_s64(struct cgroup *cgrp, | |
+ struct cftype *cftype, s64 warp_ns) | |
+{ | |
+ struct task_group *tg = cgroup_tg(cgrp); | |
+ tg->bvt_warp_ns = warp_ns; | |
+ return 0; | |
+} | |
+ | |
+static s64 cpu_bvt_warp_read_s64(struct cgroup *cgrp, | |
+ struct cftype *cftype) | |
+{ | |
+ struct task_group *tg = cgroup_tg(cgrp); | |
+ return tg->bvt_warp_ns; | |
+} | |
+#endif /* CONFIG_CFS_BVT */ | |
#endif /* CONFIG_FAIR_GROUP_SCHED */ | |
#ifdef CONFIG_RT_GROUP_SCHED | |
@@ -8267,6 +8291,13 @@ static struct cftype cpu_files[] = { | |
.read_map = cpu_stats_show, | |
}, | |
#endif | |
+#ifdef CONFIG_CFS_BVT | |
+ { | |
+ .name = "bvt_warp_ns", | |
+ .read_s64 = cpu_bvt_warp_read_s64, | |
+ .write_s64 = cpu_bvt_warp_write_s64, | |
+ }, | |
+#endif | |
#ifdef CONFIG_RT_GROUP_SCHED | |
{ | |
.name = "rt_runtime_us", | |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | |
index c099cc6..6bfe63e 100644 | |
--- a/kernel/sched/fair.c | |
+++ b/kernel/sched/fair.c | |
@@ -110,6 +110,22 @@ unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; | |
unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; | |
#endif | |
+#ifdef CONFIG_CFS_BVT | |
+/* | |
+ * If the BVT_PLACEMENT scheduler feature is enabled, waking BVT tasks | |
+ * are placed differently from CFS tasks when they wakeup. Rather | |
+ * than being placed some large factor (i.e. sched_latency >> 1) | |
+ * before min_vruntime (which gives waking tasks an unfair advantage | |
+ * in preempting currently runng tasks), they are placed | |
+ * sched_bvt_place_epsilon nanoseconds relative to min_vruntime. If | |
+ * you really want a BVT task to preempt currently running tasks, it | |
+ * should have a greater "warp" value than the current running task. | |
+ * | |
+ * Default: 1us in the future, units: nanoseconds | |
+ */ | |
+unsigned int sysctl_sched_bvt_place_epsilon = 1000UL; | |
+#endif | |
+ | |
/* | |
* Increase the granularity value when there are more CPUs, | |
* because with more CPUs the 'effective latency' as visible | |
@@ -416,6 +432,26 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) | |
#endif /* CONFIG_FAIR_GROUP_SCHED */ | |
+#ifdef CONFIG_CFS_BVT | |
+static inline void update_effective_vruntime(struct sched_entity *se) | |
+{ | |
+ s64 warp; | |
+ struct task_group *tg; | |
+ | |
+ if (entity_is_task(se)) { | |
+ se->effective_vruntime = se->vruntime; | |
+ return; | |
+ } | |
+ | |
+ tg = se->my_q->tg; | |
+ warp = tg->bvt_warp_ns; | |
+ | |
+ /* FIXME: Should we calc_delta_fair on warp_ns? */ | |
+ se->effective_vruntime = se->vruntime - warp; | |
+ se->is_warped = warp ? 1 : 0; | |
+} | |
+#endif /* CONFIG_CFS_BVT */ | |
+ | |
static __always_inline | |
void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec); | |
@@ -444,7 +480,11 @@ static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime) | |
static inline int entity_before(struct sched_entity *a, | |
struct sched_entity *b) | |
{ | |
+#ifdef CONFIG_CFS_BVT | |
+ return (s64)(a->effective_vruntime - b->effective_vruntime) < 0; | |
+#else | |
return (s64)(a->vruntime - b->vruntime) < 0; | |
+#endif | |
} | |
static void update_min_vruntime(struct cfs_rq *cfs_rq) | |
@@ -674,6 +714,9 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |
delta_exec_weighted = calc_delta_fair(delta_exec, curr); | |
curr->vruntime += delta_exec_weighted; | |
+#ifdef CONFIG_CFS_BVT | |
+ update_effective_vruntime(curr); | |
+#endif | |
update_min_vruntime(cfs_rq); | |
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | |
@@ -1074,10 +1117,19 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |
vruntime -= thresh; | |
} | |
+#ifdef CONFIG_CFS_BVT | |
+ if (sched_feat(BVT_PLACEMENT) && !entity_is_task(se) && se->is_warped) { | |
+ vruntime = cfs_rq->min_vruntime + sysctl_sched_bvt_place_epsilon; | |
+ } | |
+#endif | |
+ | |
/* ensure we never gain time by being placed backwards. */ | |
vruntime = max_vruntime(se->vruntime, vruntime); | |
se->vruntime = vruntime; | |
+#ifdef CONFIG_CFS_BVT | |
+ update_effective_vruntime(se); | |
+#endif | |
} | |
static void check_enqueue_throttle(struct cfs_rq *cfs_rq); | |
@@ -1089,8 +1141,13 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |
* Update the normalized vruntime before updating min_vruntime | |
* through callig update_curr(). | |
*/ | |
- if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING)) | |
+ if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING)) { | |
se->vruntime += cfs_rq->min_vruntime; | |
+#ifdef CONFIG_CFS_BVT | |
+ update_effective_vruntime(se); | |
+#endif | |
+ } | |
+ | |
/* | |
* Update run-time statistics of the 'current'. | |
@@ -1199,8 +1256,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |
* update can refer to the ->curr item and we need to reflect this | |
* movement in our normalized position. | |
*/ | |
- if (!(flags & DEQUEUE_SLEEP)) | |
+ if (!(flags & DEQUEUE_SLEEP)) { | |
se->vruntime -= cfs_rq->min_vruntime; | |
+#ifdef CONFIG_CFS_BVT | |
+ update_effective_vruntime(se); | |
+#endif | |
+ } | |
/* return excess runtime on last dequeue */ | |
return_cfs_rq_runtime(cfs_rq); | |
@@ -1240,7 +1301,11 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |
return; | |
se = __pick_first_entity(cfs_rq); | |
+#ifdef CONFIG_CFS_BVT | |
+ delta = curr->effective_vruntime - se->effective_vruntime; | |
+#else | |
delta = curr->vruntime - se->vruntime; | |
+#endif | |
if (delta < 0) | |
return; | |
@@ -2351,6 +2416,9 @@ static void task_waking_fair(struct task_struct *p) | |
#endif | |
se->vruntime -= min_vruntime; | |
+#ifdef CONFIG_CFS_BVT | |
+ update_effective_vruntime(se); | |
+#endif | |
} | |
#ifdef CONFIG_FAIR_GROUP_SCHED | |
@@ -2849,7 +2917,11 @@ wakeup_gran(struct sched_entity *curr, struct sched_entity *se) | |
static int | |
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) | |
{ | |
+#ifdef CONFIG_CFS_BVT | |
+ s64 gran, vdiff = curr->effective_vruntime - se->effective_vruntime; | |
+#else | |
s64 gran, vdiff = curr->vruntime - se->vruntime; | |
+#endif | |
if (vdiff <= 0) | |
return -1; | |
@@ -4937,8 +5009,12 @@ static void task_fork_fair(struct task_struct *p) | |
update_curr(cfs_rq); | |
- if (curr) | |
+ if (curr) { | |
se->vruntime = curr->vruntime; | |
+#ifdef CONFIG_CFS_BVT | |
+ update_effective_vruntime(se); | |
+#endif | |
+ } | |
place_entity(cfs_rq, se, 1); | |
if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) { | |
@@ -4947,10 +5023,17 @@ static void task_fork_fair(struct task_struct *p) | |
* 'current' within the tree based on its new key value. | |
*/ | |
swap(curr->vruntime, se->vruntime); | |
+#ifdef CONFIG_CFS_BVT | |
+ update_effective_vruntime(curr); | |
+ update_effective_vruntime(se); | |
+#endif | |
resched_task(rq->curr); | |
} | |
se->vruntime -= cfs_rq->min_vruntime; | |
+#ifdef CONFIG_CFS_BVT | |
+ update_effective_vruntime(se); | |
+#endif | |
raw_spin_unlock_irqrestore(&rq->lock, flags); | |
} | |
@@ -4998,6 +5081,9 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p) | |
*/ | |
place_entity(cfs_rq, se, 0); | |
se->vruntime -= cfs_rq->min_vruntime; | |
+#ifdef CONFIG_CFS_BVT | |
+ update_effective_vruntime(se); | |
+#endif | |
} | |
} | |
@@ -5083,6 +5169,9 @@ static void task_move_group_fair(struct task_struct *p, int on_rq) | |
set_task_rq(p, task_cpu(p)); | |
if (!on_rq) | |
p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime; | |
+#ifdef CONFIG_CFS_BVT | |
+ update_effective_vruntime(&p->se); | |
+#endif | |
} | |
void free_fair_sched_group(struct task_group *tg) | |
diff --git a/kernel/sched/features.h b/kernel/sched/features.h | |
index de00a48..256b29c 100644 | |
--- a/kernel/sched/features.h | |
+++ b/kernel/sched/features.h | |
@@ -69,3 +69,5 @@ SCHED_FEAT(TTWU_QUEUE, true) | |
SCHED_FEAT(FORCE_SD_OVERLAP, false) | |
SCHED_FEAT(RT_RUNTIME_SHARE, true) | |
SCHED_FEAT(LB_MIN, false) | |
+ | |
+SCHED_FEAT(BVT_PLACEMENT, true) | |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h | |
index 55844f2..e2173b3 100644 | |
--- a/kernel/sched/sched.h | |
+++ b/kernel/sched/sched.h | |
@@ -114,6 +114,10 @@ struct task_group { | |
atomic_t load_weight; | |
#endif | |
+#ifdef CONFIG_CFS_BVT | |
+ s64 bvt_warp_ns; | |
+#endif | |
+ | |
#ifdef CONFIG_RT_GROUP_SCHED | |
struct sched_rt_entity **rt_se; | |
struct rt_rq **rt_rq; | |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c | |
index 4ab1187..fd86401 100644 | |
--- a/kernel/sysctl.c | |
+++ b/kernel/sysctl.c | |
@@ -373,6 +373,15 @@ static struct ctl_table kern_table[] = { | |
.extra1 = &one, | |
}, | |
#endif | |
+#ifdef CONFIG_CFS_BVT | |
+ { | |
+ .procname = "sched_bvt_place_epsilon", | |
+ .data = &sysctl_sched_bvt_place_epsilon, | |
+ .maxlen = sizeof(unsigned int), | |
+ .mode = 0644, | |
+ .proc_handler = proc_dointvec, | |
+ }, | |
+#endif | |
#ifdef CONFIG_PROVE_LOCKING | |
{ | |
.procname = "prove_locking", |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment