path: root/kernel/rcu/tree.c
diff options
authorPaul E. McKenney <>2015-10-01 10:26:24 -0700
committerPaul E. McKenney <>2015-12-04 12:26:52 -0800
commitdf5bd5144a80a9f6c3807383b11f735dae9caf9d (patch)
treefce44d0970a70446c5fd6b50f2e764db0efb8e56 /kernel/rcu/tree.c
parent1307f2148719cc9e9d12f5fa7d5b3b61ec5aef72 (diff)
rcu: Reduce expedited GP memory contention via per-CPU variables
Currently, the piggybacked-work checks carried out by sync_exp_work_done() atomically increment a small set of variables (the ->expedited_workdone0, ->expedited_workdone1, ->expedited_workdone2, ->expedited_workdone3 fields in the rcu_state structure), which will form a memory-contention bottleneck given a sufficiently large number of CPUs concurrently invoking either synchronize_rcu_expedited() or synchronize_sched_expedited(). This commit therefore moves these for fields to the per-CPU rcu_data structure, eliminating the memory contention. The show_rcuexp() function also changes to sum up each field in the rcu_data structures. Signed-off-by: Paul E. McKenney <>
Diffstat (limited to 'kernel/rcu/tree.c')
1 files changed, 5 insertions, 6 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 00f07d6436ce..33d7e2551165 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3585,7 +3585,7 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
- struct rcu_data *rdp;
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
struct rcu_node *rnp0;
struct rcu_node *rnp1 = NULL;
@@ -3599,7 +3599,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
if (sync_exp_work_done(rsp, rnp0, NULL,
- &rsp->expedited_workdone0, s))
+ &rdp->expedited_workdone0, s))
return NULL;
return rnp0;
@@ -3613,14 +3613,13 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
* can be inexact, as it is just promoting locality and is not
* strictly needed for correctness.
- rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
- if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s))
+ if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s))
return NULL;
rnp0 = rdp->mynode;
for (; rnp0 != NULL; rnp0 = rnp0->parent) {
if (sync_exp_work_done(rsp, rnp1, rdp,
- &rsp->expedited_workdone2, s))
+ &rdp->expedited_workdone2, s))
return NULL;
if (rnp1)
@@ -3630,7 +3629,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
rnp1 = rnp0;
if (sync_exp_work_done(rsp, rnp1, rdp,
- &rsp->expedited_workdone3, s))
+ &rdp->expedited_workdone3, s))
return NULL;
return rnp1;