Skip to content

Instantly share code, notes, and snippets.

@frma71
Created June 16, 2015 14:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save frma71/5af5a2a4b264d5cdd265 to your computer and use it in GitHub Desktop.
Save frma71/5af5a2a4b264d5cdd265 to your computer and use it in GitHub Desktop.
/*
* This file contains a copy of cputime_adjust() and related functions from
* linux-next(next-20150610) in addition to stubs and a driver to verify
* and demonstrate some problems in the scaling mechanism in addition to
* some concurrency problems.
*
* The functions copied are instrumented with a call to emulate_preemption()
* in order to be able to easly demonstrate the concurrency problem, other
* then that they are identical to the ones in the kernel.
*
* Compile with: gcc -o testadjust testadjust.c
* Run with: ./testadjust
* And expect:
* 0.0 sum_exec=100000000000 utime=0 stime=1 => 0.0 tot=10000 user=0 sys=10000 =====> OK
* 0.1 sum_exec=101000000000 utime=100 stime=1 => 0.1 tot=20000 user=10000 sys=10000 =====> FAIL
*
* 1.0 sum_exec=100000000000 utime=1 stime=0 => 1.0 tot=10000 user=10000 sys=0 =====> OK
* 1.1 sum_exec=101000000000 utime=1 stime=100 => 1.1 tot=20000 user=10000 sys=10000 =====> FAIL
*
* 2.0 sum_exec=100000000000 utime=1 stime=1 => 2.0 tot=10000 user=5000 sys=5000 =====> OK
* 2.1 sum_exec=101000000000 utime=100 stime=1 => 2.1 tot=15000 user=10000 sys=5000 =====> FAIL
*
* 3.0 sum_exec=100000000000 utime=1 stime=1 => <<PREEMPT>>
* 3.1 sum_exec=101000000000 utime=100 stime=1 => 3.1 tot=10100 user=10000 sys=100 =====> OK
* <<SWITCH BACK>> 3.0 tot=15000 user=10000 sys=5000 =====> FAIL
*
*/
#include <stdio.h>
#include <stdint.h>
/* Use (or don't use) the fix proposed by peterz on lkml
*/
#define PETERZ_FIX 0
/*****************************************************************************
*
* The code below is stubs and implementations of macros and functions used
* by cputime_adjust() and friends.
*
*****************************************************************************
*/
typedef uint32_t u32;
typedef uint64_t u64;
typedef uint64_t cputime_t;
/* Some macro definitions used by cputime_adjust() and friends copied or stubbed
*/
#define __force
#define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
/* No real concurrency, so we can use a trivial implementation.
*/
#define READ_ONCE(x) x
/* Structs copied from kernel code
*/
struct cputime {
cputime_t utime;
cputime_t stime;
};
struct task_cputime {
cputime_t utime;
cputime_t stime;
unsigned long long sum_exec_runtime;
};
/* We are running in a standard glibc execution environment, so just rely on
* the standard 64 bit arithmetics.
*/
u64 div_u64(u64 a, u64 b) {
return a/b;
}
/* We have no real concurrency in the test so we can make a
* simple version of cmpxchg
*/
int cmpxchg_cputime(cputime_t *counter, cputime_t old, cputime_t new) {
cputime_t ret = *counter;
(void)old;
*counter = new;
return ret;
}
/* Convert nanoseconds to jiffies, this assumes HZ=100
*/
cputime_t nsecs_to_cputime(u64 ns) {
return (cputime_t)(ns/(1000000000/100));
}
void emulate_preemption(int preemption_position);
/*****************************************************************************
*
* The code below is copied verbatim from the kernel, except:
*
* - The additional call to emulate_preemption() from cputime_adjust()
* - The code within #if PETERZ_FIX in cputime_adjust() is a fix proposed by peterz
*
*****************************************************************************
*/
/*
* Perform (stime * rtime) / total, but avoid multiplication overflow by
* loosing precision when the numbers are big.
*/
static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
{
u64 scaled;
for (;;) {
/* Make sure "rtime" is the bigger of stime/rtime */
if (stime > rtime)
swap(rtime, stime);
/* Make sure 'total' fits in 32 bits */
if (total >> 32)
goto drop_precision;
/* Does rtime (and thus stime) fit in 32 bits? */
if (!(rtime >> 32))
break;
/* Can we just balance rtime/stime rather than dropping bits? */
if (stime >> 31)
goto drop_precision;
/* We can grow stime and shrink rtime and try to make them both fit */
stime <<= 1;
rtime >>= 1;
continue;
drop_precision:
/* We drop from rtime, it has more bits than stime */
rtime >>= 1;
total >>= 1;
}
/*
* Make sure gcc understands that this is a 32x32->64 multiply,
* followed by a 64/32->64 divide.
*/
scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
return (__force cputime_t) scaled;
}
/*
* Atomically advance counter to the new value. Interrupts, vcpu
* scheduling, and scaling inaccuracies can cause cputime_advance
* to be occasionally called with a new value smaller than counter.
* Let's enforce atomicity.
*
* Normally a caller will only go through this loop once, or not
* at all in case a previous caller updated counter the same jiffy.
*/
static void cputime_advance(cputime_t *counter, cputime_t new)
{
cputime_t old;
while (new > (old = READ_ONCE(*counter)))
cmpxchg_cputime(counter, old, new);
}
/*
* Adjust tick based cputime random precision against scheduler
* runtime accounting.
*/
static void cputime_adjust(struct task_cputime *curr,
struct cputime *prev,
cputime_t *ut, cputime_t *st)
{
cputime_t rtime, stime, utime;
/*
* Tick based cputime accounting depend on random scheduling
* timeslices of a task to be interrupted or not by the timer.
* Depending on these circumstances, the number of these interrupts
* may be over or under-optimistic, matching the real user and system
* cputime with a variable precision.
*
* Fix this by scaling these tick based values against the total
* runtime accounted by the CFS scheduler.
*/
rtime = nsecs_to_cputime(curr->sum_exec_runtime);
/*
* Update userspace visible utime/stime values only if actual execution
* time is bigger than already exported. Note that can happen, that we
* provided bigger values due to scaling inaccuracy on big numbers.
*/
if (prev->stime + prev->utime >= rtime)
goto out;
stime = curr->stime;
utime = curr->utime;
if (utime == 0) {
stime = rtime;
} else if (stime == 0) {
utime = rtime;
} else {
cputime_t total = stime + utime;
stime = scale_stime((__force u64)stime,
(__force u64)rtime, (__force u64)total);
#if PETERZ_FIX // PETERZ:s proposed fix
if (stime < prev->stime)
stime = prev->stime;
#endif
emulate_preemption(0);
utime = rtime - stime;
}
cputime_advance(&prev->stime, stime);
cputime_advance(&prev->utime, utime);
out:
*ut = prev->utime;
*st = prev->stime;
}
/*****************************************************************************
*
* The code below is the actual test driver. The tests struct has a number of
* testcases each with a sequence of inputs to cputime_adjust() after each step
* we verify that the reported system plus user time is equal to the provided
* sum_exec_runtime.
*
*****************************************************************************
*/
#define NSEC_PER_SEC 1000000000ULL
static const struct {
unsigned long long sum_exec_runtime;
cputime_t utime;
cputime_t stime;
int preempt;
} tests[][10] = {
{{ .sum_exec_runtime = 100*NSEC_PER_SEC, .utime = 0, .stime = 1 },
{ .sum_exec_runtime = 101*NSEC_PER_SEC, .utime = 100, .stime = 1 },
{ .sum_exec_runtime = 0},
},
{{ .sum_exec_runtime = 100*NSEC_PER_SEC, .utime = 1, .stime = 0 },
{ .sum_exec_runtime = 101*NSEC_PER_SEC, .utime = 1, .stime = 100 },
{ .sum_exec_runtime = 0},
},
{{ .sum_exec_runtime = 100*NSEC_PER_SEC, .utime = 1, .stime = 1 },
{ .sum_exec_runtime = 101*NSEC_PER_SEC, .utime = 100, .stime = 1 },
{ .sum_exec_runtime = 0},
},
{{ .sum_exec_runtime = 100*NSEC_PER_SEC, .utime = 1, .stime = 1, .preempt = 1},
{ .sum_exec_runtime = 101*NSEC_PER_SEC, .utime = 100, .stime = 1 },
{ .sum_exec_runtime = 0},
}
};
/* The global state of the test machinery
*/
static int testno;
static int step;
static struct cputime prev;
/* Execute the current testno/step and validate the results
*/
void do_step(void) {
struct task_cputime t;
cputime_t ut,st;
int tno,s;
tno = testno;
s = step;
t.sum_exec_runtime = tests[tno][s].sum_exec_runtime;
t.utime = tests[tno][s].utime;
t.stime = tests[tno][s].stime;
printf("%d.%d sum_exec=%-10lld utime=%-5lld stime=%-5lld => ",
tno, s,
(unsigned long long)t.sum_exec_runtime,
(unsigned long long)t.utime,
(unsigned long long)t.stime);
cputime_adjust(&t, &prev, &ut, &st);
printf(" %d.%d tot=%-10llu user=%-5llu sys=%-5llu",
tno, s, (unsigned long long)(ut+st),
(unsigned long long)ut, (unsigned long long)st);
if((ut + st) != nsecs_to_cputime(t.sum_exec_runtime))
printf(" =====> FAIL\n");
else
printf(" =====> OK\n");
}
/* Called from the preemption point in cputime_adjust()
*/
void emulate_preemption(int pos) {
if((1 << pos) && tests[testno][step].preempt) {
printf("<<PREEMPT>>\n");
step++;
do_step();
printf("<<SWITCH BACK>>%38s","");
}
}
int main(void) {
for(testno = 0; testno < (int)(sizeof(tests)/sizeof(tests[0])); testno++) {
prev.utime = 0;
prev.stime = 0;
step = 0;
printf("\n");
while(tests[testno][step].sum_exec_runtime != 0) {
do_step();
step++;
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment