Add initial patches for CPU Controller on Control Group v2

2024-11-24 16:03:23 +00:00 · 2016-08-25 13:01:40 -04:00 · 2016-08-25 13:01:40 -04:00 · 2b1fa9da8b
commit 2b1fa9da8b
parent d378f5fe11
7 changed files with 1260 additions and 0 deletions
--- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.4.patch
+++ b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.4.patch
@ -0,0 +1,407 @@
+commit e7cae741f6d645ac68fe8823ca6ef45dbbf6891b
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Misc preps for cgroup unified hierarchy interface
+    
+    Make the following changes in preparation for the cpu controller
+    interface implementation for the unified hierarchy.  This patch
+    doesn't cause any functional differences.
+    
+    * s/cpu_stats_show()/cpu_cfs_stats_show()/
+    
+    * s/cpu_files/cpu_legacy_files/
+    
+    * Separate out cpuacct_stats_read() from cpuacct_stats_show().  While
+      at it, remove pointless cpuacct_stat_desc[] array.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 732e993..77f3ddd 100644
+--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
+@@ -8512,7 +8512,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
+ 	return ret;
+ }
+ 
+-static int cpu_stats_show(struct seq_file *sf, void *v)
+static int cpu_cfs_stats_show(struct seq_file *sf, void *v)
+ {
+ 	struct task_group *tg = css_tg(seq_css(sf));
+ 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+@@ -8552,7 +8552,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
+ }
+ #endif /* CONFIG_RT_GROUP_SCHED */
+ 
+-static struct cftype cpu_files[] = {
+static struct cftype cpu_legacy_files[] = {
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	{
+ 		.name = "shares",
+@@ -8573,7 +8573,7 @@ static struct cftype cpu_files[] = {
+ 	},
+ 	{
+ 		.name = "stat",
+-		.seq_show = cpu_stats_show,
+		.seq_show = cpu_cfs_stats_show,
+ 	},
+ #endif
+ #ifdef CONFIG_RT_GROUP_SCHED
+@@ -8599,7 +8599,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.fork		= cpu_cgroup_fork,
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+-	.legacy_cftypes	= cpu_files,
+	.legacy_cftypes	= cpu_legacy_files,
+ 	.early_init	= 1,
+ };
+ 
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index dd7cbb5..42b2dd5 100644
+--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
+@@ -177,36 +177,33 @@ static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
+ 	return 0;
+ }
+ 
+-static const char * const cpuacct_stat_desc[] = {
+-	[CPUACCT_STAT_USER] = "user",
+-	[CPUACCT_STAT_SYSTEM] = "system",
+-};
+-
+-static int cpuacct_stats_show(struct seq_file *sf, void *v)
+static void cpuacct_stats_read(struct cpuacct *ca, u64 *userp, u64 *sysp)
+ {
+-	struct cpuacct *ca = css_ca(seq_css(sf));
+ 	int cpu;
+-	s64 val = 0;
+ 
+	*userp = 0;
+ 	for_each_online_cpu(cpu) {
+ 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
+-		val += kcpustat->cpustat[CPUTIME_USER];
+-		val += kcpustat->cpustat[CPUTIME_NICE];
+		*userp += kcpustat->cpustat[CPUTIME_USER];
+		*userp += kcpustat->cpustat[CPUTIME_NICE];
+ 	}
+-	val = cputime64_to_clock_t(val);
+-	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
+ 
+-	val = 0;
+	*sysp = 0;
+ 	for_each_online_cpu(cpu) {
+ 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
+-		val += kcpustat->cpustat[CPUTIME_SYSTEM];
+-		val += kcpustat->cpustat[CPUTIME_IRQ];
+-		val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
+		*sysp += kcpustat->cpustat[CPUTIME_SYSTEM];
+		*sysp += kcpustat->cpustat[CPUTIME_IRQ];
+		*sysp += kcpustat->cpustat[CPUTIME_SOFTIRQ];
+ 	}
+}
+ 
+-	val = cputime64_to_clock_t(val);
+-	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
+static int cpuacct_stats_show(struct seq_file *sf, void *v)
+{
+	cputime64_t user, sys;
+ 
+	cpuacct_stats_read(css_ca(seq_css(sf)), &user, &sys);
+	seq_printf(sf, "user %lld\n", cputime64_to_clock_t(user));
+	seq_printf(sf, "system %lld\n", cputime64_to_clock_t(sys));
+ 	return 0;
+ }
+ 
+
+commit 1bb33e8a69f089f2d3f58a0e681d4ff352e11c97
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Implement interface for cgroup unified hierarchy
+    
+    While the cpu controller doesn't have any functional problems, there
+    are a couple interface issues which can be addressed in the v2
+    interface.
+    
+    * cpuacct being a separate controller.  This separation is artificial
+      and rather pointless as demonstrated by most use cases co-mounting
+      the two controllers.  It also forces certain information to be
+      accounted twice.
+    
+    * Use of different time units.  Writable control knobs use
+      microseconds, some stat fields use nanoseconds while other cpuacct
+      stat fields use centiseconds.
+    
+    * Control knobs which can't be used in the root cgroup still show up
+      in the root.
+    
+    * Control knob names and semantics aren't consistent with other
+      controllers.
+    
+    This patchset implements cpu controller's interface on the unified
+    hierarchy which adheres to the controller file conventions described
+    in Documentation/cgroups/unified-hierarchy.txt.  Overall, the
+    following changes are made.
+    
+    * cpuacct is implictly enabled and disabled by cpu and its information
+      is reported through "cpu.stat" which now uses microseconds for all
+      time durations.  All time duration fields now have "_usec" appended
+      to them for clarity.  While this doesn't solve the double accounting
+      immediately, once majority of users switch to v2, cpu can directly
+      account and report the relevant stats and cpuacct can be disabled on
+      the unified hierarchy.
+    
+      Note that cpuacct.usage_percpu is currently not included in
+      "cpu.stat".  If this information is actually called for, it can be
+      added later.
+    
+    * "cpu.shares" is replaced with "cpu.weight" and operates on the
+      standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000).
+      The weight is scaled to scheduler weight so that 100 maps to 1024
+      and the ratio relationship is preserved - if weight is W and its
+      scaled value is S, W / 100 == S / 1024.  While the mapped range is a
+      bit smaller than the orignal scheduler weight range, the dead zones
+      on both sides are relatively small and covers wider range than the
+      nice value mappings.  This file doesn't make sense in the root
+      cgroup and isn't create on root.
+    
+    * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max"
+      which contains both quota and period.
+    
+    * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by
+      "cpu.rt.max" which contains both runtime and period.
+    
+    v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for
+        CFS bandwidth stats and also using raw division for u64.  Use
+        CONFIG_CFS_BANDWITH and do_div() instead.
+    
+        The semantics of "cpu.rt.max" is not fully decided yet.  Dropped
+        for now.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 77f3ddd..7aafe63 100644
+--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
+@@ -8591,6 +8591,139 @@ static struct cftype cpu_legacy_files[] = {
+ 	{ }	/* terminate */
+ };
+ 
+static int cpu_stats_show(struct seq_file *sf, void *v)
+{
+	cpuacct_cpu_stats_show(sf);
+
+#ifdef CONFIG_CFS_BANDWIDTH
+	{
+		struct task_group *tg = css_tg(seq_css(sf));
+		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+		u64 throttled_usec;
+
+		throttled_usec = cfs_b->throttled_time;
+		do_div(throttled_usec, NSEC_PER_USEC);
+
+		seq_printf(sf, "nr_periods %d\n"
+			   "nr_throttled %d\n"
+			   "throttled_usec %llu\n",
+			   cfs_b->nr_periods, cfs_b->nr_throttled,
+			   throttled_usec);
+	}
+#endif
+	return 0;
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
+			       struct cftype *cft)
+{
+	struct task_group *tg = css_tg(css);
+	u64 weight = scale_load_down(tg->shares);
+
+	return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
+}
+
+static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
+				struct cftype *cftype, u64 weight)
+{
+	/*
+	 * cgroup weight knobs should use the common MIN, DFL and MAX
+	 * values which are 1, 100 and 10000 respectively.  While it loses
+	 * a bit of range on both ends, it maps pretty well onto the shares
+	 * value used by scheduler and the round-trip conversions preserve
+	 * the original value over the entire range.
+	 */
+	if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
+		return -ERANGE;
+
+	weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
+
+	return sched_group_set_shares(css_tg(css), scale_load(weight));
+}
+#endif
+
+static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
+						  long period, long quota)
+{
+	if (quota < 0)
+		seq_puts(sf, "max");
+	else
+		seq_printf(sf, "%ld", quota);
+
+	seq_printf(sf, " %ld\n", period);
+}
+
+/* caller should put the current value in *@periodp before calling */
+static int __maybe_unused cpu_period_quota_parse(char *buf,
+						 u64 *periodp, u64 *quotap)
+{
+	char tok[21];	/* U64_MAX */
+
+	if (!sscanf(buf, "%s %llu", tok, periodp))
+		return -EINVAL;
+
+	*periodp *= NSEC_PER_USEC;
+
+	if (sscanf(tok, "%llu", quotap))
+		*quotap *= NSEC_PER_USEC;
+	else if (!strcmp(tok, "max"))
+		*quotap = RUNTIME_INF;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+#ifdef CONFIG_CFS_BANDWIDTH
+static int cpu_max_show(struct seq_file *sf, void *v)
+{
+	struct task_group *tg = css_tg(seq_css(sf));
+
+	cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
+	return 0;
+}
+
+static ssize_t cpu_max_write(struct kernfs_open_file *of,
+			     char *buf, size_t nbytes, loff_t off)
+{
+	struct task_group *tg = css_tg(of_css(of));
+	u64 period = tg_get_cfs_period(tg);
+	u64 quota;
+	int ret;
+
+	ret = cpu_period_quota_parse(buf, &period, &quota);
+	if (!ret)
+		ret = tg_set_cfs_bandwidth(tg, period, quota);
+	return ret ?: nbytes;
+}
+#endif
+
+static struct cftype cpu_files[] = {
+	{
+		.name = "stat",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cpu_stats_show,
+	},
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	{
+		.name = "weight",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = cpu_weight_read_u64,
+		.write_u64 = cpu_weight_write_u64,
+	},
+#endif
+#ifdef CONFIG_CFS_BANDWIDTH
+	{
+		.name = "max",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cpu_max_show,
+		.write = cpu_max_write,
+	},
+#endif
+	{ }	/* terminate */
+};
+
+ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.css_alloc	= cpu_cgroup_css_alloc,
+ 	.css_free	= cpu_cgroup_css_free,
+@@ -8600,7 +8733,15 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+ 	.legacy_cftypes	= cpu_legacy_files,
+	.dfl_cftypes	= cpu_files,
+ 	.early_init	= 1,
+#ifdef CONFIG_CGROUP_CPUACCT
+	/*
+	 * cpuacct is enabled together with cpu on the unified hierarchy
+	 * and its stats are reported through "cpu.stat".
+	 */
+	.depends_on	= 1 << cpuacct_cgrp_id,
+#endif
+ };
+ 
+ #endif	/* CONFIG_CGROUP_SCHED */
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index 42b2dd5..b4d32a6 100644
+--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
+@@ -224,6 +224,30 @@ static struct cftype files[] = {
+ 	{ }	/* terminate */
+ };
+ 
+/* used to print cpuacct stats in cpu.stat on the unified hierarchy */
+void cpuacct_cpu_stats_show(struct seq_file *sf)
+{
+	struct cgroup_subsys_state *css;
+	u64 usage, user, sys;
+
+	css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys);
+
+	usage = cpuusage_read(css, seq_cft(sf));
+	cpuacct_stats_read(css_ca(css), &user, &sys);
+
+	user *= TICK_NSEC;
+	sys *= TICK_NSEC;
+	do_div(usage, NSEC_PER_USEC);
+	do_div(user, NSEC_PER_USEC);
+	do_div(sys, NSEC_PER_USEC);
+
+	seq_printf(sf, "usage_usec %llu\n"
+		   "user_usec %llu\n"
+		   "system_usec %llu\n", usage, user, sys);
+
+	css_put(css);
+}
+
+ /*
+  * charge this task's execution time to its accounting group.
+  *
+diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
+index ed60562..44eace9 100644
+--- a/kernel/sched/cpuacct.h
+++ b/kernel/sched/cpuacct.h
+@@ -2,6 +2,7 @@
+ 
+ extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+ extern void cpuacct_account_field(struct task_struct *p, int index, u64 val);
+extern void cpuacct_cpu_stats_show(struct seq_file *sf);
+ 
+ #else
+ 
+@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *p, int index, u64 val)
+ {
+ }
+ 
+static inline void cpuacct_cpu_stats_show(struct seq_file *sf)
+{
+}
+
+ #endif
--- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.6.patch
+++ b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.6.patch
@ -0,0 +1,407 @@
+commit 6426c5b02d4aab620219b08a5d97ad8851b56b0d
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Misc preps for cgroup unified hierarchy interface
+    
+    Make the following changes in preparation for the cpu controller
+    interface implementation for the unified hierarchy.  This patch
+    doesn't cause any functional differences.
+    
+    * s/cpu_stats_show()/cpu_cfs_stats_show()/
+    
+    * s/cpu_files/cpu_legacy_files/
+    
+    * Separate out cpuacct_stats_read() from cpuacct_stats_show().  While
+      at it, remove pointless cpuacct_stat_desc[] array.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index d1f7149..0d34f35 100644
+--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
+@@ -8371,7 +8371,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
+ 	return ret;
+ }
+ 
+-static int cpu_stats_show(struct seq_file *sf, void *v)
+static int cpu_cfs_stats_show(struct seq_file *sf, void *v)
+ {
+ 	struct task_group *tg = css_tg(seq_css(sf));
+ 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+@@ -8411,7 +8411,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
+ }
+ #endif /* CONFIG_RT_GROUP_SCHED */
+ 
+-static struct cftype cpu_files[] = {
+static struct cftype cpu_legacy_files[] = {
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	{
+ 		.name = "shares",
+@@ -8432,7 +8432,7 @@ static struct cftype cpu_files[] = {
+ 	},
+ 	{
+ 		.name = "stat",
+-		.seq_show = cpu_stats_show,
+		.seq_show = cpu_cfs_stats_show,
+ 	},
+ #endif
+ #ifdef CONFIG_RT_GROUP_SCHED
+@@ -8457,7 +8457,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.fork		= cpu_cgroup_fork,
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+-	.legacy_cftypes	= cpu_files,
+	.legacy_cftypes	= cpu_legacy_files,
+ 	.early_init	= true,
+ };
+ 
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index 4a81120..b99030a 100644
+--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
+@@ -180,36 +180,33 @@ static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
+ 	return 0;
+ }
+ 
+-static const char * const cpuacct_stat_desc[] = {
+-	[CPUACCT_STAT_USER] = "user",
+-	[CPUACCT_STAT_SYSTEM] = "system",
+-};
+-
+-static int cpuacct_stats_show(struct seq_file *sf, void *v)
+static void cpuacct_stats_read(struct cpuacct *ca, u64 *userp, u64 *sysp)
+ {
+-	struct cpuacct *ca = css_ca(seq_css(sf));
+ 	int cpu;
+-	s64 val = 0;
+ 
+	*userp = 0;
+ 	for_each_online_cpu(cpu) {
+ 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
+-		val += kcpustat->cpustat[CPUTIME_USER];
+-		val += kcpustat->cpustat[CPUTIME_NICE];
+		*userp += kcpustat->cpustat[CPUTIME_USER];
+		*userp += kcpustat->cpustat[CPUTIME_NICE];
+ 	}
+-	val = cputime64_to_clock_t(val);
+-	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
+ 
+-	val = 0;
+	*sysp = 0;
+ 	for_each_online_cpu(cpu) {
+ 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
+-		val += kcpustat->cpustat[CPUTIME_SYSTEM];
+-		val += kcpustat->cpustat[CPUTIME_IRQ];
+-		val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
+		*sysp += kcpustat->cpustat[CPUTIME_SYSTEM];
+		*sysp += kcpustat->cpustat[CPUTIME_IRQ];
+		*sysp += kcpustat->cpustat[CPUTIME_SOFTIRQ];
+ 	}
+}
+ 
+-	val = cputime64_to_clock_t(val);
+-	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
+static int cpuacct_stats_show(struct seq_file *sf, void *v)
+{
+	cputime64_t user, sys;
+ 
+	cpuacct_stats_read(css_ca(seq_css(sf)), &user, &sys);
+	seq_printf(sf, "user %lld\n", cputime64_to_clock_t(user));
+	seq_printf(sf, "system %lld\n", cputime64_to_clock_t(sys));
+ 	return 0;
+ }
+ 
+
+commit d2a799f795a5d5a69c9dc365c34f926e0649f840
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Implement interface for cgroup unified hierarchy
+    
+    While the cpu controller doesn't have any functional problems, there
+    are a couple interface issues which can be addressed in the v2
+    interface.
+    
+    * cpuacct being a separate controller.  This separation is artificial
+      and rather pointless as demonstrated by most use cases co-mounting
+      the two controllers.  It also forces certain information to be
+      accounted twice.
+    
+    * Use of different time units.  Writable control knobs use
+      microseconds, some stat fields use nanoseconds while other cpuacct
+      stat fields use centiseconds.
+    
+    * Control knobs which can't be used in the root cgroup still show up
+      in the root.
+    
+    * Control knob names and semantics aren't consistent with other
+      controllers.
+    
+    This patchset implements cpu controller's interface on the unified
+    hierarchy which adheres to the controller file conventions described
+    in Documentation/cgroups/unified-hierarchy.txt.  Overall, the
+    following changes are made.
+    
+    * cpuacct is implictly enabled and disabled by cpu and its information
+      is reported through "cpu.stat" which now uses microseconds for all
+      time durations.  All time duration fields now have "_usec" appended
+      to them for clarity.  While this doesn't solve the double accounting
+      immediately, once majority of users switch to v2, cpu can directly
+      account and report the relevant stats and cpuacct can be disabled on
+      the unified hierarchy.
+    
+      Note that cpuacct.usage_percpu is currently not included in
+      "cpu.stat".  If this information is actually called for, it can be
+      added later.
+    
+    * "cpu.shares" is replaced with "cpu.weight" and operates on the
+      standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000).
+      The weight is scaled to scheduler weight so that 100 maps to 1024
+      and the ratio relationship is preserved - if weight is W and its
+      scaled value is S, W / 100 == S / 1024.  While the mapped range is a
+      bit smaller than the orignal scheduler weight range, the dead zones
+      on both sides are relatively small and covers wider range than the
+      nice value mappings.  This file doesn't make sense in the root
+      cgroup and isn't create on root.
+    
+    * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max"
+      which contains both quota and period.
+    
+    * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by
+      "cpu.rt.max" which contains both runtime and period.
+    
+    v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for
+        CFS bandwidth stats and also using raw division for u64.  Use
+        CONFIG_CFS_BANDWITH and do_div() instead.
+    
+        The semantics of "cpu.rt.max" is not fully decided yet.  Dropped
+        for now.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 0d34f35..5990efc 100644
+--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
+@@ -8450,6 +8450,139 @@ static struct cftype cpu_legacy_files[] = {
+ 	{ }	/* terminate */
+ };
+ 
+static int cpu_stats_show(struct seq_file *sf, void *v)
+{
+	cpuacct_cpu_stats_show(sf);
+
+#ifdef CONFIG_CFS_BANDWIDTH
+	{
+		struct task_group *tg = css_tg(seq_css(sf));
+		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+		u64 throttled_usec;
+
+		throttled_usec = cfs_b->throttled_time;
+		do_div(throttled_usec, NSEC_PER_USEC);
+
+		seq_printf(sf, "nr_periods %d\n"
+			   "nr_throttled %d\n"
+			   "throttled_usec %llu\n",
+			   cfs_b->nr_periods, cfs_b->nr_throttled,
+			   throttled_usec);
+	}
+#endif
+	return 0;
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
+			       struct cftype *cft)
+{
+	struct task_group *tg = css_tg(css);
+	u64 weight = scale_load_down(tg->shares);
+
+	return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
+}
+
+static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
+				struct cftype *cftype, u64 weight)
+{
+	/*
+	 * cgroup weight knobs should use the common MIN, DFL and MAX
+	 * values which are 1, 100 and 10000 respectively.  While it loses
+	 * a bit of range on both ends, it maps pretty well onto the shares
+	 * value used by scheduler and the round-trip conversions preserve
+	 * the original value over the entire range.
+	 */
+	if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
+		return -ERANGE;
+
+	weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
+
+	return sched_group_set_shares(css_tg(css), scale_load(weight));
+}
+#endif
+
+static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
+						  long period, long quota)
+{
+	if (quota < 0)
+		seq_puts(sf, "max");
+	else
+		seq_printf(sf, "%ld", quota);
+
+	seq_printf(sf, " %ld\n", period);
+}
+
+/* caller should put the current value in *@periodp before calling */
+static int __maybe_unused cpu_period_quota_parse(char *buf,
+						 u64 *periodp, u64 *quotap)
+{
+	char tok[21];	/* U64_MAX */
+
+	if (!sscanf(buf, "%s %llu", tok, periodp))
+		return -EINVAL;
+
+	*periodp *= NSEC_PER_USEC;
+
+	if (sscanf(tok, "%llu", quotap))
+		*quotap *= NSEC_PER_USEC;
+	else if (!strcmp(tok, "max"))
+		*quotap = RUNTIME_INF;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+#ifdef CONFIG_CFS_BANDWIDTH
+static int cpu_max_show(struct seq_file *sf, void *v)
+{
+	struct task_group *tg = css_tg(seq_css(sf));
+
+	cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
+	return 0;
+}
+
+static ssize_t cpu_max_write(struct kernfs_open_file *of,
+			     char *buf, size_t nbytes, loff_t off)
+{
+	struct task_group *tg = css_tg(of_css(of));
+	u64 period = tg_get_cfs_period(tg);
+	u64 quota;
+	int ret;
+
+	ret = cpu_period_quota_parse(buf, &period, &quota);
+	if (!ret)
+		ret = tg_set_cfs_bandwidth(tg, period, quota);
+	return ret ?: nbytes;
+}
+#endif
+
+static struct cftype cpu_files[] = {
+	{
+		.name = "stat",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cpu_stats_show,
+	},
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	{
+		.name = "weight",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = cpu_weight_read_u64,
+		.write_u64 = cpu_weight_write_u64,
+	},
+#endif
+#ifdef CONFIG_CFS_BANDWIDTH
+	{
+		.name = "max",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cpu_max_show,
+		.write = cpu_max_write,
+	},
+#endif
+	{ }	/* terminate */
+};
+
+ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.css_alloc	= cpu_cgroup_css_alloc,
+ 	.css_released	= cpu_cgroup_css_released,
+@@ -8458,7 +8591,15 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+ 	.legacy_cftypes	= cpu_legacy_files,
+	.dfl_cftypes	= cpu_files,
+ 	.early_init	= true,
+#ifdef CONFIG_CGROUP_CPUACCT
+	/*
+	 * cpuacct is enabled together with cpu on the unified hierarchy
+	 * and its stats are reported through "cpu.stat".
+	 */
+	.depends_on	= 1 << cpuacct_cgrp_id,
+#endif
+ };
+ 
+ #endif	/* CONFIG_CGROUP_SCHED */
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index b99030a..a1a5a4b 100644
+--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
+@@ -227,6 +227,30 @@ static struct cftype files[] = {
+ 	{ }	/* terminate */
+ };
+ 
+/* used to print cpuacct stats in cpu.stat on the unified hierarchy */
+void cpuacct_cpu_stats_show(struct seq_file *sf)
+{
+	struct cgroup_subsys_state *css;
+	u64 usage, user, sys;
+
+	css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys);
+
+	usage = cpuusage_read(css, seq_cft(sf));
+	cpuacct_stats_read(css_ca(css), &user, &sys);
+
+	user *= TICK_NSEC;
+	sys *= TICK_NSEC;
+	do_div(usage, NSEC_PER_USEC);
+	do_div(user, NSEC_PER_USEC);
+	do_div(sys, NSEC_PER_USEC);
+
+	seq_printf(sf, "usage_usec %llu\n"
+		   "user_usec %llu\n"
+		   "system_usec %llu\n", usage, user, sys);
+
+	css_put(css);
+}
+
+ /*
+  * charge this task's execution time to its accounting group.
+  *
+diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
+index ba72807..ddf7af4 100644
+--- a/kernel/sched/cpuacct.h
+++ b/kernel/sched/cpuacct.h
+@@ -2,6 +2,7 @@
+ 
+ extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+ extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
+extern void cpuacct_cpu_stats_show(struct seq_file *sf);
+ 
+ #else
+ 
+@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
+ {
+ }
+ 
+static inline void cpuacct_cpu_stats_show(struct seq_file *sf)
+{
+}
+
+ #endif
--- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.7.patch
+++ b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.7.patch
@ -0,0 +1,407 @@
+commit 0d966df508ef4d6c0b1baae9e369f4fb0d3e10af
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Misc preps for cgroup unified hierarchy interface
+    
+    Make the following changes in preparation for the cpu controller
+    interface implementation for the unified hierarchy.  This patch
+    doesn't cause any functional differences.
+    
+    * s/cpu_stats_show()/cpu_cfs_stats_show()/
+    
+    * s/cpu_files/cpu_legacy_files/
+    
+    * Separate out cpuacct_stats_read() from cpuacct_stats_show().  While
+      at it, remove pointless cpuacct_stat_desc[] array.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 97ee9ac..c148dfe 100644
+--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
+@@ -8482,7 +8482,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
+ 	return ret;
+ }
+ 
+-static int cpu_stats_show(struct seq_file *sf, void *v)
+static int cpu_cfs_stats_show(struct seq_file *sf, void *v)
+ {
+ 	struct task_group *tg = css_tg(seq_css(sf));
+ 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+@@ -8522,7 +8522,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
+ }
+ #endif /* CONFIG_RT_GROUP_SCHED */
+ 
+-static struct cftype cpu_files[] = {
+static struct cftype cpu_legacy_files[] = {
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	{
+ 		.name = "shares",
+@@ -8543,7 +8543,7 @@ static struct cftype cpu_files[] = {
+ 	},
+ 	{
+ 		.name = "stat",
+-		.seq_show = cpu_stats_show,
+		.seq_show = cpu_cfs_stats_show,
+ 	},
+ #endif
+ #ifdef CONFIG_RT_GROUP_SCHED
+@@ -8568,7 +8568,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.fork		= cpu_cgroup_fork,
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+-	.legacy_cftypes	= cpu_files,
+	.legacy_cftypes	= cpu_legacy_files,
+ 	.early_init	= true,
+ };
+ 
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index 41f85c4..3eb9eda 100644
+--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
+@@ -242,36 +242,33 @@ static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
+ 	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
+ }
+ 
+-static const char * const cpuacct_stat_desc[] = {
+-	[CPUACCT_STAT_USER] = "user",
+-	[CPUACCT_STAT_SYSTEM] = "system",
+-};
+-
+-static int cpuacct_stats_show(struct seq_file *sf, void *v)
+static void cpuacct_stats_read(struct cpuacct *ca, u64 *userp, u64 *sysp)
+ {
+-	struct cpuacct *ca = css_ca(seq_css(sf));
+ 	int cpu;
+-	s64 val = 0;
+ 
+	*userp = 0;
+ 	for_each_possible_cpu(cpu) {
+ 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
+-		val += kcpustat->cpustat[CPUTIME_USER];
+-		val += kcpustat->cpustat[CPUTIME_NICE];
+		*userp += kcpustat->cpustat[CPUTIME_USER];
+		*userp += kcpustat->cpustat[CPUTIME_NICE];
+ 	}
+-	val = cputime64_to_clock_t(val);
+-	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
+ 
+-	val = 0;
+	*sysp = 0;
+ 	for_each_possible_cpu(cpu) {
+ 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
+-		val += kcpustat->cpustat[CPUTIME_SYSTEM];
+-		val += kcpustat->cpustat[CPUTIME_IRQ];
+-		val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
+		*sysp += kcpustat->cpustat[CPUTIME_SYSTEM];
+		*sysp += kcpustat->cpustat[CPUTIME_IRQ];
+		*sysp += kcpustat->cpustat[CPUTIME_SOFTIRQ];
+ 	}
+}
+ 
+-	val = cputime64_to_clock_t(val);
+-	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
+static int cpuacct_stats_show(struct seq_file *sf, void *v)
+{
+	cputime64_t user, sys;
+ 
+	cpuacct_stats_read(css_ca(seq_css(sf)), &user, &sys);
+	seq_printf(sf, "user %lld\n", cputime64_to_clock_t(user));
+	seq_printf(sf, "system %lld\n", cputime64_to_clock_t(sys));
+ 	return 0;
+ }
+ 
+
+commit ed6d93036ec930cb774da10b7c87f67905ce71f1
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Implement interface for cgroup unified hierarchy
+    
+    While the cpu controller doesn't have any functional problems, there
+    are a couple interface issues which can be addressed in the v2
+    interface.
+    
+    * cpuacct being a separate controller.  This separation is artificial
+      and rather pointless as demonstrated by most use cases co-mounting
+      the two controllers.  It also forces certain information to be
+      accounted twice.
+    
+    * Use of different time units.  Writable control knobs use
+      microseconds, some stat fields use nanoseconds while other cpuacct
+      stat fields use centiseconds.
+    
+    * Control knobs which can't be used in the root cgroup still show up
+      in the root.
+    
+    * Control knob names and semantics aren't consistent with other
+      controllers.
+    
+    This patchset implements cpu controller's interface on the unified
+    hierarchy which adheres to the controller file conventions described
+    in Documentation/cgroups/unified-hierarchy.txt.  Overall, the
+    following changes are made.
+    
+    * cpuacct is implictly enabled and disabled by cpu and its information
+      is reported through "cpu.stat" which now uses microseconds for all
+      time durations.  All time duration fields now have "_usec" appended
+      to them for clarity.  While this doesn't solve the double accounting
+      immediately, once majority of users switch to v2, cpu can directly
+      account and report the relevant stats and cpuacct can be disabled on
+      the unified hierarchy.
+    
+      Note that cpuacct.usage_percpu is currently not included in
+      "cpu.stat".  If this information is actually called for, it can be
+      added later.
+    
+    * "cpu.shares" is replaced with "cpu.weight" and operates on the
+      standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000).
+      The weight is scaled to scheduler weight so that 100 maps to 1024
+      and the ratio relationship is preserved - if weight is W and its
+      scaled value is S, W / 100 == S / 1024.  While the mapped range is a
+      bit smaller than the orignal scheduler weight range, the dead zones
+      on both sides are relatively small and covers wider range than the
+      nice value mappings.  This file doesn't make sense in the root
+      cgroup and isn't create on root.
+    
+    * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max"
+      which contains both quota and period.
+    
+    * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by
+      "cpu.rt.max" which contains both runtime and period.
+    
+    v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for
+        CFS bandwidth stats and also using raw division for u64.  Use
+        CONFIG_CFS_BANDWITH and do_div() instead.
+    
+        The semantics of "cpu.rt.max" is not fully decided yet.  Dropped
+        for now.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index c148dfe..7bba2c5 100644
+--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
+@@ -8561,6 +8561,139 @@ static struct cftype cpu_legacy_files[] = {
+ 	{ }	/* terminate */
+ };
+ 
+static int cpu_stats_show(struct seq_file *sf, void *v)
+{
+	cpuacct_cpu_stats_show(sf);
+
+#ifdef CONFIG_CFS_BANDWIDTH
+	{
+		struct task_group *tg = css_tg(seq_css(sf));
+		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+		u64 throttled_usec;
+
+		throttled_usec = cfs_b->throttled_time;
+		do_div(throttled_usec, NSEC_PER_USEC);
+
+		seq_printf(sf, "nr_periods %d\n"
+			   "nr_throttled %d\n"
+			   "throttled_usec %llu\n",
+			   cfs_b->nr_periods, cfs_b->nr_throttled,
+			   throttled_usec);
+	}
+#endif
+	return 0;
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
+			       struct cftype *cft)
+{
+	struct task_group *tg = css_tg(css);
+	u64 weight = scale_load_down(tg->shares);
+
+	return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
+}
+
+static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
+				struct cftype *cftype, u64 weight)
+{
+	/*
+	 * cgroup weight knobs should use the common MIN, DFL and MAX
+	 * values which are 1, 100 and 10000 respectively.  While it loses
+	 * a bit of range on both ends, it maps pretty well onto the shares
+	 * value used by scheduler and the round-trip conversions preserve
+	 * the original value over the entire range.
+	 */
+	if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
+		return -ERANGE;
+
+	weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
+
+	return sched_group_set_shares(css_tg(css), scale_load(weight));
+}
+#endif
+
+static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
+						  long period, long quota)
+{
+	if (quota < 0)
+		seq_puts(sf, "max");
+	else
+		seq_printf(sf, "%ld", quota);
+
+	seq_printf(sf, " %ld\n", period);
+}
+
+/* caller should put the current value in *@periodp before calling */
+static int __maybe_unused cpu_period_quota_parse(char *buf,
+						 u64 *periodp, u64 *quotap)
+{
+	char tok[21];	/* U64_MAX */
+
+	if (!sscanf(buf, "%s %llu", tok, periodp))
+		return -EINVAL;
+
+	*periodp *= NSEC_PER_USEC;
+
+	if (sscanf(tok, "%llu", quotap))
+		*quotap *= NSEC_PER_USEC;
+	else if (!strcmp(tok, "max"))
+		*quotap = RUNTIME_INF;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+#ifdef CONFIG_CFS_BANDWIDTH
+static int cpu_max_show(struct seq_file *sf, void *v)
+{
+	struct task_group *tg = css_tg(seq_css(sf));
+
+	cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
+	return 0;
+}
+
+static ssize_t cpu_max_write(struct kernfs_open_file *of,
+			     char *buf, size_t nbytes, loff_t off)
+{
+	struct task_group *tg = css_tg(of_css(of));
+	u64 period = tg_get_cfs_period(tg);
+	u64 quota;
+	int ret;
+
+	ret = cpu_period_quota_parse(buf, &period, &quota);
+	if (!ret)
+		ret = tg_set_cfs_bandwidth(tg, period, quota);
+	return ret ?: nbytes;
+}
+#endif
+
+static struct cftype cpu_files[] = {
+	{
+		.name = "stat",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cpu_stats_show,
+	},
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	{
+		.name = "weight",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = cpu_weight_read_u64,
+		.write_u64 = cpu_weight_write_u64,
+	},
+#endif
+#ifdef CONFIG_CFS_BANDWIDTH
+	{
+		.name = "max",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cpu_max_show,
+		.write = cpu_max_write,
+	},
+#endif
+	{ }	/* terminate */
+};
+
+ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.css_alloc	= cpu_cgroup_css_alloc,
+ 	.css_released	= cpu_cgroup_css_released,
+@@ -8569,7 +8702,15 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+ 	.legacy_cftypes	= cpu_legacy_files,
+	.dfl_cftypes	= cpu_files,
+ 	.early_init	= true,
+#ifdef CONFIG_CGROUP_CPUACCT
+	/*
+	 * cpuacct is enabled together with cpu on the unified hierarchy
+	 * and its stats are reported through "cpu.stat".
+	 */
+	.depends_on	= 1 << cpuacct_cgrp_id,
+#endif
+ };
+ 
+ #endif	/* CONFIG_CGROUP_SCHED */
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index 3eb9eda..7a02d26 100644
+--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
+@@ -305,6 +305,30 @@ static struct cftype files[] = {
+ 	{ }	/* terminate */
+ };
+ 
+/* used to print cpuacct stats in cpu.stat on the unified hierarchy */
+void cpuacct_cpu_stats_show(struct seq_file *sf)
+{
+	struct cgroup_subsys_state *css;
+	u64 usage, user, sys;
+
+	css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys);
+
+	usage = cpuusage_read(css, seq_cft(sf));
+	cpuacct_stats_read(css_ca(css), &user, &sys);
+
+	user *= TICK_NSEC;
+	sys *= TICK_NSEC;
+	do_div(usage, NSEC_PER_USEC);
+	do_div(user, NSEC_PER_USEC);
+	do_div(sys, NSEC_PER_USEC);
+
+	seq_printf(sf, "usage_usec %llu\n"
+		   "user_usec %llu\n"
+		   "system_usec %llu\n", usage, user, sys);
+
+	css_put(css);
+}
+
+ /*
+  * charge this task's execution time to its accounting group.
+  *
+diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
+index ba72807..ddf7af4 100644
+--- a/kernel/sched/cpuacct.h
+++ b/kernel/sched/cpuacct.h
+@@ -2,6 +2,7 @@
+ 
+ extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+ extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
+extern void cpuacct_cpu_stats_show(struct seq_file *sf);
+ 
+ #else
+ 
+@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
+ {
+ }
+ 
+static inline void cpuacct_cpu_stats_show(struct seq_file *sf)
+{
+}
+
+ #endif
--- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/README.md
+++ b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/README.md
@ -0,0 +1,21 @@
+Patches for CPU Controller on Control Group v2
+===============================================
+
+See Tejun Heo's [explanation][1] for why these patches are currently
+out-of-tree.
+
+Generating the patches
+-----------------------
+
+In a linux checkout, with remote tc-cgroup pointing to
+git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git, your
+nixpkgs checkout in the same directory as your linux checkout (or
+modify the command accordingly), and setting `ver` to the appropriate
+version:
+
+```shell
+$ ver=4.7
+$ git log --reverse --patch v$ver..remotes/tc-cgroup/cgroup-v2-cpu-v$ver > ../nixpkgs/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/$ver.patch
+```
+
+[1]: https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu
--- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/default.nix
+++ b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/default.nix
@ -0,0 +1,11 @@
+let
+  ents = builtins.readDir ./.;
+in builtins.listToAttrs (builtins.filter (x: x != null) (map (name: let
+  match = builtins.match "(.*)\\.patch" name;
+in if match == null then null else {
+  name = builtins.head match;
+  value = {
+    name = "cpu-cgroup-v2-${name}";
+    patch = ./. + "/${name}";
+  };
+}) (builtins.attrNames ents)))
--- a/pkgs/os-specific/linux/kernel/patches.nix
+++ b/pkgs/os-specific/linux/kernel/patches.nix
@ -144,4 +144,6 @@ rec {
        sha256 = "14rm1qr87p7a5prz8g5fwbpxzdp3ighj095x8rvhm8csm20wspyy";
      };
    };
+
+  cpu-cgroup-v2 = import ./cpu-cgroup-v2-patches;
 }
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@ -11165,6 +11165,7 @@ in
  linux_4_4 = callPackage ../os-specific/linux/kernel/linux-4.4.nix {
    kernelPatches =
      [ kernelPatches.bridge_stp_helper
+        kernelPatches.cpu-cgroup-v2."4.4"
      ]
      ++ lib.optionals ((platform.kernelArch or null) == "mips")
      [ kernelPatches.mips_fpureg_emu
@ -11176,6 +11177,7 @@ in
  linux_4_6 = callPackage ../os-specific/linux/kernel/linux-4.6.nix {
    kernelPatches =
      [ kernelPatches.bridge_stp_helper
+        kernelPatches.cpu-cgroup-v2."4.6"
      ]
      ++ lib.optionals ((platform.kernelArch or null) == "mips")
      [ kernelPatches.mips_fpureg_emu
@ -11187,6 +11189,9 @@ in
  linux_4_7 = callPackage ../os-specific/linux/kernel/linux-4.7.nix {
    kernelPatches =
      [ kernelPatches.bridge_stp_helper
+        # See pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/README.md
+        # when adding a new linux version
+        kernelPatches.cpu-cgroup-v2."4.7"
      ]
      ++ lib.optionals ((platform.kernelArch or null) == "mips")
      [ kernelPatches.mips_fpureg_emu