Skip to content

Instantly share code, notes, and snippets.

@pfactum
Created May 9, 2019 12:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pfactum/b323f9b93eabd8a6c15dc7a31f2dacaf to your computer and use it in GitHub Desktop.
Save pfactum/b323f9b93eabd8a6c15dc7a31f2dacaf to your computer and use it in GitHub Desktop.
KSM "always" mode preparations
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2b8ee90bb644..510766a3fa05 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2008,6 +2008,13 @@
0: force disabled
1: force enabled
+ ksm_mode=
+ [KNL]
+ Format: [madvise|always]
+ Default: madvise
+ Can be used to control the default behavior of the system
+ with respect to merging anonymous memory.
+
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
Default is 0 (don't ignore, but inject #GP)
diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst
index 9303786632d1..9af730640da7 100644
--- a/Documentation/admin-guide/mm/ksm.rst
+++ b/Documentation/admin-guide/mm/ksm.rst
@@ -78,6 +78,13 @@ KSM daemon sysfs interface
The KSM daemon is controlled by sysfs files in ``/sys/kernel/mm/ksm/``,
readable by all but writable only by root:
+mode
+ * set madvise to deduplicate only madvised memory
+ * set always to allow deduplicating all the anonymous memory
+ (applies to newly allocated memory only)
+
+ Default: madvise (maintains old behaviour)
+
pages_to_scan
how many pages to scan before ksmd goes to sleep
e.g. ``echo 100 > /sys/kernel/mm/ksm/pages_to_scan``.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 95ca1fe7283c..19cc246000e8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -648,6 +648,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_MIXEDMAP)] = "mm",
[ilog2(VM_HUGEPAGE)] = "hg",
[ilog2(VM_NOHUGEPAGE)] = "nh",
+#ifdef VM_UNMERGEABLE
+ [ilog2(VM_UNMERGEABLE)] = "ug",
+#endif
[ilog2(VM_MERGEABLE)] = "mg",
[ilog2(VM_UFFD_MISSING)]= "um",
[ilog2(VM_UFFD_WP)] = "uw",
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index e48b1e453ff5..3c076b35259c 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -21,6 +21,11 @@ struct mem_cgroup;
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags);
+#ifdef VM_UNMERGEABLE
+bool ksm_mode_always(void);
+#endif
+int ksm_enter(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long *vm_flags);
int __ksm_enter(struct mm_struct *mm);
void __ksm_exit(struct mm_struct *mm);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6b10c21630f5..114cdb882cdd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -252,11 +252,13 @@ extern unsigned int kobjsize(const void *objp);
#define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0)
#define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1)
#define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2)
#define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3)
#define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4)
+#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5)
#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
#ifdef CONFIG_ARCH_HAS_PKEYS
@@ -272,6 +274,10 @@ extern unsigned int kobjsize(const void *objp);
#endif
#endif /* CONFIG_ARCH_HAS_PKEYS */
+#ifdef VM_HIGH_ARCH_5
+#define VM_UNMERGEABLE VM_HIGH_ARCH_5 /* Opt-out for KSM "always" mode */
+#endif /* VM_HIGH_ARCH_5 */
+
#if defined(CONFIG_X86)
# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */
#elif defined(CONFIG_PPC)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index a1675d43777e..717e0fd9d2ef 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -130,6 +130,12 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" )
#define IF_HAVE_VM_SOFTDIRTY(flag,name)
#endif
+#ifdef VM_UNMERGEABLE
+#define IF_HAVE_VM_UNMERGEABLE(flag,name) {flag, name },
+#else
+#define IF_HAVE_VM_UNMERGEABLE(flag,name)
+#endif
+
#define __def_vmaflag_names \
{VM_READ, "read" }, \
{VM_WRITE, "write" }, \
@@ -161,6 +167,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
{VM_MIXEDMAP, "mixedmap" }, \
{VM_HUGEPAGE, "hugepage" }, \
{VM_NOHUGEPAGE, "nohugepage" }, \
+IF_HAVE_VM_UNMERGEABLE(VM_UNMERGEABLE, "unmergeable" ) \
{VM_MERGEABLE, "mergeable" } \
#define show_vma_flags(flags) \
diff --git a/mm/ksm.c b/mm/ksm.c
index fc64874dc6f4..6a2280b875cc 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -295,6 +295,12 @@ static int ksm_nr_node_ids = 1;
static unsigned long ksm_run = KSM_RUN_STOP;
static void wait_while_offlining(void);
+#ifdef VM_UNMERGEABLE
+#define KSM_MODE_MADVISE 0
+#define KSM_MODE_ALWAYS 1
+static unsigned long ksm_mode = KSM_MODE_MADVISE;
+#endif
+
static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
static DECLARE_WAIT_QUEUE_HEAD(ksm_iter_wait);
static DEFINE_MUTEX(ksm_thread_mutex);
@@ -2450,36 +2456,18 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
switch (advice) {
case MADV_MERGEABLE:
- /*
- * Be somewhat over-protective for now!
- */
- if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
- VM_PFNMAP | VM_IO | VM_DONTEXPAND |
- VM_HUGETLB | VM_MIXEDMAP))
- return 0; /* just ignore the advice */
-
- if (vma_is_dax(vma))
- return 0;
-
-#ifdef VM_SAO
- if (*vm_flags & VM_SAO)
- return 0;
-#endif
-#ifdef VM_SPARC_ADI
- if (*vm_flags & VM_SPARC_ADI)
- return 0;
+#ifdef VM_UNMERGEABLE
+ *vm_flags &= ~VM_UNMERGEABLE;
#endif
-
- if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
- err = __ksm_enter(mm);
- if (err)
- return err;
- }
-
- *vm_flags |= VM_MERGEABLE;
+ err = ksm_enter(mm, vma, vm_flags);
+ if (err)
+ return err;
break;
case MADV_UNMERGEABLE:
+#ifdef VM_UNMERGEABLE
+ *vm_flags |= VM_UNMERGEABLE;
+#endif
if (!(*vm_flags & VM_MERGEABLE))
return 0; /* just ignore the advice */
@@ -2496,6 +2484,76 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
return 0;
}
+#ifdef VM_UNMERGEABLE
+bool ksm_mode_always(void)
+{
+ return ksm_mode == KSM_MODE_ALWAYS;
+}
+
+static int __init setup_ksm_mode(char *str)
+{
+ int ret = 0;
+
+ if (!str)
+ goto out;
+
+ if (!strcmp(str, "madvise")) {
+ ksm_mode = KSM_MODE_MADVISE;
+ ret = 1;
+ } else if (!strcmp(str, "always")) {
+ ksm_mode = KSM_MODE_ALWAYS;
+ ret = 1;
+ }
+
+out:
+ if (!ret)
+ pr_warn("ksm_mode= cannot parse, ignored\n");
+
+ return ret;
+}
+__setup("ksm_mode=", setup_ksm_mode);
+#endif
+
+int ksm_enter(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long *vm_flags)
+{
+ int err;
+
+ /*
+ * Be somewhat over-protective for now!
+ */
+ if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
+ VM_PFNMAP | VM_IO | VM_DONTEXPAND |
+ VM_HUGETLB | VM_MIXEDMAP))
+ return 0; /* just ignore the advice */
+
+ if (vma_is_dax(vma))
+ return 0;
+
+#ifdef VM_SAO
+ if (*vm_flags & VM_SAO)
+ return 0;
+#endif
+#ifdef VM_SPARC_ADI
+ if (*vm_flags & VM_SPARC_ADI)
+ return 0;
+#endif
+#ifdef VM_UNMERGEABLE
+ if (*vm_flags & VM_UNMERGEABLE)
+ return 0;
+#endif
+
+ if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
+ err = __ksm_enter(mm);
+ if (err)
+ return err;
+ }
+
+ *vm_flags |= VM_MERGEABLE;
+
+ return 0;
+}
+
int __ksm_enter(struct mm_struct *mm)
{
struct mm_slot *mm_slot;
@@ -2859,6 +2917,35 @@ static void wait_while_offlining(void)
static struct kobj_attribute _name##_attr = \
__ATTR(_name, 0644, _name##_show, _name##_store)
+#ifdef VM_UNMERGEABLE
+static ssize_t mode_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ switch (ksm_mode) {
+ case KSM_MODE_MADVISE:
+ return sprintf(buf, "always [madvise]\n");
+ case KSM_MODE_ALWAYS:
+ return sprintf(buf, "[always] madvise\n");
+ }
+
+ return sprintf(buf, "always [madvise]\n");
+}
+
+static ssize_t mode_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (!memcmp("madvise", buf, min(sizeof("madvise")-1, count)))
+ ksm_mode = KSM_MODE_MADVISE;
+ else if (!memcmp("always", buf, min(sizeof("always")-1, count)))
+ ksm_mode = KSM_MODE_ALWAYS;
+ else
+ return -EINVAL;
+
+ return count;
+}
+KSM_ATTR(mode);
+#endif
+
static ssize_t sleep_millisecs_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -3161,6 +3248,9 @@ static ssize_t full_scans_show(struct kobject *kobj,
KSM_ATTR_RO(full_scans);
static struct attribute *ksm_attrs[] = {
+#ifdef VM_UNMERGEABLE
+ &mode_attr.attr,
+#endif
&sleep_millisecs_attr.attr,
&pages_to_scan_attr.attr,
&run_attr.attr,
diff --git a/mm/memory.c b/mm/memory.c
index ab650c21bccd..08f3f92de310 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2994,6 +2994,12 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
update_mmu_cache(vma, vmf->address, vmf->pte);
unlock:
pte_unmap_unlock(vmf->pte, vmf->ptl);
+
+#if defined(CONFIG_KSM) && defined(VM_UNMERGEABLE)
+ if (ksm_mode_always())
+ ksm_enter(vma->vm_mm, vma, &vma->vm_flags);
+#endif
+
return ret;
release:
mem_cgroup_cancel_charge(page, memcg, false);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment