changeset 865:91a6a5244cce

slab: implement a custom allocator This is a very simple allocator based on the ideas from kmem cache allocator in Solaris. While functional, there are plenty of performance and debuggability improvements that could (and probably should) be made. Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
author Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
date Sun, 15 Mar 2020 15:49:39 +0200
parents 241c40daa948
children 1c4c64f0ef21
files slab.c
diffstat 1 files changed, 241 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/slab.c	Sun Jan 28 19:16:22 2024 -0500
+++ b/slab.c	Sun Mar 15 15:49:39 2020 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2020 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
+ * Copyright (c) 2015-2020,2023-2024 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -22,35 +22,197 @@
 
 #include <errno.h>
 #include <string.h>
+#include <unistd.h>
 
 #include <jeffpc/error.h>
 #include <jeffpc/mem.h>
+#include <jeffpc/mmap.h>
+#include <jeffpc/cstr.h>
+#include <jeffpc/list.h>
+#include <jeffpc/synch.h>
 
 /*
- * A slab allocator - well, not really... just use malloc and free directly.
+ * A slab allocator - custom implementation
+ *
+ * This is a simplified (and independent) implementations of Solaris's kmem
+ * cache allocator.
+ *
+ * Cache
+ * -----
+ *
+ * The top level structure (struct mem_cache) maintains several lists of
+ * slabs each of which is made up of allocatable objects.
+ *
+ * Slabs
+ * -----
+ *
+ * We allocate one page per slab.  The beginning of the page consists of an
+ * array of objects, each satisfying the alignment requirement.  The end of
+ * the page contains the slab tracking structure (struct slab).  There may
+ * be some space wasted between the last object and the struct slab.
+ *
+ * Visually, a page has the following layout:
+ *
+ * +-------+-------+-----+-------+-------+-------------+
+ * | <obj> | <obj> | ... | <obj> | <pad> | struct slab |
+ * +-------+-------+-----+-------+-------+-------------+
+ *
+ * Using slab size == page size simplifies the conversion between object
+ * pointers and slab structs.  We can simply round down the object address
+ * to the nearest page boundary and then read the struct slab at the end of
+ * the page.
+ *
+ * We don't track which object is in-use, but we keep a per-slab list of all
+ * free objects in that slab.  As a result, we can compute which objects are
+ * in-use.
  */
 
+static LOCK_CLASS(mem_cache_lc);
+
+#define MEM_CACHE_NAME_SIZE	32
 struct mem_cache {
-	size_t size;
-	size_t align;
+	char name[MEM_CACHE_NAME_SIZE];
+	size_t objsize;
+	size_t objalign;
+	size_t pagesize;
+	size_t objs_per_slab;
+
+	struct lock lock;
+
+	/* slab lists */
+	struct list empty_slabs;
+	struct list partial_slabs;
+};
+
+#define SLAB_STRUCT_ALIGNMENT	16
+#define SLAB_MAGIC		0x534c4142
+struct slab {
+	struct list_node slab_list;
+	struct list objs;
+	size_t free;
+	size_t magic;
+};
+
+STATIC_ASSERT(sizeof(struct slab) % SLAB_STRUCT_ALIGNMENT == 0);
+
+struct objctl {
+	struct list_node list;
 };
 
+static inline void *page_start(struct mem_cache *cache, void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~(cache->pagesize - 1));
+}
+
+static inline struct slab *page_to_slab(struct mem_cache *cache, void *page)
+{
+	struct slab *slab;
+	uintptr_t pgoff;
+
+	pgoff = cache->pagesize - sizeof(struct slab);
+
+	slab = (struct slab *)(((uintptr_t) page) + pgoff);
+
+	/* make sure we're using proper alignment */
+	ASSERT0(((uintptr_t) slab) % SLAB_STRUCT_ALIGNMENT);
+
+	return slab;
+}
+
+static inline struct slab *obj_to_slab(struct mem_cache *cache, void *obj)
+{
+	return page_to_slab(cache, page_start(cache, obj));
+}
+
+static struct slab *alloc_slab(struct mem_cache *cache)
+{
+	struct slab *slab;
+	uintptr_t ptr;
+	void *page;
+	size_t i;
+
+	page = xmmap(NULL, cache->pagesize, PROT_READ | PROT_WRITE,
+		     MAP_ANON | MAP_PRIVATE, -1, 0);
+	if (IS_ERR(page))
+		return page;
+
+	/*
+	 * sanity check that we have correct page alignment to work with
+	 * obj_to_slab's assumptions
+	 */
+	ASSERT0(((uintptr_t) page) & (cache->pagesize - 1));
+
+	slab = page_to_slab(cache, page);
+
+	slab->free = cache->objs_per_slab;
+	slab->magic = SLAB_MAGIC;
+	list_create(&slab->objs, sizeof(struct objctl),
+		    offsetof(struct objctl, list));
+
+	for (ptr = (uintptr_t) page, i = 0;
+	     i < cache->objs_per_slab;
+	     ptr += cache->objsize, i++) {
+		ASSERT3P((void *) ptr, <, slab);
+		list_insert_tail(&slab->objs, (struct objctl *) ptr);
+	}
+
+	list_insert_tail(&cache->partial_slabs, slab);
+
+	return slab;
+}
+
+static void free_slab(struct mem_cache *cache, struct slab *slab)
+{
+	ASSERT3U(slab->free, ==, cache->objs_per_slab);
+	ASSERT3U(slab->magic, ==, SLAB_MAGIC);
+
+	ASSERT0(xmunmap(page_start(cache, slab), cache->pagesize));
+}
+
 #pragma weak mem_cache_create
 struct mem_cache *mem_cache_create(char *name, size_t size, size_t align)
 {
 	struct mem_cache *cache;
+	size_t objs_per_slab;
+	size_t pagesize;
 
-	if (!size)
+	if (!name || !size)
+		return ERR_PTR(-EINVAL);
+
+	if (align && !is_p2(align))
 		return ERR_PTR(-EINVAL);
 
-	/* FIXME: check that the value is 0, or a power of 2 */
+	pagesize = getpagesize();
+
+	if (align > pagesize)
+		return ERR_PTR(-EINVAL);
+
+	/* tweak the alignment - LP64 ABIs commonly use 16B */
+	align = MAX(sizeof(void *) * 2, align);
+	ASSERT(is_p2(align));
+
+	/* tweak the size */
+	size = MAX(size, sizeof(struct objctl)); /* need enough for objctl */
+	size = p2roundup(size, align); /* respect alignment */
+
+	objs_per_slab = (pagesize - sizeof(struct slab)) / size;
+	if (!objs_per_slab)
+		return ERR_PTR(-EINVAL);
 
 	cache = malloc(sizeof(struct mem_cache));
 	if (!cache)
 		return ERR_PTR(-ENOMEM);
 
-	cache->size = size;
-	cache->align = (align < sizeof(void *)) ? sizeof(void *) : align;
+	strcpy_safe(cache->name, name, sizeof(cache->name)); /* truncates */
+	cache->objsize = size;
+	cache->objalign = align;
+	cache->pagesize = pagesize;
+	cache->objs_per_slab = objs_per_slab;
+	MXINIT(&cache->lock, &mem_cache_lc);
+	list_create(&cache->empty_slabs, sizeof(struct slab),
+		    offsetof(struct slab, slab_list));
+	list_create(&cache->partial_slabs, sizeof(struct slab),
+		    offsetof(struct slab, slab_list));
 
 	return cache;
 }
@@ -61,22 +223,88 @@
 	if (!cache)
 		return;
 
+	/* sanity check first */
+	ASSERT(list_is_empty(&cache->partial_slabs));
+	ASSERT(list_is_empty(&cache->empty_slabs));
+
+	list_destroy(&cache->partial_slabs);
+	list_destroy(&cache->empty_slabs);
+	MXDESTROY(&cache->lock);
+
+	/* poison */
+	memset(cache, 0xaf, sizeof(struct mem_cache));
+
 	free(cache);
 }
 
 #pragma weak mem_cache_alloc
 void *mem_cache_alloc(struct mem_cache *cache)
 {
+	struct slab *slab;
 	void *obj;
-	int ret;
+
+	MXLOCK(&cache->lock);
+
+	slab = list_head(&cache->partial_slabs);
+	if (!slab) {
+		slab = alloc_slab(cache);
+		if (IS_ERR(slab)) {
+			MXUNLOCK(&cache->lock);
+			return NULL;
+		}
+	}
 
-	ret = posix_memalign(&obj, cache->align, cache->size);
+	obj = list_remove_head(&slab->objs);
+	ASSERT3P(obj, !=, NULL);
+	ASSERT0(((uintptr_t) obj) % cache->objalign);
+
+	slab->free--;
 
-	return ret ? NULL : obj;
+	if (!slab->free) {
+		/* move slab from partial to empty */
+		list_remove(&cache->partial_slabs, slab);
+		list_insert_tail(&cache->empty_slabs, slab);
+	}
+
+	MXUNLOCK(&cache->lock);
+
+	return obj;
 }
 
 #pragma weak mem_cache_free
-void mem_cache_free(struct mem_cache *cache, void *buf)
+void mem_cache_free(struct mem_cache *cache, void *obj)
 {
-	free(buf);
+	struct slab *slab;
+
+	if (!obj)
+		return;
+
+	/* sanity checks to catch bad frees */
+	ASSERT(!IS_ERR(obj));
+
+	/* check that object starts at object boundary */
+	ASSERT0(((uintptr_t) obj) % cache->objalign);
+
+	slab = obj_to_slab(cache, obj);
+	ASSERT3U(slab->magic, ==, SLAB_MAGIC);
+
+	MXLOCK(&cache->lock);
+
+	list_insert_tail(&slab->objs, obj);
+	slab->free++;
+
+	if (slab->free == 1) {
+		/* move slab from empty to partial */
+		list_remove(&cache->empty_slabs, slab);
+		list_insert_tail(&cache->partial_slabs, slab);
+	}
+
+	if (slab->free == cache->objs_per_slab) {
+		/* completely unused slab -> free it */
+		list_remove(&cache->partial_slabs, slab);
+
+		free_slab(cache, slab);
+	}
+
+	MXUNLOCK(&cache->lock);
 }