summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2020-06-30 00:59:48 -0400
committerRich Felker <dalias@aerifal.cx>2020-06-30 00:59:48 -0400
commit503bd3976623493a10b0f32c617feb51f9ba04c8 (patch)
tree6311c4ce29fb02d847c9c75d2a61090817feb2d5
parent785752a595ddd382396363f8aa237d1246ed5764 (diff)
downloadmusl-503bd3976623493a10b0f32c617feb51f9ba04c8.tar.gz
import mallocng
the files added come from the mallocng development repo, commit 2ed58817cca5bc055974e5a0e43c280d106e696b. they comprise a new malloc implementation, developed over the past 9 months, to replace the old allocator (since dubbed "oldmalloc") with one that retains low code size and minimal baseline memory overhead while avoiding fundamental flaws in oldmalloc and making significant enhancements. these include highly controlled fragmentation, fine-grained ability to return memory to the system when freed, and strong hardening against dynamic memory usage errors by the caller. internally, mallocng derives most of these properties from tightly structuring memory, creating space for allocations as uniform-sized slots within individually mmapped (and individually freeable) allocation groups. smaller-than-pagesize groups are created within slots of larger ones. minimal group size is very small, and larger sizes (in geometric progression) only come into play when usage is high. all data necessary for maintaining consistency of the allocator state is tracked in out-of-band metadata, reachable via a validated path from minimal in-band metadata. all pointers passed (to free, etc.) are validated before any stores to memory take place. early reuse of freed slots is avoided via approximate LRU order of freed slots. further hardening against use-after-free and double-free, even in the case where the freed slot has been reused, is made by cycling the offset within the slot at which the allocation is placed; this is possible whenever the slot size is larger than the requested allocation.
-rw-r--r--src/malloc/mallocng/README.mallocng13
-rw-r--r--src/malloc/mallocng/aligned_alloc.c57
-rw-r--r--src/malloc/mallocng/free.c143
-rw-r--r--src/malloc/mallocng/malloc.c387
-rw-r--r--src/malloc/mallocng/malloc_usable_size.c12
-rw-r--r--src/malloc/mallocng/meta.h288
-rw-r--r--src/malloc/mallocng/realloc.c51
7 files changed, 938 insertions, 13 deletions
diff --git a/src/malloc/mallocng/README.mallocng b/src/malloc/mallocng/README.mallocng
deleted file mode 100644
index da32bf06..00000000
--- a/src/malloc/mallocng/README.mallocng
+++ /dev/null
@@ -1,13 +0,0 @@
-This directory is a skeleton for upcoming merge of musl's new malloc
-implementation, mallocng. To use it, drop in copies of or symlinks to
-the following files from mallocng:
-
-- meta.h
-- malloc.c
-- realloc.c
-- free.c
-- aligned_alloc.c
-- malloc_usable_size.c
-
-and build with make variable MALLOC_DIR=mallocng in config.mak or on
-make command line.
diff --git a/src/malloc/mallocng/aligned_alloc.c b/src/malloc/mallocng/aligned_alloc.c
new file mode 100644
index 00000000..34116896
--- /dev/null
+++ b/src/malloc/mallocng/aligned_alloc.c
@@ -0,0 +1,57 @@
+#include <stdlib.h>
+#include <errno.h>
+#include "meta.h"
+
+void *aligned_alloc(size_t align, size_t len)
+{
+ if ((align & -align) != align) {
+ errno = EINVAL;
+ return 0;
+ }
+
+ if (len > SIZE_MAX - align || align >= (1ULL<<31)*UNIT) {
+ errno = ENOMEM;
+ return 0;
+ }
+
+ if (DISABLE_ALIGNED_ALLOC) {
+ errno = ENOMEM;
+ return 0;
+ }
+
+ if (align <= UNIT) align = UNIT;
+
+ unsigned char *p = malloc(len + align - UNIT);
+ struct meta *g = get_meta(p);
+ int idx = get_slot_index(p);
+ size_t stride = get_stride(g);
+ unsigned char *start = g->mem->storage + stride*idx;
+ unsigned char *end = g->mem->storage + stride*(idx+1) - IB;
+ size_t adj = -(uintptr_t)p & (align-1);
+
+ if (!adj) {
+ set_size(p, end, len);
+ return p;
+ }
+ p += adj;
+ uint32_t offset = (size_t)(p-g->mem->storage)/UNIT;
+ if (offset <= 0xffff) {
+ *(uint16_t *)(p-2) = offset;
+ p[-4] = 0;
+ } else {
+ // use a 32-bit offset if 16-bit doesn't fit. for this,
+ // 16-bit field must be zero, [-4] byte nonzero.
+ *(uint16_t *)(p-2) = 0;
+ *(uint32_t *)(p-8) = offset;
+ p[-4] = 1;
+ }
+ p[-3] = idx;
+ set_size(p, end, len);
+ // store offset to aligned enframing. this facilitates cycling
+ // offset and also iteration of heap for debugging/measurement.
+ // for extreme overalignment it won't fit but these are classless
+ // allocations anyway.
+ *(uint16_t *)(start - 2) = (size_t)(p-start)/UNIT;
+ start[-3] = 7<<5;
+ return p;
+}
diff --git a/src/malloc/mallocng/free.c b/src/malloc/mallocng/free.c
new file mode 100644
index 00000000..40745f97
--- /dev/null
+++ b/src/malloc/mallocng/free.c
@@ -0,0 +1,143 @@
+#define _BSD_SOURCE
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "meta.h"
+
+struct mapinfo {
+ void *base;
+ size_t len;
+};
+
+static struct mapinfo nontrivial_free(struct meta *, int);
+
+static struct mapinfo free_group(struct meta *g)
+{
+ struct mapinfo mi = { 0 };
+ int sc = g->sizeclass;
+ if (sc < 48) {
+ ctx.usage_by_class[sc] -= g->last_idx+1;
+ }
+ if (g->maplen) {
+ step_seq();
+ record_seq(sc);
+ mi.base = g->mem;
+ mi.len = g->maplen*4096UL;
+ } else {
+ void *p = g->mem;
+ struct meta *m = get_meta(p);
+ int idx = get_slot_index(p);
+ g->mem->meta = 0;
+ // not checking size/reserved here; it's intentionally invalid
+ mi = nontrivial_free(m, idx);
+ }
+ free_meta(g);
+ return mi;
+}
+
+static int okay_to_free(struct meta *g)
+{
+ int sc = g->sizeclass;
+
+ if (!g->freeable) return 0;
+
+ // always free individual mmaps not suitable for reuse
+ if (sc >= 48 || get_stride(g) < UNIT*size_classes[sc])
+ return 1;
+
+ // always free groups allocated inside another group's slot
+ // since recreating them should not be expensive and they
+ // might be blocking freeing of a much larger group.
+ if (!g->maplen) return 1;
+
+ // if there is another non-full group, free this one to
+ // consolidate future allocations, reduce fragmentation.
+ if (g->next != g) return 1;
+
+ // free any group in a size class that's not bouncing
+ if (!is_bouncing(sc)) return 1;
+
+ size_t cnt = g->last_idx+1;
+ size_t usage = ctx.usage_by_class[sc];
+
+ // if usage is high enough that a larger count should be
+ // used, free the low-count group so a new one will be made.
+ if (9*cnt <= usage && cnt < 20)
+ return 1;
+
+ // otherwise, keep the last group in a bouncing class.
+ return 0;
+}
+
+static struct mapinfo nontrivial_free(struct meta *g, int i)
+{
+ uint32_t self = 1u<<i;
+ int sc = g->sizeclass;
+ uint32_t mask = g->freed_mask | g->avail_mask;
+
+ if (mask+self == (2u<<g->last_idx)-1 && okay_to_free(g)) {
+ // any multi-slot group is necessarily on an active list
+ // here, but single-slot groups might or might not be.
+ if (g->next) {
+ assert(sc < 48);
+ int activate_new = (ctx.active[sc]==g);
+ dequeue(&ctx.active[sc], g);
+ if (activate_new && ctx.active[sc])
+ activate_group(ctx.active[sc]);
+ }
+ return free_group(g);
+ } else if (!mask) {
+ assert(sc < 48);
+ // might still be active if there were no allocations
+ // after last available slot was taken.
+ if (ctx.active[sc] != g) {
+ queue(&ctx.active[sc], g);
+ }
+ }
+ a_or(&g->freed_mask, self);
+ return (struct mapinfo){ 0 };
+}
+
+void free(void *p)
+{
+ if (!p) return;
+
+ struct meta *g = get_meta(p);
+ int idx = get_slot_index(p);
+ size_t stride = get_stride(g);
+ unsigned char *start = g->mem->storage + stride*idx;
+ unsigned char *end = start + stride - IB;
+ get_nominal_size(p, end);
+ uint32_t self = 1u<<idx, all = (2u<<g->last_idx)-1;
+ ((unsigned char *)p)[-3] = 255;
+ // invalidate offset to group header, and cycle offset of
+ // used region within slot if current offset is zero.
+ *(uint16_t *)((char *)p-2) = 0;
+
+ // release any whole pages contained in the slot to be freed
+ // unless it's a single-slot group that will be unmapped.
+ if (((uintptr_t)(start-1) ^ (uintptr_t)end) >= 2*PGSZ && g->last_idx) {
+ unsigned char *base = start + (-(uintptr_t)start & (PGSZ-1));
+ size_t len = (end-base) & -PGSZ;
+ if (len) madvise(base, len, MADV_FREE);
+ }
+
+ // atomic free without locking if this is neither first or last slot
+ for (;;) {
+ uint32_t freed = g->freed_mask;
+ uint32_t avail = g->avail_mask;
+ uint32_t mask = freed | avail;
+ assert(!(mask&self));
+ if (!freed || mask+self==all) break;
+ if (!MT)
+ g->freed_mask = freed+self;
+ else if (a_cas(&g->freed_mask, freed, freed+self)!=freed)
+ continue;
+ return;
+ }
+
+ wrlock();
+ struct mapinfo mi = nontrivial_free(g, idx);
+ unlock();
+ if (mi.len) munmap(mi.base, mi.len);
+}
diff --git a/src/malloc/mallocng/malloc.c b/src/malloc/mallocng/malloc.c
new file mode 100644
index 00000000..d695ab8e
--- /dev/null
+++ b/src/malloc/mallocng/malloc.c
@@ -0,0 +1,387 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+#include "meta.h"
+
+LOCK_OBJ_DEF;
+
+const uint16_t size_classes[] = {
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 12, 15,
+ 18, 20, 25, 31,
+ 36, 42, 50, 63,
+ 72, 84, 102, 127,
+ 146, 170, 204, 255,
+ 292, 340, 409, 511,
+ 584, 682, 818, 1023,
+ 1169, 1364, 1637, 2047,
+ 2340, 2730, 3276, 4095,
+ 4680, 5460, 6552, 8191,
+};
+
+static const uint8_t small_cnt_tab[][3] = {
+ { 30, 30, 30 },
+ { 31, 15, 15 },
+ { 20, 10, 10 },
+ { 31, 15, 7 },
+ { 25, 12, 6 },
+ { 21, 10, 5 },
+ { 18, 8, 4 },
+ { 31, 15, 7 },
+ { 28, 14, 6 },
+};
+
+static const uint8_t med_cnt_tab[4] = { 28, 24, 20, 32 };
+
+struct malloc_context ctx = { 0 };
+
+struct meta *alloc_meta(void)
+{
+ struct meta *m;
+ unsigned char *p;
+ if (!ctx.init_done) {
+#ifndef PAGESIZE
+ ctx.pagesize = get_page_size();
+#endif
+ ctx.secret = get_random_secret();
+ ctx.init_done = 1;
+ }
+ size_t pagesize = PGSZ;
+ if (pagesize < 4096) pagesize = 4096;
+ if ((m = dequeue_head(&ctx.free_meta_head))) return m;
+ if (!ctx.avail_meta_count) {
+ int need_unprotect = 1;
+ if (!ctx.avail_meta_area_count && ctx.brk!=-1) {
+ uintptr_t new = ctx.brk + pagesize;
+ int need_guard = 0;
+ if (!ctx.brk) {
+ need_guard = 1;
+ ctx.brk = brk(0);
+ // some ancient kernels returned _ebss
+ // instead of next page as initial brk.
+ ctx.brk += -ctx.brk & (pagesize-1);
+ new = ctx.brk + 2*pagesize;
+ }
+ if (brk(new) != new) {
+ ctx.brk = -1;
+ } else {
+ if (need_guard) mmap((void *)ctx.brk, pagesize,
+ PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+ ctx.brk = new;
+ ctx.avail_meta_areas = (void *)(new - pagesize);
+ ctx.avail_meta_area_count = pagesize>>12;
+ need_unprotect = 0;
+ }
+ }
+ if (!ctx.avail_meta_area_count) {
+ size_t n = 2UL << ctx.meta_alloc_shift;
+ p = mmap(0, n*pagesize, PROT_NONE,
+ MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (p==MAP_FAILED) return 0;
+ ctx.avail_meta_areas = p + pagesize;
+ ctx.avail_meta_area_count = (n-1)*(pagesize>>12);
+ ctx.meta_alloc_shift++;
+ }
+ p = ctx.avail_meta_areas;
+ if ((uintptr_t)p & (pagesize-1)) need_unprotect = 0;
+ if (need_unprotect)
+ if (mprotect(p, pagesize, PROT_READ|PROT_WRITE)
+ && errno != ENOSYS)
+ return 0;
+ ctx.avail_meta_area_count--;
+ ctx.avail_meta_areas = p + 4096;
+ if (ctx.meta_area_tail) {
+ ctx.meta_area_tail->next = (void *)p;
+ } else {
+ ctx.meta_area_head = (void *)p;
+ }
+ ctx.meta_area_tail = (void *)p;
+ ctx.meta_area_tail->check = ctx.secret;
+ ctx.avail_meta_count = ctx.meta_area_tail->nslots
+ = (4096-sizeof(struct meta_area))/sizeof *m;
+ ctx.avail_meta = ctx.meta_area_tail->slots;
+ }
+ ctx.avail_meta_count--;
+ m = ctx.avail_meta++;
+ m->prev = m->next = 0;
+ return m;
+}
+
+static uint32_t try_avail(struct meta **pm)
+{
+ struct meta *m = *pm;
+ uint32_t first;
+ if (!m) return 0;
+ uint32_t mask = m->avail_mask;
+ if (!mask) {
+ if (!m) return 0;
+ if (!m->freed_mask) {
+ dequeue(pm, m);
+ m = *pm;
+ if (!m) return 0;
+ } else {
+ m = m->next;
+ *pm = m;
+ }
+
+ mask = m->freed_mask;
+
+ // skip fully-free group unless it's the only one
+ // or it's a permanently non-freeable group
+ if (mask == (2u<<m->last_idx)-1 && m->freeable) {
+ m = m->next;
+ *pm = m;
+ mask = m->freed_mask;
+ }
+
+ // activate more slots in a not-fully-active group
+ // if needed, but only as a last resort. prefer using
+ // any other group with free slots. this avoids
+ // touching & dirtying as-yet-unused pages.
+ if (!(mask & ((2u<<m->mem->active_idx)-1))) {
+ if (m->next != m) {
+ m = m->next;
+ *pm = m;
+ } else {
+ int cnt = m->mem->active_idx + 2;
+ int size = size_classes[m->sizeclass]*UNIT;
+ int span = UNIT + size*cnt;
+ // activate up to next 4k boundary
+ while ((span^(span+size-1)) < 4096) {
+ cnt++;
+ span += size;
+ }
+ if (cnt > m->last_idx+1)
+ cnt = m->last_idx+1;
+ m->mem->active_idx = cnt-1;
+ }
+ }
+ mask = activate_group(m);
+ assert(mask);
+ decay_bounces(m->sizeclass);
+ }
+ first = mask&-mask;
+ m->avail_mask = mask-first;
+ return first;
+}
+
+static int alloc_slot(int, size_t);
+
+static struct meta *alloc_group(int sc, size_t req)
+{
+ size_t size = UNIT*size_classes[sc];
+ int i = 0, cnt;
+ unsigned char *p;
+ struct meta *m = alloc_meta();
+ if (!m) return 0;
+ size_t usage = ctx.usage_by_class[sc];
+ size_t pagesize = PGSZ;
+ int active_idx;
+ if (sc < 9) {
+ while (i<2 && 4*small_cnt_tab[sc][i] > usage)
+ i++;
+ cnt = small_cnt_tab[sc][i];
+ } else {
+ // lookup max number of slots fitting in power-of-two size
+ // from a table, along with number of factors of two we
+ // can divide out without a remainder or reaching 1.
+ cnt = med_cnt_tab[sc&3];
+
+ // reduce cnt to avoid excessive eagar allocation.
+ while (!(cnt&1) && 4*cnt > usage)
+ cnt >>= 1;
+
+ // data structures don't support groups whose slot offsets
+ // in units don't fit in 16 bits.
+ while (size*cnt >= 65536*UNIT)
+ cnt >>= 1;
+ }
+
+ // If we selected a count of 1 above but it's not sufficient to use
+ // mmap, increase to 2. Then it might be; if not it will nest.
+ if (cnt==1 && size*cnt+UNIT <= pagesize/2) cnt = 2;
+
+ // All choices of size*cnt are "just below" a power of two, so anything
+ // larger than half the page size should be allocated as whole pages.
+ if (size*cnt+UNIT > pagesize/2) {
+ // check/update bounce counter to start/increase retention
+ // of freed maps, and inhibit use of low-count, odd-size
+ // small mappings and single-slot groups if activated.
+ int nosmall = is_bouncing(sc);
+ account_bounce(sc);
+ step_seq();
+
+ // since the following count reduction opportunities have
+ // an absolute memory usage cost, don't overdo them. count
+ // coarse usage as part of usage.
+ if (!(sc&1) && sc<32) usage += ctx.usage_by_class[sc+1];
+
+ // try to drop to a lower count if the one found above
+ // increases usage by more than 25%. these reduced counts
+ // roughly fill an integral number of pages, just not a
+ // power of two, limiting amount of unusable space.
+ if (4*cnt > usage && !nosmall) {
+ if (0);
+ else if ((sc&3)==1 && size*cnt>8*pagesize) cnt = 2;
+ else if ((sc&3)==2 && size*cnt>4*pagesize) cnt = 3;
+ else if ((sc&3)==0 && size*cnt>8*pagesize) cnt = 3;
+ else if ((sc&3)==0 && size*cnt>2*pagesize) cnt = 5;
+ }
+ size_t needed = size*cnt + UNIT;
+ needed += -needed & (pagesize-1);
+
+ // produce an individually-mmapped allocation if usage is low,
+ // bounce counter hasn't triggered, and either it saves memory
+ // or it avoids eagar slot allocation without wasting too much.
+ if (!nosmall && cnt<=7) {
+ req += IB + UNIT;
+ req += -req & (pagesize-1);
+ if (req<size+UNIT || (req>=4*pagesize && 2*cnt>usage)) {
+ cnt = 1;
+ needed = req;
+ }
+ }
+
+ p = mmap(0, needed, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (p==MAP_FAILED) {
+ free_meta(m);
+ return 0;
+ }
+ m->maplen = needed>>12;
+ ctx.mmap_counter++;
+ active_idx = (4096-UNIT)/size-1;
+ if (active_idx > cnt-1) active_idx = cnt-1;
+ if (active_idx < 0) active_idx = 0;
+ } else {
+ int j = size_to_class(UNIT+cnt*size-IB);
+ int idx = alloc_slot(j, UNIT+cnt*size-IB);
+ if (idx < 0) {
+ free_meta(m);
+ return 0;
+ }
+ struct meta *g = ctx.active[j];
+ p = enframe(g, idx, UNIT*size_classes[j]-IB, ctx.mmap_counter);
+ m->maplen = 0;
+ p[-3] = (p[-3]&31) | (6<<5);
+ for (int i=0; i<=cnt; i++)
+ p[UNIT+i*size-4] = 0;
+ active_idx = cnt-1;
+ }
+ ctx.usage_by_class[sc] += cnt;
+ m->avail_mask = (2u<<active_idx)-1;
+ m->freed_mask = (2u<<(cnt-1))-1 - m->avail_mask;
+ m->mem = (void *)p;
+ m->mem->meta = m;
+ m->mem->active_idx = active_idx;
+ m->last_idx = cnt-1;
+ m->freeable = 1;
+ m->sizeclass = sc;
+ return m;
+}
+
+static int alloc_slot(int sc, size_t req)
+{
+ uint32_t first = try_avail(&ctx.active[sc]);
+ if (first) return a_ctz_32(first);
+
+ struct meta *g = alloc_group(sc, req);
+ if (!g) return -1;
+
+ g->avail_mask--;
+ queue(&ctx.active[sc], g);
+ return 0;
+}
+
+void *malloc(size_t n)
+{
+ if (size_overflows(n)) return 0;
+ struct meta *g;
+ uint32_t mask, first;
+ int sc;
+ int idx;
+ int ctr;
+
+ if (n >= MMAP_THRESHOLD) {
+ size_t needed = n + IB + UNIT;
+ void *p = mmap(0, needed, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (p==MAP_FAILED) return 0;
+ wrlock();
+ step_seq();
+ g = alloc_meta();
+ if (!g) {
+ unlock();
+ munmap(p, needed);
+ return 0;
+ }
+ g->mem = p;
+ g->mem->meta = g;
+ g->last_idx = 0;
+ g->freeable = 1;
+ g->sizeclass = 63;
+ g->maplen = (needed+4095)/4096;
+ g->avail_mask = g->freed_mask = 0;
+ // use a global counter to cycle offset in
+ // individually-mmapped allocations.
+ ctx.mmap_counter++;
+ idx = 0;
+ goto success;
+ }
+
+ sc = size_to_class(n);
+
+ rdlock();
+ g = ctx.active[sc];
+
+ // use coarse size classes initially when there are not yet
+ // any groups of desired size. this allows counts of 2 or 3
+ // to be allocated at first rather than having to start with
+ // 7 or 5, the min counts for even size classes.
+ if (!g && sc>=4 && sc<32 && sc!=6 && !(sc&1) && !ctx.usage_by_class[sc]) {
+ size_t usage = ctx.usage_by_class[sc|1];
+ // if a new group may be allocated, count it toward
+ // usage in deciding if we can use coarse class.
+ if (!ctx.active[sc|1] || (!ctx.active[sc|1]->avail_mask
+ && !ctx.active[sc|1]->freed_mask))
+ usage += 3;
+ if (usage <= 12)
+ sc |= 1;
+ g = ctx.active[sc];
+ }
+
+ for (;;) {
+ mask = g ? g->avail_mask : 0;
+ first = mask&-mask;
+ if (!first) break;
+ if (RDLOCK_IS_EXCLUSIVE || !MT)
+ g->avail_mask = mask-first;
+ else if (a_cas(&g->avail_mask, mask, mask-first)!=mask)
+ continue;
+ idx = a_ctz_32(first);
+ goto success;
+ }
+ upgradelock();
+
+ idx = alloc_slot(sc, n);
+ if (idx < 0) {
+ unlock();
+ return 0;
+ }
+ g = ctx.active[sc];
+
+success:
+ ctr = ctx.mmap_counter;
+ unlock();
+ return enframe(g, idx, n, ctr);
+}
+
+int is_allzero(void *p)
+{
+ struct meta *g = get_meta(p);
+ return g->sizeclass >= 48 ||
+ get_stride(g) < UNIT*size_classes[g->sizeclass];
+}
diff --git a/src/malloc/mallocng/malloc_usable_size.c b/src/malloc/mallocng/malloc_usable_size.c
new file mode 100644
index 00000000..a440a4ea
--- /dev/null
+++ b/src/malloc/mallocng/malloc_usable_size.c
@@ -0,0 +1,12 @@
+#include <stdlib.h>
+#include "meta.h"
+
+size_t malloc_usable_size(void *p)
+{
+ struct meta *g = get_meta(p);
+ int idx = get_slot_index(p);
+ size_t stride = get_stride(g);
+ unsigned char *start = g->mem->storage + stride*idx;
+ unsigned char *end = start + stride - IB;
+ return get_nominal_size(p, end);
+}
diff --git a/src/malloc/mallocng/meta.h b/src/malloc/mallocng/meta.h
new file mode 100644
index 00000000..61ec53f9
--- /dev/null
+++ b/src/malloc/mallocng/meta.h
@@ -0,0 +1,288 @@
+#ifndef MALLOC_META_H
+#define MALLOC_META_H
+
+#include <stdint.h>
+#include <errno.h>
+#include <limits.h>
+#include "glue.h"
+
+__attribute__((__visibility__("hidden")))
+extern const uint16_t size_classes[];
+
+#define MMAP_THRESHOLD 131052
+
+#define UNIT 16
+#define IB 4
+
+struct group {
+ struct meta *meta;
+ unsigned char active_idx:5;
+ char pad[UNIT - sizeof(struct meta *) - 1];
+ unsigned char storage[];
+};
+
+struct meta {
+ struct meta *prev, *next;
+ struct group *mem;
+ volatile int avail_mask, freed_mask;
+ uintptr_t last_idx:5;
+ uintptr_t freeable:1;
+ uintptr_t sizeclass:6;
+ uintptr_t maplen:8*sizeof(uintptr_t)-12;
+};
+
+struct meta_area {
+ uint64_t check;
+ struct meta_area *next;
+ int nslots;
+ struct meta slots[];
+};
+
+struct malloc_context {
+ uint64_t secret;
+#ifndef PAGESIZE
+ size_t pagesize;
+#endif
+ int init_done;
+ unsigned mmap_counter;
+ struct meta *free_meta_head;
+ struct meta *avail_meta;
+ size_t avail_meta_count, avail_meta_area_count, meta_alloc_shift;
+ struct meta_area *meta_area_head, *meta_area_tail;
+ unsigned char *avail_meta_areas;
+ struct meta *active[48];
+ size_t usage_by_class[48];
+ uint8_t unmap_seq[32], bounces[32];
+ uint8_t seq;
+ uintptr_t brk;
+};
+
+__attribute__((__visibility__("hidden")))
+extern struct malloc_context ctx;
+
+#ifdef PAGESIZE
+#define PGSZ PAGESIZE
+#else
+#define PGSZ ctx.pagesize
+#endif
+
+__attribute__((__visibility__("hidden")))
+struct meta *alloc_meta(void);
+
+__attribute__((__visibility__("hidden")))
+int is_allzero(void *);
+
+static inline void queue(struct meta **phead, struct meta *m)
+{
+ assert(!m->next);
+ assert(!m->prev);
+ if (*phead) {
+ struct meta *head = *phead;
+ m->next = head;
+ m->prev = head->prev;
+ m->next->prev = m->prev->next = m;
+ } else {
+ m->prev = m->next = m;
+ *phead = m;
+ }
+}
+
+static inline void dequeue(struct meta **phead, struct meta *m)
+{
+ if (m->next != m) {
+ m->prev->next = m->next;
+ m->next->prev = m->prev;
+ if (*phead == m) *phead = m->next;
+ } else {
+ *phead = 0;
+ }
+ m->prev = m->next = 0;
+}
+
+static inline struct meta *dequeue_head(struct meta **phead)
+{
+ struct meta *m = *phead;
+ if (m) dequeue(phead, m);
+ return m;
+}
+
+static inline void free_meta(struct meta *m)
+{
+ *m = (struct meta){0};
+ queue(&ctx.free_meta_head, m);
+}
+
+static inline uint32_t activate_group(struct meta *m)
+{
+ assert(!m->avail_mask);
+ uint32_t mask, act = (2u<<m->mem->active_idx)-1;
+ do mask = m->freed_mask;
+ while (a_cas(&m->freed_mask, mask, mask&~act)!=mask);
+ return m->avail_mask = mask & act;
+}
+
+static inline int get_slot_index(const unsigned char *p)
+{
+ return p[-3] & 31;
+}
+
+static inline struct meta *get_meta(const unsigned char *p)
+{
+ assert(!((uintptr_t)p & 15));
+ int offset = *(const uint16_t *)(p - 2);
+ int index = get_slot_index(p);
+ if (p[-4]) {
+ assert(!offset);
+ offset = *(uint32_t *)(p - 8);
+ assert(offset > 0xffff);
+ }
+ const struct group *base = (const void *)(p - UNIT*offset - UNIT);
+ const struct meta *meta = base->meta;
+ assert(meta->mem == base);
+ assert(index <= meta->last_idx);
+ assert(!(meta->avail_mask & (1u<<index)));
+ assert(!(meta->freed_mask & (1u<<index)));
+ const struct meta_area *area = (void *)((uintptr_t)meta & -4096);
+ assert(area->check == ctx.secret);
+ if (meta->sizeclass < 48) {
+ assert(offset >= size_classes[meta->sizeclass]*index);
+ assert(offset < size_classes[meta->sizeclass]*(index+1));
+ } else {
+ assert(meta->sizeclass == 63);
+ }
+ if (meta->maplen) {
+ assert(offset <= meta->maplen*4096UL/UNIT - 1);
+ }
+ return (struct meta *)meta;
+}
+
+static inline size_t get_nominal_size(const unsigned char *p, const unsigned char *end)
+{
+ size_t reserved = p[-3] >> 5;
+ if (reserved >= 5) {
+ assert(reserved == 5);
+ reserved = *(const uint32_t *)(end-4);
+ assert(reserved >= 5);
+ assert(!end[-5]);
+ }
+ assert(reserved <= end-p);
+ assert(!*(end-reserved));
+ // also check the slot's overflow byte
+ assert(!*end);
+ return end-reserved-p;
+}
+
+static inline size_t get_stride(const struct meta *g)
+{
+ if (!g->last_idx && g->maplen) {
+ return g->maplen*4096UL - UNIT;
+ } else {
+ return UNIT*size_classes[g->sizeclass];
+ }
+}
+
+static inline void set_size(unsigned char *p, unsigned char *end, size_t n)
+{
+ int reserved = end-p-n;
+ if (reserved) end[-reserved] = 0;
+ if (reserved >= 5) {
+ *(uint32_t *)(end-4) = reserved;
+ end[-5] = 0;
+ reserved = 5;
+ }
+ p[-3] = (p[-3]&31) + (reserved<<5);
+}
+
+static inline void *enframe(struct meta *g, int idx, size_t n, int ctr)
+{
+ size_t stride = get_stride(g);
+ size_t slack = (stride-IB-n)/UNIT;
+ unsigned char *p = g->mem->storage + stride*idx;
+ unsigned char *end = p+stride-IB;
+ // cycle offset within slot to increase interval to address
+ // reuse, facilitate trapping double-free.
+ int off = (p[-3] ? *(uint16_t *)(p-2) + 1 : ctr) & 255;
+ assert(!p[-4]);
+ if (off > slack) {
+ size_t m = slack;
+ m |= m>>1; m |= m>>2; m |= m>>4;
+ off &= m;
+ if (off > slack) off -= slack+1;
+ assert(off <= slack);
+ }
+ if (off) {
+ // store offset in unused header at offset zero
+ // if enframing at non-zero offset.
+ *(uint16_t *)(p-2) = off;
+ p[-3] = 7<<5;
+ p += UNIT*off;
+ // for nonzero offset there is no permanent check
+ // byte, so make one.
+ p[-4] = 0;
+ }
+ *(uint16_t *)(p-2) = (size_t)(p-g->mem->storage)/UNIT;
+ p[-3] = idx;
+ set_size(p, end, n);
+ return p;
+}
+
+static inline int size_to_class(size_t n)
+{
+ n = (n+IB-1)>>4;
+ if (n<10) return n;
+ n++;
+ int i = (28-a_clz_32(n))*4 + 8;
+ if (n>size_classes[i+1]) i+=2;
+ if (n>size_classes[i]) i++;
+ return i;
+}
+
+static inline int size_overflows(size_t n)
+{
+ if (n >= SIZE_MAX/2 - 4096) {
+ errno = ENOMEM;
+ return 1;
+ }
+ return 0;
+}
+
+static inline void step_seq(void)
+{
+ if (ctx.seq==255) {
+ for (int i=0; i<32; i++) ctx.unmap_seq[i] = 0;
+ ctx.seq = 1;
+ } else {
+ ctx.seq++;
+ }
+}
+
+static inline void record_seq(int sc)
+{
+ if (sc-7U < 32) ctx.unmap_seq[sc-7] = ctx.seq;
+}
+
+static inline void account_bounce(int sc)
+{
+ if (sc-7U < 32) {
+ int seq = ctx.unmap_seq[sc-7];
+ if (seq && ctx.seq-seq < 10) {
+ if (ctx.bounces[sc-7]+1 < 100)
+ ctx.bounces[sc-7]++;
+ else
+ ctx.bounces[sc-7] = 150;
+ }
+ }
+}
+
+static inline void decay_bounces(int sc)
+{
+ if (sc-7U < 32 && ctx.bounces[sc-7])
+ ctx.bounces[sc-7]--;
+}
+
+static inline int is_bouncing(int sc)
+{
+ return (sc-7U < 32 && ctx.bounces[sc-7] >= 100);
+}
+
+#endif
diff --git a/src/malloc/mallocng/realloc.c b/src/malloc/mallocng/realloc.c
new file mode 100644
index 00000000..18769f42
--- /dev/null
+++ b/src/malloc/mallocng/realloc.c
@@ -0,0 +1,51 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <string.h>
+#include "meta.h"
+
+void *realloc(void *p, size_t n)
+{
+ if (!p) return malloc(n);
+ if (size_overflows(n)) return 0;
+
+ struct meta *g = get_meta(p);
+ int idx = get_slot_index(p);
+ size_t stride = get_stride(g);
+ unsigned char *start = g->mem->storage + stride*idx;
+ unsigned char *end = start + stride - IB;
+ size_t old_size = get_nominal_size(p, end);
+ size_t avail_size = end-(unsigned char *)p;
+ void *new;
+
+ // only resize in-place if size class matches
+ if (n <= avail_size && n<MMAP_THRESHOLD
+ && size_to_class(n)+1 >= g->sizeclass) {
+ set_size(p, end, n);
+ return p;
+ }
+
+ // use mremap if old and new size are both mmap-worthy
+ if (g->sizeclass>=48 && n>=MMAP_THRESHOLD) {
+ assert(g->sizeclass==63);
+ size_t base = (unsigned char *)p-start;
+ size_t needed = (n + base + UNIT + IB + 4095) & -4096;
+ new = g->maplen*4096UL == needed ? g->mem :
+ mremap(g->mem, g->maplen*4096UL, needed, MREMAP_MAYMOVE);
+ if (new!=MAP_FAILED) {
+ g->mem = new;
+ g->maplen = needed/4096;
+ p = g->mem->storage + base;
+ end = g->mem->storage + (needed - UNIT) - IB;
+ *end = 0;
+ set_size(p, end, n);
+ return p;
+ }
+ }
+
+ new = malloc(n);
+ if (!new) return 0;
+ memcpy(new, p, n < old_size ? n : old_size);
+ free(p);
+ return new;
+}