Commits

cheery committed 4cad36b

memory allocator

Comments (0)

Files changed (6)

 FLAGS="--std=c99 -shared -fPIC -lrt -O2"
 
-gcc src/obj.c src/vm.c -o vm.so $FLAGS
-gcc src/obj.c src/vm.c -o vm32.so $FLAGS -m32
+SOURCE="src/obj.c src/vm.c src/mem.c"
+
+gcc $SOURCE -o vm.so $FLAGS
+gcc $SOURCE -o vm32.so $FLAGS -m32
+/*
+    copyright: Henri Tuhola <henri.tuhola@gmail.com>
+
+    you are free to use this piece of code for educational purposes.
+    if you test yourself that it works reliably, you can also use it for practical purposes.
+
+    this is a reimplementation of Doug Lea's malloc. I did this based on those short
+    descriptions I read about dlmalloc.
+
+    mem.h contains the following structure and three extern -procedures:
+    typedef struct Mem {
+        size_t prev_size, size;
+        int flags;
+    } Mem;
+
+    each structure allocated by using this library requires the Mem -header.
+    struct Example {
+        Mem mem;
+        int my_data;
+    };
+    inside procedure() {
+        struct Example* example = mem_new(sizeof(struct Example));
+        example = mem_resize(example, sizeof(struct Example) + 32);
+        mem_free(example);
+    }
+*/
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <string.h>
+#include "mem.h"
+
+/*
+    returns numbers of leading zero bits on 32bit-range.
+*/
+static inline int nlz(unsigned int x) {
+   unsigned int y;
+   int n = 32;
+   y = x >>16;  if (y != 0) {n = n -16;  x = y;}
+   y = x >> 8;  if (y != 0) {n = n - 8;  x = y;}
+   y = x >> 4;  if (y != 0) {n = n - 4;  x = y;}
+   y = x >> 2;  if (y != 0) {n = n - 2;  x = y;}
+   y = x >> 1;  if (y != 0) return n - 2;
+   return n - x;
+}
+
+/*
+    doubly-linked list easens unlinking of the memory region.
+    this data structure belongs solely for retrieving
+    best fitting free memory region from bins.
+*/
+typedef struct Free {
+    Mem mem;
+    struct Free* prev;
+    struct Free* next;
+} Free;
+
+/*
+    free regions are ordered into these bins according
+    to their size. nlz(mem->size) gives the index where
+    a freed region belongs into.
+    bins assume free region does not change in size
+    once it is reachable through them.
+*/
+static Free* bins[32] = {NULL};
+
+/*
+    there's no fitting free region, it must be created.
+    this procedure comes to help you. yay!
+    requested size is aligned into smallest memory region
+    we dare to request from an operating system. It's quite
+    large, a megabyte.
+    additional Mem -stub is positioned into the end of
+    new memory region to easen handling of the structure.
+    the stub is recognized from it's 0-size and reserved-flag.
+    it helps in memory-unmapping and merging regions.
+*/
+static inline Mem* mmap_new_region(size_t size) {
+    const size_t minsize = 0x1000000;
+    const size_t minsize_mask = minsize - 1;
+    size += sizeof(Mem);
+    size += (minsize - size & minsize_mask);
+    char* data = mmap(
+        NULL,
+        size,
+        PROT_READ | PROT_WRITE,
+        MAP_PRIVATE | MAP_ANONYMOUS,
+        -1,
+        0
+    );
+    if (data == NULL) {
+        fprintf(stderr, "error on mem_take\n");
+        exit(1);
+        return NULL;
+    }
+    Mem* this = (Mem*)data;
+    Mem* stub = (Mem*)(data + size - sizeof(Mem));
+    size -= sizeof(Mem);
+    *this = (Mem){prev_size:0,      size:size,  flags:0x0};
+    *stub = (Mem){prev_size:size,   size:0,     flags:0x1};
+    return this;
+}
+
+/*
+    lmerge -procedure needs this one.
+    memory region needs to know the size of it's preceding neighbour
+    when they are merged.
+    this procedure needs to be called when
+    memory region size and boundaries change.
+*/
+static inline void update_mem_neighbour(Mem* mem) {
+    size_t size = mem->size;
+    Mem* next = (Mem*)((char*)mem + size);
+    next->prev_size = size;
+}
+
+/*
+    puts a memory region into a bin so it can be found later.
+    goes through all the possible cases what might happen when
+    connecting the the region into bin.
+    aside ordering memory regions into bins by their size,
+    the bin chains are also ordered to accelerates allocation.
+*/
+static void free_link(Free* free) {
+    size_t size = free->mem.size;
+    Free** bin = bins+nlz(size);
+    Free* junction = *bin;
+    if (junction == NULL) {
+        *bin = free;
+        free->next = NULL;
+        free->prev = NULL;
+    } else
+    if (size < junction->mem.size) {
+        *bin = free;
+        free->next = junction;
+        free->prev = NULL;
+    } else {
+        while (junction->next) {
+            if (size < junction->next->mem.size) break;
+            junction = junction->next;
+        }
+        if (junction->next) junction->next->prev = free;
+        free->next = junction->next;
+        free->prev = junction;
+        junction->next = free;
+    }
+}
+
+/*
+    memory region is taken into use by disconnecting
+    it from a bin chain.
+*/
+static void free_unlink(Free* free) {
+    if(free->prev) free->prev->next = free->next;
+    if(free->next) free->next->prev = free->prev;
+    Free** bin = bins+nlz(free->mem.size);
+    if (*bin == free) *bin = free->next;
+}
+
+/*
+    unlinks the memory region that best fits the requested size.
+    picks one terribly large if such is found.
+    otherwise a new memory region gets mapped.
+*/
+static inline Mem* free_best_fit(size_t size) {
+    int i = nlz(size);
+    Free* candinate = bins[i];
+    while (candinate) {
+        if (candinate->mem.size >= size) {
+            free_unlink(candinate);
+            return (Mem*)candinate;
+        }
+        candinate = candinate->next;
+    }
+    for (int k = 1; k <= i; ++k) {
+        candinate = bins[i-k];
+        if (candinate) {
+            free_unlink(candinate);
+            return (Mem*)candinate;
+        }
+    }
+    return mmap_new_region(size);
+}
+
+/*
+    cuts the memory region into the size.
+    the excess part is marked free and linked back in.
+    shit hits the fan if there isn't sufficient excess portion.
+    callee beware.
+*/
+static inline void mem_splice(Mem* mem, size_t size) {
+    size_t old_size = mem->size;
+    mem->size = size;
+    char* data = (char*)mem;
+    Mem* free = (Mem*)(data+size);
+    *free = (Mem){prev_size:size, size:old_size-size, flags:0x0};
+    update_mem_neighbour(free);
+    free_link((Free*)free);
+}
+
+/*
+    user structure allocated this way must have
+    Mem -header. it'd be also good for the structure
+    to be aligned into machine word boundaries.
+    picks up the best fitting free region and splices
+    it into shape when necessary to reduce internal fragmentation.
+*/
+extern void* mem_new(size_t size) {
+    if (size < sizeof(Free)) size = sizeof(Free);
+    // go, locate best fitting piece, splice it.
+    Mem* free = free_best_fit(size);
+    if (size + sizeof(Free) <= free->size) mem_splice(free, size);
+    free->flags = 0x1;
+    return free;
+}
+
+/*
+    the easier merging operation. merges right free neighbour.
+    stub memory regions created by mmap_new_region are flagged
+    reserved. therefore this procedure never merges them.
+*/
+static inline Mem* rmerge(Mem* mem) {
+    Mem* n = (Mem*)((char*)mem + mem->size);
+    if (n->flags==0) {
+        free_unlink((Free*)n);
+        mem->size += n->size;
+    }
+    return mem;
+}
+
+/*
+    harder of the two merging operations. merges left free neighbour.
+    prev_size gets zeroed when memory mapping a new region. it's
+    applied here to prevent merging below that point. 
+
+    in generic for merging procedures, when there's a free neighbour
+    for the given memory region, the neighbour is getting unlinked
+    from a bin chain.
+    these operations change memory region size so update_mem_neighbour
+    should be called after them. mem_slice is sufficient too because
+    it calls the update_mem_neighbour for correct portion.
+*/
+static inline Mem* lmerge(Mem* mem) {
+    if (mem->prev_size == 0) return mem;
+    Mem* n = (Mem*)((char*)mem - mem->prev_size);
+    if (n->flags==0) {
+        free_unlink((Free*)n);
+        n->size += mem->size;
+        return n;
+    }
+    return mem;
+}
+
+/*
+    common ingredient for mem_free and mem_resize.
+    mem_free describes what are the minimum requirements
+    for using this procedure.
+*/
+static inline void unmap_or_link_back(Mem* mem) {
+    mem->flags = 0x0;
+    if (mem->prev_size == 0) {
+        Mem* n = (Mem*)((char*)mem + mem->size);
+        if (n->size == 0) {
+            int err = munmap(mem, mem->size + sizeof(Mem));
+            if (err) {
+                fprintf(stderr, "error on mem_free\n");
+                exit(1);
+            }
+            return;
+        }
+    }
+    free_link((Free*)mem);
+}
+
+/*
+    merges neighbours and unmaps the mapped region if it is entirely free.
+    fragmented regions are put into bins for reallocation.
+*/
+extern void mem_free(void* data) {
+    Mem* m = rmerge(lmerge(data));
+    update_mem_neighbour(m);
+    unmap_or_link_back(m);
+}
+
+/*
+    avoids memcpy with clever merging if possible.
+    is equivalent to the mem_free if 
+*/
+extern void* mem_resize(void* data, size_t size) {
+    if (size < sizeof(Free)) size = sizeof(Free);
+    Mem* m = rmerge(data);
+    if(size <= m->size) {
+        if (size + sizeof(Free) <= m->size) mem_splice(m, size);
+        else update_mem_neighbour(m);
+        return m;
+    }
+    Mem* n = lmerge(m);
+    update_mem_neighbour(n);
+    // requested size is larger than merged chunk, so it's okay to use m->size
+    Mem* r = mem_new(size);
+    memcpy(r+1, m+1, m->size-sizeof(Mem));
+    unmap_or_link_back(n);
+    return r;
+}
+typedef struct Mem {
+    size_t prev_size, size;
+    int flags;
+} Mem;
+
+extern void mem_free(void* data);
+extern void* mem_new(size_t size);
+extern void* mem_resize(void* data, size_t size);
+
+/*
+struct Example {
+    Mem mem;
+    int my_data;
+};
+
+inside procedure() {
+    struct Example* example = mem_new(sizeof(struct Example));
+    example = mem_resize(example, sizeof(struct Example) + 32);
+    mem_free(example);
+}
+*/

test/build_mem.sh

+gcc --std=c99 mem_activity.c ../src/mem.c

test/mem_activity.c

+// generates memory activity.
+#include <stdlib.h>
+#include <stdio.h>
+#include "../src/mem.h"
+#include <string.h>
+
+struct Data {
+    Mem header;
+    char testdata[50];
+};
+
+int main(void) {
+    struct Data* data = mem_new(sizeof(struct Data));
+    strncpy(data->testdata, "hello foobar", 50);
+    printf("data: %s\n", data->testdata);
+    mem_free(data);
+    return 0;
+}
+//gcc --std=c99 -shared -fPIC libmem.c ../src/mem.c -o libmem.so
+#include <stdlib.h>
+#include <stdio.h>
+#include "../src/mem.h"
+
+static inline size_t size_align(size_t size) {
+    return size + sizeof(size) - size % sizeof(size);
+}
+
+extern void* calloc(size_t nmemb, size_t size) {
+    if (nmemb==0) return NULL;
+    return malloc(nmemb*size);
+}
+
+extern void* malloc(size_t size) {
+    Mem* mem = mem_new(size_align(size + sizeof(*mem)));
+    return mem+1;
+}
+
+extern void free(void* ptr) {
+    if (ptr==NULL) return;
+    mem_free((Mem*)ptr - 1);
+}
+
+extern void* realloc(void* ptr, size_t size) {
+    if (ptr==NULL) return malloc(size);
+    Mem* mem = mem_resize((Mem*)ptr - 1, size_align(size + sizeof(*mem)));
+    return mem+1;
+}