Commits

Armin Rigo committed 91b7fc7

Tweak to largemalloc.c: like glibc's malloc, organize two chained
lists instead of only one, to guarantee progress in malloc() even
in the face of very large numbers of free slots that all have a
sightly-too-small, identical size.

  • Participants
  • Parent commits 58cdbd6

Comments (0)

Files changed (7)

File c7/demo/Makefile

 H_FILES = ../stmgc.h ../stm/*.h
 C_FILES = ../stmgc.c ../stm/*.c
 
-COMMON = -I.. -pthread -lrt -g -Wall -Werror
+COMMON = -I.. -pthread -lrt -g -Wall -Werror -DSTM_LARGEMALLOC_TEST
 
 
 # note that 'build' is partially optimized but still contains all asserts

File c7/demo/demo_largemalloc.c

+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <time.h>
+
+#include "stmgc.h"
+#include "../stm/largemalloc.h"
+
+static inline double get_stm_time(void)
+{
+    struct timespec tp;
+    clock_gettime(CLOCK_MONOTONIC, &tp);
+    return tp.tv_sec + tp.tv_nsec * 0.000000001;
+}
+
+ssize_t stmcb_size_rounded_up(struct object_s *ob)
+{
+    abort();
+}
+
+void stmcb_trace(struct object_s *obj, void visit(object_t **))
+{
+    abort();
+}
+
+/************************************************************/
+
+#define ARENA_SIZE  (1024*1024*1024)
+
+static char *arena_data;
+extern bool (*_stm_largemalloc_keep)(char *data);   /* a hook for tests */
+void _stm_mutex_pages_lock(void);
+
+
+static bool keep_me(char *data) {
+    static bool last_answer = false;
+    last_answer = !last_answer;
+    return last_answer;
+}
+
+void timing(int scale)
+{
+    long limit = 1L << scale;
+    _stm_largemalloc_init_arena(arena_data, ARENA_SIZE);
+    double start = get_stm_time();
+
+    long i;
+    for (i = 0; i < limit; i++) {
+        _stm_large_malloc(16 + 8 * (i % 4));  /* may return NULL */
+    }
+    _stm_largemalloc_keep = keep_me;
+    _stm_largemalloc_sweep();
+    for (i = 0; i < limit; i++) {
+        _stm_large_malloc(16 + 8 * (i % 4));  /* may return NULL */
+    }
+
+    double stop = get_stm_time();
+    printf("scale %2d: %.9f\n", scale, stop - start);
+}
+
+
+
+int main(void)
+{
+    int i;
+    arena_data = malloc(ARENA_SIZE);
+    assert(arena_data != NULL);
+    _stm_mutex_pages_lock();
+    for (i = 0; i < 25; i++)
+        timing(i);
+    return 0;
+}

File c7/stm/largemalloc.c

 #define LAST_BIN_INDEX(sz) ((sz) >= (3 << 18))
 
 typedef struct dlist_s {
-    struct dlist_s *next;   /* a doubly-linked list */
+    struct dlist_s *next;   /* a circular doubly-linked list */
     struct dlist_s *prev;
 } dlist_t;
 
+typedef struct ulist_s {
+    struct ulist_s *up;     /* a non-circular doubly-linked list */
+    struct ulist_s *down;
+} ulist_t;
+
 typedef struct malloc_chunk {
     size_t prev_size;     /* - if the previous chunk is free: size of its data
                              - otherwise, if this chunk is free: 1
                              - otherwise, 0. */
-    size_t size;          /* size of the data in this chunk,
-                             plus optionally the FLAG_SORTED */
+    size_t size;          /* size of the data in this chunk */
 
-    dlist_t d;            /* if free: a doubly-linked list */
+    dlist_t d;            /* if free: a doubly-linked list 'largebins' */
                           /* if not free: the user data starts here */
+    ulist_t u;            /* if free, if unsorted: up==UU_UNSORTED
+                             if free, if sorted: a doubly-linked list */
 
     /* The chunk has a total size of 'size'.  It is immediately followed
        in memory by another chunk.  This list ends with the last "chunk"
        one are considered "not free". */
 } mchunk_t;
 
-#define FLAG_SORTED          1
+#define UU_UNSORTED          ((ulist_t *) 1)
 #define THIS_CHUNK_FREE      1
 #define BOTH_CHUNKS_USED     0
 #define CHUNK_HEADER_SIZE    offsetof(struct malloc_chunk, d)
 
 #define chunk_at_offset(p, ofs)  ((mchunk_t *)(((char *)(p)) + (ofs)))
 #define data2chunk(p)            chunk_at_offset(p, -CHUNK_HEADER_SIZE)
+#define updown2chunk(p)          chunk_at_offset(p,                     \
+                                     -(CHUNK_HEADER_SIZE + sizeof(dlist_t)))
 
-static mchunk_t *next_chunk_s(mchunk_t *p)
+static mchunk_t *next_chunk(mchunk_t *p)
 {
-    assert(p->size & FLAG_SORTED);
-    return chunk_at_offset(p, CHUNK_HEADER_SIZE + p->size - FLAG_SORTED);
-}
-static mchunk_t *next_chunk_u(mchunk_t *p)
-{
-    assert(!(p->size & FLAG_SORTED));
     return chunk_at_offset(p, CHUNK_HEADER_SIZE + p->size);
 }
-static mchunk_t *next_chunk_a(mchunk_t *p)
-{
-    return chunk_at_offset(p, CHUNK_HEADER_SIZE + (p->size & ~FLAG_SORTED));
-}
 
 
 /* The free chunks are stored in "bins".  Each bin is a doubly-linked
    neighbors to ensure this.
 
    In each bin's doubly-linked list, chunks are sorted by their size in
-   decreasing order (if you start from 'd.next').  At the end of this
-   list are some unsorted chunks.  All unsorted chunks are after all
-   sorted chunks.  The flag 'FLAG_SORTED' distinguishes them.
+   decreasing order (if you follow 'largebins[n].next',
+   'largebins[n].next->next', etc.).  At the end of this list are some
+   unsorted chunks.  All unsorted chunks are after all sorted chunks.
+   Unsorted chunks are distinguished by having 'u.up == UU_UNSORTED'.
 
    Note that if the user always calls large_malloc() with a large
    enough argument, then the few bins corresponding to smaller values
    will never be sorted at all.  They are still populated with the
    fragments of space between bigger allocations.
+
+   Following the 'd' linked list, we get only one chunk of every size.
+   The additional chunks of a given size are linked "vertically" in
+   the secondary 'u' doubly-linked list.
+
+   
+                            +-----+
+                            | 296 |
+                            +-----+
+                              ^ |
+                              | v
+                            +-----+     +-----+
+                            | 296 |     | 288 |
+                            +-----+     +-----+
+                              ^ |         ^ |     UU_UNSORTED
+                              | v         | v          |
+   largebins    +-----+     +-----+     +-----+     +-----+     largebins
+   [4].next <-> | 304 | <-> | 296 | <-> | 288 | <-> | 296 | <-> [4].prev
+                +-----+     +-----+     +-----+     +-----+
+
 */
 
 static dlist_t largebins[N_BINS];
     new->d.next = &largebins[index];
     new->d.prev = largebins[index].prev;
     new->d.prev->next = &new->d;
+    new->u.up = UU_UNSORTED;
+    new->u.down = NULL;
     largebins[index].prev = &new->d;
-    assert(!(new->size & FLAG_SORTED));
 }
 
 static int compare_chunks(const void *vchunk1, const void *vchunk2)
     dlist_t *end = &largebins[index];
     dlist_t *scan = unsorted->prev;
     size_t count = 1;
-    while (scan != end && !(data2chunk(scan)->size & FLAG_SORTED)) {
+    while (scan != end && data2chunk(scan)->u.up == UU_UNSORTED) {
         scan = scan->prev;
         ++count;
     }
 
         chunk1 = chunks[--count];
     }
-    chunk1->size |= FLAG_SORTED;
     size_t search_size = chunk1->size;
     dlist_t *head = largebins[index].next;
 
     while (1) {
-        if (head == end || search_size >= data2chunk(head)->size) {
+        if (head == end || data2chunk(head)->size < search_size) {
             /* insert 'chunk1' here, before the current head */
             head->prev->next = &chunk1->d;
             chunk1->d.prev = head->prev;
             head->prev = &chunk1->d;
             chunk1->d.next = head;
-            if (count == 0)
-                break;    /* all done */
-            chunk1 = chunks[--count];
-            chunk1->size |= FLAG_SORTED;
-            search_size = chunk1->size;
+            chunk1->u.up = NULL;
+            chunk1->u.down = NULL;
+            head = &chunk1->d;
+        }
+        else if (data2chunk(head)->size == search_size) {
+            /* insert 'chunk1' vertically in the 'u' list */
+            ulist_t *uhead = &data2chunk(head)->u;
+            chunk1->u.up = uhead->up;
+            chunk1->u.down = uhead;
+            if (uhead->up != NULL)
+                uhead->up->down = &chunk1->u;
+            uhead->up = &chunk1->u;
+#ifndef NDEBUG
+            chunk1->d.next = (dlist_t *)0x42;   /* not used */
+            chunk1->d.prev = (dlist_t *)0x42;
+#endif
         }
         else {
             head = head->next;
+            continue;
         }
+        if (count == 0)
+            break;    /* all done */
+        chunk1 = chunks[--count];
+        search_size = chunk1->size;
     }
 }
 
 static void sort_bin(size_t index)
 {
     dlist_t *last = largebins[index].prev;
-    if (last != &largebins[index] && !(data2chunk(last)->size & FLAG_SORTED))
+    if (last != &largebins[index] && data2chunk(last)->u.up == UU_UNSORTED)
         really_sort_bin(index);
 }
 
+static void unlink_chunk(mchunk_t *mscan)
+{
+    if (mscan->u.down != NULL) {
+        /* unlink mscan from the vertical list 'u' */
+        ulist_t *up   = mscan->u.up;
+        ulist_t *down = mscan->u.down;
+        down->up = up;
+        if (up != NULL) up->down = down;
+    }
+    else {
+        dlist_t *prev = mscan->d.prev;
+        dlist_t *next = mscan->d.next;
+        if (mscan->u.up == NULL || mscan->u.up == UU_UNSORTED) {
+            /* unlink mscan from the doubly-linked list 'd' */
+            next->prev = prev;
+            prev->next = next;
+        }
+        else {
+            /* relink in the 'd' list the item above me */
+            mchunk_t *above = updown2chunk(mscan->u.up);
+            next->prev = &above->d;
+            prev->next = &above->d;
+            above->d.next = next;
+            above->d.prev = prev;
+            above->u.down = NULL;
+        }
+    }
+}
+
 char *_stm_large_malloc(size_t request_size)
 {
     /* 'request_size' should already be a multiple of the word size here */
     assert((request_size & (sizeof(char *)-1)) == 0);
 
+    /* it can be very small, but we need to ensure a minimal size
+       (currently 32 bytes) */
+    if (request_size < sizeof(struct malloc_chunk) - CHUNK_HEADER_SIZE)
+        request_size = sizeof(struct malloc_chunk) - CHUNK_HEADER_SIZE;
+
     size_t index = largebin_index(request_size);
     sort_bin(index);
 
     while (scan != end) {
         mscan = data2chunk(scan);
         assert(mscan->prev_size == THIS_CHUNK_FREE);
-        assert(next_chunk_s(mscan)->prev_size == mscan->size - FLAG_SORTED);
+        assert(next_chunk(mscan)->prev_size == mscan->size);
+        assert(IMPLY(mscan->d.prev != end,
+                     data2chunk(mscan->d.prev)->size > mscan->size));
 
-        if (mscan->size > request_size)
+        if (mscan->size >= request_size)
             goto found;
         scan = mscan->d.prev;
     }
             /* non-empty bin. */
             sort_bin(index);
             scan = largebins[index].prev;
-            end = &largebins[index];
             mscan = data2chunk(scan);
             goto found;
         }
     return NULL;
 
  found:
-    assert(mscan->size & FLAG_SORTED);
-    assert(mscan->size > request_size);
+    assert(mscan->size >= request_size);
+    assert(mscan->u.up != UU_UNSORTED);
 
-    /* unlink mscan from the doubly-linked list */
-    mscan->d.next->prev = mscan->d.prev;
-    mscan->d.prev->next = mscan->d.next;
+    if (mscan->u.up != NULL) {
+        /* fast path: grab the item that is just above, to avoid needing
+           to rearrange the 'd' list */
+        mchunk_t *above = updown2chunk(mscan->u.up);
+        ulist_t *two_above = above->u.up;
+        mscan->u.up = two_above;
+        if (two_above != NULL) two_above->down = &mscan->u;
+        mscan = above;
+    }
+    else {
+        unlink_chunk(mscan);
+    }
 
-    size_t remaining_size_plus_1 = mscan->size - request_size;
-    if (remaining_size_plus_1 <= sizeof(struct malloc_chunk)) {
-        next_chunk_s(mscan)->prev_size = BOTH_CHUNKS_USED;
-        request_size = mscan->size & ~FLAG_SORTED;
+    size_t remaining_size = mscan->size - request_size;
+    if (remaining_size < sizeof(struct malloc_chunk)) {
+        next_chunk(mscan)->prev_size = BOTH_CHUNKS_USED;
+        request_size = mscan->size;
     }
     else {
         /* only part of the chunk is being used; reduce the size
         mchunk_t *new = chunk_at_offset(mscan, CHUNK_HEADER_SIZE +
                                                request_size);
         new->prev_size = THIS_CHUNK_FREE;
-        size_t remaining_size = remaining_size_plus_1 - 1 - CHUNK_HEADER_SIZE;
-        new->size = remaining_size;
-        next_chunk_u(new)->prev_size = remaining_size;
+        size_t remaining_data_size = remaining_size - CHUNK_HEADER_SIZE;
+        new->size = remaining_data_size;
+        next_chunk(new)->prev_size = remaining_data_size;
         insert_unsorted(new);
     }
     mscan->size = request_size;
     mchunk_t *mscan = chunk_at_offset(chunk, msize);
 
     if (mscan->prev_size == BOTH_CHUNKS_USED) {
-        assert((mscan->size & ((sizeof(char *) - 1) & ~FLAG_SORTED)) == 0);
+        assert((mscan->size & (sizeof(char *) - 1)) == 0);
         mscan->prev_size = chunk->size;
     }
     else {
-        mscan->size &= ~FLAG_SORTED;
         size_t fsize = mscan->size;
         mchunk_t *fscan = chunk_at_offset(mscan, fsize + CHUNK_HEADER_SIZE);
 
         /* unlink the following chunk */
-        mscan->d.next->prev = mscan->d.prev;
-        mscan->d.prev->next = mscan->d.next;
+        unlink_chunk(mscan);
 #ifndef NDEBUG
         mscan->prev_size = (size_t)-258;  /* 0xfffffffffffffefe */
         mscan->size = (size_t)-515;       /* 0xfffffffffffffdfd */
         msize = chunk->prev_size + CHUNK_HEADER_SIZE;
         mscan = chunk_at_offset(chunk, -msize);
         assert(mscan->prev_size == THIS_CHUNK_FREE);
-        assert((mscan->size & ~FLAG_SORTED) == chunk->prev_size);
+        assert(mscan->size == chunk->prev_size);
 
         /* unlink the previous chunk */
-        mscan->d.next->prev = mscan->d.prev;
-        mscan->d.prev->next = mscan->d.next;
+        unlink_chunk(mscan);
 
         /* merge the two chunks */
         mscan->size = msize + chunk->size;
-        next_chunk_u(mscan)->prev_size = mscan->size;
+        next_chunk(mscan)->prev_size = mscan->size;
 
         assert(chunk->prev_size = (size_t)-1);
         assert(chunk->size = (size_t)-1);
 {
     char *data = ((char *)first_chunk) + 16;
     size_t prev_size_if_free = 0;
+    fprintf(stderr, "\n");
     while (1) {
-        fprintf(stderr, "[ %p: %zu\n", data - 16, *(size_t*)(data - 16));
+        assert((((uintptr_t)data) & 7) == 0);   /* alignment */
+        fprintf(stderr, "[ %p: %zu", data - 16, *(size_t*)(data - 16));
         if (prev_size_if_free == 0) {
             assert(*(size_t*)(data - 16) == THIS_CHUNK_FREE ||
                    *(size_t*)(data - 16) == BOTH_CHUNKS_USED);
             if (*(size_t*)(data - 16) == THIS_CHUNK_FREE)
-                prev_size_if_free = (*(size_t*)(data - 8)) & ~FLAG_SORTED;
+                prev_size_if_free = (*(size_t*)(data - 8));
         }
         else {
             assert(*(size_t*)(data - 16) == prev_size_if_free);
         }
         if (*(size_t*)(data - 8) == END_MARKER)
             break;
-        fprintf(stderr, "  %p: %zu ]", data - 8, *(size_t*)(data - 8));
         if (prev_size_if_free) {
-            fprintf(stderr, " (free %p / %p)\n",
-                    *(void **)data, *(void **)(data + 8));
+            fprintf(stderr, "        \t(up %p / down %p)",
+                    *(void **)(data + 16), *(void **)(data + 24));
+        }
+        fprintf(stderr, "\n  %p: %zu ]", data - 8, *(size_t*)(data - 8));
+        if (prev_size_if_free) {
+            fprintf(stderr, "\t(prev %p <-> next %p)\n",
+                    *(void **)(data + 8), *(void **)data);
         }
         else {
             fprintf(stderr, "\n");
         }
-        if (!prev_size_if_free)
-            assert(!((*(size_t*)(data - 8)) & FLAG_SORTED));
         assert(*(ssize_t*)(data - 8) >= 16);
-        data += (*(size_t*)(data - 8)) & ~FLAG_SORTED;
+        data += *(size_t*)(data - 8);
         data += 16;
     }
-    fprintf(stderr, "  %p: end. ]\n\n", data - 8);
+    fprintf(stderr, "\n  %p: end. ]\n\n", data - 8);
     assert(data - 16 == (char *)last_chunk);
 }
 
     return (char *)first_chunk;
 }
 
-#ifdef STM_TESTS
+#ifdef STM_LARGEMALLOC_TEST
 bool (*_stm_largemalloc_keep)(char *data);   /* a hook for tests */
 #endif
 
     last_chunk = chunk_at_offset(first_chunk, data_size - CHUNK_HEADER_SIZE);
     last_chunk->prev_size = first_chunk->size;
     last_chunk->size = END_MARKER;
-    assert(last_chunk == next_chunk_u(first_chunk));
+    assert(last_chunk == next_chunk(first_chunk));
 
     insert_unsorted(first_chunk);
 
-#ifdef STM_TESTS
+#ifdef STM_LARGEMALLOC_TEST
     _stm_largemalloc_keep = NULL;
 #endif
 }
             return 0;
 
         /* unlink the prev_chunk from the doubly-linked list */
-        prev_chunk->d.next->prev = prev_chunk->d.prev;
-        prev_chunk->d.prev->next = prev_chunk->d.next;
+        unlink_chunk(prev_chunk);
 
         /* reduce the prev_chunk */
-        assert((prev_chunk->size & ~FLAG_SORTED) == last_chunk->prev_size);
+        assert(prev_chunk->size == last_chunk->prev_size);
         prev_chunk->size = ((char*)new_last_chunk) - (char *)prev_chunk
                            - CHUNK_HEADER_SIZE;
 
         new_last_chunk->prev_size = prev_chunk->size;
         new_last_chunk->size = END_MARKER;
         last_chunk = new_last_chunk;
-        assert(last_chunk == next_chunk_u(prev_chunk));
+        assert(last_chunk == next_chunk(prev_chunk));
 
         insert_unsorted(prev_chunk);
     }
         new_last_chunk->prev_size = BOTH_CHUNKS_USED;
         new_last_chunk->size = END_MARKER;
         last_chunk = new_last_chunk;
-        assert(last_chunk == next_chunk_u(old_last_chunk));
+        assert(last_chunk == next_chunk(old_last_chunk));
 
         /* then free the last_chunk (turn it from "used" to "free) */
         _stm_large_free((char *)&old_last_chunk->d);
 
 static inline bool _largemalloc_sweep_keep(mchunk_t *chunk)
 {
-#ifdef STM_TESTS
+#ifdef STM_LARGEMALLOC_TEST
     if (_stm_largemalloc_keep != NULL)
         return _stm_largemalloc_keep((char *)&chunk->d);
 #endif
     mchunk_t *mnext, *chunk = first_chunk;
 
     if (chunk->prev_size == THIS_CHUNK_FREE)
-        chunk = next_chunk_a(chunk);   /* go to the first non-free chunk */
+        chunk = next_chunk(chunk);   /* go to the first non-free chunk */
 
     while (chunk != last_chunk) {
 
         assert(chunk->prev_size != THIS_CHUNK_FREE);
 
         /* first figure out the next non-free chunk */
-        mnext = next_chunk_u(chunk);
+        mnext = next_chunk(chunk);
         if (mnext->prev_size == THIS_CHUNK_FREE)
-            mnext = next_chunk_a(mnext);
+            mnext = next_chunk(mnext);
 
         /* use the callback to know if 'chunk' contains an object that
            survives or dies */

File c7/stm/misc.c

     mutex_pages_unlock();
     return result;
 }
+#endif
 
+#ifdef STM_LARGEMALLOC_TEST
 void _stm_mutex_pages_lock(void)
 {
     mutex_pages_lock();

File c7/stm/pages.h

     if (pages_privatized[pagenum - PAGE_FLAG_START].by_segment != 0)
         page_reshare(pagenum);
 }
+
+void _stm_mutex_pages_lock(void);

File c7/test/support.py

 
 ''', sources=source_files,
      define_macros=[('STM_TESTS', '1'),
+                    ('STM_LARGEMALLOC_TEST', '1'),
                     ('STM_NO_COND_WAIT', '1'),
                     ('STM_DEBUGPRINT', '1'),
                     ('GC_N_SMALL_REQUESTS', str(GC_N_SMALL_REQUESTS)), #check

File c7/test/test_largemalloc.py

         lib._stm_mutex_pages_lock()   # for this file
 
     def test_simple(self):
+        #
+        lib._stm_large_dump()
         d1 = lib._stm_large_malloc(7000)
+        lib._stm_large_dump()
         d2 = lib._stm_large_malloc(8000)
         print d1
         print d2
         lib._stm_large_dump()
 
     def test_resize_arena_reduce_2(self):
-        lib._stm_large_malloc(self.size // 2 - 64)
+        lib._stm_large_malloc(self.size // 2 - 80)
         r = lib._stm_largemalloc_resize_arena(self.size // 2)
         assert r == 1
         lib._stm_large_dump()
                 p.append((d, sz, content1, content2))
         lib._stm_large_dump()
 
-    def test_random_largemalloc_sweep(self):
+    def test_random_largemalloc_sweep(self, constrained_size_range=False):
         @ffi.callback("bool(char *)")
         def keep(data):
             try:
 
         r = random.Random(1000)
         for j in range(500):
-            sizes = [random.choice(range(104, 500, 8)) for i in range(20)]
+            if constrained_size_range:
+                max = 120
+            else:
+                max = 500
+            sizes = [random.choice(range(104, max, 8)) for i in range(20)]
             all = [lib._stm_large_malloc(size) for size in sizes]
             print all
 
                     assert all[i][50] == chr(65 + i)
                 else:
                     assert all_orig[i][50] == '\xDE'
+
+    def test_random_largemalloc_sweep_constrained_size_range(self):
+        self.test_random_largemalloc_sweep(constrained_size_range=True)