1. Wubin Qu
  2. pypy

Commits

Armin Rigo  committed 7b571cf

Copy the C files from arigo/hack/stm/c2/.

  • Participants
  • Parent commits 1c60f24
  • Branches stm-thread-2

Comments (0)

Files changed (5)

File pypy/translator/stm/src_stm/atomic_ops.h

View file
    non-volatiles */
 #define CFENCE          asm volatile ("":::"memory")
 
+#if defined(__amd64__) || defined(__i386__)
+#  define smp_wmb()       CFENCE
+#  define smp_spinloop()  asm volatile ("pause")
+#elif defined(__powerpc__)
+#  define smp_wmb()       asm volatile ("lwsync":::"memory")
+#  define smp_spinloop()  /* fill me? */
+#else
+#  error "Define smp_wmb() for your architecture"
+#endif
+
 
 #ifdef __llvm__
 #  define HAS_SYNC_BOOL_COMPARE_AND_SWAP
 #else
 /* x86 (32 bits and 64 bits) */
 static inline _Bool
-bool_cas(volatile unsigned long* ptr, unsigned long old, unsigned long _new)
+bool_cas(volatile revision_t *ptr, revision_t old, revision_t _new)
 {
-    unsigned long prev;
+    revision_t prev;
+#if defined(__amd64__)
+    assert(sizeof(revision_t) == 8);
+#elif defined(__i386__)
+    assert(sizeof(revision_t) == 4);
+#else
+#   error "the custom version of bool_cas() is only for x86 or x86-64"
+#endif
     asm volatile("lock;"
 #if defined(__amd64__)
                  "cmpxchgq %1, %2;"
 
 static inline void spinloop(void)
 {
+  smp_spinloop();
   /* use "memory" here to make sure that gcc will reload the
      relevant data from memory after the spinloop */
-  asm volatile ("pause":::"memory");
+  CFENCE;
 }
 
 

File pypy/translator/stm/src_stm/core.c

-/* -*- c-basic-offset: 2 -*- */
-
-/************************************************************/
-
-#define ABORT_REASONS 8
-#define SPINLOOP_REASONS 10
-
-struct tx_descriptor {
-  jmp_buf *setjmp_buf;
-  owner_version_t start_time;
-  /*unsigned long last_known_global_timestamp;*/
-  owner_version_t my_lock_word;
-  struct OrecList reads;
-  long atomic;   /* 0 = not atomic, > 0 atomic */
-  long reads_size_limit, reads_size_limit_nonatomic; /* see should_break_tr. */
-  int active;    /* 0 = inactive, 1 = regular, 2 = inevitable */
-  unsigned num_commits;
-  unsigned num_aborts[ABORT_REASONS];
-  unsigned num_spinloops[SPINLOOP_REASONS];
-  /*unsigned int spinloop_counter;*/
-  struct RedoLog redolog;   /* last item, because it's the biggest one */
-};
-
-/* global_timestamp contains in its lowest bit a flag equal to 1
-   if there is an inevitable transaction running */
-static volatile unsigned long global_timestamp = 2;
-static __thread struct tx_descriptor *thread_descriptor = NULL;
-
-/************************************************************/
-
-#define GETVERSION(o)     ((owner_version_t)((o)->h_version))
-#define GETVERSIONREF(o)  ((volatile owner_version_t *)(&(o)->h_version))
-#define SETVERSION(o, v)  (o)->h_version = (void *)(v)
-#define GETTID(o)         ((o)->h_tid)
-
-static unsigned long get_global_timestamp(struct tx_descriptor *d)
-{
-  return (/*d->last_known_global_timestamp =*/ global_timestamp);
-}
-
-static _Bool change_global_timestamp(struct tx_descriptor *d,
-                                     unsigned long old,
-                                     unsigned long new)
-{
-  if (bool_cas(&global_timestamp, old, new))
-    {
-      /*d->last_known_global_timestamp = new;*/
-      return 1;
-    }
-  return 0;
-}
-
-static void set_global_timestamp(struct tx_descriptor *d, unsigned long new)
-{
-  global_timestamp = new;
-  /*d->last_known_global_timestamp = new;*/
-}
-
-static void tx_abort(int);
-
-static void tx_spinloop(int num)
-{
-  unsigned int c;
-  int i;
-  struct tx_descriptor *d = thread_descriptor;
-  assert(d->active);
-  d->num_spinloops[num]++;
-
-  //printf("tx_spinloop(%d)\n", num);
-
-#if 0
-  c = d->spinloop_counter;
-  d->spinloop_counter = c * 9;
-  i = c & 0xff0000;
-  while (i >= 0) {
-    spinloop();
-    i -= 0x10000;
-  }
-#else
-  spinloop();
-#endif
-}
-
-static _Bool is_inevitable(struct tx_descriptor *d)
-{
-  /* Assert that we are running a transaction.
-     Returns True if this transaction is inevitable. */
-  assert(d->active == 1 + !d->setjmp_buf);
-  return d->active == 2;
-}
-
-/*** run the redo log to commit a transaction, and release the locks.
-     Cannot abort any more. */
-static void tx_redo(struct tx_descriptor *d, owner_version_t newver)
-{
-  wlog_t *item;
-  REDOLOG_LOOP_FORWARD(d->redolog, item)
-    {
-      void *globalobj = item->addr;
-      void *localobj = item->val;
-      long size = pypy_g__stm_getsize(localobj);
-      memcpy(((char *)globalobj) + sizeof(orec_t),
-             ((char *)localobj) + sizeof(orec_t),
-             size - sizeof(orec_t));
-      /* unlock the orec */
-      volatile orec_t* o = get_orec(globalobj);
-      CFENCE;
-      SETVERSION(o, newver);
-    } REDOLOG_LOOP_END;
-}
-
-/*** on abort, release locks and restore the old version number. */
-static void releaseAndRevertLocks(struct tx_descriptor *d)
-{
-  wlog_t *item;
-  REDOLOG_LOOP_FORWARD(d->redolog, item)
-    {
-      if (item->p != -1)
-        {
-          volatile orec_t* o = get_orec(item->addr);
-          SETVERSION(o, item->p);
-        }
-    } REDOLOG_LOOP_END;
-}
-
-/*** release locks and restore the old version number, ready to retry later */
-static void releaseLocksForRetry(struct tx_descriptor *d)
-{
-  wlog_t *item;
-  REDOLOG_LOOP_FORWARD(d->redolog, item)
-    {
-      volatile orec_t* o = get_orec(item->addr);
-      assert(item->p != -1);
-      SETVERSION(o, item->p);
-      item->p = -1;
-    } REDOLOG_LOOP_END;
-}
-
-/*** lock all locations */
-static void acquireLocks(struct tx_descriptor *d)
-{
-  wlog_t *item;
-  // try to lock every location in the write set
-  REDOLOG_LOOP_BACKWARD(d->redolog, item)
-    {
-      // get orec, read its version#
-      volatile orec_t* o = get_orec(item->addr);
-      owner_version_t ovt;
-
-    retry:
-      ovt = GETVERSION(o);
-
-      // if orec not locked, lock it
-      //
-      // NB: if ovt > start time, we may introduce inconsistent
-      // reads.  Since most writes are also reads, we'll just abort under this
-      // condition.  This can introduce false conflicts
-      if (!IS_LOCKED_OR_NEWER(ovt, d->start_time)) {
-        if (!bool_cas(GETVERSIONREF(o), ovt, d->my_lock_word))
-          goto retry;
-        // save old version to item->p.  Now we hold the lock.
-        item->p = ovt;
-      }
-      // else if the location is too recent...
-      else if (!IS_LOCKED(ovt))
-        tx_abort(0);
-      // else it is locked: check it's not by me
-      else {
-        assert(ovt != d->my_lock_word);
-        // we can either abort or spinloop.  Because we are at the end of
-        // the transaction we might try to spinloop, even though after the
-        // lock is released the ovt will be very recent, possibly
-        // > d->start_time.  It is necessary to spinloop in case we are
-        // inevitable, so use that as a criteria.  Another solution to avoid
-        // deadlocks would be to sort the order in which we take the locks.
-        if (is_inevitable(d))
-          tx_spinloop(8);
-        else
-          tx_abort(6);
-        goto retry;
-      }
-    } REDOLOG_LOOP_END;
-}
-
-static void common_cleanup(struct tx_descriptor *d)
-{
-  d->reads.size = 0;
-  redolog_clear(&d->redolog);
-  d->active = 0;
-}
-
-static void tx_restart(struct tx_descriptor *d)
-{
-  // release the locks and restore version numbers
-  releaseAndRevertLocks(d);
-  // notifies the CPU that we're potentially in a spin loop
-  tx_spinloop(0);
-  // reset all lists
-  common_cleanup(d);
-  // jump back to the setjmp_buf (this call does not return)
-  longjmp(*d->setjmp_buf, 1);
-}
-
-/*** increase the abort count and restart the transaction */
-static void tx_abort(int reason)
-{
-  struct tx_descriptor *d = thread_descriptor;
-  assert(d->active == 1);
-  d->num_aborts[reason]++;
-#ifdef RPY_STM_DEBUG_PRINT
-  PYPY_DEBUG_START("stm-abort");
-  if (PYPY_HAVE_DEBUG_PRINTS)
-      fprintf(PYPY_DEBUG_FILE, "thread %lx aborting %d\n",
-                               (long)pthread_self(), reason);
-  PYPY_DEBUG_STOP("stm-abort");
-#endif
-  tx_restart(d);
-}
-
-/**
- * fast-path validation, assuming that I don't hold locks.
- */
-static void validate_fast(struct tx_descriptor *d, int lognum)
-{
-  int i;
-  owner_version_t ovt;
-  assert(d->active == 1);
-  for (i=0; i<d->reads.size; i++)
-    {
-    retry:
-      ovt = GETVERSION(d->reads.items[i]);
-      if (IS_LOCKED_OR_NEWER(ovt, d->start_time))
-        {
-          // If locked, we wait until it becomes unlocked.  The chances are
-          // that it will then have a very recent start_time, likely
-          // > d->start_time, but it might still be better than always aborting
-          if (IS_LOCKED(ovt))
-            {
-              tx_spinloop(lognum);  /* tx_spinloop(1), tx_spinloop(2),
-                                       tx_spinloop(3) */
-              goto retry;
-            }
-          else
-            // abort if the timestamp is newer than my start time.  
-            tx_abort(lognum);  /* tx_abort(1), tx_abort(2), tx_abort(3) */
-        }
-    }
-}
-
-/**
- * validate the read set by making sure that all orecs that we've read have
- * timestamps at least as old as our start time, unless we locked those orecs.
- */
-static void validate(struct tx_descriptor *d)
-{
-  int i;
-  owner_version_t ovt;
-  assert(d->active == 1);
-  for (i=0; i<d->reads.size; i++)
-    {
-      ovt = GETVERSION(d->reads.items[i]);      // read this orec
-      if (IS_LOCKED_OR_NEWER(ovt, d->start_time))
-        {
-          if (!IS_LOCKED(ovt))
-            // if unlocked and newer than start time, abort
-            tx_abort(4);
-          else
-            {
-              // if locked and not by me, abort
-              if (ovt != d->my_lock_word)
-                tx_abort(5);
-            }
-        }
-    }
-}
-
-#ifdef USE_PTHREAD_MUTEX
-/* mutex: only to avoid busy-looping too much in tx_spinloop() below */
-
-# if defined(RPY_STM_ASSERT) && defined(PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP)
-static pthread_mutex_t mutex_inevitable = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
-# else
-static pthread_mutex_t mutex_inevitable = PTHREAD_MUTEX_INITIALIZER;
-# endif
-
-# ifndef RPY_STM_ASSERT
-#  define mutex_lock()    pthread_mutex_lock(&mutex_inevitable)
-#  define mutex_unlock()  pthread_mutex_unlock(&mutex_inevitable)
-# else
-static unsigned long locked_by = 0;
-static void mutex_lock(void)
-{
-  unsigned long pself = (unsigned long)pthread_self();
-#ifdef RPY_STM_DEBUG_PRINT
-  //fprintf(stderr, "%lx: mutex inev locking...\n", pself);
-#endif
-  assert(locked_by != pself);
-  pthread_mutex_lock(&mutex_inevitable);
-  locked_by = pself;
-#ifdef RPY_STM_DEBUG_PRINT
-  //fprintf(stderr, "%lx: mutex inev locked\n", pself);
-#endif
-}
-static void mutex_unlock(void)
-{
-  unsigned long pself = (unsigned long)pthread_self();
-  assert(locked_by == pself);
-  locked_by = 0;
-#ifdef RPY_STM_DEBUG_PRINT
-  //fprintf(stderr, "%lx: mutex inev unlocked\n", pself);
-#endif
-  pthread_mutex_unlock(&mutex_inevitable);
-}
-# endif
-#else
-# define mutex_lock()     /* nothing */
-# define mutex_unlock()   /* nothing */
-#endif
-
-static void wait_end_inevitability(struct tx_descriptor *d)
-{
-  unsigned long curts;
-  releaseLocksForRetry(d);
-
-  // We are going to wait until the other inevitable transaction
-  // finishes.  XXX We could do better here: we could check if
-  // committing 'd' would create a conflict for the other inevitable
-  // thread 'd_inev' or not.  It requires peeking in 'd_inev' from this
-  // thread (which we never do so far) in order to do something like
-  // 'validate_fast(d_inev); d_inev->start_time = updated;'
-
-  while ((curts = get_global_timestamp(d)) & 1)
-    {
-      // while we're about to wait anyway, we can do a validate_fast
-      if (d->start_time < curts - 1)
-        {
-          validate_fast(d, 3);
-          d->start_time = curts - 1;
-        }
-      tx_spinloop(4);
-      mutex_lock();
-      mutex_unlock();
-    }
-  acquireLocks(d);
-}
-
-static owner_version_t commitInevitableTransaction(struct tx_descriptor *d)
-{
-  unsigned long ts;
-  _Bool ok;
-
-  // no-one else can modify global_timestamp if I'm inevitable
-  // and d_inev_checking is 0
-  ts = get_global_timestamp(d);
-  assert(ts & 1);
-  ts += 1;
-  set_global_timestamp(d, ts);
-  assert(ts == (d->start_time + 2));
-
-  /* we still have the locks acquired, but we changed the global timestamp
-   * and we can release the mutex here.  The locked orecs will be updated
-   * immediately afterwards by tx_redo(). */
-  mutex_unlock();
-
-  return ts;
-}
-
-/* lazy/lazy read instrumentation */
-#define STM_DO_READ(READ_OPERATION)                                     \
- retry:                                                                 \
-  /* read the orec BEFORE we read anything else */                      \
-  ovt = GETVERSION(o);                                                  \
-  CFENCE;                                                               \
-                                                                        \
-  /* this tx doesn't hold any locks, so if the lock for this addr is */ \
-  /* held, there is contention.  A lock is never hold for too long,  */ \
-  /* so spinloop until it is released.                               */ \
-  if (IS_LOCKED_OR_NEWER(ovt, d->start_time))                           \
-    {                                                                   \
-      if (IS_LOCKED(ovt)) {                                             \
-        tx_spinloop(7);                                                 \
-        goto retry;                                                     \
-      }                                                                 \
-      /* else this location is too new, scale forward */                \
-      owner_version_t newts = get_global_timestamp(d) & ~1;             \
-      validate_fast(d, 1);                                              \
-      d->start_time = newts;                                            \
-    }                                                                   \
-                                                                        \
-  /* orec is unlocked, with ts <= start_time.  read the location */     \
-  READ_OPERATION;                                                       \
-                                                                        \
-  if (!is_inevitable(d)) {                                              \
-    /* if is_inevitable(), then we don't need to do the checking of  */ \
-    /* o->version done below --- but more importantly, we don't need */ \
-    /* to insert o in the OrecList.  We *do* need to do the above    */ \
-    /* check for locked-ness, though.                                */ \
-                                                                        \
-    /* postvalidate AFTER reading addr: */                              \
-    CFENCE;                                                             \
-    if (__builtin_expect(GETVERSION(o) != ovt, 0))                      \
-      goto retry;       /* oups, try again */                           \
-                                                                        \
-    oreclist_insert(&d->reads, (orec_t*)o);                             \
-  }
-
-
-#define STM_READ_WORD(SIZE, SUFFIX, TYPE)                               \
-TYPE stm_read_int##SIZE##SUFFIX(void* addr, long offset)                \
-{                                                                       \
-  struct tx_descriptor *d = thread_descriptor;                          \
-  volatile orec_t *o = get_orec(addr);                                  \
-  owner_version_t ovt;                                                  \
-                                                                        \
-  assert(sizeof(TYPE) == SIZE);                                         \
-                                                                        \
-  if ((GETTID(o) & GCFLAG_WAS_COPIED) != 0)                             \
-    {                                                                   \
-      /* Look up in the thread-local dictionary. */                     \
-      wlog_t *found;                                                    \
-      REDOLOG_FIND(d->redolog, addr, found, goto not_found);            \
-      orec_t *localobj = (orec_t *)found->val;                          \
-      assert((GETTID(localobj) & GCFLAG_GLOBAL) == 0);                  \
-      return *(TYPE *)(((char *)localobj) + offset);                    \
-                                                                        \
-    not_found:;                                                         \
-    }                                                                   \
-                                                                        \
-  TYPE tmp;                                                             \
-  STM_DO_READ(tmp = *(TYPE *)(((char *)addr) + offset));                \
-  return tmp;                                                           \
-}
-
-STM_READ_WORD(1, , char)
-STM_READ_WORD(2, , short)
-STM_READ_WORD(4, , int)
-STM_READ_WORD(8, , long long)
-STM_READ_WORD(8,f, double)
-STM_READ_WORD(4,f, float)
-
-void stm_copy_transactional_to_raw(void *src, void *dst, long size)
-{
-  struct tx_descriptor *d = thread_descriptor;
-  volatile orec_t *o = get_orec(src);
-  owner_version_t ovt;
-
-  /* don't copy the header */
-  src = ((char *)src) + sizeof(orec_t);
-  dst = ((char *)dst) + sizeof(orec_t);
-  size -= sizeof(orec_t);
-
-  STM_DO_READ(memcpy(dst, src, size));
-}
-
-long stm_descriptor_init(void)
-{
-  if (thread_descriptor == NULL)
-    {
-      struct tx_descriptor *d = malloc(sizeof(struct tx_descriptor));
-      memset(d, 0, sizeof(struct tx_descriptor));
-
-#ifdef RPY_STM_DEBUG_PRINT
-      PYPY_DEBUG_START("stm-init");
-#endif
-
-      /* initialize 'my_lock_word' to be a unique negative number */
-      d->my_lock_word = (owner_version_t)d;
-      if (!IS_LOCKED(d->my_lock_word))
-        d->my_lock_word = ~d->my_lock_word;
-      assert(IS_LOCKED(d->my_lock_word));
-
-      thread_descriptor = d;
-
-#ifdef RPY_STM_DEBUG_PRINT
-      if (PYPY_HAVE_DEBUG_PRINTS)
-        fprintf(PYPY_DEBUG_FILE, "thread %lx starting with id %lx\n",
-                (long)pthread_self(), (long)d->my_lock_word);
-      PYPY_DEBUG_STOP("stm-init");
-#endif
-      return 1;
-    }
-  else
-    return 0;   /* already initialized */
-}
-
-void stm_descriptor_done(void)
-{
-  struct tx_descriptor *d = thread_descriptor;
-  assert(d != NULL);
-  assert(d->active == 0);
-
-  thread_descriptor = NULL;
-
-#ifdef RPY_STM_DEBUG_PRINT
-  PYPY_DEBUG_START("stm-done");
-  if (PYPY_HAVE_DEBUG_PRINTS) {
-    int num_aborts = 0, num_spinloops = 0;
-    int i, prevchar;
-    char line[256], *p = line;
-
-    for (i=0; i<ABORT_REASONS; i++)
-      num_aborts += d->num_aborts[i];
-    for (i=0; i<SPINLOOP_REASONS; i++)
-      num_spinloops += d->num_spinloops[i];
-
-    p += sprintf(p, "thread %lx: %d commits, %d aborts\n",
-                 (long)pthread_self(),
-                 d->num_commits,
-                 num_aborts);
-
-    for (i=0; i<ABORT_REASONS; i++)
-      p += sprintf(p, "%c%d", i == 0 ? '[' : ',',
-                   d->num_aborts[i]);
-
-    for (i=1; i<SPINLOOP_REASONS; i++)  /* num_spinloops[0] == num_aborts */
-      p += sprintf(p, "%c%d", i == 1 ? '|' : ',',
-                   d->num_spinloops[i]);
-
-    p += sprintf(p, "]\n");
-    fwrite(line, 1, p - line, PYPY_DEBUG_FILE);
-  }
-  PYPY_DEBUG_STOP("stm-done");
-#endif
-
-  free(d);
-}
-
-static void update_reads_size_limit(struct tx_descriptor *d)
-{
-  /* 'reads_size_limit' is set to LONG_MAX if we are atomic; else
-     we copy the value from reads_size_limit_nonatomic. */
-  d->reads_size_limit = d->atomic ? LONG_MAX : d->reads_size_limit_nonatomic;
-}
-
-static void begin_transaction(jmp_buf* buf)
-{
-  struct tx_descriptor *d = thread_descriptor;
-  assert(d->active == 0);
-  d->active = 1;
-  d->setjmp_buf = buf;
-  d->start_time = (/*d->last_known_global_timestamp*/ global_timestamp) & ~1;
-  update_reads_size_limit(d);
-}
-
-static void make_inevitable(struct tx_descriptor *d)
-{
-  d->setjmp_buf = NULL;
-  d->active = 2;
-  d->reads_size_limit_nonatomic = 0;
-  update_reads_size_limit(d);
-}
-
-void stm_begin_inevitable_transaction(void)
-{
-  /* Equivalent to begin_transaction(); stm_try_inevitable();
-     except more efficient */
-  struct tx_descriptor *d = thread_descriptor;
-  assert(d->active == 0);
-  make_inevitable(d);
-
-  mutex_lock();
-  while (1)
-    {
-      unsigned long curtime = get_global_timestamp(d);
-      if (!(curtime & 1) && change_global_timestamp(d, curtime, curtime + 1))
-        {
-          d->start_time = curtime;
-          break;
-        }
-      tx_spinloop(6);
-    }
-}
-
-void stm_commit_transaction(void)
-{
-  owner_version_t end_time;
-  struct tx_descriptor *d = thread_descriptor;
-  assert(d->active != 0);
-
-  // if I don't have writes, I'm committed
-  if (!redolog_any_entry(&d->redolog))
-    {
-      if (is_inevitable(d))
-        {
-          unsigned long ts = get_global_timestamp(d);
-          assert(ts & 1);
-          set_global_timestamp(d, ts - 1);
-          mutex_unlock();
-        }
-      d->num_commits++;
-      common_cleanup(d);
-      return;
-    }
-
-  // bring that variable over to this CPU core (optimization, maybe)
-  /* global_timestamp; */
-
-  // acquire locks
-  acquireLocks(d);
-
-  if (is_inevitable(d))
-    {
-      end_time = commitInevitableTransaction(d);
-    }
-  else
-    {
-      while (1)
-        {
-          unsigned long expected = get_global_timestamp(d);
-          if (expected & 1)
-            {
-              // wait until it is done.  hopefully we can then proceed
-              // without conflicts.
-              wait_end_inevitability(d);
-              continue;
-            }
-          if (change_global_timestamp(d, expected, expected + 2))
-            {
-              end_time = expected + 2;
-              break;
-            }
-        }
-
-      // validate (but skip validation if nobody else committed)
-      if (end_time != (d->start_time + 2))
-        validate(d);
-    }
-
-  // run the redo log, and release the locks
-  tx_redo(d, end_time);
-
-  // remember that this was a commit
-  d->num_commits++;
-
-  // reset all lists
-  common_cleanup(d);
-}
-
-void stm_try_inevitable(STM_CCHARP1(why))
-{
-  /* when a transaction is inevitable, its start_time is equal to
-     global_timestamp and global_timestamp cannot be incremented
-     by another thread.  We set the lowest bit in global_timestamp
-     to 1. */
-  struct tx_descriptor *d = thread_descriptor;
-  if (d == NULL || d->active != 1)
-    return;  /* I am already inevitable, or not in a transaction at all
-                (XXX statically we should know when we're outside
-                a transaction) */
-
-#ifdef RPY_STM_DEBUG_PRINT
-  PYPY_DEBUG_START("stm-inevitable");
-  if (PYPY_HAVE_DEBUG_PRINTS)
-    {
-      fprintf(PYPY_DEBUG_FILE, "%s\n", why);
-    }
-#endif
-
-  while (1)
-    {
-      unsigned long curtime = get_global_timestamp(d);
-      if (d->start_time != (curtime & ~1))
-        {                             /* scale forward */
-          validate_fast(d, 2);
-          d->start_time = curtime & ~1;
-        }
-      mutex_lock();
-      if (curtime & 1)   /* there is, or was, already an inevitable thread */
-        {
-          /* should we spinloop here, or abort (and likely come back
-             in try_inevitable() very soon)?  unclear.  For now
-             let's try to spinloop, after the waiting done by
-             acquiring the mutex */
-        }
-      else
-        {
-          if (change_global_timestamp(d, curtime, curtime + 1))
-            break;
-        }
-      mutex_unlock();
-      tx_spinloop(6);
-    }
-  make_inevitable(d);   /* inevitable from now on */
-
-#ifdef RPY_STM_DEBUG_PRINT
-  PYPY_DEBUG_STOP("stm-inevitable");
-#endif
-}
-
-void stm_abort_and_retry(void)
-{
-  tx_abort(7);     /* manual abort */
-}
-
-/************************************************************/
-
-static __thread void *rpython_tls_object;
-
-void stm_set_tls(void *newtls)
-{
-  rpython_tls_object = newtls;
-}
-
-void *stm_get_tls(void)
-{
-  return rpython_tls_object;
-}
-
-void stm_del_tls(void)
-{
-  rpython_tls_object = NULL;
-}
-
-void *stm_tldict_lookup(void *key)
-{
-  struct tx_descriptor *d = thread_descriptor;
-  wlog_t* found;
-  REDOLOG_FIND(d->redolog, key, found, goto not_found);
-  return found->val;
-
- not_found:
-  return NULL;
-}
-
-void stm_tldict_add(void *key, void *value)
-{
-  struct tx_descriptor *d = thread_descriptor;
-  assert(d != NULL);
-  redolog_insert(&d->redolog, key, value);
-}
-
-void stm_tldict_enum(void)
-{
-  struct tx_descriptor *d = thread_descriptor;
-  wlog_t *item;
-  void *tls = stm_get_tls();
-
-  REDOLOG_LOOP_FORWARD(d->redolog, item)
-    {
-      pypy_g__stm_enum_callback(tls, item->addr, item->val);
-    } REDOLOG_LOOP_END;
-}
-
-long stm_in_transaction(void)
-{
-  struct tx_descriptor *d = thread_descriptor;
-  return d->active;
-}
-
-long stm_is_inevitable(void)
-{
-  struct tx_descriptor *d = thread_descriptor;
-  return is_inevitable(d);
-}
-
-static long stm_regular_length_limit = LONG_MAX;
-
-void stm_add_atomic(long delta)
-{
-  struct tx_descriptor *d = thread_descriptor;
-  d->atomic += delta;
-  update_reads_size_limit(d);
-}
-
-long stm_get_atomic(void)
-{
-  struct tx_descriptor *d = thread_descriptor;
-  return d->atomic;
-}
-
-long stm_should_break_transaction(void)
-{
-  struct tx_descriptor *d = thread_descriptor;
-
-  /* a single comparison to handle all cases:
-
-     - if d->atomic, then we should return False.  This is done by
-       forcing reads_size_limit to LONG_MAX as soon as atomic > 0.
-
-     - otherwise, if is_inevitable(), then we should return True.
-       This is done by forcing both reads_size_limit and
-       reads_size_limit_nonatomic to 0 in that case.
-
-     - finally, the default case: return True if d->reads.size is
-       greater than reads_size_limit == reads_size_limit_nonatomic.
-  */
-#ifdef RPY_STM_ASSERT
-  /* reads_size_limit is LONG_MAX if d->atomic, or else it is equal to
-     reads_size_limit_nonatomic. */
-  assert(d->reads_size_limit == (d->atomic ? LONG_MAX :
-                                     d->reads_size_limit_nonatomic));
-  /* if is_inevitable(), reads_size_limit_nonatomic should be 0
-     (and thus reads_size_limit too, if !d->atomic.) */
-  if (is_inevitable(d))
-    assert(d->reads_size_limit_nonatomic == 0);
-#endif
-
-  return d->reads.size >= d->reads_size_limit;
-}
-
-void stm_set_transaction_length(long length_max)
-{
-  struct tx_descriptor *d = thread_descriptor;
-  stm_try_inevitable(STM_EXPLAIN1("set_transaction_length"));
-  stm_regular_length_limit = length_max;
-}
-
-#define END_MARKER   ((void*)-8)   /* keep in sync with stmframework.py */
-
-void stm_perform_transaction(long(*callback)(void*, long), void *arg,
-                             void *save_and_restore)
-{
-  jmp_buf _jmpbuf;
-  long volatile v_counter = 0;
-  void **volatile v_saved_value;
-  long volatile v_atomic = thread_descriptor->atomic;
-  assert((!thread_descriptor->active) == (!v_atomic));
-  v_saved_value = *(void***)save_and_restore;
-  /***/
-  setjmp(_jmpbuf);
-  /* After setjmp(), the local variables v_* are preserved because they
-   * are volatile.  The other variables are only declared here. */
-  struct tx_descriptor *d = thread_descriptor;
-  long counter, result;
-  void **restore_value;
-  counter = v_counter;
-  d->atomic = v_atomic;
-  restore_value = v_saved_value;
-  if (!d->atomic)
-    {
-      /* In non-atomic mode, we are now between two transactions.
-         It means that in the next transaction's collections we know
-         that we won't need to access the shadows stack beyond its
-         current position.  So we add an end marker. */
-      *restore_value++ = END_MARKER;
-    }
-  *(void***)save_and_restore = restore_value;
-
-  do
-    {
-      v_counter = counter + 1;
-      /* initialize 'reads_size_limit_nonatomic' from the configured
-         length limit, scaled down by a factor of 2 for each time we
-         retry an aborted transaction.  Note that as soon as such a
-         shortened transaction succeeds, the next one will again have
-         full length, for now. */
-      d->reads_size_limit_nonatomic = stm_regular_length_limit >> counter;
-      if (!d->atomic)
-        begin_transaction(&_jmpbuf);
-
-      /* invoke the callback in the new transaction */
-      result = callback(arg, counter);
-
-      v_atomic = d->atomic;
-      if (!d->atomic)
-        stm_commit_transaction();
-      counter = 0;
-    }
-  while (result == 1);  /* also stops if we got an RPython exception */
-
-  if (d->atomic && thread_descriptor->setjmp_buf == &_jmpbuf)
-    stm_try_inevitable(STM_EXPLAIN1("perform_transaction left with atomic"));
-
-  *(void***)save_and_restore = v_saved_value;
-}
-
-#undef GETVERSION
-#undef GETVERSIONREF
-#undef SETVERSION
-#undef GETTID

File pypy/translator/stm/src_stm/et.c

View file
 /* XXX assumes that time never wraps around (in a 'long'), which may be
  * correct on 64-bit machines but not on 32-bit machines if the process
  * runs for long enough.
- *
- * XXX measure the overhead of the global_timestamp
  */
 
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+#include <assert.h>
+#include <pthread.h>
 
-#define USE_PTHREAD_MUTEX    /* can be made optional */
-#ifdef USE_PTHREAD_MUTEX
-# include <pthread.h>
-#endif
+#define RPY_STM_DEBUG_PRINT     1
+#define PYPY_DEBUG_START(s)     fprintf(stderr, "start: %s\n", s)
+#define PYPY_DEBUG_STOP(s)      fprintf(stderr, " stop: %s\n", s)
+#define PYPY_HAVE_DEBUG_PRINTS  1
+#define PYPY_DEBUG_FILE         stderr
 
-#include "src_stm/et.h"
-#include "src_stm/atomic_ops.h"
-
-#ifdef PYPY_STANDALONE         /* obscure: cannot include debug_print.h if compiled */
-# define RPY_STM_DEBUG_PRINT   /* via ll2ctypes; only include it in normal builds */
-# include "src/debug_print.h"
-#else
-# define RPY_STM_ASSERT    1
-#endif
-
-#ifdef RPY_STM_ASSERT
-# include <assert.h>
-#else
-# undef assert
-# define assert(x) /* nothing */
-#endif
+#include "et.h"
+#include "atomic_ops.h"
 
 /************************************************************/
 
-/* This is the same as the object header structure HDR
- * declared in stmgc.py */
+#define INEVITABLE  ((revision_t)-1)
+#define LOCKED  ((revision_t)-0x10000)
 
-typedef struct pypy_header0 orec_t;
+#include "lists.c"
 
 /************************************************************/
 
-#define IS_LOCKED(num)  ((num) < 0)
-#define IS_LOCKED_OR_NEWER(num, max_age) \
-  __builtin_expect(((unsigned long)(num)) > ((unsigned long)(max_age)), 0)
+#define ABORT_REASONS 4
+#define SPINLOOP_REASONS 3
 
-typedef long owner_version_t;
+struct tx_descriptor {
+  jmp_buf *setjmp_buf;
+  revision_t start_time;
+  revision_t my_lock;
+  int active;    /* 0 = inactive, 1 = regular, 2 = inevitable */
+  int readonly_updates;
+  unsigned int num_commits;
+  unsigned int num_aborts[ABORT_REASONS];
+  unsigned int num_spinloops[SPINLOOP_REASONS];
+  struct GcPtrList list_of_read_objects;
+  struct GcPtrList gcroots;
+  struct G2L global_to_local;
+  struct FXCache recent_reads_cache;
+};
 
-#define get_orec(addr)  ((volatile orec_t *)(addr))
+static volatile revision_t global_cur_time = 2;              /* always even */
+static volatile revision_t next_locked_value = LOCKED + 3;   /* always odd */
+static __thread struct tx_descriptor *thread_descriptor = NULL;
 
 /************************************************************/
 
-#include "src_stm/lists.c"
-#include "src_stm/core.c"
+static void ValidateDuringTransaction(struct tx_descriptor *);
+static void CancelLocks(struct tx_descriptor *d);
+static void AbortTransaction(int num);
+static void SpinLoop(int num);
+static gcptr Localize(struct tx_descriptor *d, gcptr R);
+
+static _Bool is_inevitable(struct tx_descriptor *d)
+{
+  /* Assert that we are running a transaction.
+   *      Returns True if this transaction is inevitable. */
+  assert(d->active == 1 + !d->setjmp_buf);
+  return d->active == 2;
+}
+
+static pthread_mutex_t mutex_inevitable = PTHREAD_MUTEX_INITIALIZER;
+#define inev_mutex_acquire()  pthread_mutex_lock(&mutex_inevitable)
+#define inev_mutex_release()  pthread_mutex_unlock(&mutex_inevitable)
 
 /************************************************************/
+
+gcptr Allocate(size_t size, int gctid)
+{
+  gcptr W = malloc(size);
+  W->h_tid = gctid;
+  W->h_revision = REV_INITIAL;
+  return W;
+}
+
+static inline void PossiblyUpdateChain(
+        struct tx_descriptor *d,
+        gcptr G, gcptr R, gcptr R_Container, size_t offset)
+{
+  if (R != G && --d->readonly_updates < 0)
+    {
+      d->readonly_updates = 148;   /* XXX tweak */
+      // compress the chain
+      while ((gcptr)G->h_revision != R)
+        {
+          gcptr G_next = (gcptr)G->h_revision;
+          G->h_revision = (revision_t)R;
+          G = G_next;
+        }
+      // update the original field
+      if (R_Container != NULL)
+        {
+          gcptr *ref = (gcptr *)(((char *)R_Container) + offset);
+          *ref = R;
+        }
+    }
+}
+
+static gcptr LatestGlobalRevision(struct tx_descriptor *d, gcptr G,
+                                  gcptr R_Container, size_t offset)
+{
+  gcptr R = G;
+  revision_t v;
+ retry:
+  while (!((v = R->h_revision) & 1))   // "is a pointer", i.e.
+    {                                  //   "has a more recent revision"
+      R = (gcptr)v;
+    }
+  if (__builtin_expect(v > d->start_time, 0))   // object too recent?
+    {
+      if (v >= LOCKED)
+        {
+          SpinLoop(1);     // spinloop until it is no longer LOCKED
+          goto retry;
+        }
+      ValidateDuringTransaction(d);    // try to move start_time forward
+      goto retry;                      // restart searching from R
+    }
+  PossiblyUpdateChain(d, G, R, R_Container, offset);
+  return R;
+}
+
+static inline gcptr AddInReadSet(struct tx_descriptor *d, gcptr R)
+{
+  switch (fxcache_add(&d->recent_reads_cache, R)) {
+
+  case 0:
+      /* not in the cache: it may be the first time we see it,
+       * so insert it into the list */
+      gcptrlist_insert(&d->list_of_read_objects, R);
+      break;
+
+  case 2:
+      /* already in the cache, and FX_THRESHOLD reached */
+      return Localize(d, R);
+  }
+  return R;
+}
+
+static inline gcptr _direct_read_barrier(gcptr G, gcptr R_Container,
+                                         size_t offset)
+{
+  gcptr R;
+  struct tx_descriptor *d = thread_descriptor;
+  assert(d->active);
+  if (!(G->h_tid & GCFLAG_POSSIBLY_OUTDATED))
+    {
+      R = G;
+    }
+  else
+    {
+      R = LatestGlobalRevision(d, G, R_Container, offset);
+      if (R->h_tid & GCFLAG_POSSIBLY_OUTDATED)
+        {
+          wlog_t *entry;
+          gcptr L;
+          G2L_FIND(d->global_to_local, R, entry, goto not_found);
+          L = entry->val;
+          if (R_Container && !(R_Container->h_tid & GCFLAG_GLOBAL))
+            {    /* R_Container is a local object */
+              gcptr *ref = (gcptr *)(((char *)R_Container) + offset);
+              *ref = L;   /* fix in-place */
+            }
+          return L;
+
+        not_found:;
+        }
+    }
+  R = AddInReadSet(d, R);
+  return R;
+}
+
+gcptr _DirectReadBarrier(gcptr G)
+{
+  return _direct_read_barrier(G, NULL, 0);
+}
+
+gcptr _DirectReadBarrierFromR(gcptr G, gcptr R_Container, size_t offset)
+{
+  return _direct_read_barrier(G, R_Container, offset);
+}
+
+gcptr _NonTransactionalReadBarrier(gcptr P)
+{
+  /* testing only: use this outside transactions to check the state */
+  revision_t v;
+  struct tx_descriptor *d = thread_descriptor;
+  assert(d == NULL || !d->active);
+
+  assert(P->h_tid & GCFLAG_GLOBAL);
+
+  while (!((v = P->h_revision) & 1))   // "is a pointer", i.e.
+    {                                  //   "has a more recent revision"
+      assert(P->h_tid & GCFLAG_POSSIBLY_OUTDATED);
+      fprintf(stderr, "[%p->%p]\n", P, (gcptr)v);
+      P = (gcptr)v;
+    }
+  if (P->h_tid & GCFLAG_POSSIBLY_OUTDATED)
+    fprintf(stderr, "[---%p possibly outdated---]\n", P);
+  return P;
+}
+
+static gcptr Localize(struct tx_descriptor *d, gcptr R)
+{
+  wlog_t *entry;
+  gcptr L;
+  size_t size;
+  G2L_FIND(d->global_to_local, R, entry, goto not_found);
+  L = entry->val;
+  return L;
+
+ not_found:
+  size = pypy_g__stm_getsize(R);
+  L = malloc(size);
+  memcpy(L, R, size);
+  L->h_tid &= ~(GCFLAG_GLOBAL | GCFLAG_POSSIBLY_OUTDATED);
+  assert(L->h_tid & GCFLAG_NOT_WRITTEN);
+  L->h_revision = (revision_t)R;     /* back-reference to the original */
+  g2l_insert(&d->global_to_local, R, L);
+  return L;
+}
+
+gcptr _WriteBarrier(gcptr P)
+{
+  gcptr R, W;
+  if (!(P->h_tid & GCFLAG_GLOBAL))
+    {
+      W = P;
+      R = (gcptr)W->h_revision;
+    }
+  else
+    {
+      struct tx_descriptor *d = thread_descriptor;
+      assert(d->active);
+      if (P->h_tid & GCFLAG_POSSIBLY_OUTDATED)
+        R = LatestGlobalRevision(d, P, NULL, 0);
+      else
+        R = P;
+      W = Localize(d, R);
+    }
+  W->h_tid &= ~GCFLAG_NOT_WRITTEN;
+  R->h_tid |= GCFLAG_POSSIBLY_OUTDATED;
+  return W;
+}
+
+gcptr _WriteBarrierFromReady(gcptr R)
+{
+  gcptr W;
+  if (!(R->h_tid & GCFLAG_GLOBAL))
+    {
+      W = R;
+      R = (gcptr)W->h_revision;
+    }
+  else
+    {
+      struct tx_descriptor *d = thread_descriptor;
+      assert(d->active);
+      W = Localize(d, R);
+    }
+  W->h_tid &= ~GCFLAG_NOT_WRITTEN;
+  R->h_tid |= GCFLAG_POSSIBLY_OUTDATED;
+  return W;
+}
+
+/************************************************************/
+
+static revision_t GetGlobalCurTime(struct tx_descriptor *d)
+{
+  revision_t t;
+  assert(!is_inevitable(d));    // must not be myself inevitable
+  while (1)
+    {
+      t = global_cur_time;
+      if (t != INEVITABLE)
+        return t;
+      // there is another transaction that is inevitable
+      inev_mutex_acquire();     // wait until released
+      inev_mutex_release();
+      // retry
+    }
+}
+
+static void ValidateDuringTransaction(struct tx_descriptor *d)
+{
+
+  long i, size = d->list_of_read_objects.size;
+  gcptr *items = d->list_of_read_objects.items;
+
+  assert(!is_inevitable(d));
+  d->start_time = GetGlobalCurTime(d);   // copy from the global time
+
+  for (i=0; i<size; i++)
+    {
+      gcptr R = items[i];
+      revision_t v;
+    retry:
+      v = R->h_revision;
+      if (!(v & 1))               // "is a pointer", i.e.
+        AbortTransaction(1);      //   "has a more recent revision"
+      if (v >= LOCKED)            // locked
+        goto retry;
+    }
+}
+
+static _Bool ValidateDuringCommit(struct tx_descriptor *d)
+{
+  long i, size = d->list_of_read_objects.size;
+  gcptr *items = d->list_of_read_objects.items;
+  revision_t my_lock = d->my_lock;
+
+  for (i=0; i<size; i++)
+    {
+      gcptr R = items[i];
+      revision_t v = R->h_revision;
+      if (!(v & 1))               // "is a pointer", i.e.
+        return 0;                 //   "has a more recent revision"
+      if (v >= LOCKED)            // locked
+        if (v != my_lock)         // and not by me
+          return 0;               // XXX abort or spinloop??
+    }
+  return 1;
+}
+
+/************************************************************/
+
+static void SpinLoop(int num)
+{
+  struct tx_descriptor *d = thread_descriptor;
+  assert(d->active);
+  assert(num < SPINLOOP_REASONS);
+  d->num_spinloops[num]++;
+  spinloop();
+}
+
+static void AbortTransaction(int num)
+{
+  struct tx_descriptor *d = thread_descriptor;
+  assert(d->active);
+  assert(!is_inevitable(d));
+  assert(num < ABORT_REASONS);
+  d->num_aborts[num]++;
+
+  CancelLocks(d);
+
+#ifdef RPY_STM_DEBUG_PRINT
+  PYPY_DEBUG_START("stm-abort");
+  if (PYPY_HAVE_DEBUG_PRINTS)
+      fprintf(PYPY_DEBUG_FILE, "thread %lx aborting %d\n",
+                               (long)d->my_lock, num);
+  PYPY_DEBUG_STOP("stm-abort");
+#endif
+
+  // notifies the CPU that we're potentially in a spin loop
+  SpinLoop(0);
+  // jump back to the setjmp_buf (this call does not return)
+  d->active = 0;
+  longjmp(*d->setjmp_buf, 1);
+}
+
+/************************************************************/
+
+void BeginTransaction(jmp_buf* buf)
+{
+  struct tx_descriptor *d = thread_descriptor;
+  assert(d->active == 0);
+  d->active = 1;
+  d->setjmp_buf = buf;
+  gcptrlist_clear(&d->list_of_read_objects);
+  gcptrlist_clear(&d->gcroots);
+  g2l_clear(&d->global_to_local);
+  fxcache_clear(&d->recent_reads_cache);
+  d->start_time = GetGlobalCurTime(d);
+}
+
+#if 0
+static int compare_by_R(const void *a, const void *b)
+{
+    gcptr Ra = *(const gcptr *)a;
+    gcptr Rb = *(const gcptr *)b;
+    if (Ra < Rb)
+        return -1;
+    else if (Ra == Rb)
+        return 0;
+    else
+        return 1;
+}
+#endif
+
+static void AcquireLocks(struct tx_descriptor *d)
+{
+  revision_t my_lock = d->my_lock;
+  struct gcroot_s *item = (struct gcroot_s *)d->gcroots.items;
+#if 0
+  // gcroots should be sorted in some deterministic order by construction
+  qsort(item, d->gcroots.size / 3, sizeof(struct gcroot_s), &compare_by_R);
+#endif
+  while (item->R != NULL)
+    {
+      gcptr R = item->R;
+      revision_t v;
+    retry:
+      v = R->h_revision;
+      if (!(v & 1))            // "is a pointer", i.e.
+        AbortTransaction(0);   //   "has a more recent revision"
+      if (v >= LOCKED)         // already locked by someone else
+        {
+          // we can always spinloop here: deadlocks should be impossible,
+          // because FindRootsForLocalCollect's G2L_LOOP_FORWARD should
+          // ensure a consistent ordering of the R's.
+          SpinLoop(2);
+          goto retry;
+        }
+      if (!bool_cas((volatile revision_t *)&R->h_revision, v, my_lock))
+        goto retry;
+
+      item->v = v;
+      item++;
+    }
+}
+
+static void CancelLocks(struct tx_descriptor *d)
+{
+  long i, lastitem = d->gcroots.size - 3;
+  gcptr *items = d->gcroots.items;
+  for (i=0; i<=lastitem; i+=3)
+    {
+      gcptr R = items[i];
+      gcptr v = items[i+2];
+      if (v != 0)
+        {
+          R->h_revision = (revision_t)v;
+          // if we're going to retry later, and abort,
+          // then we must not re-cancel the same entries
+          items[i+2] = 0;
+        }
+    }
+}
+
+static void UpdateChainHeads(struct tx_descriptor *d, revision_t cur_time)
+{
+  struct gcroot_s *item, *itemstart = (struct gcroot_s *)d->gcroots.items;
+  revision_t new_revision = cur_time + 1;     // make an odd number
+  assert(new_revision & 1);
+
+  for (item = itemstart; item->R != NULL; item++)
+    {
+      gcptr L = item->L;
+      assert((L->h_tid & (GCFLAG_GLOBAL |
+                          GCFLAG_NOT_WRITTEN |
+                          GCFLAG_POSSIBLY_OUTDATED)) ==
+              (GCFLAG_GLOBAL | GCFLAG_NOT_WRITTEN));
+      L->h_revision = new_revision;
+    }
+  smp_wmb();
+  for (item = itemstart; item->R != NULL; item++)
+    {
+      gcptr L = item->L;
+      gcptr R = item->R;
+      assert((R->h_tid & (GCFLAG_GLOBAL |
+                          GCFLAG_NOT_WRITTEN |
+                          GCFLAG_POSSIBLY_OUTDATED)) ==
+              (GCFLAG_GLOBAL | GCFLAG_NOT_WRITTEN | GCFLAG_POSSIBLY_OUTDATED));
+      R->h_revision = (revision_t)L;
+    }
+}
+
+struct gcroot_s *FindRootsForLocalCollect(void)
+{
+  struct tx_descriptor *d = thread_descriptor;
+  wlog_t *item;
+  G2L_LOOP_FORWARD(d->global_to_local, item)
+    {
+      gcptr R = item->addr;
+      gcptr L = item->val;
+      if (L->h_tid & GCFLAG_NOT_WRITTEN)
+        {
+          assert(L->h_revision == (revision_t)R);
+          L->h_tid |= GCFLAG_GLOBAL | GCFLAG_POSSIBLY_OUTDATED;
+          continue;
+        }
+      gcptrlist_insert3(&d->gcroots, R, L, (gcptr)0);
+    } G2L_LOOP_END;
+  gcptrlist_insert(&d->gcroots, NULL);
+  return (struct gcroot_s *)d->gcroots.items;
+}
+
+int _FakeReach(gcptr P)
+{
+  if (P->h_tid & GCFLAG_GLOBAL)
+    return 0;
+  P->h_tid |= GCFLAG_GLOBAL | GCFLAG_NOT_WRITTEN;
+  return 1;
+}
+
+revision_t CommitTransaction(void)
+{
+  revision_t cur_time;
+  struct tx_descriptor *d = thread_descriptor;
+  assert(d->active != 0);
+
+  AcquireLocks(d);
+
+  if (is_inevitable(d))
+    {
+      // no-one else can have changed global_cur_time if I'm inevitable
+      cur_time = d->start_time;
+      if (!bool_cas(&global_cur_time, INEVITABLE, cur_time + 2))
+        {
+          assert(!"global_cur_time modified even though we are inev.");
+          abort();
+        }
+      inev_mutex_release();
+    }
+  else
+    {
+      while (1)
+        {
+          cur_time = global_cur_time;
+          if (cur_time == INEVITABLE)
+            {
+              CancelLocks(d);
+              inev_mutex_acquire();   // wait until released
+              inev_mutex_release();
+              AcquireLocks(d);
+              continue;
+            }
+          if (bool_cas(&global_cur_time, cur_time, cur_time + 2))
+            break;
+        }
+      // validate (but skip validation if nobody else committed)
+      if (cur_time != d->start_time)
+        if (!ValidateDuringCommit(d))
+          AbortTransaction(2);
+    }
+  UpdateChainHeads(d, cur_time);
+
+  d->num_commits++;
+  d->active = 0;
+  return cur_time;
+}
+
+/************************************************************/
+
+void BecomeInevitable(void)
+{
+  revision_t cur_time;
+  struct tx_descriptor *d = thread_descriptor;
+  if (is_inevitable(d))
+    return;
+
+  inev_mutex_acquire();
+  cur_time = global_cur_time;
+  while (!bool_cas(&global_cur_time, cur_time, INEVITABLE))
+    cur_time = global_cur_time;     /* try again */
+  assert(cur_time != INEVITABLE);
+
+  if (d->start_time != cur_time)
+    {
+      d->start_time = cur_time;
+      if (!ValidateDuringCommit(d))
+        {
+          global_cur_time = cur_time;   // must restore the old value
+          inev_mutex_release();
+          AbortTransaction(3);
+        }
+    }
+  d->active = 2;
+  d->setjmp_buf = NULL;   /* cannot abort any more */
+}
+
+/************************************************************/
+
+inline static gcptr GlobalizeForComparison(struct tx_descriptor *d, gcptr P)
+{
+  if (P == NULL)
+    return NULL;
+  else if (P->h_tid & GCFLAG_GLOBAL)
+    return LatestGlobalRevision(d, P, NULL, 0);
+  else if (P->h_revision != REV_INITIAL)
+    return (gcptr)P->h_revision;    // return the original global obj
+  else
+    return P;        // local, allocated during this transaction
+}
+
+_Bool PtrEq(gcptr P1, gcptr P2)
+{
+  struct tx_descriptor *d = thread_descriptor;
+  return GlobalizeForComparison(d, P1) == GlobalizeForComparison(d, P2);
+}
+
+/************************************************************/
+
+revision_t DescriptorInit(void)
+{
+  assert(thread_descriptor == NULL);
+  if (1)
+    {
+      struct tx_descriptor *d = malloc(sizeof(struct tx_descriptor));
+      memset(d, 0, sizeof(struct tx_descriptor));
+
+#ifdef RPY_STM_DEBUG_PRINT
+      PYPY_DEBUG_START("stm-init");
+#endif
+
+      /* initialize 'my_lock' to be a unique odd number >= LOCKED */
+      while (1)
+        {
+          d->my_lock = next_locked_value;
+          if (bool_cas(&next_locked_value, d->my_lock, d->my_lock + 2))
+            break;
+        }
+      if (d->my_lock < LOCKED)
+        {
+          /* XXX fix this limitation */
+          fprintf(stderr, "XXX error: too many threads ever created "
+                          "in this process");
+          abort();
+        }
+      assert(d->my_lock & 1);
+      thread_descriptor = d;
+
+#ifdef RPY_STM_DEBUG_PRINT
+      if (PYPY_HAVE_DEBUG_PRINTS)
+        fprintf(PYPY_DEBUG_FILE, "thread %lx starting (pthread=%lx)\n",
+                (long)d->my_lock, (long)pthread_self());
+      PYPY_DEBUG_STOP("stm-init");
+#endif
+      return d->my_lock;
+    }
+}
+
+void DescriptorDone(void)
+{
+  struct tx_descriptor *d = thread_descriptor;
+  assert(d != NULL);
+  assert(d->active == 0);
+
+  thread_descriptor = NULL;
+
+#ifdef RPY_STM_DEBUG_PRINT
+  PYPY_DEBUG_START("stm-done");
+  if (PYPY_HAVE_DEBUG_PRINTS) {
+    int num_aborts = 0, num_spinloops = 0;
+    int i;
+    char line[256], *p = line;
+
+    for (i=0; i<ABORT_REASONS; i++)
+      num_aborts += d->num_aborts[i];
+    for (i=0; i<SPINLOOP_REASONS; i++)
+      num_spinloops += d->num_spinloops[i];
+
+    p += sprintf(p, "thread %lx: %d commits, %d aborts\n",
+                 (long)d->my_lock,
+                 d->num_commits,
+                 num_aborts);
+
+    for (i=0; i<ABORT_REASONS; i++)
+      p += sprintf(p, "%c%d", i == 0 ? '[' : ',',
+                   d->num_aborts[i]);
+
+    for (i=1; i<SPINLOOP_REASONS; i++)  /* num_spinloops[0] == num_aborts */
+      p += sprintf(p, "%c%d", i == 1 ? '|' : ',',
+                   d->num_spinloops[i]);
+
+    p += sprintf(p, "]\n");
+    fwrite(line, 1, p - line, PYPY_DEBUG_FILE);
+  }
+  PYPY_DEBUG_STOP("stm-done");
+#endif
+
+  free(d);
+}

File pypy/translator/stm/src_stm/et.h

View file
 /*** Extendable Timestamps
  *
- * This is a (heavily modified) C version of rstm_r5/stm/et.hpp.
+ * Documentation:
+ * https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/stm2012/stmimpl.rst
+ *
+ * This is very indirectly based on rstm_r5/stm/et.hpp.
  * See http://www.cs.rochester.edu/research/synchronization/rstm/api.shtml
  *
+ * Stand-alone version of these files, including random stress-tests:
+ * https://bitbucket.org/arigo/arigo/raw/default/hack/stm/c2
+ *
  */
 
 #ifndef _ET_H
 #define _ET_H
 
+#include <stddef.h>
+#include <stdint.h>
 #include <setjmp.h>
 
 
-/* see comments in ../stmgcintf.py */
-void stm_set_tls(void *);
-void *stm_get_tls(void);
-void stm_del_tls(void);
+#define GCFLAG_GLOBAL              0x10000
+#define GCFLAG_POSSIBLY_OUTDATED   0x20000
+#define GCFLAG_NOT_WRITTEN         0x40000
 
-void *stm_tldict_lookup(void *);
-void stm_tldict_add(void *, void *);
-void stm_tldict_enum(void);
+#define GCFLAG_PREBUILT            (GCFLAG_GLOBAL|GCFLAG_NOT_WRITTEN)
+#define REV_INITIAL                1
 
-long stm_descriptor_init(void);
-void stm_descriptor_done(void);
+typedef uintptr_t revision_t;
 
-void stm_begin_inevitable_transaction(void);
-void stm_commit_transaction(void);
+typedef struct pypy_header0 {
+    long h_tid;
+    revision_t h_revision;
+} *gcptr;
 
-long stm_in_transaction(void);
-long stm_is_inevitable(void);
-void stm_add_atomic(long);
-long stm_get_atomic(void);
-long stm_should_break_transaction(void);
-void stm_set_transaction_length(long);
 
-void stm_perform_transaction(long(*)(void*, long), void*, void*);
+#define STM_READ_BARRIER_P(P)                                           \
+    (__builtin_expect((((gcptr)(P))->h_tid & GCFLAG_GLOBAL) == 0, 1) ?  \
+     (P) : (typeof(P))_DirectReadBarrier((gcptr)(P)))
 
-/* these functions are declared by generated C code from the GC
-   (see llop.nop(...)) */
-extern long pypy_g__stm_getsize(void *);
-extern void pypy_g__stm_enum_callback(void *, void *, void *);
+#define STM_READ_BARRIER_P_FROM_R(P, R_container, offset)               \
+    (__builtin_expect((((gcptr)(P))->h_tid & GCFLAG_GLOBAL) == 0, 1) ?  \
+     (P) : (typeof(P))_DirectReadBarrierFromR((gcptr)(P),               \
+                                              (gcptr)(R_container),     \
+                                              offset))
 
-char      stm_read_int1(void *, long);
-short     stm_read_int2(void *, long);
-int       stm_read_int4(void *, long);
-long long stm_read_int8(void *, long);
-double    stm_read_int8f(void *, long);
-float     stm_read_int4f(void *, long);
+#define STM_WRITE_BARRIER_P(R)                                          \
+    (__builtin_expect((((gcptr)(R))->h_tid & GCFLAG_NOT_WRITTEN) == 0, 1) ? \
+     (R) : (typeof(R))_WriteBarrier((gcptr)(R)))
 
+#define STM_WRITE_BARRIER_R(R)                                          \
+    (__builtin_expect((((gcptr)(R))->h_tid & GCFLAG_NOT_WRITTEN) == 0, 1) ? \
+     (R) : (typeof(R))_WriteBarrierFromReady((gcptr)(R)))
 
-#if 1  /* #ifdef RPY_STM_ASSERT --- but it's always useful to have this info */
-#  define STM_CCHARP1(arg)    char* arg
-#  define STM_EXPLAIN1(info)  info
-#else
-#  define STM_CCHARP1(arg)    void
-#  define STM_EXPLAIN1(info)  /* nothing */
-#endif
+#define STM_NONTRANSACTIONAL_READ_BARRIER(P)                            \
+    ((typeof(P))_NonTransactionalReadBarrier((gcptr)(P)))
 
+#define _REACH(P) _FakeReach((gcptr)(P))
 
-void stm_try_inevitable(STM_CCHARP1(why));
-void stm_abort_and_retry(void);
+#define BEGIN_TRANSACTION                         \
+  {                                               \
+    jmp_buf _jmpbuf;                              \
+    setjmp(_jmpbuf);                              \
+    BeginTransaction(&_jmpbuf);                   \
+    {
 
-void stm_copy_transactional_to_raw(void *src, void *dst, long size);
+#define END_TRANSACTION                           \
+    }                                             \
+    CommitTransaction();                          \
+  }
+#define _END_TRANSACTION_NUM(t)                   \
+    }                                             \
+    t = CommitTransaction();                      \
+  }
 
-
-/************************************************************/
-
-/* These are the same two flags as defined in stmgc.py */
-
-enum {
-  first_gcflag      = 1L << (PYPY_LONG_BIT / 2),
-  GCFLAG_GLOBAL     = first_gcflag << 0,
-  GCFLAG_WAS_COPIED = first_gcflag << 1
+struct gcroot_s {
+    gcptr R, L;
+    revision_t v;
 };
 
+void BeginTransaction(jmp_buf *);
+struct gcroot_s *FindRootsForLocalCollect(void);
+int _FakeReach(gcptr);
+revision_t CommitTransaction(void);
+void BecomeInevitable(void);
+//void BeginInevitableTransaction(void);
+revision_t DescriptorInit(void);
+void DescriptorDone(void);
 
-#define RPY_STM_ARRAY(T, size, ptr, field)                      \
-    _RPY_STM(T, size, ptr, ((char*)&field)-((char*)ptr), field)
+gcptr Allocate(size_t size, int gctid);
+_Bool PtrEq(gcptr P1, gcptr P2);
 
-#define RPY_STM_FIELD(T, size, STRUCT, ptr, field)              \
-    _RPY_STM(T, size, ptr, offsetof(STRUCT, field), ptr->field)
+gcptr _DirectReadBarrier(gcptr);
+gcptr _DirectReadBarrierFromR(gcptr, gcptr, size_t);
+gcptr _WriteBarrier(gcptr);
+gcptr _WriteBarrierFromReady(gcptr);
+gcptr _NonTransactionalReadBarrier(gcptr);
 
-#define _RPY_STM(T, size, ptr, offset, field)           \
-    (((*(long*)ptr) & GCFLAG_GLOBAL) == 0 ? field :     \
-     (T)stm_read_int##size(ptr, offset))
+
+extern size_t pypy_g__stm_getsize(gcptr);
 
 
 #endif  /* _ET_H */

File pypy/translator/stm/src_stm/lists.c

View file
 
 /************************************************************/
 
-/* The redolog_xx functions are implemented as a tree, supporting
-   very high performance in REDOLOG_FIND in the common case where
+/* The g2l_xx functions ("global_to_local") are implemented as a tree,
+   supporting very high performance in G2L_FIND in the common case where
    there are no or few elements in the tree, but scaling correctly
    if the number of items becomes large. */
 
 #define TREE_MASK   ((TREE_ARITY - 1) * sizeof(void*))
 
 typedef struct {
-  void* addr;
-  void *val;
-  owner_version_t p;   // the previous version number (if locked)
+  gcptr addr;
+  gcptr val;
 } wlog_t;
 
 typedef struct {
   char *items[TREE_ARITY];
 } wlog_node_t;
 
-struct RedoLog {
+struct G2L {
   char *raw_start, *raw_current, *raw_end;
   wlog_node_t toplevel;
 };
 
-static void _redolog_clear_node(wlog_node_t *node)
+static void _g2l_clear_node(wlog_node_t *node)
 {
   memset(node, 0, sizeof(wlog_node_t));
 }
 
-static void redolog_clear(struct RedoLog *redolog)
+static void g2l_clear(struct G2L *g2l)
 {
-  if (redolog->raw_current != redolog->raw_start)
+  if (g2l->raw_current != g2l->raw_start)
     {
-      _redolog_clear_node(&redolog->toplevel);
-      redolog->raw_current = redolog->raw_start;
+      _g2l_clear_node(&g2l->toplevel);
+      g2l->raw_current = g2l->raw_start;
     }
 }
 
-static int redolog_any_entry(struct RedoLog *redolog)
+#if 0
+static int g2l_any_entry(struct G2L *g2l)
 {
-  return redolog->raw_current != redolog->raw_start;
+  return g2l->raw_current != g2l->raw_start;
 }
+#endif
 
-#define _REDOLOG_LOOP(redolog, item, INITIAL, _PLUS_)                   \
+#define _G2L_LOOP(g2l, item, INITIAL, _PLUS_)                           \
 {                                                                       \
   struct { char **next; char **end; } _stack[TREE_DEPTH_MAX], *_stackp; \
   char **_next, **_end, *_entry;                                        \
   /* initialization */                                                  \
   _stackp = _stack;      /* empty stack */                              \
-  _next = (redolog).toplevel.items + INITIAL;                           \
+  _next = (g2l).toplevel.items + INITIAL;                               \
   _end = _next _PLUS_ TREE_ARITY;                                       \
   /* loop */                                                            \
   while (1)                                                             \
       /* points to a wlog_t item */                                     \
       item = (wlog_t *)_entry;
 
-#define REDOLOG_LOOP_FORWARD(redolog, item)                             \
-                       _REDOLOG_LOOP(redolog, item, 0, +)
-#define REDOLOG_LOOP_BACKWARD(redolog, item)                            \
-                       _REDOLOG_LOOP(redolog, item, (TREE_ARITY-1), -)
-#define REDOLOG_LOOP_END     } }
+#define G2L_LOOP_FORWARD(g2l, item)                             \
+                       _G2L_LOOP(g2l, item, 0, +)
+#define G2L_LOOP_BACKWARD(g2l, item)                            \
+                       _G2L_LOOP(g2l, item, (TREE_ARITY-1), -)
+#define G2L_LOOP_END     } }
 
-#define REDOLOG_FIND(redolog, addr1, result, goto_not_found)    \
+#define G2L_FIND(g2l, addr1, result, goto_not_found)            \
 {                                                               \
   unsigned long _key = (unsigned long)(addr1);                  \
-  char *_p = (char *)((redolog).toplevel.items);                \
+  char *_p = (char *)((g2l).toplevel.items);                    \
   char *_entry = *(char **)(_p + (_key & TREE_MASK));           \
   if (__builtin_expect(_entry == NULL, 1))                      \
     goto_not_found;    /* common case, hopefully */             \
-  result = _redolog_find(_entry, addr1);                        \
+  result = _g2l_find(_entry, addr1);                            \
   if (result == NULL || result->addr != (addr1))                \
     goto_not_found;                                             \
 }
 
-static wlog_t *_redolog_find(char *entry, long* addr)
+static wlog_t *_g2l_find(char *entry, gcptr addr)
 {
   unsigned long key = (unsigned long)addr;
   while (((long)entry) & 1)
   return (wlog_t *)entry;   /* may be NULL */
 }
 
-static void redolog_insert(struct RedoLog *redolog, void* addr, void *val);
+static void g2l_insert(struct G2L *g2l, gcptr addr, gcptr val);
 
-static void _redolog_grow(struct RedoLog *redolog, long extra)
+static void _g2l_grow(struct G2L *g2l, long extra)
 {
-  struct RedoLog newredolog;
-  wlog_t *item, *newitem;
-  long alloc = redolog->raw_end - redolog->raw_start;
+  struct G2L newg2l;
+  wlog_t *item;
+  long alloc = g2l->raw_end - g2l->raw_start;
   long newalloc = (alloc + extra + (alloc >> 2) + 31) & ~15;
   //fprintf(stderr, "growth: %ld\n", newalloc);
   char *newitems = malloc(newalloc);
-  newredolog.raw_start = newitems;
-  newredolog.raw_current = newitems;
-  newredolog.raw_end = newitems + newalloc;
-  _redolog_clear_node(&newredolog.toplevel);
-  REDOLOG_LOOP_FORWARD(*redolog, item)
+  newg2l.raw_start = newitems;
+  newg2l.raw_current = newitems;
+  newg2l.raw_end = newitems + newalloc;
+  _g2l_clear_node(&newg2l.toplevel);
+  G2L_LOOP_FORWARD(*g2l, item)
     {
-      assert(item->p == -1);
-      redolog_insert(&newredolog, item->addr, item->val);
-    } REDOLOG_LOOP_END;
-  free(redolog->raw_start);
-  *redolog = newredolog;
+      g2l_insert(&newg2l, item->addr, item->val);
+    } G2L_LOOP_END;
+  free(g2l->raw_start);
+  *g2l = newg2l;
 }
 
-static char *_redolog_grab(struct RedoLog *redolog, long size)
+static char *_g2l_grab(struct G2L *g2l, long size)
 {
   char *result;
-  result = redolog->raw_current;
-  redolog->raw_current += size;
-  if (redolog->raw_current > redolog->raw_end)
+  result = g2l->raw_current;
+  g2l->raw_current += size;
+  if (g2l->raw_current > g2l->raw_end)
     {
-      _redolog_grow(redolog, size);
+      _g2l_grow(g2l, size);
       return NULL;
     }
   return result;
 }
 
-static void redolog_insert(struct RedoLog *redolog, void* addr, void *val)
+static void g2l_insert(struct G2L *g2l, gcptr addr, gcptr val)
 {
  retry:;
   wlog_t *wlog;
   unsigned long key = (unsigned long)addr;
   int shift = 0;
-  char *p = (char *)(redolog->toplevel.items);
+  char *p = (char *)(g2l->toplevel.items);
   char *entry;
   assert((key & (sizeof(void*)-1)) == 0);   /* only for aligned keys */
   while (1)
           assert(wlog1->addr != addr);
           /* collision: there is already a different wlog here */
           wlog_node_t *node = (wlog_node_t *)
-                _redolog_grab(redolog, sizeof(wlog_node_t));
+                _g2l_grab(g2l, sizeof(wlog_node_t));
           if (node == NULL) goto retry;
-          _redolog_clear_node(node);
+          _g2l_clear_node(node);
           unsigned long key1 = (unsigned long)(wlog1->addr);
           char *p1 = (char *)(node->items);
           *(wlog_t **)(p1 + ((key1 >> shift) & TREE_MASK)) = wlog1;
           p = p1;
         }
     }
-  wlog = (wlog_t *)_redolog_grab(redolog, sizeof(wlog_t));
+  wlog = (wlog_t *)_g2l_grab(g2l, sizeof(wlog_t));
   if (wlog == NULL) goto retry;
   wlog->addr = addr;
   wlog->val = val;
-  wlog->p = -1;
   *(char **)p = (char *)wlog;
 }
 
 /************************************************************/
 
-/* The oreclist_xx functions are implemented as an array that grows
+/* The gcptrlist_xx functions are implemented as an array that grows
    as needed. */
 
-struct OrecList {
+struct GcPtrList {
   long size, alloc;
-  unsigned long locked;
-  orec_t **items;
+  gcptr *items;
 };
 
-static void _oreclist_grow(struct OrecList *oreclist)
+static void gcptrlist_clear(struct GcPtrList *gcptrlist)
 {
-  long newalloc = oreclist->alloc + (oreclist->alloc >> 1) + 16;
-  orec_t **newitems = malloc(newalloc * sizeof(orec_t *));
-  long i;
-  for (i=0; i<oreclist->size; i++)
-    newitems[i] = oreclist->items[i];
-  while (!bool_cas(&oreclist->locked, 0, 1))
-    /* rare case */ ;
-  free(oreclist->items);
-  oreclist->items = newitems;
-  oreclist->alloc = newalloc;
-  CFENCE;
-  oreclist->locked = 0;
+  gcptrlist->size = 0;
 }
 
-static void oreclist_insert(struct OrecList *oreclist, orec_t *newitem)
+static void _gcptrlist_grow(struct GcPtrList *gcptrlist)
 {
-  /* XXX it would be a good idea to try to compactify the list,
-     by removing duplicate entries.  We are only interested in
-     the set of entries present, and the order doesn't matter.
-     See for example if, in practice, there are many consecutive
-     duplicate entries; if so, add a check here.  Otherwise, or
-     in addition, we need a general de-duplicator logic called
-     at every local_collection().
-  */
-  if (__builtin_expect(oreclist->size == oreclist->alloc, 0))
-    _oreclist_grow(oreclist);
-  oreclist->items[oreclist->size++] = newitem;
+  long newalloc = gcptrlist->alloc + (gcptrlist->alloc >> 1) + 16;
+  gcptr *newitems = malloc(newalloc * sizeof(gcptr));
+  long i;
+  for (i=0; i<gcptrlist->size; i++)
+    newitems[i] = gcptrlist->items[i];
+  free(gcptrlist->items);
+  gcptrlist->items = newitems;
+  gcptrlist->alloc = newalloc;
+}
+
+static inline void gcptrlist_insert(struct GcPtrList *gcptrlist, gcptr newitem)
+{
+  if (__builtin_expect(gcptrlist->size == gcptrlist->alloc, 0))
+    _gcptrlist_grow(gcptrlist);
+  gcptrlist->items[gcptrlist->size++] = newitem;
+}
+
+static void gcptrlist_insert3(struct GcPtrList *gcptrlist, gcptr newitem1,
+                              gcptr newitem2, gcptr newitem3)
+{
+  gcptr *items;
+  long i = gcptrlist->size;
+  if (__builtin_expect((gcptrlist->alloc - i) < 3, 0))
+    _gcptrlist_grow(gcptrlist);