Commits

Anonymous committed 22f596c

For spin lock, asm vs C is 15ns vs 17ns

Comments (0)

Files changed (2)

 int pthread_spin_lock(pthread_spinlock_t *lock)
 {
     /* owner: 0~15, ticket: 16~31 */
+#if 1
     long ticket = 0xFFFF & (atomic_fetch_and_add(lock, 0x10000) >> 16);
 
     while ((0xFFFF & atomic_read(lock)) != ticket)
         cpu_relax();
+#else
+    int inc = 0x10000;
+    int tmp;
+
+    asm volatile("lock xaddl %0, %1\n"
+                 "movzwl %w0, %2\n\t"
+                 "shrl $16, %0\n\t"
+                 "1:\t"
+                 "cmpl %0, %2\n\t"
+                 "je 2f\n\t"
+                 "pause\n\t"
+                 "movzwl %1, %2\n\t"
+                 "jmp 1b\n"
+                 "2:"
+                 : "+r" (inc), "+m" (*lock), "=&r" (tmp)
+                 :
+                 : "memory", "cc");
+#endif
 
     return 0;
 }
 int pthread_spin_trylock(pthread_spinlock_t *lock)
 {
     /* owner: 0~15, ticket: 16~31 */
+#if 1
     long tmp = atomic_read(lock);
     if ((tmp & 0xFFFF) == (0xFFFF & (tmp >> 16))) {
         if (atomic_cmpxchg(lock, tmp + 0x10000, tmp) == tmp)
     }
 
     return EBUSY;
+#else
+    int tmp;
+    int ticket;
+
+    asm volatile("movl %2, %0\n\t"
+                 "movl %0, %1\n\t"
+                 "roll $16, %0\n\t"
+                 "cmpl %0, %1\n\t"
+                 "jne 1f\n\t"
+                 "addl $0x10000, %1\n\t"
+                 "lock cmpxchgl %1, %2\n\t"
+                 "1:"
+                 "sete %b1\n\t"
+                 "movzbl %b1, %0\n\t"
+                 : "=&a" (tmp), "=&q" (ticket), "+m" (*lock)
+                 :
+                 : "memory", "cc");
+
+    return tmp ? 0 : EBUSY;
+#endif
 }
 
 /**
 int pthread_spin_unlock(pthread_spinlock_t *lock)
 {
     /* owner: 0~15, ticket: 16~31 */
+
+#if 1
     long owner = 0xFFFF & (*lock + 1);
     *lock = (*lock & 0xFFFF0000) | owner;
+#else
+    asm volatile("incw %0"
+                 : "+m" (*lock)
+                 :
+                 : "memory", "cc");
+#endif
+
     return 0;
 }
 

test/test_speed.c

     LeaveCriticalSection(&cs);
 
     clock_gettime(CLOCK_MONOTONIC, &tp);
-    for(i = 0; i < TEST_TIMES; i++) {
+    for(i = TEST_TIMES * 100; i > 0; i--) {
         EnterCriticalSection(&cs);
         LeaveCriticalSection(&cs);
     }
 
     DeleteCriticalSection(&cs);
 
-    fprintf(stdout, "EnterCriticalSectio/LeaveCriticalSection: %7.3lf us\n", (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
+    fprintf(stdout, "EnterCriticalSectio/LeaveCriticalSection: %7.3lf us\n",
+        (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 100000.0));
 }
 
 void test_tid()
     GetCurrentThreadId();
 
     clock_gettime(CLOCK_MONOTONIC, &tp);
-    for(i = 0; i < TEST_TIMES; i++) {
+    for(i = TEST_TIMES * 1000; i > 0; i--) {
         GetCurrentThreadId();
     }
     clock_gettime(CLOCK_MONOTONIC, &tp2);
 
-    fprintf(stdout, "                      GetCurrentThreadId: %7.3lf us\n", (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
+    fprintf(stdout, "                      GetCurrentThreadId: %7.3lf us\n",
+        (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000000.0));
 }
 
 void test_evt()
 
     CloseHandle(event);
 
-    fprintf(stdout, "            SetEvent/WaitForSingleObject: %7.3lf us\n", (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
+    fprintf(stdout, "            SetEvent/WaitForSingleObject: %7.3lf us\n",
+        (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
 }
 
 void test_sem()
 
     CloseHandle(handle);
 
-    fprintf(stdout, "    ReleaseSemaphore/WaitForSingleObject: %7.3lf us\n", (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
+    fprintf(stdout, "    ReleaseSemaphore/WaitForSingleObject: %7.3lf us\n",
+        (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
 }
 
 void test_lps()
 
     CloseHandle(handle);
 
-    fprintf(stdout, "                                get_ncpu: %7.3lf us\n", (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
+    fprintf(stdout, "                                get_ncpu: %7.3lf us\n",
+        (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
 }
 
 void test_spin()
     pthread_spin_unlock(&lock);
 
     clock_gettime(CLOCK_MONOTONIC, &tp);
-    for(i = 0; i < TEST_TIMES; i++) {
+    for(i = TEST_TIMES * 100; i > 0; i--) {
         pthread_spin_lock(&lock);
         pthread_spin_unlock(&lock);
     }
     clock_gettime(CLOCK_MONOTONIC, &tp2);
 
-    fprintf(stdout, "   pthread_spin_lock/pthread_spin_unlock: %7.3lf us\n", (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
+    fprintf(stdout, "   pthread_spin_lock/pthread_spin_unlock: %7.3lf us\n",
+        (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 100000.0));
 }
 
 #ifdef _MSC_VER
     }
     clock_gettime(CLOCK_MONOTONIC, &tp2);
 
-    fprintf(stdout, "                          spin_count(%02d): %7.3lf us\n", count, (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
+    fprintf(stdout, "                          spin_count(%02d): %7.3lf us\n", count,
+        (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
 }
 
 void test_mutex()
     }
 
     clock_gettime(CLOCK_MONOTONIC, &tp);
-    for(i = 0; i < TEST_TIMES; i++) {
+    for(i = TEST_TIMES * 100; i > 0; i--) {
         pthread_mutex_lock(&mutex);
         pthread_mutex_unlock(&mutex);
     }
     clock_gettime(CLOCK_MONOTONIC, &tp2);
 
-    fprintf(stdout, " pthread_mutex_lock/pthread_mutex_unlock: %7.3lf us\n", (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
+    fprintf(stdout, " pthread_mutex_lock/pthread_mutex_unlock: %7.3lf us\n",
+        (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 100000.0));
 }
 
 #ifndef _MSC_VER
     }
     clock_gettime(CLOCK_MONOTONIC, &tp2);
 
-    fprintf(stdout, "               QueryPerformanceFrequency: %7.3lf us\n", (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
+    fprintf(stdout, "               QueryPerformanceFrequency: %7.3lf us\n",
+        (tp2.tv_nsec - tp.tv_nsec + (tp2.tv_sec - tp.tv_sec) * POW10_9) / (TEST_TIMES * 1000.0));
 }
 
 int main(int argc, char *argv[])