You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kamailio/fastlock.h

416 lines
11 KiB

/*
* fast architecture specific locking
*
* $Id$
*
*
*
* Copyright (C) 2001-2003 FhG Fokus
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
*
*History:
*--------
* 2002-02-05 created by andrei
* 2003-01-16 added PPC locking code contributed by Dinos Dorkofikis
* <kdor@intranet.gr>
* 2004-09-12 added MIPS locking for ISA>=2 (>r3000) (andrei)
* 2004-12-16 for now use the same locking code for sparc32 as for sparc64
* (it will work only if NOSMP is defined) (andrei)
* 2005-04-27 added alpha locking code (andrei)
* 2005-05-25 PPC locking code enabled for PPC64; added a lwsync to
* the tsl part and replaced the sync with a lwsync for the
* unlock part (andrei)
* 2006-03-08 mips2 NOSMP (skip sync), optimized x86 & mips clobbers and
* input/output constraints (andrei)
* 2006-04-03 optimization: call lock_get memory barrier outside tsl,in the
* calling function, only if the lock operation succeeded
* (membar_getlock()) (andrei)
* added try_lock(); more x86 optimizations, x86 release_lock
* fix (andrei)
* 2006-04-04 sparc* optimizations, sparc32 smp support, armv6 no smp support,
* ppc, mips*, alpha optimizations (andrei)
* 2006-04-05 ppc fixes (s/stw/stwx/, s/lwz/lwzx), early clobber added
* where needed (andrei)
* 2006-11-22 arm early clobber added: according to the swp instruction
* specification the address register must be != from the other 2
* (Julien Blache <jblache@debian.org>)
*
*/
/*
* WARNING: the code was not tested on the following architectures:
* - arm6 (cross-compiles ok, no test)
* - alpha (cross-compiles ok, no test)
* - mips64 (cross-compiles ok)
* - ppc64 (compiles ok)
* - sparc32 (tested on a sparc64)
*/
#ifndef fastlock_h
#define fastlock_h
#include "sched_yield.h"
#define SPIN_OPTIMIZE /* if defined optimize spining on the lock:
try first the lock with non-atomic/non memory locking
operations, and only if the lock appears to be free
switch to the more expensive version */
typedef volatile int fl_lock_t;
#define init_lock( l ) (l)=0
/* what membar to use (if any) after taking a lock. This
* was separated from the lock code to allow better optimizations.
* e.g.: use the membar_getlock only after getting the lock and don't use
* it if lock_get fails / when spinning on tsl.
* There is no corresponding membar_release_lock (because lock_release
* must always include the needed memory barrier).
* WARNING: this is intended only for internal fastlock use*/
#if defined(__CPU_i386) || defined(__CPU_x86_64)
#define membar_getlock() /* not needed on x86 */
#elif defined(__CPU_sparc64)
#ifndef NOSMP
#define membar_getlock() \
asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory");
/* can be either StoreStore|StoreLoad or LoadStore|LoadLoad
* since ldstub acts both as a store and as a load */
#else
/* no need for a compiler barrier, that is already included in lock_get/tsl*/
#define membar_getlock() /* not needed if no smp*/
#endif /* NOSMP */
#elif defined(__CPU_sparc)
#define membar_getlock()/* no need for a compiler barrier, already included */
#elif defined __CPU_arm || defined __CPU_arm6
#ifndef NOSMP
#warning smp not supported on arm* (no membars), try compiling with -DNOSMP
#endif /* NOSMP */
#define membar_getlock()
#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
#ifndef NOSMP
#define membar_getlock() \
asm volatile("lwsync \n\t" : : : "memory");
#else
#define membar_getlock()
#endif /* NOSMP */
#elif defined __CPU_mips2 || defined __CPU_mips64
#ifndef NOSMP
#define membar_getlock() \
asm volatile("sync \n\t" : : : "memory");
#else
#define membar_getlock()
#endif /* NOSMP */
#elif defined __CPU_mips
#ifndef NOSMP
#warning smp not supported on mips1 (no membars), try compiling with -DNOSMP
#endif
#define membar_getlock()
#elif defined __CPU_alpha
#ifndef NOSMP
#define membar_getlock() \
asm volatile("mb \n\t" : : : "memory");
#else
#define membar_getlock()
#endif /* NOSMP */
#else /* __CPU_xxx */
#error "unknown architecture"
#endif
/*test and set lock, ret !=0 if lock held by someone else, 0 otherwise
* WARNING: no memory barriers included, if you use this function directly
* (not recommended) and it gets the lock (ret==0), you should call
* membar_getlock() after it */
inline static int tsl(fl_lock_t* lock)
{
int val;
#if defined(__CPU_i386) || defined(__CPU_x86_64)
#ifdef NOSMP
asm volatile(
" xor %0, %0 \n\t"
" btsl $0, %2 \n\t"
" setc %b0 \n\t"
: "=&q" (val), "=m" (*lock) : "m"(*lock) : "memory", "cc"
);
#else
asm volatile(
#ifdef SPIN_OPTIMIZE
" cmpb $0, %2 \n\t"
" mov $1, %0 \n\t"
" jnz 1f \n\t"
#else
" mov $1, %0 \n\t"
#endif
" xchgb %2, %b0 \n\t"
"1: \n\t"
: "=&q" (val), "=m" (*lock) : "m"(*lock) : "memory"
#ifdef SPIN_OPTIMIZE
, "cc"
#endif
);
#endif /*NOSMP*/
#elif defined(__CPU_sparc64)
asm volatile(
#ifdef SPIN_OPTIMIZE
" ldub [%2], %0 \n\t"
" brnz,a,pn %0, 1f \n\t"
" nop \n\t"
#endif
" ldstub [%2], %0 \n\t"
"1: \n\t"
/* membar_getlock must be called outside this function */
: "=&r"(val), "=m"(*lock) : "r"(lock): "memory"
);
#elif defined(__CPU_sparc)
asm volatile(
#ifdef SPIN_OPTIMIZE
" ldub [%2], %0 \n\t"
" tst %0 \n\t"
" bne,a 1f \n\t"
" nop \n\t"
#endif
" ldstub [%2], %0 \n\t"
"1: \n\t"
/* membar_getlock must be called outside this function */
: "=&r"(val), "=m"(*lock) : "r"(lock): "memory"
#ifdef SPIN_OPTIMIZE
, "cc"
#endif
);
#elif defined __CPU_arm
asm volatile(
"swp %0, %2, [%3] \n\t"
: "=&r" (val), "=m"(*lock) : "r"(1), "r" (lock) : "memory"
);
#elif defined __CPU_arm6
asm volatile(
" ldrex %0, [%2] \n\t"
" cmp %0, #0 \n\t"
" strexeq %0, %3, [%2] \n\t" /* executed only if Z=1 */
/* if %0!=0 => either it was 1 initially or was 0
* and somebody changed it just before the strexeq (so the
* lock is taken) => it's safe to return %0 */
: "=&r"(val), "=m"(*lock) : "r"(lock), "r"(1) : "cc"
);
#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
asm volatile(
"1: \n\t"
#ifdef SPIN_OPTIMIZE
" lwzx %0, 0, %2 \n\t"
" cmpwi %0, 0 \n\t"
" bne- 2f \n\t" /* predict: not taken */
#endif
" lwarx %0, 0, %2\n\t"
" cmpwi %0, 0\n\t"
" bne- 2f\n\t"
" stwcx. %3, 0, %2\n\t"
" bne- 1b\n\t"
/* membar_getlock must be called outside this function */
"2:\n\t"
: "=&r" (val), "=m"(*lock) : "r"(lock), "r"(1) : "memory", "cc"
);
#elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
|| defined __CPU_mips64
long tmp;
asm volatile(
".set push \n\t"
".set noreorder\n\t"
".set mips2 \n\t"
#ifdef SPIN_OPTIMIZE
" lw %1, %2 \n\t"
" bne %1, $0, 2f \n\t"
" nop \n\t"
#endif
"1: ll %1, %2 \n\t"
" bne %1, $0, 2f \n\t"
" li %0, 1 \n\t" /* delay slot */
" sc %0, %2 \n\t"
" beqz %0, 1b \n\t"
" nop \n\t"
"2: \n\t"
/* membar_getlock must be called outside this function */
".set pop\n\t"
: "=&r" (tmp), "=&r" (val), "=m" (*lock)
: "m" (*lock)
: "memory"
);
#elif defined __CPU_alpha
long tmp;
tmp=0;
/* lock low bit set to 1 when the lock is hold and to 0 otherwise */
asm volatile(
"1: ldl %0, %1 \n\t"
" blbs %0, 2f \n\t" /* optimization if locked */
" ldl_l %0, %1 \n\t"
" blbs %0, 2f \n\t"
" lda %2, 1 \n\t" /* or: or $31, 1, %2 ??? */
" stl_c %2, %1 \n\t"
" beq %2, 3f \n\t" /* back cond. jumps are always predicted to be
taken => make forward jump */
/* membar_getlock must be called outside this function */
"2: \n\t"
".subsection 2 \n\t"
"3: br 1b \n\t"
".previous \n\t"
:"=&r" (val), "=m"(*lock), "=&r"(tmp)
:"m"(*lock)
: "memory"
);
#else
#error "unknown architecture"
#endif
return val;
}
inline static void get_lock(fl_lock_t* lock)
{
#ifdef ADAPTIVE_WAIT
int i=ADAPTIVE_WAIT_LOOPS;
#endif
while(tsl(lock)){
#ifdef BUSY_WAIT
#elif defined ADAPTIVE_WAIT
if (i>0) i--;
else sched_yield();
#else
sched_yield();
#endif
}
membar_getlock();
}
/* like get_lock, but it doesn't wait. If it gets the lock returns 0,
* <0 otherwise (-1) */
inline static int try_lock(fl_lock_t* lock)
{
if (tsl(lock)){
return -1;
}
membar_getlock();
return 0;
}
inline static void release_lock(fl_lock_t* lock)
{
#if defined(__CPU_i386)
#ifdef NOSMP
asm volatile(
" movb $0, %0 \n\t"
: "=m"(*lock) : : "memory"
);
#else /* ! NOSMP */
int val;
/* a simple mov $0, (lock) does not force StoreStore ordering on all
x86 versions and it doesn't seem to force LoadStore either */
asm volatile(
" xchgb %b0, %1 \n\t"
: "=q" (val), "=m" (*lock) : "0" (0) : "memory"
);
#endif /* NOSMP */
#elif defined(__CPU_x86_64)
asm volatile(
" movb $0, %0 \n\t" /* on amd64 membar StoreStore | LoadStore is
implicit (at least on the same mem. type) */
: "=m"(*lock) : : "memory"
);
#elif defined(__CPU_sparc64) || defined(__CPU_sparc)
asm volatile(
#ifndef NOSMP
#ifdef __CPU_sparc64
"membar #LoadStore | #StoreStore \n\t"
#else /* __CPU_sparc */
"stbar \n\t"
#endif /* __CPU_sparc64 */
#endif
"stb %%g0, [%1] \n\t"
: "=m"(*lock) : "r" (lock) : "memory"
);
#elif defined __CPU_arm || defined __CPU_arm6
#ifndef NOSMP
#warning arm* smp mode not supported (no membars), try compiling with -DNOSMP
#endif
asm volatile(
" str %1, [%2] \n\r"
: "=m"(*lock) : "r"(0), "r"(lock) : "memory"
);
#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
asm volatile(
/* "sync\n\t" lwsync is faster and will work
* here too
* [IBM Prgramming Environments Manual, D.4.2.2]
*/
"lwsync\n\t"
"stwx %1, 0, %2\n\t"
: "=m"(*lock) : "r"(0), "r"(lock) : "memory"
);
#elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
|| defined __CPU_mips64
asm volatile(
".set push \n\t"
".set noreorder \n\t"
".set mips2 \n\t"
#ifndef NOSMP
#ifdef __CPU_mips
#warning mips1 smp mode not supported (no membars), try compiling with -DNOSMP
#else
" sync \n\t"
#endif
#endif
" sw $0, %0 \n\t"
".set pop \n\t"
: "=m" (*lock) : /* no input */ : "memory"
);
#elif defined __CPU_alpha
asm volatile(
#ifndef NOSMP
" mb \n\t"
#endif
" stl $31, %0 \n\t"
: "=m"(*lock) :/* no input*/ : "memory" /* because of the mb */
);
#else
#error "unknown architecture"
#endif
}
#endif