Code is written for MSVC++.
There are two different implementations. Both implementation use stack to store queue, so size of spinlocks are not increased.
The first use linked list. Local-flag for busy-waiting is located on stack of current processor (principle of locality). It is not required to align memory to prevent cache contention. The downside is that it requires one busy-waiting loop when releasing spinlock, but it shouldn’t happen too often.
Code: Select all
struct queued_spinlock_entry
{
queued_spinlock_entry* next;
int state;
};
queued_spinlock_entry* head = 0;
void some_process_1()
{
queued_spinlock_entry proce_qe;
__asm
{
lea eax, proce_qe
lea ebx, head
mov [eax], 0
mov edx, eax
mov [eax]queued_spinlock_entry.state, 1
lock xchg [ebx],eax
test eax, eax
jz enter_section
mov [eax],edx
wait1:
pause
cmp [edx]queued_spinlock_entry.state, 1
je wait1
enter_section:
}
// do some work
_asm
{
lea eax, proce_qe
lea ebx, head
xor ecx, ecx
mov edx, eax
lock cmpxchg [ebx], ecx
je exit_section
wait2:
pause
cmp [edx], ecx
je wait2
mov eax, [edx]
mov [eax]queued_spinlock_entry.state, ecx
exit_section:
}
}
Code: Select all
int* position = 0;
void some_process_2()
{
int proce_qe;
__asm
{
lea eax, proce_qe
lea ebx, position
mov [eax], 1
mov edx, eax
lock xchg [ebx],eax
test eax, eax
jz enter_section
wait1:
pause
cmp [eax], 1
je wait1
enter_section:
}
// do some work
__asm
{
lea eax, proce_qe
lea ebx, position
xor ecx, ecx
mov edx, eax
lock cmpxchg [ebx], ecx
mov [edx], ecx
}
}