Stack alignment issue with using floating point
Posted: Mon Dec 02, 2013 9:26 pm
So I've got an issue. Apparently, if a memory operand is involved with a floating-point operation, it has to be 16-byte aligned. If not, a GPF gets issued...
This is the problem code:
Wherein rbp-160 isn't 16-byte aligned.
That's the vicinity of the problem code.
According to helpful individuals on IRC, the compiler is supposed to preserve the alignment of the stack during function calls, whereby RSP+8 is 16-byte aligned at the call instruction.
This seems to work. However, I suspect that my context-switching business is trashing some of that alignment...
This is the context switching code:
(don't mind the inefficient code lol)
Here is the process setup code:
And this is the scheduler code itself:
Again don't mind the inefficient scheduler, it's mostly a placeholder for now.
Sorry for the codedump -- I really can't pinpoint the problem (ie. the exact point where the stack gets misaligned), except I have a hunch the scheduler is to blame, because if I execute the problem code before the scheduler starts scheduling, no problem.
Thanks.
This is the problem code:
Code: Select all
movaps dqword ptr ss:[rbp-160], xmm0
Code: Select all
{
// decimal numbers, because why not.
double mem = K_SystemMemoryInBytes;
uint8_t inx = 0;
while(mem > 1024)
{
mem /= 1024;
inx++;
}
PrintFormatted("%.2f %s of memory detected.\n\n", mem, K_BinaryUnits[inx]);
}
According to helpful individuals on IRC, the compiler is supposed to preserve the alignment of the stack during function calls, whereby RSP+8 is 16-byte aligned at the call instruction.
This seems to work. However, I suspect that my context-switching business is trashing some of that alignment...
This is the context switching code:
Code: Select all
TaskSwitcher:
push %r15
push %r14
push %r13
push %r12
push %r11
push %r10
push %r9
push %r8
push %rdx
push %rcx
push %rbx
push %rax
push %rbp
push %rsi
push %rdi
// Load the kernel data segment
movw $0x10, %bp
movl %ebp, %ds
movl %ebp, %es
movl %ebp, %fs
movl %ebp, %gs
// Modify the tss to change rsp0 to point here
// movq %rsp, 0x504
movq %rsp, %rdi
call SwitchProcess
movq %rax, %rsp
// xchg %bx, %bx
mov $0x20, %al
outb %al, $0x20
cmp $0xFADE, %r11
jne NoRing3
DoRing3:
cmpq $0x0, 0x600
je DoRetR3
// if not zero, do a CR3 switch.
movq 0x600, %rax
mov %rax, %cr3
// if this is ring3, we need to...
// do ring3!
jmp PopRegR3
DoRetR3:
// reset to the kernel's PML4.
mov $0x1000, %rax
mov %rax, %cr3
PopRegR3:
pop %rdi
pop %rsi
pop %rbp
pop %rax
pop %rbx
pop %rcx
pop %rdx
pop %r8
pop %r9
pop %r10
pop %r11
pop %r12
pop %r13
pop %r14
pop %r15
// screw around with the stack
// change the data-segment to user-mode
// also modify the eflags and code-segment values.
mov $0x23, %ax
mov %ax, %ds
mov %ax, %es
mov %ax, %fs
mov %ax, %gs
iretq
NoRing3:
cmpq $0x0, 0x600
je DoRetR0
// if not zero, do a CR3 switch.
movq 0x600, %rax
mov %rax, %cr3
jmp PopReg
DoRetR0:
// reset to the kernel's PML4.
mov $0x1000, %rax
mov %rax, %cr3
PopReg:
pop %rdi
pop %rsi
pop %rbp
pop %rax
pop %rbx
pop %rcx
pop %rdx
pop %r8
pop %r9
pop %r10
pop %r11
pop %r12
pop %r13
pop %r14
pop %r15
iretq
Here is the process setup code:
Code: Select all
void CreateTask(const char name[64], uint8_t Flags, void (*Function)())
{
Task_type* Task = new ((void*)Kernel::HardwareAbstraction::MemoryManager::DynamicMinimal::Allocate(sizeof(Task_type))) Task_type();
// Allocate kernel stack
uint64_t f = Physical::AllocatePage_NoMap();
for(int i = 0; i < DefaultRing3StackSize / 0x1000; i++)
Virtual::MapAddr(f + (i * 0x1000), (i == 0) ? f : Physical::AllocatePage_NoMap(), 0x07);
// allocate user stack.
uint64_t k = Physical::AllocatePage_NoMap();
for(int i = 0; i < DefaultRing3StackSize / 0x1000; i++)
Virtual::MapAddr(k + (i * 0x1000), (i == 0) ? k : Physical::AllocatePage_NoMap(), Flags & 0x1 ? 0x7 : 0x3);
Task->TopOfStack = DefaultRing3StackSize + f;
Task->StackPointer = Task->TopOfStack;
Task->Flags = Flags;
Task->Thread = Function;
Task->State = 1;
Task->ProcessID = GetNumberOfTasks();
Task->CR3 = 0;
String::Copy(Task->Name, name);
uint64_t* stack = (uint64_t*)Task->StackPointer;
if(!(Task->Flags & 0x1))
{
// kernel thread
*--stack = 0x10; // SS
*--stack = k + DefaultRing3StackSize; // User stack pointer
*--stack = 0x202; // RFLAGS
*--stack = 0x08; // CS
*--stack = (uint64_t)Function; // RIP
}
else
{
// user thread
*--stack = 0x23; // SS
*--stack = k + DefaultRing3StackSize; // User stack pointer
*--stack = 0x202; // RFLAGS
*--stack = 0x1B; // CS
*--stack = (uint64_t)Function; // RIP
}
*--stack = 0x0; // R15
*--stack = 0x0; // R14
*--stack = 0x0; // R13
*--stack = 0x0; // R12
*--stack = 0x0; // R11
*--stack = 0x0; // R10
*--stack = 0x0; // R9
*--stack = 0x0; // R8
*--stack = 0x0; // RDX
*--stack = 0x0; // RCX
*--stack = 0x0; // RBX
*--stack = 0x0; // RAX
*--stack = 0x0; // RBP
*--stack = 0x0; // RSI
*--stack = 0x0; // RDI
Task->StackPointer = (uint64_t)stack;
TaskList->Add(Task);
Log("Created new kernel thread: KStack: %x (%d b); UStack: %x(%d b); PID: %d",
f, DefaultRing3StackSize, k, DefaultRing3StackSize, Task->ProcessID);
}
And this is the scheduler code itself:
Code: Select all
extern "C" uint64_t SwitchProcess(uint64_t context)
{
TicksSinceInit++;
if(TaskList->Size() < 1)
return context;
if(!IsFirst)
{
TaskList->Get(CurrentPID)->StackPointer = context;
}
else
IsFirst = false;
if(CurrentPID < TaskList->Size() - 1 && NextPIDToSchedule == 0)
CurrentPID++;
else if(NextPIDToSchedule != 0)
{
CurrentPID = NextPIDToSchedule;
NextPIDToSchedule = 0;
}
else
CurrentPID = 0;
// decrease the sleep counters of all threads.
for(int i = 0; i < TaskList->Size(); i++)
{
if(TaskList->Get(i)->Sleep > 0 && i != CurrentPID)
{
TaskList->Get(i)->Sleep--;
if(TaskList->Get(i)->Sleep == 0)
{
NextPIDToSchedule = i;
}
}
}
if(!TaskList->Get(CurrentPID)->State)
{
CurrentPID = 0;
return TaskList->Get(CurrentPID)->StackPointer;
}
if(TaskList->Get(CurrentPID)->Sleep > 0)
{
TaskList->Get(CurrentPID)->Sleep--;
CurrentPID = 0;
return TaskList->Get(CurrentPID)->StackPointer;
}
if(TaskList->Get(CurrentPID)->Flags & 0x1)
{
// this tells switch.s (on return) that we need to return to user-mode.
asm volatile("mov $0xFADE, %%r11" ::: "%r11");
}
if(TaskList->Get(CurrentPID)->CR3 != 0)
{
asm volatile("movq %[page], 0x600" :: [page]"r"(TaskList->Get(CurrentPID)->CR3): "memory");
}
else
{
asm volatile("movq $0x1000, 0x600" ::: "memory");
}
asm volatile("mov %[stacktop], 0x504" :: [stacktop]"r"(TaskList->Get(CurrentPID)->TopOfStack));
return TaskList->Get(CurrentPID)->StackPointer; // Return new task's context.
}
Sorry for the codedump -- I really can't pinpoint the problem (ie. the exact point where the stack gets misaligned), except I have a hunch the scheduler is to blame, because if I execute the problem code before the scheduler starts scheduling, no problem.
Thanks.