I have a bug that's driving me crazy. I think I have narrowed it down to something to do with my task switching. The symptoms seem to be that on a task switch it's going to an incorrect eip and page faulting. I think my stacks are getting messed up or something.
I've been at this one bug for days, if anyone has any ideas, please tell me
here is my irq asm stubs:
Code: Select all
syscall_irq:
push byte 0
push byte 48
jmp irq_common_stub
extern irq_handler
irq_common_stub:
cld
pushad
push ds
push es
push fs
push gs
mov eax, [current]
mov [eax], esp
mov ax, 0x10
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
push esp
call irq_handler
add esp, 4
jmp common_end
global switch_to
switch_to:
mov eax, [current]
mov esp, [eax]
mov ebx, [eax+8];put content of the k-stack field into ebx.
mov [morts_tss+4],ebx ;update system tss. (esp)
mov ebx, [eax+20]
mov cr3, ebx
common_end:
mov eax, [esp]
cmp eax, 40
jl master_reg
mov al, 0x20
out 0xA0, al
master_reg:
mov al, 0x20
out 0x20, al
pop gs
pop fs
pop es
pop ds
popad
add esp, 8
iret
the task structure is:
Code: Select all
struct task_struct {
uint32_t esp;
uint32_t ss;
uint32_t kstack;
uint32_t kstack_mem;
uint32_t ustack;
uint32_t cr3;
long state;
long counter;
long priority;
uint8_t signal[NSIG];
fn_ptr sig_restore;
fn_ptr sig_fns[NSIG];
int exit_code;
unsigned long brk,start_stack;
long pid,parent,pgrp,session,leader;
unsigned short uid,euid,suid;
unsigned short gid,egid,sgid;
long alarm;
long utime,stime,cutime,cstime,start_time;
unsigned short used_math;
int tty;
unsigned short umask;
struct ext2fs_inode_table *pwd; // TODO: should be a vfs node
struct ext2fs_fs_data *pwd_fsd;
struct ext2fs_inode_table *root; // TODO: should be a vfs node
struct ext2fs_fs_data *root_fsd;
unsigned long close_on_exec;
struct file *filp[NR_OPEN];
struct task_struct *next;
struct task_struct *prev;
};
The error seems to occur when I use my disk buffer code. To start with it only happened on real PCs and Qemu, but not Bochs or Vmware, so I rewrote my buffer code, and now it crashes on everything, so I switched back and studied it deeper, and it seems to do the wrong things but get away with it in bochs & vmware.
I have learnt thanks to Qemu & Gdb (yay OS Faq2!) that the offending eip is in my malloc function, (i am using a malloc function about the same as solar's PDClib malloc, at first I had my own malloc, but that faulted in free) but I have discovered that by adding breakpoints, that it gets to an eip in the malloc code without calling the malloc function, which made me think it's my stacks?
My stacks are 8k each, and are allocated using the malloc command. Also the error appears to happen in ring 3 and ring 0.
If anyone has any hints. please help me..
Thanks
Andrew