[Solved]Weird fork() Bug in Custom x86 Kernel – Child Process Never Runs
Posted: Tue Apr 22, 2025 9:27 pm
Currently, I'm implementing fork() and multitasking in a custom x86 kernel and have encountered a very strange bug. I've debugged carefully and reviewed the code multiple times, but I still can't figure out what's wrong, so I'm seeking advice from anyone with OS development experience.
Here's the flow of execution:
This function is called in kernel mode. It allocates and initializes the first user process structure, then calls exec_fn() to jump into user space.
This function removes all previous user virtual memory mappings, initializes a new space (currently only code and stack regions are defined with static base/size), copies the function pointed to by func to the user code segment, marks it read-only, and jumps to the entry point.
This is a simple test to check whether fork() and multitasking are working properly. All functions used here are marked with __attribute__((always_inline)), meaning they do not cause a jump into kernel address space. System calls are made via int $0x80.
However, only the else block executes, and the child process (if (pid == 0) block) never runs. It seems like only the parent continues executing, and I can’t figure out why. To narrow things down, I simplified the code like this:
Here’s my fork() implementation:
The create_child() function clones the current process’s virtual memory, allocates a new kernel stack, and sets up the new task_struct. stack_copy_and_adjust() copies the kernel stack and adjusts the esp.
Here’s the stack_copy_and_adjust() assembly code:
I’ve verified that esi and edi point correctly and memory is copied successfully.
Here’s the task switching function:
To test if a child is running, I added a debug check:
When .doneVAS: is reached, the four registers pushed in stack_copy_and_adjust() are restored, and after the ret, execution should resume from the if (current == child) condition.
Here’s the modified sys_fork() for debugging:
Here’s the weird part: If I comment out the printk() line, esp and esp0 are incorrect. But if the line is active, the output values are exactly correct. This suggests that something is uninitialized or corrupted and printk() is accidentally fixing it.
At this point, I’m completely stuck. Any advice, insight, or suggestions would be deeply appreciated, especially from those with experience implementing fork() or task switching on x86.
Also, I can't use GDB as it freezes after jumping into user mode.
Here's the flow of execution:
Code: Select all
void init_process(void)
{
struct task_struct *task;
task = kmalloc(sizeof(struct task_struct));
if (!task)
do_panic("init process failed");
task->pid = INIT_PROCESS_PID;
task->uid = task->pid; // stub
task->cr3 = current_cr3();
task->esp0 = (uint32_t)&stack_top;
task->time_slice_remaining = DEFAULT_TIMESLICE;
task->parent = NULL;
task->vblocks.by_base = RB_ROOT;
task->vblocks.by_size = RB_ROOT;
task->mapping_files.by_base = RB_ROOT;
task->state = PROCESS_RUNNING;
init_list_head(&task->children);
init_list_head(&task->ready);
pid_table_register(task);
current = task;
exec_fn(init_process_code);
}
Code: Select all
void exec_fn(void (*func)())
{
user_vspace_clean(¤t->vblocks, ¤t->mapping_files, CL_TLB_INVL | CL_RECYCLE);
user_vspace_init();
memcpy((void *)USER_CODE_BASE, func, USER_CODE_SIZE);
set_rdonly();
jmp_to_entry_point();
}
Code: Select all
void init_process_code(void)
{
char buf[10];
char base[11] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '\0'};
int pid;
int magic_number = 1234567;
for (int i = 0; i < 10; i++) {
pid = fork();
if (pid < 0) {
write("fork failed\n");
while (true);
}
if (pid == 0) {
write("child created my pid: ");
t_number_to_string(buf, getuid(), 10, base);
write(buf);
magic_number++;
write("magic number: ");
t_number_to_string(buf, magic_number, 10, base);
write(buf);
write("\n");
while (true);
}
else {
write("fork succeed child pid: ");
t_number_to_string(buf, pid, 10, base);
write(buf);
write("\n");
}
}
if (magic_number != 1234567)
write("cow failed.. \n");
else
write("ok..");
while(true);
}
However, only the else block executes, and the child process (if (pid == 0) block) never runs. It seems like only the parent continues executing, and I can’t figure out why. To narrow things down, I simplified the code like this:
Code: Select all
void init_process_code(void)
{
int pid;
pid = fork();
while (true);
}
Code: Select all
static inline int create_child(struct task_struct **out_child)
{
struct task_struct *child;
int ret = -ENOMEM;
child = kmalloc(sizeof(struct task_struct));
if (!child)
goto out;
child->pid = alloc_pid();
if (child->pid == PID_NONE) {
ret = -EAGAIN;
goto out_clean_child;
}
child->uid = child->pid; // stub
child->esp0 = (uint32_t)kmalloc(KERNEL_STACK_SIZE);
if (!child->esp0)
goto out_clean_pid;
child->esp0 += KERNEL_STACK_SIZE;
child->vblocks.by_base = RB_ROOT;
child->vblocks.by_size = RB_ROOT;
if (vblocks_clone(&child->vblocks) < 0)
goto out_clean_kstack;
child->mapping_files.by_base = RB_ROOT;
if (mapping_files_clone(&child->mapping_files) < 0)
goto out_clean_vblocks;
pages_set_cow();
child->cr3 = pgdir_clone();
child->time_slice_remaining = DEFAULT_TIMESLICE;
child->parent = current;
child->state = PROCESS_READY;
init_list_head(&child->children);
*out_child = child;
return 0;
out_clean_vblocks:
vblocks_clean(&child->vblocks);
out_clean_kstack:
kstack_free(child);
out_clean_pid:
free_pid(child->pid);
out_clean_child:
kfree(child);
out:
return ret;
}
Code: Select all
static inline int sys_fork(void)
{
struct task_struct *child;
int ret;
ret = create_child(&child);
if (ret < 0)
return ret;
stack_copy_and_adjust(child);
if (current == child)
return 0;
add_child_to_parent(current, child);
pid_table_register(child);
ready_queue_enqueue(child);
return child->pid;
}
Here’s the stack_copy_and_adjust() assembly code:
Code: Select all
section .text
global stack_copy_and_adjust
extern current
extern memcpy32
stack_copy_and_adjust:
push ebx
push esi
push edi
push ebp
mov eax, [current]
mov ebx, [esp + 20]
mov esi, [eax + OFFSET_TASK_ESP0]
sub esi, 8192
mov edi, [ebx + OFFSET_TASK_ESP0]
sub edi, 8192
push 2048
push esi
push edi
call memcpy32
add esp, 12
mov eax, esp
sub eax, esi
add edi, eax
mov [ebx + OFFSET_TASK_ESP], edi
mov ebx, [esp + 12]
mov esi, [esp + 8]
mov edi, [esp + 4]
add esp, 16
ret
Here’s the task switching function:
Code: Select all
global switch_to_task
extern current
extern tss
switch_to_task:
push ebx
push esi
push edi
push ebp
mov esi, [current]
mov [esi + OFFSET_TASK_ESP], esp
mov edi, [esp + 20]
mov [current], edi
mov esp, [edi + OFFSET_TASK_ESP]
mov eax, [edi + OFFSET_TASK_CR3]
mov edx, [edi + OFFSET_TASK_ESP0]
mov ebx, [tss]
mov [ebx + OFFSET_TSS_ESP0], edx
mov ecx, cr3
cmp eax, ecx
je .doneVAS
mov cr3, eax
.doneVAS:
pop ebp
pop edi
pop esi
pop ebx
ret
Code: Select all
static inline void schedule(void)
{
struct task_struct *next_task;
if (current->pid == 2) {
printk("aaaaaaa\n");
while (true);
}
next_task = list_next_entry(current, ready);
current->state = PROCESS_READY;
next_task->state = PROCESS_RUNNING;
switch_to_task(next_task);
}
Code: Select all
if (current->pid == 2) {
printk("aaaaaaa\n");
while (true);
}
Here’s the modified sys_fork() for debugging:
Code: Select all
if (current == child) {
printk("child pid = %u esp = %x esp0 = %x \n", current->pid, current->esp, current->esp0);
return 0;
}
// printk("child pid = %u esp = %x esp0 = %x \n", child->pid, child->esp, child->esp0);
At this point, I’m completely stuck. Any advice, insight, or suggestions would be deeply appreciated, especially from those with experience implementing fork() or task switching on x86.
Also, I can't use GDB as it freezes after jumping into user mode.