SMP: Problems with starting APs
Posted: Sun Aug 07, 2022 8:06 am
Hi,
I am writing a long mode OS kernel and I have implemented quite a few features (memory allocation, parsing ACPI tables, task switching) but I have had a few problems with multiprocessing. I have successfully initialized the local APIC on the BSP and I can send self-IPIs. I have also implemented the IO-APIC for the purposes of calibrating the time-stamp counter (for udelay()). Right now, I am trying to boot application processors.
This is the code I use for starting an AP (apboot.c):
Here are my implementations of APIC functions (if you'd need them, but you probably won't). The implementations for xAPIC and x2APIC reside in different files and I use a struct apic_interface to call these functions.
My code panics and prints
The linker script places it at 0x8000. (apboot.S, AT&T syntax):
This part of the linker script causes .realmode to be placed at 0x8000:
In bochs, I get the following logs:
When I turn off the machine, I get the following logs:
I genuinely don't know how to debug this, so any help would be appreciated.
This is my first post on these forums, so if I did anything wrong, please say it.
I am writing a long mode OS kernel and I have implemented quite a few features (memory allocation, parsing ACPI tables, task switching) but I have had a few problems with multiprocessing. I have successfully initialized the local APIC on the BSP and I can send self-IPIs. I have also implemented the IO-APIC for the purposes of calibrating the time-stamp counter (for udelay()). Right now, I am trying to boot application processors.
This is the code I use for starting an AP (apboot.c):
Code: Select all
extern volatile u8 __ap_flag;
void start_ap(struct cpu *ap)
{
printk("starting CPU%u...\n", ap->cpu_id);
__ap_flag = 0;
apic_write_icr(ap->lapic_id, 0xc500); /* send INIT level assert */
apic_wait_icr();
apic_write_icr(ap->lapic_id, 0x8500); /* send INIT level de-assert */
apic_wait_icr();
udelay(10000);
u32 apic_error;
apic_read_esr();
apic_write_icr(ap->lapic_id, 0x608);
udelay(200);
apic_wait_icr();
apic_error = apic_read_esr();
if(apic_error)
panic("APIC error 0x%x when starting CPU%u", apic_error, ap->cpu_id);
udelay(10000);
if(!__ap_flag)
panic("failed to start CPU%u", ap->cpu_id);
printk("started CPU%u\n", ap->cpu_id);
}
Code: Select all
static void normal_apic_write_icr(u32 dst, u32 flags)
{
mmapic_write_reg(MMAPIC_ICR_HIGH, dst << 24);
mmapic_write_reg(MMAPIC_ICR_LOW, flags);
}
static void normal_apic_wait_icr(void)
{
while(mmapic_read_reg(MMAPIC_ICR_LOW) & (1 << 12))
asm volatile("pause" : : : "memory");
}
static void x2apic_write_icr(u32 dst, u32 flags)
{
write_msr(MSR_X2APIC_ICR, ((u64) dst << 32) | flags);
}
static void x2apic_wait_icr(void) {}
This is the code that should run on the AP, shamelessly copied from SMP.failed to start CPU1
The linker script places it at 0x8000. (apboot.S, AT&T syntax):
Code: Select all
.code16
.section .realmode
__ap_entry: /* 0x8000 */
cli
cld
ljmp $0, $0x8040
.align 16
__gdt32: /* 0x8010 */
.long 0, 0
.long 0x0000ffff, 0x00cf9a00
.long 0x0000ffff, 0x008f9200
.long 0x00000068, 0x00cf8900
__gdt_ptr: /* 0x8030 */
.word 0x1f
.long 0x8010
.long 0, 0
.align 64
__ap_jmp_target: /* 0x8040 */
xorw %ax, %ax
movw %ax, %ds
lgdtl 0x8030
movl %cr0, %eax
orl $1, %eax
movl %eax, %cr0
ljmp $8, $8060
.code32
.align 32
__ap_pm: /* 0x8060 */
movw $16, %ax
movw %ax, %ds
movw %ax, %ss
movb $1, __ap_flag
__hlt_loop:
cli
hlt
jmp __hlt_loop
.align 16
.globl __ap_flag
__ap_flag:
.byte 0
Code: Select all
ENTRY(_start)
SECTIONS
{
/*
* AP startup code needs to be placed below 1MB.
* Therefore we give it a separate segment.
*/
. = 0x8000;
__realmode_start = .;
.realmode :
{
*(.realmode)
}
. = ALIGN(4K);
__realmode_end = .;
... (boot, text, data etc)
}
Code: Select all
00133390123i[CPU0 ] WRMSR: wrote 00000000:fee00800 to MSR_APICBASE
00133390123i[APIC0 ] allocate APIC id=0 (MMIO enabled) to 0x0000fee00000
00134628551i[APIC1 ] Deliver INIT IPI
00134628551i[CPU1 ] cpu software reset
00134628551i[APIC1 ] allocate APIC id=1 (MMIO enabled) to 0x0000fee00000
00134628551i[CPU1 ] CPU[1] is an application processor. Halting until SIPI.
// I removed some CPUID things, they're just unneeded
00134668576i[APIC1 ] Deliver Start Up IPI
00134668576i[CPU1 ] CPU 1 started up at 0800:00000000 by APIC
Code: Select all
00259752000p[XGUI ] >>PANIC<< POWER button turned off.
========================================================================
Bochs is exiting with the following message:
[XGUI ] POWER button turned off.
========================================================================
00259752000i[CPU0 ] CPU is in long mode (active)
00259752000i[CPU0 ] CS.mode = 64 bit
00259752000i[CPU0 ] SS.mode = 64 bit
00259752000i[CPU0 ] EFER = 0x00000500
00259752000i[CPU0 ] | RAX=0000000000000000 RBX=ffff8000007ade28
00259752000i[CPU0 ] | RCX=000000000000000a RDX=ffffffff81071c20
00259752000i[CPU0 ] | RSP=ffffffff81088eda RBP=ffffffff81088f2a
00259752000i[CPU0 ] | RSI=0000000000000000 RDI=0000000000000004
00259752000i[CPU0 ] | R8=0000000000000000 R9=0000000000000000
00259752000i[CPU0 ] | R10=0000000000000000 R11=ffffffff81088d8a
00259752000i[CPU0 ] | R12=ffffffff8106ea51 R13=ffffffff803b3814
00259752000i[CPU0 ] | R14=0000000080000000 R15=00000000003b326a
00259752000i[CPU0 ] | IOPL=0 ID vip vif ac vm rf nt of df if tf sf ZF af PF cf
00259752000i[CPU0 ] | SEG sltr(index|ti|rpl) base limit G D
00259752000i[CPU0 ] | CS:0008( 0001| 0| 0) 00000000 ffffffff 1 0
00259752000i[CPU0 ] | DS:0010( 0002| 0| 0) 00000000 ffffffff 1 1
00259752000i[CPU0 ] | SS:0010( 0002| 0| 0) 00000000 ffffffff 1 1
00259752000i[CPU0 ] | ES:0010( 0002| 0| 0) 00000000 ffffffff 1 1
00259752000i[CPU0 ] | FS:0010( 0002| 0| 0) 00000000 ffffffff 1 1
00259752000i[CPU0 ] | GS:0010( 0002| 0| 0) 7f746000 ffffffff 1 1
00259752000i[CPU0 ] | MSR_FS_BASE:0000000000000000
00259752000i[CPU0 ] | MSR_GS_BASE:ffff80007f746000
00259752000i[CPU0 ] | RIP=ffffffff8104f100 (ffffffff8104f100)
00259752000i[CPU0 ] | CR0=0xe0000011 CR2=0x0000000000000000
00259752000i[CPU0 ] | CR3=0x0000000001002000 CR4=0x000000a0
00259752000i[CPU1 ] CPU is in real mode (active)
00259752000i[CPU1 ] CS.mode = 16 bit
00259752000i[CPU1 ] SS.mode = 16 bit
00259752000i[CPU1 ] EFER = 0x00000000
00259752000i[CPU1 ] | EAX=00000000 EBX=00000000 ECX=00000000 EDX=00000000
00259752000i[CPU1 ] | ESP=0000fff6 EBP=00005a8d ESI=00000000 EDI=000081f4
00259752000i[CPU1 ] | IOPL=0 id vip vif ac vm rf nt of df if tf sf zf af pf cf
00259752000i[CPU1 ] | SEG sltr(index|ti|rpl) base limit G D
00259752000i[CPU1 ] | CS:0000( 1e00| 0| 0) 00000000 0000ffff 0 0
00259752000i[CPU1 ] | DS:0000( 0000| 0| 0) 00000000 0000ffff 0 0
00259752000i[CPU1 ] | SS:0000( 0000| 0| 0) 00000000 0000ffff 0 0
00259752000i[CPU1 ] | ES:0000( 0000| 0| 0) 00000000 0000ffff 0 0
00259752000i[CPU1 ] | FS:0000( 0000| 0| 0) 00000000 0000ffff 0 0
00259752000i[CPU1 ] | GS:0000( 0000| 0| 0) 00000000 0000ffff 0 0
00259752000i[CPU1 ] | EIP=0000000b (0000000b)
00259752000i[CPU1 ] | CR0=0x60000010 CR2=0x00000000
00259752000i[CPU1 ] | CR3=0x00000000 CR4=0x00000000
00259752000i[CMOS ] Last time is 1659879937 (Sun Aug 7 15:45:37 2022)
00259752000i[XGUI ] Exit
00259752000i[SIM ] quit_sim called with exit code 1
This is my first post on these forums, so if I did anything wrong, please say it.