Ethin wrote:Could you post your code here if possible?
The whole code can be found in my git repo, if you want to test it yourself:
https://github.com/cristian-vijelie/unikraft/tree/smp
To run it: make menuconfig -> Platform Configuration -> KVM guest, Platform Interface Options -> SMP support
Then, to enable debug messages: in the main config menu: Library Configuration -> ukdebug -> Enable debug messages, Kernel message level -> Show all types of messages
At last, to run it:
make && sudo qemu-system-x86_64 -smp 2 -enable-kvm -m 128 -cpu host -kernel build/unikraft_kvm-x86_64 -serial stdio
To use gdb: sudo qemu-system-x86_64
-s -S -smp 2 -enable-kvm -m 128 -cpu host -kernel build/unikraft_kvm-x86_64 -serial stdio
and, in another terminal: gdb --eval-command="target remote :1234" ./build/unikraft_kvm-x86_64.dbg
I'll also post parts of the code:
Defines:
Code: Select all
#define IA32_APIC_BASE 0x1b
#define x2APIC_BASE 0x800
#define x2APIC_SPUR 0x80F
#define x2APIC_ESR 0x828
#define x2APIC_ICR 0x830
#define x2APIC_BASE_EXTD 10
#define x2APIC_BASE_EN 11
#define x2APIC_CPUID_BIT 21
#define x2APIC_SPUR_EN 8
#define x2APIC_ICR_DMODE_SMI 0x200
#define x2APIC_ICR_DMODE_NMI 0x400
#define x2APIC_ICR_DMODE_INIT 0x500
#define x2APIC_ICR_DMODE_SUP 0x600
#define x2APIC_ICR_DESTMODE_LOGICAL 0x800
#define x2APIC_ICR_LEVEL_ASSERT 0x4000
#define x2APIC_ICR_TRIGGER_LEVEL 0x8000
#define x2apic_logical_dest(x) ((((x) & 0xfff0) << 16) | (1 << ((x) & 0x000f)))
The function that enables the APs:
Code: Select all
void enable_cores(__u8 numcores)
{
__u8 bspid, ret;
int i, j;
__u32 ecx, eax, edx;
bspid = ukplat_lcpu_id();
uk_pr_info("Bootstrapping processor has the ID %d\n", bspid);
if (numcores > smp_numcores) {
uk_pr_info("Too many cores have been selected to be enabled. "
"Truncating to %d!\n",
smp_numcores);
numcores = smp_numcores;
}
memcpy((void *)0x8000, &_lcpu_start16, 4096);
uk_pr_info("Copied AP boot code to 0x8000\n");
uk_pr_debug("Computed logical ID for core 0: %d\n",
((lapic_ids[0] & 0xff00) << 16)
| (1 << (lapic_ids[0] & 0x00ff)));
rdmsr(0x80D, &eax, &edx);
uk_pr_debug("Logical ID from LDR: %d\n", eax);
for (i = 0; i < numcores; i++) {
if (i == bspid)
continue;
/* clear APIC errors */
wrmsr(x2APIC_ESR, 0, 0);
/* select AP and trigger INIT IPI */
eax = x2APIC_ICR_LEVEL_ASSERT | x2APIC_ICR_DESTMODE_LOGICAL
| x2APIC_ICR_DMODE_INIT;
edx = x2apic_logical_dest(lapic_ids[i]);
uk_pr_debug("eax: 0x%x, edx: 0x%x\n", eax, edx);
wrmsr(x2APIC_ICR, eax, edx);
/* deassert */
eax = x2APIC_ICR_DESTMODE_LOGICAL | x2APIC_ICR_DMODE_INIT;
edx = x2apic_logical_dest(lapic_ids[i]);
uk_pr_debug("eax: 0x%x, edx: 0x%x\n", eax, edx);
wrmsr(x2APIC_ICR, eax, edx);
/* wait 10 msec */
mdelay(10);
for (j = 0; j < 2; j++) {
/* clear APIC errors */
wrmsr(x2APIC_ESR, 0, 0);
/* select AP and trigger STARTUP IPI for 0x8000 */
eax = x2APIC_ICR_TRIGGER_LEVEL | x2APIC_ICR_LEVEL_ASSERT
| x2APIC_ICR_DESTMODE_LOGICAL
| x2APIC_ICR_DMODE_SUP | 0x08;
edx = x2apic_logical_dest(lapic_ids[i]);
uk_pr_debug("eax: 0x%x, edx: 0x%x\n", eax, edx);
wrmsr(x2APIC_ICR, eax, edx);
/* wait 200 usec */
udelay(200);
}
mdelay(10);
}
bspdone = 1;
}
Initialization:
Code: Select all
__u8 smp_init()
{
__u8 ret;
__u32 eax, edx;
ret = enable_x2apic();
if (ret) {
uk_pr_err("x2APIC could not be enabled!\n");
return -1;
}
rdmsr(x2APIC_SPUR, &eax, &edx);
uk_pr_debug(
"Spurious Interrupt Register has the values %x; EN bit: %d\n", eax,
(eax & (1 << x2APIC_SPUR_EN)) != 0);
if ((eax & (1 << x2APIC_SPUR_EN)) == 0) {
eax |= (1 << x2APIC_SPUR_EN);
wrmsr(x2APIC_SPUR, eax, edx);
uk_pr_debug("Spurious interrupt enabled\n");
}
find_madt();
if (madt == NULL)
return -1;
get_lapicid();
return 0;
}
Enabling the x2APIC mode:
Code: Select all
static __u8 enable_x2apic(void)
{
__u32 eax, edx, ecx;
__asm__ __volatile__("mov $1, %%eax; cpuid;" : "=c"(ecx) : :);
if (ecx & (1 << x2APIC_CPUID_BIT))
uk_pr_debug("x2APIC is supported; enabling\n");
else {
uk_pr_info("x2APIC is not supported\n");
return 1;
}
rdmsr(IA32_APIC_BASE, &eax, &edx);
uk_pr_debug(
"IA32_APIC_BASE has the value %x; EN bit: %d, EXTD bit: %d\n", eax,
(eax & (1 << x2APIC_BASE_EN)) != 0,
(eax & (1 << x2APIC_BASE_EXTD)) != 0);
/* set the x2APIC enable bit */
eax |= (1 << x2APIC_BASE_EXTD);
wrmsr(IA32_APIC_BASE, eax, edx);
uk_pr_info("x2APIC is enabled\n");
return 0;
}
The trampoline code, which doesn't seem to be reached:
Code: Select all
#define ENTRY(x) .globl x; .type x,%function; x:
#define END(x) .size x, . - x
.code16
ENTRY(_lcpu_start16)
r_base = .
cli
cld
wbinvd
mov %cs, %ax
mov %ax, %ds
mov %ax, %es
mov %ax, %ss
movw $(trampoline_stack_end - r_base), %sp
movl %cr0, %eax
orl $1, %eax
movl %eax, %cr0
ljmpl *(_lcpu_start32_vector - r_base)
END(_lcpu_start16)
.code32
.align 32
ENTRY(_lcpu_start32)
cld
/* 1: enable pae */
movl %cr4, %eax
orl $X86_CR4_PAE, %eax
movl %eax, %cr4
/* 2: enable long mode */
movl $0xc0000080, %ecx
rdmsr
orl $X86_EFER_LME, %eax
orl $X86_EFER_NXE, %eax
wrmsr
/* 3: load pml4 pointer */
movl $cpu_pml4, %eax
movl %eax, %cr3
/* 4: enable paging */
movl %cr0, %eax
orl $X86_CR0_PG, %eax
movl %eax, %cr0
jmp _lcpu_start64
/* NOTREACHED */
haltme2:
cli
hlt
jmp haltme2
END(_lcpu_start32)
.align 64
gdt64:
.quad 0x0000000000000000
gdt64_cs:
.quad GDT_DESC_CODE_VAL /* 64bit CS */
gdt64_ds:
.quad GDT_DESC_DATA_VAL /* DS */
.quad 0x0000000000000000 /* TSS part 1 (via C) */
.quad 0x0000000000000000 /* TSS part 2 (via C) */
gdt64_end:
.align 64
.type gdt64_ptr, @object
gdt64_ptr:
.word gdt64_end-gdt64-1
.quad gdt64
.type mxcsr_ptr, @object
mxcsr_ptr:
.long 0x1f80 /* Intel SDM power-on default */
#include "pagetable.S"
.code64
.align 32
ENTRY(_lcpu_start64)
lgdt (gdt64_ptr)
/* let lret jump just one instruction ahead, but set %cs
* to the correect GDT entry while doing that.
*/
pushq $(gdt64_cs-gdt64)
pushq $1f
lretq
1:
/* Set up the remaining segment registers */
movq $(gdt64_ds-gdt64), %rax
movq %rax, %ds
movq %rax, %es
movq %rax, %ss
xorq %rax, %rax
movq %rax, %fs
movq %rax, %gs
/* spinlock, wait for the BSP to finish */
spin:
pause
cmpb $0, bspdone
jz spin
lock incb smp_aprunning
movq $_lcpu_entry_default, %rax
jmp *%rax
END(_lcpu_start64)
.align 32
_lcpu_start32_vector:
.long _lcpu_start32 - r_base
.word 8, 0
.align 32
_lcpu_start64_vector:
.long _lcpu_start64 - r_base
.word 16, 0
trampoline_stack:
.space 0x1000
trampoline_stack_end: