I may be able to answer only some of Brendan's questions: The PoCs have not been tested when running raw (i.e. without qemu-kvm or an OS) on a physical machine. The behaviour of the msr 0x309 is: If we set a value -x into it, the PMI fires after the CPU retires (x+1) instructions of the appropriate type.
Below is a PoC which sets the msr 0x309 to -2147483648 for user-mode instructions.
The PMI fires after 2147483649 instructions are executed by the cpu in the user-mode. I did not actually calculate the number of instructions, but as we reduce the value of (x), the delay between the cpu resets reduces accordingly. If the number of instructions are kept small enough, one can verify the point when the PMI fires.
I believe that this PoC is sufficient for the OP to ascertain that
qemu-kvm does indeed fire the PMIs after the desired number of user-mode instructions are retired. The OP may add NMI/IRQ handlers to further verify that it does or does not work. Whether or not IRQs other than NMI work needs to be verified (OP?).
The code is based on
this article.
It was run on a qemu-kvm running on an i5-3330.
main.asm
Code: Select all
%define FREE_SPACE 0x9000
ORG 0x7C00
BITS 16
; Main entry point where BIOS leaves us.
Main:
jmp 0x0000:.FlushCS ; Some BIOS' may load us at 0x0000:0x7C00 while other may load us at 0x07C0:0x0000.
; Do a far jump to fix this issue, and reload CS to 0x0000.
.FlushCS:
xor ax, ax
; Set up segment registers.
mov ss, ax
; Set up stack so that it starts below Main.
mov sp, Main
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
cld
; Point edi to a free space bracket.
mov edi, FREE_SPACE
; Switch to Long Mode.
jmp SwitchToLongMode
BITS 64
.Long:
hlt
jmp .Long
BITS 16
%include "lm.asm"
; Pad out file.
times 510 - ($-$$) db 0
dw 0xAA55
lm.asm
Code: Select all
%define PAGE_PRESENT (1 << 0)
%define PAGE_WRITE (1 << 1)
%define PAGE_USER (1 << 2)
%define PAGE_SIZE (1 << 7)
%define CODE_SEG 0x0008
%define DATA_SEG 0x0010
ALIGN 4
IDT:
.Length dw 0
.Base dd 0
; Function to switch directly to long mode from real mode.
; Identity maps the first 2MiB.
; Uses Intel syntax.
; es:edi Should point to a valid page-aligned 16KiB buffer, for the PML4, PDPT, PD and a PT.
; ss:esp Should point to memory that can be used as a small (1 uint32_t) stack
SwitchToLongMode:
; Zero out the 16KiB buffer.
; Since we are doing a rep stosd, count should be bytes/4.
push di ; REP STOSD alters DI.
mov ecx, 0x1000
xor eax, eax
cld
rep stosd
pop di ; Get DI back.
; Build the Page Map Level 4.
; es:di points to the Page Map Level 4 table.
lea eax, [es:di + 0x1000] ; Put the address of the Page Directory Pointer Table in to EAX.
or eax, PAGE_PRESENT | PAGE_WRITE | PAGE_USER; Or EAX with the flags - present flag, writable flag.
mov [es:di], eax ; Store the value of EAX as the first PML4E.
; Build the Page Directory Pointer Table.
lea eax, [es:di + 0x2000] ; Put the address of the Page Directory in to EAX.
or eax, PAGE_PRESENT | PAGE_WRITE | PAGE_USER; Or EAX with the flags - present flag, writable flag.
mov [es:di + 0x1000], eax ; Store the value of EAX as the first PDPTE.
; Build the Page Directory.
xor eax, eax
; lea eax, [es:di + 0x3000] ; Put the address of the Page Table in to EAX.
or eax, PAGE_SIZE | PAGE_PRESENT | PAGE_WRITE | PAGE_USER; Or EAX with the flags - present flag, writeable flag.
mov [es:di + 0x2000], eax ; Store to value of EAX as the first PDE.
; 0x0000fee00 000
; 000000000 000000011 111110111 000000000 000000000000
; Build the Page Directory Pointer Table.
lea eax, [es:di + 0x3000] ; Put the address of the Page Directory in to EAX.
or eax, PAGE_PRESENT | PAGE_WRITE ; Or EAX with the flags - present flag, writable flag.
mov [es:di + 0x1000 + 0x18], eax ; Store the value of EAX as the first PDPTE.
mov eax, 0xfee00000
or eax, PAGE_SIZE | PAGE_PRESENT | PAGE_WRITE ; Or EAX with the flags - present flag, writeable flag.
mov [es:di + 0x3000 + 0xfb8], eax ; Store to value of EAX as the first PDE.
; Disable IRQs
mov al, 0xFF ; Out 0xFF to 0xA1 and 0x21 to disable all IRQs.
out 0xA1, al
out 0x21, al
lidt [IDT] ; Load a zero length IDT so that any NMI causes a triple fault.
; Enter long mode.
mov eax, 10100000b ; Set the PAE and PGE bit.
mov cr4, eax
mov edx, edi ; Point CR3 at the PML4.
mov cr3, edx
mov ecx, 0xC0000080 ; Read from the EFER MSR.
rdmsr
or eax, 0x00000100 ; Set the LME bit.
wrmsr
mov ebx, cr0 ; Activate long mode -
or ebx,0x80000001 ; - by enabling paging and protection simultaneously.
mov cr0, ebx
lgdt [GDT.Pointer] ; Load GDT.Pointer defined below.
jmp CODE_SEG:LongMode ; Load CS with 64 bit segment and flush the instruction cache
; Global Descriptor Table
ALIGN 4
GDT:
.Null:
dq 0x0000000000000000 ; Null Descriptor - should be present.
.Kernel:
dq 0x00209A0000000000 ; 64-bit code descriptor (exec/read).
dq 0x0000920000000000 ; 64-bit data descriptor (read/write).
.User:
dq 0x0020FA0000000000 ; 64-bit code descriptor (exec/read).
dq 0x0000F20000000000 ; 64-bit data descriptor (read/write).
.Pointer:
dw $ - GDT - 1 ; 16-bit Size (Limit) of GDT.
dd GDT ; 32-bit Base Address of GDT. (CPU will zero extend to 64-bit)
[BITS 64]
LongModeUser:
jmp LongModeUser
JumpUser:
mov rax, rsp
push qword 0x23;ss
push rax; rsp
pushfq;
push qword 0x1b; cs
push qword LongModeUser; rip
iretq;
LongMode:
mov rcx, 0xfee00000
mov dword [rcx + 0x340], 0x400
mov eax, 0xff
mov edx, 0x1
mov ecx, 0x38f
wrmsr
mov edx, 0xffff
mov eax, 0x80000000
mov ecx, 0x309
wrmsr
xor edx, edx
mov eax, 0xa; track only user-mode instructions
mov ecx, 0x38d
wrmsr
jmp JumpUser
;not reached
loop:
hlt
jmp loop ; You should replace this jump to wherever you want to jump to.
Compilation step:
Running step:
Code: Select all
qemu-system-x86_64 -cpu host -monitor tcp::4444,server,nowait \
-d int,guest_errors,unimp,pcall,cpu_reset -hda hd -enable-kvm