If you get stuck, you can cheat and borrow my code.
It seems to work fine -- I just tried it on my laptop -- but I haven't used it more than once. It SHOULD work, and it looks right to me, but I don't promise that it's flawless.
But I felt like sharing. You'll need NASM to compile this as is.
<pre>
[BITS 16] ; We start in pure real mode.
cli ; Disable interrupts.
mov dx, 0x70 ; Disable NMIs.
in al, dx
or al, 0x80
out dx, al
lgdt [GDT] ; Load the new segment descriptors.
mov eax, cr0 ; Turn on protected mode.
or al, 1
mov cr0, eax
jmp dword 0x08:.1 ; Set CS by jumping to the 32-bit code segment.
[BITS 32] ; Once we've jumped, we're now running 32-bit
; code.
.1: mov eax, dseg-GDT ; Change data-segment limits to 4GB (for all
mov ds, eax ; four data-segment registers).
mov es, eax
mov fs, eax
mov gs, eax
mov eax, ds16-GDT ; Ensure that the stack-segment limit remains
mov ss, eax ; 64KB! (Stack pointer in unreal mode is
; IP -- *not* EIP!)
jmp dword 0x18:.2 ; Ensure that the code-segment limit remains
; 64KB -- jump to the 16-bit code segment.
[BITS 16]
.2: mov eax, cr0 ; Turn off protected mode. Now the CPU will
and al, 0xFE ; interpret all code as 16-bit.
mov cr0, eax
jmp word 0x00:.3 ; Set CS to real-mode segment -- load it with
; zero. (THIS MEANS THAT THIS CODE MUST BE
; IN THE FIRST 64KB OF MEMORY.)
.3: xor ax, ax ; Set all segments to a proper real-mode value
mov ds, ax ; (in this case zero). The unreal-mode limits
mov es, ax ; set for DS, ES, FS, and GS while in protected
mov fs, ax ; mode will not be changed.
mov gs, ax
mov ss, ax
mov dx, 0x70 ; Re-enable NMIs.
in al, dx
and al, 0x7F
out dx, al
sti ; Re-enable interrupts.
</pre>