Page 1 of 1

previously missed bug in my FAT file reading code

Posted: Fri Aug 19, 2022 10:53 pm
by Schol-R-LEA
I have moved into a new phase with my boot loader, having created a new project called Ordo, a simple C based OS kernel which uses the Verbum boot loader as a sub-module.

been running into a problem with the code I use to read files into memory. While this code works for reading the second stage boot loader, it hangs when trying to read the kernel file.

The working code is

Code: Select all

        mov cx, fat_buffer
        mov [bp - stg2_parameters.drive], dx
        mov [bp - stg2_parameters.fat_0], cx
        mov [bp - stg2_parameters.directory_buffer], word dir_buffer
        mov [bp - stg2_parameters.PnP_Entry_Seg], bx ; BX == old ES value
        mov [bp - stg2_parameters.PnP_Entry_Off], di
        mov [bp - stg2_parameters.boot_sig], word bootsig
        mov [bp - stg2_parameters.bpb], word boot_bpb

;;; reset the disk drive
        call near reset_disk

        mov ax, Reserved_Sectors          ; get location of the first FAT sector
        mov bx, fat_buffer
        call read_fat

        mov ax, dir_sectors
        mov bx, dir_buffer
        call near read_root_directory

        mov si, snd_stage_file
        mov di, dir_buffer
        mov cx, Root_Entries
        mov bx, dir_entry_size
        call near seek_directory_entry
        cmp di, word 0
        jz .no_file

        call read_directory_details

        mov di, fat_buffer
        mov si, stage2_buffer
        call near fat_to_file

    .stg2_read_finished:
The code which is failing uses the existing FAT and directory buffers generated by the code above.

Code: Select all

load_kernel_code:
        mov si, kernel_filename
        mov di, word [bp - stg2_parameters.directory_buffer]
        mov cx, Root_Entries
        mov bx, dir_entry_size
        call near seek_directory_entry
        cmp di, word 0
        jnz .read_directory

        write no_kernel
        jmp local_halt_loop

    .read_directory:
        call read_directory_details
        write kernel_file_found

        ; reset the disk drive
        mov dl, byte [bp - stg2_parameters.drive]
        call near reset_disk
        mov di, [bp - stg2_parameters.fat_0]
        mov si, kcode_offset
        push ax
        push es
        mov ax, kernel_base
        mov es, ax
        mov dl, byte [bp - stg2_parameters.drive]
        call near fat_to_file
        pop es
        pop ax
        write kernel_loaded
        jmp find_kernel_code_block

The latter used the following string constants:

Code: Select all

kernel_filename              db "KERNEL  SYS", NULL

no_kernel                    db 'KERNEL.SYS not found.', NULL

kernel_file_found            db 'KERNEL.SYS found...', NULL
kernel_loaded                db 'loaded.', CR, LF, NULL
and the following memory offsets:

Code: Select all

kernel_base       equ 0xffff

kdata_offset      equ 0xfffc
struc KData
    .mmap_cnt     resd 1
    .mmap         resd High_Mem_Map_size
    .drive        resd 1
    .fat          resd fat_size
endstruc

kcode_offset      equ 0x1000

The second stage parameter block, which transfers critical data from the boot sector to the second stage, is defined as:

Code: Select all

struc stg2_parameters
        .drive                resw 1   ; include a padding byte for stack alignment
        .fat_0                resw 1   ; offset for the FAT buffer
        .directory_buffer     resw 1
        .PnP_Entry_Seg        resw 1
        .PnP_Entry_Off        resw 1
        .boot_sig             resw 1
        .bpb                  resw 1
        .reserved             resw 9
endstruc
The disk reading code is spread across several source files, the most relevant ones being

Code: Select all

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; routines for basic floppy disk handling

%ifndef SIMPLE_DISK_HANDLING_CODE__INC
%define SIMPLE_DISK_HANDLING_CODE__INC

%include "bios.inc"
%include "macros.inc"
%include "bpb.inc"
%include "dir_entry.inc"
%include "stage2_parameters.inc"

bits 16

tries            equ 0x03        ; number of times to attempt to access the FDD

;;; reset_disk - reset the floppy drive
;;; Inputs:
;;;        DL - the disk ID
reset_disk:
        mov si, 0
        mov di, tries        ; set count of attempts for disk resets
    .try_reset:
        mov ah, disk_reset
        int DBIOS
        jnc short .reset_end
        dec di
        jnz short .try_reset
        ;;; if repeated attempts to reset the disk fail, report error code
 ;       write failure_state
 ;       write reset_failed
 ;       write exit
        jmp halted
    .reset_end:
        ret


;;; read_LBA_sector - read a sector from a Linear Block Address 
;;; Inputs: 
;;;       AX = Linear Block Address to read from
;;;       ES = Segment to write result to
;;;       BX = offset to write result to
;;; Outputs:
;;;       AX = LBA+1 (i.e., the increment of previous LBA value) 
;;;       ES:BX - buffer written to

read_LBA_sector:
        pusha
        call near LBA_to_CHS
        mov ah, dh              ; temporary swap
        mov dx, [bp + stg2_parameters.drive] ; get the value for DL
        mov dh, ah
        mov al, 1
        call near read_sectors
    .read_end:                  ; read_LBA_sector
        popa
        inc ax
        ret



;;; LBA_to_CHS - compute the cylinder, head, and sector
;;;              from a linear block address
;;; Inputs:
;;;       AX = Linear Block Address
;;; Outputs:
;;;       CH = Cylinder
;;;       DH = Head
;;;       CL = Sector (bits 0-5)
LBA_to_CHS:
        push bx
        push ax                 ; save so it can be used twice
        zero(dx)
        mov bx, Sectors_Per_Cylinder
        ;; Sector =  (LBA % sectors per cyl) + 1    => in DL
        div bx
        inc dl
        mov cl, dl
        pop ax                  ; retrieve LBA value

        ;; Cylinder = LBA / (sectors per cyl * # of heads)   => in AL
        imul bx, Heads
        zero(dx)
        div bx                  ; AX = Cyl #, DL = partial result for Head
        mov ch, al              ; put previous AL into CH

        ;; Head = (LBA % (sectors per cyl * # of heads)) / sectors per cyl
        ;;     => first part in DL, final in AL
        mov ax, dx
        zero(dx)
        mov bx, Sectors_Per_Cylinder
        div bx                  ; get the final value for Head
        mov dh, al
        pop bx
        ret

;;; read_sectors -
;;; Inputs:
;;;       AL = # of sectors to read
;;;       DL = drive number
;;;       CH = Cylinder
;;;       DH = Head
;;;       CL = Sector (bits 0-5)
;;; Outputs:
;;;       ES:BX = segment and offset for the buffer 
;;;               to save the read sector into
read_sectors:
        pusha
        mov si, 0
        mov di, tries        ; set count of attempts for disk reads
        mov ah, disk_read
  .try_read:
        push ax
        int DBIOS
        pop ax
        jnc short .read_end
        dec di
        jnz short .try_read

        jmp halted

  .read_end:
        popa
        ret

%endif

Code: Select all

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; routine for finding a directory entry

%ifndef DIRECTORY_ENTRY_SEEK_CODE__INC
%define DIRECTORY_ENTRY_SEEK_CODE__INC

bits 16

%include "bios.inc"
%include "macros.inc"
%include "bpb.inc"
%include "dir_entry.inc"
%include "stage2_parameters.inc"

;;; seek_directory_entry - seek the directory for the given filename
;;; Inputs:
;;;       SI - address of the filename to match against
;;;       ES:DI - directory buffer
;;;       CX - max. number of entries
;;;       BX - size of an entry
;;; Outputs:
;;;       DI - location of the entry
seek_directory_entry:
    .dir_entry_test:
        push di
        push si
        push cx
        mov cx, filename_length
    repe cmpsb                  ; does the directory entry match?
        pop cx
        pop si
        pop di
        je .entry_found
        add di, bx
        loop .dir_entry_test
        mov di, 0x0000          ; if not found, return 0
    .entry_found:
        ret


;;; read_directory_details -
;;; Inputs:
;;;       ES:DI - directory entry
;;; Outputs:
;;;       BX - First FAT entry of the file
read_directory_details:
        ;; position of first sector
        mov bx, es:[di + directory_entry.cluster_lobits]
       ret

%endif
and

Code: Select all

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; routine for finding a directory entry

%ifndef FAT_TO_FILE_CODE__INC
%define FAT_TO_FILE_CODE__INC

bits 16

%include "bios.inc"
%include "macros.inc"
%include "bpb.inc"
%include "dir_entry.inc"
%include "stage2_parameters.inc"
%include "simple_disk_handling_code.inc"
%include "simple_text_print_code.inc"
%include "print_hex_code.inc"

hi_nibble_mask        equ 0x0FFF
lo_nibble_shift       equ 4
end_of_chain_mask     equ 0x0FF8
    

;;; fat_to_file - read a chain of FAT entries to
;;;               get the file sectors it maps to
;;;               and read those sectors into a buffer
;;; Inputs:
;;;       BX - starting FAT entry
;;;       DI - FAT buffer
;;;       SI - buffer to read the entry to
;;; Outputs:
;;;       buffer to read file into
fat_to_file:
    .read_loop:
        mov ax, bx
        sub ax, 2
        add ax, first_data_sector
;        call print_hex_word
        push bx
        mov bx, si
        call read_LBA_sector
        pop bx
        call extract_next_fat12_entry
        mov bx, ax
        add si, Bytes_Per_Sector
;        test ax, end_of_chain_mask
;        jnz .read_loop
        cmp ax, end_of_chain_mask
        jb .read_loop

    .end_of_file:
;        call print_hex_word
        ret

;;; extract_next_fat12_entry - read a FAT entry to
;;;               see where the next FAT entry is,
;;;               if any
;;; Inputs:
;;;       DI - FAT entry buffer 
;;;       BX - current FAT entry's value
;;; Outputs:
;;;       AX - next FAT entry's value
extract_next_fat12_entry:
;; address_of_FAT_entry = fat_buffer + (current_cluster + current_cluster / 2)
        mov ax, bx              ; BX == current cluster
        shr ax, 1               ; current_cluster / 2
        add ax, bx              ; (current_cluster + current_cluster / 2) == FAT entry offset
        push di
        add di, ax              ; index fat buffer by offset
        mov ax, [di]            ; get the indexed entry
        pop di
        test bx, 1              ; check if the existing entry is odd
        jz .even
    .odd:
        shr ax, lo_nibble_shift ; extract the high bits
;        call print_hex_word
        ret
    .even:
        and ax, hi_nibble_mask  ; extract the low bits
;        call print_hex_word
        ret

%endif

Re: previously missed bug in my FAT file reading code

Posted: Fri Aug 19, 2022 11:54 pm
by Schol-R-LEA
Never mind; it turned out that the problem was unrelated to this code. A broken memcopy routine was clobbering the FAT buffer.

Re: previously missed bug in my FAT file reading code

Posted: Sat Aug 20, 2022 12:26 am
by Octocontrabass
I think they call that rubber duck debugging.

Anyway, before you figured it out, I did spot one potential problem:
Schol-R-LEA wrote:

Code: Select all

kernel_base       equ 0xffff
You're giving an address above 1MB to INT 0x13. Different BIOSes have different ideas of what to do in this situation; for example, the IBM 5170 BIOS always wraps back around to 0, but the Bochs BIOS will access above 1MB even with A20 disabled. (Assuming a floppy disk. With hard disks, both of those BIOSes obey the A20 gate.)

Re: previously missed bug in my FAT file reading code

Posted: Sat Aug 20, 2022 12:34 am
by Schol-R-LEA
Damn. OK, I am going to have to reconsider my approach to loading the kernel file. I was likely going to have to do this anyway, though, as once I identify the location of the text section, I was going to have to move that to a page boundary so that I could map it to the higher half region. This may even make that easier in the long run, I'm not sure yet.

Mind you, the 'kernel' file I am using right now is just a placeholder, as I was mostly trying to get the second stage loader to read the file into memory and trace through the header to the text section. Once I have that nailed down, I need to get the text section mapped, and only after I have done that will I attend to getting the kernel code and linker script in order.

Re: previously missed bug in my FAT file reading code

Posted: Sat Aug 20, 2022 4:27 am
by nullplan
The usual approach to this is to either use function 87 of interrupt 15, or doing what it is doing manually (which is of course made more difficult by the fact that the A20 gate still exists, and whether or not interrupt 13 functions will modify it is up in the hands of the little baby Jesus, and he's a baby, he can't hold anything). There are a couple of strategies you can employ (only load a block at a time, or allocate a large buffer in low memory and load the kernel via that, and I can't think of a third one), but those are generally the way to do it. Oh, and of course, just assuming that the whole kernel file will fit into the free memory after the 1MB mark, which is generally a reasonable assumption (you ought to have something like 14MB there at the least).

Having 16MB of RAM as system requirement is probably reasonable these days.

The alternative is to create a full protected mode driver for the storage medium, but then you run into the trouble that you have to know your storage medium, and have to be able to figure out where it is from the BIOS drive number (and function 48 of interrupt 13 if supported) alone. Well, you wouldn't need a full driver, a simple read-only PIO driver would suffice for the task. But it is still a daunting task to possibly create a USB driver (for all the HCIs and all the mass storage sub-standards, like CBI and UAS), and all in the space constraints of a bootloader.

Re: previously missed bug in my FAT file reading code

Posted: Sat Aug 20, 2022 11:25 am
by Schol-R-LEA
nullplan wrote:The usual approach to this is to either use function 87 of interrupt 15, or doing what it is doing manually
I've already got A20 enabled, so my plan was to manually load the executable section to FFFF:0200 (the first page entirely in the old DOS HMA) and then (following Octocontrabass' suggestion) map that to 0xC0000000 (the start of the higher half of memory) upon entering protected mode. This way, I don't need to concern myself with the physical memory layout at all (well, I will, but not regarding where I load that code), except for a few locations which I would identity map due to their importance for hardware memory-mapped devices and so forth (including the entirety of the first 1MiB). All this assumes that the kernel executable file is less than 60KiB total, of course, but given that this isn't intended as a longer term project, I am confident that this will be feasible.

In my current setup, which I changed due to Octocontrabass' point earlier about the vagaries of BIOS implementations, actually has a limit on the executable file size of 16KiB, which again should be sufficient for the purposes of this project; I can re-write it if this proves to be insufficient, but I wanted to go with the path of least resistance at this point.

I also have a section of data to be passed on the stack, which I am also loading into the HMA at the very top. I'll map this section to the stack of the higher half when the time comes.

Right now I am concentrating on how to find the text sections of the ELF file which is being loaded into memory, and figuring out how to copy those sections into the desired location. After that I will need to study up on how to set up page tables and enable paging, as well as how to map pages into memory. All of which I will have to do in assembly, as things stand.

Re: previously missed bug in my FAT file reading code

Posted: Sat Aug 20, 2022 2:08 pm
by Octocontrabass
Schol-R-LEA wrote:once I identify the location of the text section, I was going to have to move that to a page boundary so that I could map it to the higher half region.
Nowadays, binaries are usually linked so that sections are aligned to page boundaries, which makes it unnecessary to move anything as long as the binary is loaded to a page boundary in the first place. Back when wasting an average of 2kB of disk space per section was a serious concern, binaries would instead be linked so that the address in the binary was aligned to a sector boundary while the address in memory was aligned to a page boundary, so the loader could load each section to a page boundary and no moving would be necessary.

You could simplify your bootloader (at the expense of some disk space) by enforcing this kind of alignment in the kernel binary.
Schol-R-LEA wrote:I've already got A20 enabled, so my plan was to manually load the executable section to FFFF:0200 (the first page entirely in the old DOS HMA)
You mean FFFF:0010? Page alignment applies to linear addresses, not virtual addresses. (And pages are 0x1000 bytes, not 0x200.)
Schol-R-LEA wrote:All this assumes that the kernel executable file is less than 60KiB total, of course, but given that this isn't intended as a longer term project, I am confident that this will be feasible.
The usual reason to load the binary above 1MB is to access a contiguous block of physical memory that's bigger than 640kB. If you're limiting yourself to memory you can access with 16-bit addressing, there's no reason to choose the HMA over conventional memory.
Schol-R-LEA wrote:a limit on the executable file size of 16KiB
...But I guess it doesn't matter where you load your binary if it'll fit wherever you choose.

Re: previously missed bug in my FAT file reading code

Posted: Sat Aug 20, 2022 9:49 pm
by Schol-R-LEA
Octocontrabass wrote:
Schol-R-LEA wrote:once I identify the location of the text section, I was going to have to move that to a page boundary so that I could map it to the higher half region.
Nowadays, binaries are usually linked so that sections are aligned to page boundaries, which makes it unnecessary to move anything as long as the binary is loaded to a page boundary in the first place. Back when wasting an average of 2kB of disk space per section was a serious concern, binaries would instead be linked so that the address in the binary was aligned to a sector boundary while the address in memory was aligned to a page boundary, so the loader could load each section to a page boundary and no moving would be necessary.

You could simplify your bootloader (at the expense of some disk space) by enforcing this kind of alignment in the kernel binary.
Ah, that helps a great deal, actually. Thank you for pointing this out. Though ironically, I'd just finished writing the code to transfer the executable binary to the HMA, so...
Octocontrabass wrote:
Schol-R-LEA wrote:I've already got A20 enabled, so my plan was to manually load the executable section to FFFF:0200 (the first page entirely in the old DOS HMA)
You mean FFFF:0010? Page alignment applies to linear addresses, not virtual addresses. (And pages are 0x1000 bytes, not 0x200.)
Erk, I actually meant FFFF:1000, not FFFF:0200, that was a bit of a brainfart on my part. But if that isn't actually necessary, then yes, I would load it at FFFF:0010 instead.
Octocontrabass wrote:
Schol-R-LEA wrote:All this assumes that the kernel executable file is less than 60KiB total, of course, but given that this isn't intended as a longer term project, I am confident that this will be feasible.
The usual reason to load the binary above 1MB is to access a contiguous block of physical memory that's bigger than 640kB. If you're limiting yourself to memory you can access with 16-bit addressing, there's no reason to choose the HMA over conventional memory.
In this case, it was mostly that I wanted it clear of the lower 1MiB of memory so I could identity map that without moving things around any more than I just had.
Octocontrabass wrote:
Schol-R-LEA wrote:a limit on the executable file size of 16KiB
...But I guess it doesn't matter where you load your binary if it'll fit wherever you choose.
I suppose so. This specific project is more about 'climbing the mountain because it is there' than anything else; I feel the need to at least demonstrate the basic ability to start a protected mode kernel from Legacy BIOS because it had hung over my head so long, and I wouldn't truly feel accomplished on the subject if I didn't scratch it off my list before going on to bigger and better things.