Why ALIGN affects section's file offset?

Question about which tools to use, bugs, the best way to implement a function, etc should go here. Don't forget to see if your question is answered in the wiki first! When in doubt post here.
Post Reply
haojn
Posts: 3
Joined: Thu Jan 06, 2011 12:50 am

Why ALIGN affects section's file offset?

Post by haojn »

When I am testing my code, I found a strange relevance between "align" and "file offset": the offset of first section in ELF is related to the biggest alignment used in asm.

My understand is "align" will only fill the gap by "nop" or "resb 1" to reach the correct VMA; but how "align 4096" in .data section affects the file offset of .loaderheader section?

My code basically enters long mode and hang. The code is not robust. Just used to generate ELF.
Note the "align 4096" in loader.s. If we change it to "align 8192", the final binary will be "loaderheader.offset=8192". What's the content before .loaderheader?

Did I miss something?

loader.s:

Code: Select all

global loader           ; making entry point visible to linker
 
; setting up the Multiboot header - see GRUB docs for details
MODULEALIGN equ  1<<0                   ; align loaded modules on page boundaries
MEMINFO     equ  1<<1                   ; provide memory map
FLAGS       equ  MODULEALIGN | MEMINFO  ; this is the Multiboot 'flag' field
MAGIC       equ    0x1BADB002           ; 'magic number' lets bootloader find the header
CHECKSUM    equ -(MAGIC + FLAGS)        ; checksum required
 
[BITS 32]

section .loaderheader 
MultiBootHeader:
   dd MAGIC
   dd FLAGS
   dd CHECKSUM

; reserve initial kernel stack space
STACKSIZE equ 0x4000                  ; that's 16k.
CR0_PG equ 0x80000000
CR4_PAE equ 0x20
MSR_EFER equ 0xc0000080
MSR_EFER_LME equ 0x100

COM1_PORT equ 0x3F8


section .text 
align 4
loader:
	mov esp, stack+STACKSIZE           ; set up the stack
	;reload 32bit GDTR
	lgdt [entry_gdtr32]
	jmp 0x08:gdtreload32
gdtreload32:

	;initialize page table
	mov eax, (0<<21)+0x83
	mov edi, entry_pde
	mov ecx, 512*8
	xor edx, edx
.oneentry:
	mov [edi], eax
	mov [edi+4], edx
	add edi, 8
	add eax, 1<<21
	adc edx, 0
	loop .oneentry
	
	;enter long mode	
	mov eax, cr4
	or eax, CR4_PAE
	mov cr4, eax
	mov eax, entry_pml4
	mov cr3, eax
	mov ecx, MSR_EFER
	rdmsr
	or eax, MSR_EFER_LME
	wrmsr
	mov eax, cr0
	or eax, CR0_PG
	mov cr0, eax

	;xchg bx, bx		;bochs magic break

	jmp 0x18:entry64

[BITS 64]
%macro outb 2	;ruin dx, al
	mov dx, %1
	mov al, %2
	out dx, al
%endmacro


entry64:

	mov eax, 0x20
	mov ds, eax
	mov ss, eax
	mov es, eax

	;initialize TSS base address
	mov rax, entry_gdt+0x28
	mov rcx, tss_64
	mov word[rax+2], cx
	shr rcx, 16
	mov byte[rax+4], cl
	shr rcx, 8
	mov byte[rax+7], cl
	shr rcx, 8
	mov [rax+8], ecx

	;xchg bx, bx		;bochs magic break

	lgdt [entry_gdtr64]

	ltr [tss_selector]	;load tss

	xor rdi,rdi
	mov edi, ebx
	
	xchg bx, bx		;bochs magic break

	cli
hang:
	hlt                                ; halt machine should kernel return
	jmp hang

section .data 
;tss
align 4
global tss_64
tss_64:
	dd 0	;reserved
	dq 0	;rsp0
	dq 0	;rsp1
	dq 0	;rsp2
	dq 0	;reserved
	dq 0	;ist1
	dq 0	;ist2
	dq 0	;ist3
	dq 0	;ist4
	dq 0	;ist5
	dq 0	;ist6
	dq 0	;ist7
	dq 0	;reserved
	dw 0	;reserved
	dw io_bitmap-tss_64	;i/o map base address
;i/o permission bitmap
io_bitmap:
	times 65536/8 db 0
	db 0xf
;tss selector
tss_selector:
	dw 0x28
;gdt
align 4
entry_gdtr32:
	dw 0x28-1
	dd entry_gdt
entry_gdtr64:
	dw 0x38-1
	dq entry_gdt
global entry_gdt
entry_gdt:
	dq 0
	dq 0x00CF9B000000FFFF	;0x08 CODE32, DPL=0
	dq 0x00CF93000000FFFF	;0x10 DATA32, DPL=0
	dq 0x00AF9B000000FFFF	;0x18 CODE64, DPL=0
	dq 0x00AF93000000FFFF	;0x20 DATA64, DPL=0
	dq 0x009F89000000FFFF	;0x28 64-bit TSS lower half
	dq 0x0000000000000000	;0x30 64-bit TSS higher half
;page table (2MB pages)
[color=#FF0000]align 4096[/color]
entry_pml4:
	dq entry_pdpt+0x7		;first 512GB
	times 512-1 dq 0
entry_pdpt:
	dq entry_pde+0*4096+0x7		;0-1GB
	dq entry_pde+1*4096+0x7		;1-2GB
	dq entry_pde+2*4096+0x7		;2-3GB
	dq entry_pde+3*4096+0x7		;3-4GB
	dq entry_pde+4*4096+0x7		;4-5GB
	dq entry_pde+5*4096+0x7		;5-6GB
	dq entry_pde+6*4096+0x7		;6-7GB
	dq entry_pde+7*4096+0x7		;7-8GB
	times 512-8 dq 0
entry_pde:				;8 pde
	;dq (0<<21)+0x83			;0-2MB
	times 512*8 dq 0	
		
section .bss
align 4
stack:
   resb STACKSIZE                     ; reserve 16k stack on a doubleword boundary
linker.ld

Code: Select all

ENTRY (loader)
OUTPUT_FORMAT(elf64-x86-64)
PHDRS{
	header PT_LOAD;
}

SECTIONS{
    . = 0x00100000;
    
	.loaderheader ALIGN (0x1000) :{
		*(.loaderheader)
	}:header
	
    .text ALIGN (0x1000) :{
        *(.text)
    }

    .rodata ALIGN (0x1000) :{
        *(.rodata)
    }

    .data ALIGN (0x1000) :{
        *(.data)
    }

    .bss : {
        sbss = .;
        *(COMMON)
        *(.bss)
        ebss = .;
    }
}
linker.ld:

Code: Select all

SOURCES_H=
SOURCES_C=
SOURCES_ASM=loader.s
BIN=kernel64.bin
LINKER=linker.ld

OBJS_C=$(SOURCES_C:.c=.o)
OBJS_ASM=$(SOURCES_ASM:.s=.o)

CFLAGS=-Wall -Wextra -nostdlib -nostartfiles -nodefaultlibs -m64
CFLAGS+=-mno-red-zone -fno-stack-protector -std=gnu99 -gdwarf-2

all: $(BIN)

$(BIN): $(OBJS_C) $(OBJS_ASM) $(LINKER)
	ld -nostdlib -nodefaultlibs -nostdinc -n -melf_x86_64 -z max-page=0x1000 -T $(LINKER) -o $(BIN) $(OBJS_ASM) $(OBJS_C) 

clean:
	-rm *.o $(BIN)

$(OBJS_C): $(SOURCES_C) $(SOURCES_H)
	gcc -c $(SOURCES_C) $(CFLAGS)

loader.o: loader.s
	yasm -g dwarf2 -f elf64 loader.s

.PHONY: clean
When "align 4096":
objdump -h kernel64.bin:
Sections:
Idx Name Size VMA LMA File off Algn
0 .loaderheader 0000000c 0000000000100000 0000000000100000 00001000 2**0
CONTENTS, ALLOC, LOAD, READONLY, DATA
1 .text 000000f6 0000000000101000 0000000000101000 00002000 2**4
CONTENTS, ALLOC, LOAD, READONLY, CODE
...


When "align 8192":
objdump -h kernel64.bin:
Sections:
Idx Name Size VMA LMA File off Algn
0 .loaderheader 0000000c 0000000000100000 0000000000100000 00002000 2**0
CONTENTS, ALLOC, LOAD, READONLY, DATA
1 .text 000000f6 0000000000101000 0000000000101000 00003000 2**4
CONTENTS, ALLOC, LOAD, READONLY, CODE


One related thread in forum is http://forum.osdev.org/viewtopic.php?f=1&t=19128 but I cannot solve the problem by adding "-n".
Last edited by haojn on Thu Jun 30, 2011 12:33 am, edited 1 time in total.
User avatar
Karlosoft
Member
Member
Posts: 277
Joined: Thu Feb 14, 2008 10:46 am
Location: Italy
Contact:

Re: Why ALIGN affects section's file offset?

Post by Karlosoft »

Your data is less than 4096, so the size of this section is 4096... :)
It it was 4097 long it should have been 8192.
haojn
Posts: 3
Joined: Thu Jan 06, 2011 12:50 am

Re: Why ALIGN affects section's file offset?

Post by haojn »

Thank you for quick reply.

Yes. We can tell the size of section may change if alignment changes because we need nop to satisfy the VMA alignment.

But what I concerned is the "file offset", the offset of .loaderheader (the first section) in the kernel64.bin. Why this value changed to 8192 if "ALIGN 8192" is set? What's the content in the gap between ELF header and 8192 in kernel64.bin?
haojn
Posts: 3
Joined: Thu Jan 06, 2011 12:50 am

Re: Why ALIGN affects section's file offset?

Post by haojn »

berkus wrote: Because according to ELF spec, alignment affects START of the section.
You are right. I also found some evidence elsewhere.

http://sourceware.org/ml/binutils/2009-04/msg00099.html
http://forum.soft32.com/linux2/ELF-Segm ... 14787.html

First, ld guarantees file offset%pagesize=VMA%pagesize. This facilitate loader to load a segment on a paging boundary.
Since "the default page size for x86_64 is 0x200000", I doubt you may not get this file offset issue if you generate elf32.

The other thing I missed is that ld fill the binary file continuously for each segment (loader treats one segment as a loading part). In my previous script, every section is put in one segment.
ld will try to satisfy the VMA on the line "align 4096" first because it is the largest alignment in the segment (VMA is fixed, ld calculates file offset). This leads to padding at the beginning of the segment, which in turn results in change of file offset.
For other instructions aligned on smaller boundary, I guess ld use "nop" without moving the whole segment.

Experiment:
If I use following script to put .loaderheader in a different segment, the file offset of .loaderheader will change.

Code: Select all

ENTRY (loader)
OUTPUT_FORMAT(elf64-x86-64)
PHDRS{
	header PT_LOAD;
	other PT_LOAD;
}

SECTIONS{
    . = 0x00100000;
    
	.loaderheader  :{
		*(.loaderheader)
	}:header
	
    .text:{
        *(.text)
    }:other

    .rodata  :{
        *(.rodata)
    }:other

    .data :{
        *(.data)
    }:other

    .bss : {
        sbss = .;
        *(COMMON)
        *(.bss)
        ebss = .;
    }:other
}
objdump -x

Code: Select all

Program Header:
    LOAD off    0x00000000000000b0 vaddr 0x0000000000100000 paddr 0x0000000000100000 align 2**0
         filesz 0x000000000000000c memsz 0x000000000000000c flags r--
    LOAD off    0x0000000000001010 vaddr 0x0000000000100010 paddr 0x0000000000100010 align 2**12
         filesz 0x000000000000dff0 memsz 0x0000000000011ff0 flags rwx

Sections:
Idx Name          Size      VMA               LMA               File off  Algn
  0 .loaderheader 0000000c  0000000000100000  0000000000100000  000000b0  2**0
                  CONTENTS, ALLOC, LOAD, READONLY, DATA
  1 .text         000000f6  0000000000100010  0000000000100010  00001010  2**4
                  CONTENTS, ALLOC, LOAD, READONLY, CODE
  2 .iplt         00000000  0000000000100108  0000000000100108  00001108  2**2
                  CONTENTS, ALLOC, LOAD, READONLY, CODE
  3 .rela.dyn     00000000  0000000000100108  0000000000100108  00001108  2**3
                  CONTENTS, ALLOC, LOAD, READONLY, DATA
  4 .data         0000d000  0000000000101000  0000000000101000  00002000  2**12
                  CONTENTS, ALLOC, LOAD, DATA
  5 .igot.plt     00000000  000000000010e000  000000000010e000  0000f000  2**3
                  CONTENTS, ALLOC, LOAD, DATA
  6 .bss          00004000  000000000010e000  000000000010e000  0000f000  2**2
                  ALLOC
Please fix me if I misunderstood.
Post Reply