Page 1 of 1

Interrupt problem [SOLVED]

Posted: Tue Mar 25, 2008 12:43 pm
by inx
Hello all. I've been trying to fix this *&^$ bug for two days now, and I can't seem to figure out why it's continuing to happen. My kernel is non-preemptible (yea, I know, but I want it that way for now), and I have interrupts as firmly shut off as I can manage, but after a random amount of time (sometimes it runs fine for 15 minutes, sometimes it's the first timer interrupt), my task state becomes corrupted. After looking at my dump, it seems to me like the timer interrupt is interrupting my syscall handler. I'm stumped, so here's my assembly for the ISRs:

Code: Select all

.extern kstack, trapHandler, syscallHandler

.global exception_00, exception_01, exception_03, exception_04, exception_05
.global exception_06, exception_07, exception_08, exception_09, exception_10
.global exception_11, exception_12, exception_13, exception_14, exception_16

.global irq_00, irq_01, irq_02, irq_03, irq_04, irq_05, irq_06, irq_07, irq_08
.global irq_09, irq_10, irq_11, irq_12, irq_13, irq_14, irq_15

.global system_call

.global reservedISR

.macro exceptionISRWithErrorCode id
	cli
	exceptionTemplate	\id
	iret
.endm

.macro exceptionISR id
	cli
	pushl	$0			/* Faux error code for padding */
	exceptionTemplate	\id
	addl	$4, %esp
	iret
.endm

.macro exceptionTemplate id
	pushl	%eax
	pushl	%ecx
	pushl	%edx
	pushl	%ebx
	pushl	%ebp
	pushl	%esi
	pushl	%edi
	pushw	%ds
	pushw	%es
	pushw	%fs
	pushw	%gs
	movl	$0x10, %eax
	movl	%eax, %ds
	movl	%eax, %es
	movl	%eax, %fs
	movl	%eax, %gs
	movl	$kstack, %esp
	push	$\id
	push	$1				/* Trap type */
	call	trapHandler
	movl	%eax, %esp			/* Load new task state */
	popw	%gs
	popw	%fs
	popw	%es
	popw	%ds
	popl	%edi
	popl	%esi
	popl	%ebp
	popl	%ebx
	popl	%edx
	popl	%ecx
	popl	%eax
.endm

.macro irqISRMaster id
	irqTemplate	0x20, \id
.endm

.macro irqISRSlave id
	irqTemplate	0xA0, \id
.endm

.macro irqTemplate pic, id
	cli
	pushl	$0
	pushl	%eax
	pushl	%ecx
	pushl	%edx
	pushl	%ebx
	pushl	%ebp
	pushl	%esi
	pushl	%edi
	pushw	%ds
	pushw	%es
	pushw	%fs
	pushw	%gs
	movl	$0x10, %eax
	movl	%eax, %ds
	movl	%eax, %es
	movl	%eax, %fs
	movl	%eax, %gs
	movl	$kstack, %esp
	push	$\id
	push	$2				/* Trap type */
	call	trapHandler
	movl	%eax, %esp			/* Load new task state */
	movb	$0x20, %al
	outb	%al, $\pic
	popw	%gs
	popw	%fs
	popw	%es
	popw	%ds
	popl	%edi
	popl	%esi
	popl	%ebp
	popl	%ebx
	popl	%edx
	popl	%ecx
	popl	%eax
	addl	$4, %esp
	iret
.endm

.macro reservedISRTemplate
	cli
	pushl	$0
	pushl	%eax
	pushl	%ecx
	pushl	%edx
	pushl	%ebx
	pushl	%ebp
	pushl	%esi
	pushl	%edi
	pushw	%ds
	pushw	%es
	pushw	%fs
	pushw	%gs
	movl	$0x10, %eax
	movl	%eax, %ds
	movl	%eax, %es
	movl	%eax, %fs
	movl	%eax, %gs
	movl	$kstack, %esp
	push	$0
	push	$0				/* Trap type */
	call	trapHandler
	movl	%eax, %esp			/* Load new task state */
	popw	%gs
	popw	%fs
	popw	%es
	popw	%ds
	popl	%edi
	popl	%esi
	popl	%ebp
	popl	%ebx
	popl	%edx
	popl	%ecx
	popl	%eax
	addl	$4, %esp
	iret
.endm

exception_00:
	exceptionISR	0

exception_01:
	exceptionISR	1

exception_03:
	exceptionISR	3

exception_04:
	exceptionISR	4

exception_05:
	exceptionISR	5

exception_06:
	exceptionISR	6

exception_07:
	exceptionISR	7

exception_08:
	exceptionISRWithErrorCode	8

exception_09:
	exceptionISR	9

exception_10:
	exceptionISRWithErrorCode	10

exception_11:
	exceptionISRWithErrorCode	11

exception_12:
	exceptionISRWithErrorCode	12

exception_13:
	exceptionISRWithErrorCode	13

exception_14:
	exceptionISRWithErrorCode	14

exception_16:
	exceptionISR	16

irq_00:
	irqISRMaster	0

irq_01:
	irqISRMaster	1

irq_02:
	irqISRMaster	2

irq_03:
	irqISRMaster	3

irq_04:
	irqISRMaster	4

irq_05:
	irqISRMaster	5

irq_06:
	irqISRMaster	6

irq_07:
	irqISRMaster	7

irq_08:
	irqISRSlave	8

irq_09:
	irqISRSlave	9

irq_10:
	irqISRSlave	10

irq_11:
	irqISRSlave	11

irq_12:
	irqISRSlave	12

irq_13:
	irqISRSlave	13

irq_14:
	irqISRSlave	14

irq_15:
	irqISRSlave	15

system_call:
	cli
	pushl	$0
	pushl	%eax
	pushl	%ecx
	pushl	%edx
	pushl	%ebx
	pushl	%ebp
	pushl	%esi
	pushl	%edi
	pushw	%ds
	pushw	%es
	pushw	%fs
	pushw	%gs
	movl	$0x10, %eax
	movl	%eax, %ds
	movl	%eax, %es
	movl	%eax, %fs
	movl	%eax, %gs
	movl	$kstack, %esp
	call	syscallHandler
	movl	%eax, %esp			/* Load new task state */
	popw	%gs
	popw	%fs
	popw	%es
	popw	%ds
	popl	%edi
	popl	%esi
	popl	%ebp
	popl	%ebx
	popl	%edx
	popl	%ecx
	popl	%eax
	addl	$4, %esp
	iret

reservedISR:
	reservedISRTemplate
Screenshot of when the problem is detected:
Image

The 0xDEADD065 in the ebx field is what the task had in eax, and the value in ecx matches a line in an objdump of my kernel:
c000270c g .text 00000000 system_call

The fields for SS through EIP seem to stay pristine, making me think it's being interrupted right before the iret, except for the fact that ecx contains that odd value.. I'm not sure what to think any more. Any help would be appreciated.

Posted: Tue Mar 25, 2008 1:01 pm
by t0xic
make sure you declare any variables you use in the handlers as 'volatile'

Posted: Tue Mar 25, 2008 3:35 pm
by inx
Right now, the only variable used is the currentTask pointer, which is declared volatile, and the one task struct it points to, which is malloc()ed, and I've commented out the call to schedule() for now. The problem is only semi-reproducible, as in it always happens, but whether it happens the first time the timer interrupt fires or half an hour down the road is random. There are currently no counters or anything of the sort, I've pared it down to entering the ISR, checking for state corruption, and returning, in hopes of finding the bug. Works fine for anywhere from 5 to 60-some timer interrupts, then dies.

Posted: Tue Mar 25, 2008 7:24 pm
by inx
I've tried to further isolate the problem, and I've found that the problem does not occur if either IRQs are masked and only syscalls are looped, or if syscalls are not issued and only the timer is firing. It does appear that the problem is caused by the timer ISR running while I'm still in the kernel for a system call. But each of my ISRs begins with a CLI instruction.. I can't see how this is possible. I guess I'll try a newer version of Bochs.

Posted: Tue Mar 25, 2008 9:46 pm
by inx
Problem solved. The timer interrupt was interrupting the system call before CLI could be executed. I changed from trap gates to interrupt gates and removed all reference to CLI/STI. Everything works perfectly now. Thank you for the reply, t0xic. :)