Page 1 of 1

[solved but funny] interrupt: complex bug

Posted: Fri Nov 04, 2011 8:57 am
by lemonyii
hey guy!
i don't think it's a good idea to post such complex thing here, which i think u can hardly figure out where did i go wrong without knowing the detail, but recently someone always hit the right point, so i will try to describe it as detailed as i can.
the situation is, i print an increasing number in a loop in a thread, and generate an interrupt after each print. the interrupt code will save registers, switch to a procedure(but do nothing) then recover it immediately.
the problem is, the increasing number is not always increasing (5% maybe). i see this at my first running on VMware, then bochs and qemu. but i can hardly see it now. fortunately i got a screen shot :mrgreen:[img]bug.jpg[/img]

Code: Select all

struct thread {
	uchar	lock;
	uchar	stat;
	uchar	prio;
	uchar	time;		// how many time it's left?
	uchar	rsvd0[4];
	ulong	rsvd1[4];
	struct process*	proc;
	struct thread*	next;
	struct thread*	myself; //  ====\\\*	point to myself.	
	ulong	space;			//		 \\\*	page table
	ulong	regs[16];		//		  \\\**** this part is used in int handlers, should not be changed a bit.
	ulong	intvect;		//		  ///**** everything should happen very accurately here.
	ulong	errcode;		//		 ///*
	ulong	intstk[5];		//  ====///*		
}__attribute__((packed));
void thread_idle( thread_t* me ){
	ulong i = 0;
	while ( true ) {
		struct x64_cpu* mycpu = cpu_who_am_i();
		//vga_prt("%d\t", i++);   // use this instead will reduce the problem, but not solve
		vga_prt("idle %x running on cpu %x ! COUNT= %d\n", me, mycpu->id, i++ );
		__asm volatile( "int $255" : : : "memory" );
	}
}
void inth_dispatch( struct thread* t ){
	vga_prt( "INT %x.%x : thread=%x, rip=%x, rsp=%x\n", t->intvect, t->errcode, t, t->intstk[0], t->intstk[3] );
	//vga_prt( "." );    // use this instead will reduce the problem, but not solve
	int_recover_thread( t );
}

void int_recover_thread( struct thread* t ){
	struct x64_cpu* me = cpu_who_am_i();
	me->tss.rsp[0] = (ulong)t + sizeof(*t);
	me->tss.ist[7] = (ulong)t + sizeof(*t);
	t->regs[6] = (ulong)me + (me->sz);
	//vga_prt( " t : %x\n", t );
	recover( & (t->space) );
}

Code: Select all

%macro	Handler	1		
	section .text
	bits	64
	align	8
	global	inth_prep_%1
	inth_prep_%1:
		%if ((%1!=8)&&(%1!=10)&&(%1!=11)&&(%1!=12)&&(%1!=13)&&(%1!=14))
		push	0		; no err code, push one.
		%endif
		push	%1		; int vector
		push	rax		
		mov		rax, qword inth_prep
		jmp		rax
%endmacro

inth_prep:
	pop		rax
	push	r15
	push	r14	
	push	r13
	push	r12
	push	r11			
	push	r10
	push	r9
	push	r8
	push	rbp
	sub		rsp,8			; don't push rsp!! there is a rsp value here, which is the new stack for int handlers.
	push	rdi			
	push	rsi			
	push	rdx			
	push	rcx			
	push	rbx			
	push	rax			
	mov		rax,cr3		
	push	rax	
	mov		rdi,[rsp-8]		; thread.myself, a thread_t* pointing to itself is stored here.
	mov		rsp,[rsp+8*7]	; switch to new stack
	mov		rax,qword inth_dispatch
	call	rax				; inth_dispatch( thread_t* me );


recover:
	mov		rsp,rdi
	pop		rax
	mov		cr3,rax
	pop		rax
	pop		rbx
	pop		rcx
	pop		rdx
	pop		rsi
	pop		rdi
	add		rsp,8		; see how did we do this in inth_prep_
	pop		rbp
	pop		r8
	pop		r9
	pop		r10
	pop		r11
	pop		r12
	pop		r13
	pop		r14
	pop		r15
	add		rsp,16		; int vector and err code.
	iretq
if the procedure have problem, it won't work. and if i make the print simple, i can hardly see the problem. and if both simple display mentioned in the code is used, i can't see the problem.
i've been working on this for half a day, so i need you guys remind me what might lead to this. thank you so much!

Re: interrupt: complex bug

Posted: Fri Nov 04, 2011 9:17 am
by lemonyii
i use these instead of previous functions, only simplified the display, as mentioned before:

Code: Select all

void inth_dispatch( struct thread* t ){
	//vga_prt( "INT %x.%x : thread=%x, rip=%x, rsp=%x\n", t->intvect, t->errcode, t, t->intstk[0], t->intstk[3] );
	vga_prt( "." );
	int_recover_thread( t );
}

Code: Select all

void thread_idle( thread_t* me ){
	ulong i = 0;
	while ( true ) {
		struct x64_cpu* mycpu = cpu_who_am_i();
		vga_prt("%d\t", i++);
		//vga_prt("idle %x running on cpu %x ! COUNT= %d\n", me, mycpu->id, i++ );
		__asm volatile( "int $255" : : : "memory" );
	}
}
and it has been running for minutes counted over 10M, no bug has been found. because the numbers in odd lines will always end with 5, 6, 7, 8, 9 and those in even lines are 0, 1, 2, 3, 4. any error will break this, so i can judge it very simply. what's wrong? i'm confused.[img]nobug.jpg[/img]

Re: interrupt: complex bug

Posted: Fri Nov 04, 2011 9:28 am
by lemonyii
you know what? i think i know why now! the problem is not a problem...
because my vga_prt() copies data to scroll, two lines up on each scroll, so i can see there are same lines when paused.
i spent too much time on this problem...
sorry! :mrgreen: